Source code for nti.zope_catalog.catalog

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Catalog extensions.

"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# stdlib imports
import collections
import itertools
import warnings

import BTrees
from ZODB.POSException import POSError
from zope import interface
from zope.catalog.catalog import Catalog as _ZCatalog
from zope.catalog.interfaces import ICatalog

from nti.zodb import isBroken
from .interfaces import INoAutoIndex
from .interfaces import IDeferredCatalog


__docformat__ = "restructuredtext en"

logger = __import__('logging').getLogger(__name__)


[docs]class ResultSet(object): """ Lazily accessed set of objects. This is just like :class:`zope.catalog.catalog.ResultSet` except it is slower (it has more overhead) and it offers the dubious feature of ignoring broken objects (which is a footgun if ever there was). If you have such objects, your code or deployment is broken. Prefer not to use this class in normal operations (it might be useful for recovery, but even that's doubtful since it doesn't track which objects were "invalid"). """ def __init__(self, uids, uidutil, ignore_invalid=False): self.uids = uids self.uidutil = uidutil self.ignore_invalid = ignore_invalid if ignore_invalid: warnings.warn("Please do not ignore corrupted databases.", stacklevel=2) def __len__(self): return len(self.uids) __length_hint__ = __len__ def get_object(self, uid): if self.ignore_invalid: obj = self.uidutil.queryObject(uid) if isBroken(obj, uid): obj = None else: obj = self.uidutil.getObject(uid) if obj is None: logger.warning("Your database is corrupted. There is no object for id %d", uid) return obj getObject = get_object def items(self): for uid in self.uids: obj = self.get_object(uid) if obj is not None: yield uid, obj iter_pairs = items def __iter__(self): return (item[1] for item in self.items())
[docs] def count(self): """ How many objects are there? In a proper database, this should be identical to the len() of this object. This is only different if the database is corrupt and needs fixed. If you see this, this is a strong signal that your code is broken. """ return sum(1 for _ in self.items())
[docs]class CatalogPrefetchIterator(object): """ Given an iterator of ``(intid, object)``: - Breaks the iterator into chunks of a given size; - Detects any persistent objects in the chunk connected to a jar (this is done by checking for a ``_p_jar``); - Groups those objects by jar if needed (supporting multiple databases, because `ZODB 5 currently does not correctly do this <https://github.com/zopefoundation/ZODB/issues/273>`_); - Asks each jar to prefetch the given objects. - Finally, iterates over the chunk. This object is intended to be used with the ``_visitSublocations`` method of a catalog, but may be useful in other cases. For example, one could enhance a standard :class:`zope.catalog.catalog.ResultSet` like so (note this won't work for the :class:`ResultSet` defined here):: from zope.catalog.catalog import ResultSet class PrefetchedResultSet(ResultSet, object): def __iter__(self): iterable = (uid, self.uidutil.getObject(uid) for uid in self.uids) for _, obj in CatalogPrefetchIterator(iterable, 512): yield obj .. versionadded:: 3.0 """ def __init__(self, iterable, chunk_size): self.iterable = iter(iterable) # work if they pass a concrete collection self.chunk_size = chunk_size self._chunk = None # The common case is that we only ever encounter one database; # the first time we see any jar, we'll know if we need to divy # objects between different jars or not. self._prefetch = self._prefetch_unknown self._single_jar = None def __iter__(self): return self def __next__(self): if not self._chunk: self._get_next_chunk() if not self._chunk: raise StopIteration return self._chunk.pop() next = __next__ # Python 2 def _get_next_chunk(self, _islice=itertools.islice): if self.iterable is None: self._chunk = None return raw_chunk = list(_islice(self.iterable, self.chunk_size)) self._prefetch(raw_chunk) self._chunk = raw_chunk if not raw_chunk or len(raw_chunk) < self.chunk_size: # We're done. self.iterable = None # Signal that to __next__ self._prefetch = None # break the cycle self._single_jar = None # why not def _prefetch_unknown(self, raw_chunk): for _, obj in raw_chunk: jar = getattr(obj, '_p_jar', None) if jar is not None: # Hey hey, now we can find out if we need to # actually group or not. if len(jar.db().databases) > 1: self._prefetch = self._prefetch_multidb else: self._prefetch = self._prefetch_singledb self._single_jar = jar self._prefetch(raw_chunk) # We have our answer we can quit now. break # We never encountered a persistent object. How sad. def _prefetch_multidb(self, raw_chunk, _defaultdict=collections.defaultdict): by_jar = _defaultdict(set) # {jar: [oids]} for _, obj in raw_chunk: jar = getattr(obj, '_p_jar', None) by_jar[jar].add(getattr(obj, '_p_oid', None)) by_jar.pop(None) # lose the non-persistent objects for jar, oids in by_jar.items(): oids.discard(None) # Lose persistent objects that aren't saved jar.prefetch(oids) def _prefetch_singledb(self, raw_chunk): oids = { getattr(obj, '_p_oid', None) for _, obj in raw_chunk } oids.discard(None) # lose the non-persistent objects, and those not saved self._single_jar.prefetch(oids)
[docs]class Catalog(_ZCatalog): """ An extended catalog. Features include: * When manually calling :meth:`updateIndex` or :meth:`updateIndexes`, objects that provide :class:`nti.zope_catalog.interfaces.INoAutoIndex` are ignored. Note that if you have previously indexed objects that now provide this (i.e., class definition has changed) you need to :meth:`clear` the catalog first for this to be effective. * Updating indexes can optionally ignore certain errors related to persistence POSKeyErrors. Note that updating a single index does this by default (since it is usually called from the :class:`.IObjectAdded` event handler) but updating all indexes does not since it is usually called by hand. """ family = BTrees.family64 PREFETCH_CHUNK_SIZE = 512 def _visitAllSublocations(self): return super(Catalog, self)._visitSublocations() def _visitSublocations(self): no_auto_inst = INoAutoIndex.providedBy no_auto_class = INoAutoIndex.implementedBy # Try to avoid activating the object if not necessary # by first checking if the class is INoAutoIndex. # We'll just need to check instances down below. no_auto_class_sublocations = ( x for x in self._visitAllSublocations() if not no_auto_class(type(x[1])) ) prefetched = CatalogPrefetchIterator(no_auto_class_sublocations, self.PREFETCH_CHUNK_SIZE) for uid, obj in prefetched: if no_auto_inst(obj): continue yield uid, obj # we may get TypeError: __setstate__() takes exactly 2 arguments (1 given) # error or creator cannot be resolved (if a user has been deleted) # catch and continue _PERSISTENCE_EXCEPTIONS = (POSError, TypeError) # disable warning about different number of arguments than superclass # pylint: disable=I0011,W0221
[docs] def updateIndex(self, index, ignore_persistence_exceptions=True): """ Update a single index. """ to_catch = self._PERSISTENCE_EXCEPTIONS if ignore_persistence_exceptions else () for uid, obj in self._visitSublocations(): try: index.index_doc(uid, obj) except to_catch as e: logger.error("Error indexing object %s(%s); %s", type(obj), uid, e)
[docs] def updateIndexes(self, ignore_persistence_exceptions=False): """ Update all indexes in this catalog. """ # avoid the btree iterator for each object indexes = list(self.values()) to_catch = self._PERSISTENCE_EXCEPTIONS if ignore_persistence_exceptions else () for uid, obj in self._visitSublocations(): for index in indexes: try: index.index_doc(uid, obj) except to_catch as e: logger.error("Error indexing object %s(%s); %s", type(obj), uid, e)
[docs]class DeferredCatalog(Catalog): """ An implementation of :class:`nti.zope_catalog.interfaces.IDeferredCatalog`. """
_implemented_by = list(interface.implementedBy(DeferredCatalog).interfaces()) _implemented_by.remove(ICatalog) _implemented_by.insert(0, IDeferredCatalog) interface.classImplementsOnly(DeferredCatalog, *_implemented_by) del _implemented_by