#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Support for writing topic indexes and the filtered sets that go with them.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# stdlib imports
try:
from collections.abc import Mapping
except ImportError: # pragma: no cover
from collections import Mapping
import BTrees
from zc.catalog.extentcatalog import FilterExtent
from zope import interface
from zope.catalog.interfaces import ICatalogIndex
from zope.container.contained import Contained
from zope.index.topic import TopicIndex as _TopicIndex
from zope.index.topic.filter import FilteredSetBase
__docformat__ = "restructuredtext en"
[docs]@interface.implementer(ICatalogIndex)
class TopicIndex(_TopicIndex, Contained):
"""
A topic index that implements ``IContained`` and ``ICatalogIndex``
for use with catalog indexes.
To summarize, a topic index is a way to divide objects into a set
of groups (aka topics). The groups are determined by the contents
of this object, which are called filters. Each filter is
conceptually like a mini-index itself, but in practice most of
them are simply used to store group membership when some criteria
are met; for that purpose the :class:`.ExtentFilteredSet` is
ideal.
"""
#: We default to 64-bit btrees.
family = BTrees.family64
# If we're not IContained, we get location proxied.
# If we're not ICatalogIndex, we don't get updated when
# we get put in a catalog.
def __getitem__(self, filterid):
return self._filters[filterid]
[docs] def apply(self, query):
"""
Queries this index and returns the set of matching docids.
The *query* can be in one of several formats:
* A single string or a list of strings. In that case,
docids that are in all the given topics (by id) are returned.
This is equivalent to zc.catalog-style ``all_of`` operator.
* A mapping containing exactly two keys, ``operator``
and ``query``. The value for ``operator`` is either
``and`` or ``or`` to specify intersection or union, respectively.
The value for query is again a string or list of strings.
* A dictionary containing exactly one key, either ``any_of``
or ``all_of``, whose value is the string or list of string
topic IDs.
"""
# The first two cases are handled natively. The later case,
# zc.catalog style, we handle by converting.
if isinstance(query, Mapping):
if 'any_of' in query:
query = {'operator': 'or', 'query': query['any_of']}
elif 'all_of' in query:
query = {'operator': 'and', 'query': query['all_of']}
return super(TopicIndex, self).apply(query)
[docs]class ExtentFilteredSet(FilteredSetBase):
"""
A filtered set that uses an :class:`zc.catalog.interfaces.IExtent`
to store document IDs; this can make for faster, easier querying
of other indexes.
"""
#: We default to 64-bit btrees
family = BTrees.family64
#: The extent object. We pull this apart to
#: get the value for `_ids` (used in the implementation
#: of `unindex_doc`)
_extent = None
#: The set-like object that holds docids. In our case,
#: this is an extent.
_ids = None
def __init__(self, fid, expr, family=None):
"""
Create a new filtered extent.
:param expr: A callable object of three parameters: this
object, the docid, and the document. This will be
available as the value of :meth:`getExpression`.
If you pass ``None``, you can override getExpression
yourself.
.. caution:: This is often a persistent object, so if you
pass a filter, it must be picklable. In general and for
the most flexibility, instead of passing something like
``IFoo.providedBy``, instead pass a global (function) object
in your own module.
"""
super(ExtentFilteredSet, self).__init__(fid, expr, family=family)
# The super implementation calls clear() to establish `_ids`
def ids(self):
return tuple(self._ids) if self._ids is not None else ()
def clear(self):
# Note that we ignore the super implementation.
self._extent = FilterExtent(self.getExpression(), family=self.family)
self._ids = self._extent.set
def index_doc(self, docid, context):
try:
self._extent.add(docid, context)
except ValueError:
# Only unindex if it was found in the index to start with.
# Trying to remove a missing docid still leads to readCurrent()
# being called on the underlying BTree set unnecessarily.
# This leads to traversing the BTree twice in the uncommon case
# of removing an existing object, but that's fine.
# (TODO: Can there be persistence errors that break `in` but let
# `remove()` keep working?)
# See https://github.com/NextThought/nti.zope_catalog/issues/12
if docid in self._ids:
self.unindex_doc(docid)
[docs] def getExtent(self):
"""
Returns the :class:`zc.catalog.interfaces.IFilterExtent` used.
This is always consistent with the return value of :meth:`getIds`.
"""
return self._extent