__revision__  = "$Revision: 1.6 $"
__copyright__ = "Copyright (c) 2005 by Simon Pamies"


__doc__ = """
Standalone indexer for the ZODB. Can be used to index
many objects in a database and search for attributes
without loading objects.

Example:
    
    >>> o = DummyObject()
    >>> from standaloneindexer import Indexer
    >>> from standaloneindexer import MetadataIndex
    >>> idx = Indexer()
    >>> idx.addIndex(MetadataIndex('name'))
    >>> idx.index_object(o)
    >>> idx.search(name='dummy')
    <standaloneindexer.ResultListItem object at 0x40298d2c>
    >>> str(idx.search(name='dummy'))
    '<ResultList with 1 results>'
    >>> result = idx.search(name='dummy')
    >>> result()[0]
"""

try:
    from ZODB.cPersistence import Persistent
except:
    from Persistence import Persistent

from BTrees.OOBTree import union as OOSetUnion
from BTrees.OOBTree import difference as OOSetDifference
from BTrees.OOBTree import intersection as OOSetIntersection

from BTrees.OOBTree import OOBTree, OOSet

import exceptions

from types import StringType
import sys
import random

class NotIndexable(exceptions.Exception):
    """ Dummy """

class MetadataIndex(Persistent):
    """
    An Indexer indexes one single
    metadata from objects. An Index
    is a circular mapping between
    the name of the attribute and
    the value of it.

      self._data['id'] = []

    ex.:
    We have objects A,B,C with
    
      A.__dict__['id'] = 'A'
      B.__dict__['id'] = 'B'
      C.__dict__['id'] = 'C'

    d = MetadataIndex(datum='id')
    d.index_object(A)
    d.index_object(B)
    d.index_object(C)

    # searching for all objects that
    # have an attribute id and where
    # this attribute is either A or B
    d.get_results(['A', 'B'])
    ['A', 'B']
    """

    def __init__(self, datum):
        """
        datum is the attribute we want to
        index with this MetadataIndex.
        """
        
        if type(datum) != type(''):
            raise TypeError, '%s not a valid metadata field!' % str(datum)
        
        self._datum = datum

        # mapping for {attribute value -> uid}
        # can be a list, because it is not unthinkable
        # that some objects have the same content
        # for a specific attribute
        self._data = OOBTree()

        # mapping for {uid -> attribute value}
        self._uids = OOBTree()
        self._length = 0

    def __len__(self):
        return self._length

    def _check_object(self, object):
        """
        This method tries to extract the value
        from the objects attribute. First we check
        if the attribute is a callable one. If not
        we try to return the raw value...
        """
        
        if not hasattr(object, self._datum) or \
               not hasattr(object, 'id'):
            raise NotIndexable, 'Object %s has not the required fields (id or %s)!' \
                  % (str(object), self._datum)

        idx = getattr(object, self._datum)
        if callable(idx):

            try:
                idx = idx()
            except: raise NotIndexable, 'Attribute %s of object %s not safely callable!' \
                    % (self._datum, str(object))

        return idx

    def _vindex(self, uid, idx):
        """
        Creates the mapping as described
        in the class documentation
        """
        
        if self._data.has_key(idx):
            self._data[idx] += [uid]
        else: self._data[idx] = [uid]
        
        self._uids[uid] = idx
        self._length += 1
        
        self._p_changed = 1
        return uid

    def index_object(self, object, uid):
        """
        Indexes objects. Objects that want
        to be indexable need at least the id
        field...
        """

        # getting the attributes value
        idx = self._check_object(object)

        # creating mapping
        return self._vindex(uid, idx)

    def _virtual_index(self, uid):
        """
        In the current implementation we
        create index entries also for
        objects not havin' the specified
        attribute. If object A has no attribute
        field, we create an entry filled with None.
        """
        
        return self._vindex(uid, None)
        
    def unindex_uid(self, uid):
        """
        Unindexes object given a specific
        UID that is returned by index_object.
        """

        if not self._uids.has_key(uid):
            raise KeyError, '%s does not exist in this Index!' % uid

        idx = self._uids[uid]
        lst = self._data[idx]
        for item in lst:
            if len(lst)==1:
                del self._data[idx]
            elif len(lst)>1:
                self._data[idx]=[item for item in lst if item!=uid]
            else:
                raise KeyError, 'Fatal Error: Empty list. Can be a data corruption.'
            
        del self._uids[uid]
        
        self._length -= 1
        self._p_changed = 1

    def unindex_object(self, object):
        """
        Tries to unindex an index entry
        by looking through that object
        """

        idx = self._check_object(object)
        if not self._data.has_key(idx):
            raise KeyError, 'Object %s does not exists in this Index!' % str(object)

        self.unindex_uid(self._data[idx])

    def get_results(self, input):
        """
        Returns the results for the
        indicated search. input can be
        a list because we perhaps wanna
        search for all objects that have
        _datum == ['dummy1', 'dummy2']...
        """

        result = OOSet()
        if type(input) != type([]):
            
            try:
                if len(input) == 0:
                    input = []
                    
                else:
                    input = [input]
                
            except TypeError:
                if input != None:
                    input = [input]

        # returning all results if
        # the argument is empty
        if not input:
            result.update(self._uids.keys())

        else:
            for item in input:
                try:
                    for uid in self._data[item]:
                        result.insert(uid)

                except:

                    # passing >> nirvana in order to avoid
                    # a has_key call...
                    pass
            
        return result
            
class IndexContainer(Persistent):
    """
    An IndexContainer can be used to
    collect many MetadataIndex objects
    at one place.
    
    @see: MetadataIndex
    """

    def __init__(self):
        self._indexes = {}

        # adding default index. all objects
        # must have an id...
        self.addIndex(MetadataIndex('id'))

    def hasIndexFor(self, field_name):
        return self._indexes.has_key(field_name)

    def addIndex(self, index):
        #if not IMetadataIndex.isImplementedBy(index):
        #    raise TypeError, '%s seems not to be an MetadataIndex!' % str(index)

        key = getattr(index, '_datum')
        self._indexes[key] = index
        self._p_changed = 1

    def removeIndex(self, field_name):
        try: del self._indexes[field_name]
        except:
            raise KeyError, 'Index %s not found in here :('

    def clear(self):
        """
        Removing *ALL* indizes from this container
        """

        self._indexes = {}
        self._p_changed = 1

    def getIndex(self, field_name):
        try:
            return self._indexes[field_name]
        except: raise KeyError, 'Index for %s not found!' % field_name

class ResultItem(Persistent):

    def __init__(self):
        self._result_item = 1

    def getCatalogUID(self):
        """
        Returns the uid this object is
        indexed with in the catalog
        """
        
        return self._ticle_catalog_uid

    def __call__(self):
        """ Wrapper """
        
        return self.getCatalogUID()
    
    def __str__(self):
        return '<ResultItem %s>' % (self.id)

class ResultListItem(Persistent):

    def __init__(self, id):
        self.id = id

        self._result_list = OOBTree()
        self._length = 0
        self._p_changed = 1

    def addObject(self, other, uid):
        if not self._result_list.has_key(uid):
            self._length += 1
        
        self._result_list[uid] = other
        
        self._p_changed = 1
        return self.__class__

    def __call__(self, *args, **kw):
        """
        Returns a *lazy* result set that is
        managed by an IOBTreeResultSet...
        """
        
        return self._result_list.values()

    def lazy_results(self):
        return self.__call__()

    def results(self):
        """
        Returns the complete result list as
        normal list cause internal repr is
        an IOBTreeResult...
        """

        return list(self._result_list.values())

    def __str__(self):
        return '<ResultList with %s results>' % len(self)

    def __len__(self):
        return self._length

class Indexer(Persistent, IndexContainer):
    """
    A little bit like the ZCatalog but
    more lightweight. We do not need
    that pluggable functionality and
    other special filtering stuff...
    """

    # we say that zeros are not handled
    # as None conditions...
    __empty_types__ = [type(0), type(0.0)]

    def __init__(self, id='std-indexer', servicehub=None):
        self.id = id
        
        self.servicehub = servicehub
        self._v_index = 0
        IndexContainer.__init__(self) 

    def _ext_search(self, kws, logical_or=0):        
        old = OOSet(); rs = OOSet();
        for index in self._indexes.values():
            if kws.has_key(index._datum):
                rs = index.get_results(kws[index._datum])

                if len(old) == 0:
                    if len(rs) == 0 and not logical_or:
                        return OOSet()
                    
                    old = OOSetUnion(rs, old)
                else:

                    # take only these that are equal
                    if not logical_or:
                        old = OOSetIntersection(rs,old)
                        if len(old) == 0:
                            return OOSet()
                    else:
                        old = OOSetUnion(rs, old)
                
        return old

    def search(self, inverted=0, logical_or=0, include_empty=0, **kw):
        """
        Main method for dealing with data indexed here. The
        search terms (aka fields) are concatenated with AND.
        
        The param <inverted> can be used to say:
        'Give me all indexed data NOT matching that query'

        if <logical_or> is set, the search terms are
        concatenated with OR not with AND

        if <include_empty> is true, then you can also have
        params like that: id=None and it will give you
        all objects where id == None. The default behaviour is
        to ignore such parameters...
        """

        # Here we eliminate all
        # entries not indexed herein
        kws = {}
        for key in kw.keys():
            if self.hasIndexFor(key):

                # see param list above
                if not include_empty:
                    
                    # only adding if there is
                    # really something we wanna
                    # search for...
                    if kw[key] or (type(kw[key]) in Indexer.__empty_types__):
                        kws[key] = kw[key]
                else:
                    kws[key] = kw[key]

        kwfilled = 1
        if not kws:

            # Here we indicate that we wanna
            # have ALL indexed data. If indexes
            # get an empty value they return all
            # data in there...
            kwfilled = 0
            for index in self._indexes.values():
                kws[index._datum] = None

        # It can happen, that we do not have
        # that volatile variable here - so we
        # need to create a new one when searching.
        try:
            self._v_index += 1
        except: self._v_index = 1

        # generate some list object with a random id
        results = ResultListItem('%s-%s' % \
                                 (str(str(random.random())),
                                  str(self._v_index)))

        rs = self._ext_search(kws, logical_or)
        if inverted and kwfilled:

            # getting ALL indexed data and do
            # a difference match on that
            k = {}
            for index in self_indexes.values():
                k[index._datum] = None

            buddy = self._ext_search(k)
            rs = OOSetDifference(buddy, rs)
        
        # now we have a list of UIDs and want to
        # create the result list with them. I've
        # chosen here an double pass mechanism -
        for index in self._indexes.values():
            for uid in rs:
                
                if not results._result_list.has_key(uid):
                    ob = ResultItem()
                else: ob = results._result_list[uid]

                ob.__dict__[index._datum] = index._uids[uid]

                # adding catalog uid, so that it can be
                # retrieved easily later
                ob._ticle_catalog_uid = uid

                # adding or readding object to the result list
                results.addObject(ob, uid)

        # Returns an OOBTreeResultSet!
        return results

    def index_object(self, object):
        uid = str(random.random())
        for index in self._indexes.keys():
            if hasattr(object, index):

                self._indexes[index].index_object(object, uid)
            else:

                # indexing virtually the metadata
                # so that all objects contain all metadata
                self._indexes[index]._virtual_index(uid)

        return uid

    def unindex_object(self, indexed_uid):
        for index in self._indexes.values():
            index.unindex_uid(indexed_uid)