"""OID tree traversal for the garbage collection phase of packing.Optimized for memory efficiency. Uses sets of native integersrather than Python integers because native integers take up a lot lessroom in RAM."""from__future__importabsolute_importimportcollectionsimportgcimportloggingimportBTreesfromrelstorage._compatimportiteritemsIIunion32=BTrees.family32.II.union# pylint:disable=no-memberIISet32=BTrees.family32.II.SetIISet64=BTrees.family64.II.Setlog=logging.getLogger(__name__)
[docs]classIISet32X(object):"""An IISet32 extended with a Python set layer for efficient inserts."""def__init__(self):self._base=IISet32()self._layer=set()defadd(self,key):layer=self._layerifkeynotinlayerandkeynotinself._base:layer.add(key)iflen(layer)>10000:self._apply()def_apply(self):"""Add the layer to the base and reset the layer."""ifself._layer:self._base=IIunion32(self._base,IISet32(self._layer))self._layer.clear()def__iter__(self):self._apply()returniter(self._base)def__contains__(self,key):returnkeyinself._layerorkeyinself._base
[docs]classTreeMarker(object):"""Finds all OIDs reachable from a set of root OIDs."""# This class groups OIDs by their upper 33 bits. Why 33 instead# of 32? Because IISet and IIBucket are signed, they can not accept# positive integers >= (1 << 31). The solution is to simply# add an extra grouping bit.hi=0xffffffff80000000# 33 high bitslo=0x000000007fffffff# 31 low bitsdef__init__(self):# self._refs:# {from_oid_hi: {to_oid_hi: IISet64([from_oid_lo << 32 | to_oid_lo])}}self._refs=collections.defaultdict(lambda:collections.defaultdict(IISet64))# self._reachable: {oid_hi: IISet32X}self._reachable=collections.defaultdict(IISet32X)self.reachable_count=0
[docs]defadd_refs(self,pairs):"""Add a list of (from_oid, to_oid) reference pairs. `from_oid` and `to_oid` must be 64 bit integers. """refs=self._refshi=self.hilo=self.loforfrom_oid,to_oidinpairs:s=refs[from_oid&hi][to_oid&hi]s.add(((from_oid&lo)<<32)|(to_oid&lo))
[docs]defmark(self,oids):"""Mark specific OIDs and descendants of those OIDs as reachable."""hi=self.hilo=self.lopass_count=1# this_pass: {oid_hi: IISet32X}this_pass=collections.defaultdict(IISet32X)foroidinsorted(oids):this_pass[oid&hi].add(int(oid&lo))whilethis_pass:gc.collect()found,next_pass=self._mark_pass(this_pass)log.debug("Found %d more referenced object(s) in pass %d",found,pass_count)ifnotfound:breakself.reachable_count+=foundpass_count+=1this_pass=next_passreturnpass_count
def_mark_pass(self,this_pass):"""Mark OIDs as reachable. Produce an OID set for the next pass. Return (found, next_pass), where `found` is the number of new OIDs marked and `next_pass` is the collection of OIDs to follow in the next pass. """# pylint:disable=too-many-locals# next_pass: {oid_hi: IISet32X}next_pass=collections.defaultdict(IISet32X)found=0refs=self._refsreachable=self._reachablelo=self.loforoid_hi,oids_loiniteritems(this_pass):from_reachable_set=reachable[oid_hi]foroid_loinoids_lo:ifoid_loinfrom_reachable_set:# This OID is already known to be reachable.continuefound+=1from_reachable_set.add(oid_lo)ifoid_hinotinrefs:# This OID doesn't reference anything.continue# Add the children of this OID to next_pass.forto_oid_hi,siniteritems(refs[oid_hi]):min_key=oid_lo<<32max_key=min_key|0xffffffffkeys=s.keys(min=min_key,max=max_key)ifnotkeys:# No references found here.continueto_reachable_set=reachable[to_oid_hi]next_pass_add=next_pass[to_oid_hi].addforkeyinkeys:child_oid_lo=int(key&lo)ifchild_oid_lonotinto_reachable_set:next_pass_add(child_oid_lo)returnfound,next_pass
[docs]deffree_refs(self):"""Free the collection of refs to save RAM."""self._refs=Nonegc.collect()
@propertydefreachable(self):"""Iterate over all the reachable OIDs."""foroid_hi,oids_loiniteritems(self._reachable):foroid_loinoids_lo:# Decode the OID.yieldoid_hi|oid_lo