# # Python module for synchronizing the serializing and de-serializing of several # different data sources that have a need to keep an updated file version of # themselves. # ''' Usage: >>> from Serializer import SerializerMaster >>> master = SerializerMaster('my-file.ser', { 'journal': PendingQueue(), 'line': DictList() }) >>> journal = master.value('journal') >>> line = master.value('line') # adds the value 'xyz' to the data, and updates the saved version of the # master file >>> journal.add('xyz') ''' import os, pickle, types, time class SerializerProxy: ''' An object that a self-serialized class can use to easily notify a master that an update to its structure changed. ''' def __init__(self, master, name): assert master is not None assert name is not None self.master = master self.name = name def update(self): self.master.update(self.name) def register(self, obj): self.master.set(self.name, obj) def value(self): return self.master.value(self.name) class Serializable: ''' Class that can be registered with a proxy, and properly save itself without the proxy. Useful for extending instances. ''' def __init__(self): self.__serializer_proxy = None def register_serializer_proxy(self, serializer_proxy): ''' Register this object with a serializer proxy instance. ''' self.__serializer_proxy = serializer_proxy #print "Registered proxy %s with %s" % (serializer_proxy.name, str(self)) def _update(self): ''' Update our pickled form in the master. Called when the inner state changes. ''' if self.__serializer_proxy: self.__serializer_proxy.update() def __getstate__(self): odict = self.__dict__.copy() # copy the dict since we change it if odict.has_key('__serializer_proxy'): del odict['__serializer_proxy'] # remove the proxy return odict def __setstate__(self, dict): self.__dict__.update(dict) self.__serializer_proxy = None class SerializerMaster: ''' The Master knows all the objects that should be serialized. Registered objects must call update(obj) when their state is updated. For performance, the data in each saved item should be minimal. If a registered object has a method named "register_serializer_proxy", then that will be called on restore time with a proxy instance. One named item is always reserved, and will overwrite any changes: "last write". ''' LAST_WRITE = 'last write' REGISTER_PROXY = 'register_serializer_proxy' def __init__(self, filename, defaults = None): ''' Restores the master state from the filename. ''' assert filename is not None self.filename = filename self.__restore() changed = False if defaults is not None: for name, value in defaults.items(): # always add the default, and set our "changed" flag to True # if either there already was a change, or if the addDefault # incurred a change. changed = self.add_default(name, value, False) or changed else: # Even though we don't have anything to save, we should still mark # our presence. changed = True if changed: self.__save() def add_default(self, name, default_value, save_now = True): ''' If the current form of the master does not contain an object for the name, then add it to the list. Returns True if the default was added, otherwise False. ''' ret = False if not self.__object.has_key(name): ret = True self.set(name, default_value, save_now) return ret def keys(self): ''' Retrieve a list of names for all the known registered objects. ''' return self.__object.keys() def set(self, name, value, save_now = True): ''' Sets the named element to a new value. ''' if self.__object.has_key(name): # must be identically equal to skip the update. if self.__object[name] is not value: self.__register(name, value) self.update(name, save_now) # else don't save - nothing changed else: # add a new value self.__object[name] = value self.__register(name, value) self.update(name, save_now) def update(self, name, save_now = True): ''' An update happened to the given object (not the object's name). This will update the pickled version and possibly save the file. ''' if self.__object.has_key(name): if save_now: self.__save() else: raise pickle.PicklingError, "Unknown object updated: %s" % name def proxy(self, name): ''' Returns a serializer proxy instance for the object registered with the name, or None if nothing is registered with that name. ''' if self.__object.has_key(name): return SerializerProxy(self, name) else: return None def value(self, name): ''' Returns the object associated with this name, or None if nothing is registered with that name. ''' if self.__object.has_key(name): return self.__object[name] else: return None def __restore(self): ''' Restores the master to its previously saved state, if the file exists. ''' #print "Restoring pickled data" self.__object = dict() if os.path.exists(self.filename): f = open(self.filename, "rb") try: self.__object = pickle.load(f) except: f.close() raise for (name, obj) in self.__object.items(): self.__register(name, obj) else: self.__object[SerializerMaster.LAST_WRITE] = 0.0 def __register(self, name, obj): ''' Put the object into the objectified list, and potentially register ourselves to the added item. Doesn't do any saves. ''' self.__object[name] = obj # If applicable, register the proxy with the object if dir(obj).count(self.REGISTER_PROXY) > 0: fnc = getattr(obj, self.REGISTER_PROXY) if callable(fnc): fnc(SerializerProxy(self, name)) def __save(self): ''' Saves the objects to the file. ''' f = open(self.filename, "w+b") write_time = time.time() self.__object[SerializerMaster.LAST_WRITE] = write_time try: #print "Saving pickled data" pickle.dump(self.__object, f) finally: f.close() # ------------------------------------------------------------------------- # Sample Serializable Implementations class SerializablePrimitive(Serializable): ''' Example implementation of a Serializable. ''' def __init__(self, value): Serializable.__init__(self) self.d = value def get(self): return self.d def set(self, value): self.d = value self._update() def save(self): ''' call after the primitive's value was altered. ''' self._update() class DictList(Serializable): ''' A dictionary of key -> [] ''' def __init__(self): Serializable.__init__(self) self.__d = {} def items(self): return self.__d.items() def get_list(self, key): ''' Will not force an update. ''' if self.__d.has_key(key): return self.__d[key] else: return [] def add(self, key, value): ''' Add an element to the end of the key's list. ''' if not self.__d.has_key(key): self.__d[key] = [ value ] else: self.__d[key].append(value) self._update() def extend(self, key, valueList): ''' Extend the key's list with the values in the argument's list. ''' if not self.__d.has_key(key): self.__d[key] = valueList else: self.__d[key].extend(valueList) self._update() def clear(self, key = None): ''' If no key is given, then the dictionary is wiped clean, otherwise the list associated with the given key is removed. ''' if key is None: self.__d = {} self._update() elif self.__d.has_key(key): del self.__d[key] self._update() def has_keys(self): return len(self.__d.keys()) > 0 def __len__(self): return len(self.__d.keys()) def __getitem__(self, key): return self.get_list(key) def __delitem__(self, key): self.clear(key) # Note: no __setitem__, as it doesn't make sense class PendingQueue(Serializable): ''' Allows for storing a list of objects, and keeping it syncronized with an on-disk store. At creation time, the on-disk store will be loaded and restored, if it exists. This is not thread safe. ''' def __init__(self): ''' First load of a queue. ''' Serializable.__init__(self) self.__queue = [] def peek(self): ''' Look at the next item from the queue, but don't remove it. Returns None if the list is empty. ''' if len(self.__queue) <= 0: return None else: return self.__queue[0] def is_empty(self): ''' Returns a boolean reporting whether the queue is empty. ''' return len(self.__queue) <= 0 def has_contents(self): ''' The opposite of isEmpty ''' return len(self.__queue) > 0 def size(self): return len(self.__queue) def __len__(self): return self.size() def __nonzero__(self): return self.size() > 0 def __cmp__(self, other): if isinstance(other, PendingQueue): return self.__queue == other.__queue else: return False def __hash__(self): return self.__queue.__hash__() + 1 def next(self): ''' Return and remove the next item from the queue, or None if the queue is empty. ''' ret = None if self.has_contents(): ret = self.__queue[0] self.__queue = self.__queue[1:] # Our data changed, so announce it to the Serializer mechanism self._update() return ret def add(self, data): ''' Add an item to the queue. ''' #cutil.log(cutil.VERBOSE, "adding [%s] to queue %s (prior length was %d)" % ( # str(data), 'xxx', len(self.__queue))) self.__queue.append(data) # Our data changed, so announce it to the Serializer mechanism self._update()