Source code for ringo.lib.imexport

"""Modul for the messanging system in ringo"""
import logging
import datetime
import json
import csv
import codecs
import cStringIO
import sets
try:
        import cStringIO as StringIO
except ImportError:
        import StringIO
import xlsxwriter
import sqlalchemy as sa

from ringo.model.base import BaseItem
from ringo.model.user import UserSetting
from ringo.lib.helpers import serialize, deserialize
from ringo.lib.alchemy import get_props_from_instance

log = logging.getLogger(__name__)


[docs]class ExportConfiguration(object): """ You can provide a JSON configuration file for the export to define which fields of the given modul should be imported in detail. Providing a configuration file allows you to export also `properties` and related Items which are not part of the default export. Example export configuration:: ["f1", "f2", "f3", {"bar": ["f4", "f5", {"baz": [...]}}] In this configuration "id", "foo", "bar" considered as fields of the exported item. In contrast the keys of the nested dictionarys are taken as the name of the relations. The following list defines again the fields of the items in the relation. So assuming you are exporting items of type "Foo" the export will include fields "f1", "f2", and "f3". Further "Foo" is related to "Bar" items in the relation "bar". From the "Bar" items the fields "f4" and "f5" are included. The "Bar" items have themself a relation called "baz" and again you can follow the scheme to define a detail configuration of what should be in the export. The configuration also support wildcards. Use "*" so add all fields of the item or related item:: ["*" {"bar": ["*", {"baz": [...]}}] """ def __init__(self, jsonconfig): self.config = jsonconfig self.relations = self._parse(jsonconfig) def _parse(self, config, relation="root"): """Will return a dictionary with the field configuration for each relation found in the export configuration. The field configuration is a list of fieldnames. :config: Export configuration :relation: Name of the "current" relation. The name "root" is a placeholder for the elements on the first level of the export. :returns: Dict with realtion configuration """ relations = {} relations[relation] = [] for field in config: if isinstance(field, dict): for rel in field: relations.update(self._parse(field[rel], rel)) else: relations[relation].append(field) return relations def includes_wildcard(self): return "*" in self.config def get_relation_fields(self): fields = [] for f in self.config: if f == "*": continue else: fields.append(f) return fields
class ExtendedJSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, BaseItem): return obj.id # Let the base class default method raise the TypeError elif isinstance(obj, datetime.datetime): return obj.strftime("%Y-%m-%d %H:%M:%S") elif isinstance(obj, datetime.date): return obj.strftime("%Y-%m-%d") # Let the base class default method raise the TypeError elif isinstance(obj, UserSetting): return obj.id else: return json.JSONEncoder.default(self, obj) class UnicodeCSVWriter: """ A CSV writer which will write rows to CSV file "f", which is encoded in the given encoding. """ def __init__(self, f, fields, dialect=csv.excel, encoding="utf-8", **kwds): # Redirect output to a queue self.queue = cStringIO.StringIO() self.writer = csv.DictWriter(self.queue, fieldnames=fields, dialect=dialect, **kwds) self.stream = f self.encoder = codecs.getincrementalencoder(encoding)() def writeheader(self): self.writer.writeheader() def writerow(self, row): tmp_dict = {} for k, v in row.iteritems(): try: tmp_dict[k] = v.encode("utf-8") except AttributeError: tmp_dict[k] = v self.writer.writerow(tmp_dict) # Fetch UTF-8 output from the queue ... data = self.queue.getvalue() data = data.decode("utf-8") # ... and reencode it into the target encoding data = self.encoder.encode(data) # write to the target stream self.stream.write(data) # empty queue self.queue.truncate(0) def writerows(self, rows): for row in rows: self.writerow(row) class Exporter(object): """Base exporter to export items of the given class. The exporter will return a list of dictionarys with key values pairs of the values for each items which should be exported. The export is done by calling the `perform` method of this class. On default the exporter will return all fields of the item but no relations or related items. However the Exporter is able to export related items if configured correct. In this case the Exporter will return a list of nested dictionaries. You can configure which will be exported on each item by either using the `fields` parameter. If no fields are provided all fields excluding the relations will be exported. The order of the configured fields will determine the order of the fields in the export (If supported e.g CSV). A more detailed option to configur the content of the export you can provide a ExportConfiguration to the exporter. On default the exported items will be `serialized`. This means that each value is converted into a export specific format. E.g dates are converted into ISO8601 notation in the JSONExporter. If set to false the the values are real python values. Using the `relations` parameter is deprecated. It can be used as a shortcut to add ORM relation of the item into the export. Exported relations will be the id of the linked items. In connection with the serialized parameter the string representation of the linked items are exported. """ def __init__(self, clazz, fields=None, serialized=True, relations=False, config=None): """ :clazz: Clazz of the items which will be exported. :fields: List of fields and relations which should be exported. :serialized: Flag to indicate that the exported values should be serialized. :config: ExportConfiguration for the exporter. """ self._clazz = clazz self._fields = fields self._serialized = serialized self._relations = relations self._config = config def serialize(self, data): """Method to convert the given python listing with the exported items into a serialized form. This is depended on the concrete exporter. This default implementation just returns the given data as it is. You can overwrite this method in a more specific renderer. :data: List containing exported items. :returns: String representing the data """ return data def flatten(self, data): """Will flatten nested data structures as found in Blobforms""" values = {} for key in data: # Handle data container of blobforms if key == "data": try: jdata = json.loads(data[key]) for jkey in jdata: values[jkey] = jdata[jkey] except ValueError: values[key] = data[key] else: values[key] = data[key] return values def perform(self, items): """Will export the given items. Depending if the Exporter has been initialised with the `serialized` parameter the export will return a list of dictionaries (each with the values) or in the exporter specific format (e.g. JSON). :items: Items which will be exported. :returns: Exported items. (Format Depends on the export configuration). """ data = [] # Check if the given item(s) is a list. If not we put it into a # temporary list. if not isinstance(items, list): _items = [items] else: _items = items for item in _items: # Ensure that every item has a UUID. Set missing UUID here # if the item has no uuid set yet. if not item.uuid: item.reset_uuid() # Check if a configuration is provided. # FIXME: Read support for deprecated "relations" argument? # Is missing here. (ti) <2017-05-23 14:02> if not self._config or len(self._config.config) == 0: # No configuration is provided. Export all fields # exluding relations. values = item.get_values(serialized=self._serialized) else: # Configuration is provided. Export fields and relations # based on the given configuration. values = {} if self._config.includes_wildcard(): fields = [p.key for p in get_props_from_instance(item)] fields.extend(self._config.get_relation_fields()) else: fields = self._config.config for field in fields: if isinstance(field, dict): for relation in field: clazz = getattr(self._clazz, relation).mapper.class_ exporter = Exporter(clazz, serialized=self._serialized, config=ExportConfiguration(field[relation])) value = item.get_value(relation) value = exporter.perform(value) values[relation] = value else: value = serialize(item.get_value(field)) values[field] = value data.append(self.flatten(values)) # If the input to the method was a single item we will return a # single exported item. if not isinstance(items, list): if len(data) > 0: data = data[0] else: data = None return self.serialize(data) class XLSXExporter(Exporter): """Docstring for XLSXExporter. """ def serialize(self, data): output = StringIO.StringIO() book = xlsxwriter.Workbook(output) if len(data) > 0: keys = sorted(data[0].keys()) sheet = book.add_worksheet(self._clazz.__tablename__) row = 0 col = 0 # write header for key in keys: sheet.write(row, col, key) col += 1 # write data col = 0 row = 1 for item in data: for key in keys: sheet.write(row, col, item[key]) col += 1 row += 1 col = 0 book.close() output.seek(0) return output.read() class JSONExporter(Exporter): """Docstring for JSONExporter. """ def serialize(self, data): return json.dumps(data, cls=ExtendedJSONEncoder) class CSVExporter(Exporter): """Docstring for CSVExporter. """ def _collect_keys(self, data): """The function will collect all keys (fields) within the given items. This is needed in case of blobform items as those items has a generic data field which may contain variable number of fields depending if the item has a value for the fields or not.""" keys = sets.Set() for item in data: keys = keys.union(item.keys()) return keys def serialize(self, data): outfile = cStringIO.StringIO() writer = UnicodeCSVWriter(outfile, sorted(self._collect_keys(data))) writer.writeheader() writer.writerows(data) outfile.seek(0) return outfile.read() class Importer(object): """Docstring for Importer.""" def __init__(self, clazz, db=None, use_strict=False): """@todo: to be defined1. :clazz: The clazz for which we will import data """ self._clazz = clazz self._db = db self._clazz_type = self._get_types(clazz) self._use_strict = use_strict def _get_types(self, clazz): type_mapping = {} mapper = sa.orm.class_mapper(clazz) for prop in mapper.iterate_properties: if isinstance(prop, sa.orm.RelationshipProperty): type_mapping[prop.key] = str(prop.direction.name) else: type_mapping[prop.key] = str(prop.columns[0].type) return type_mapping def _deserialize_values(self, obj): """This function can be called after the basic deserialisation has finished. It is used to convert integer, date and datetime objects which are either not supported by the defaults decoders or not decoded correct (NULL values) :obj: Deserialized dictionary from basic deserialisation :returns: Deserialized dictionary with additional integer, date and datetime deserialisation """ for field in obj: if (field not in self._clazz_type or obj[field] is None): continue else: datatype = self._clazz_type[field].lower() if datatype in ['manytoone', 'manytomany', 'onetomany', 'onetoone']: continue obj[field] = deserialize(obj[field], datatype) return obj def _deserialize_relations(self, obj): """Will deserialize items in a MANYTOMANY relation. Other relations do not need to be handled as they should have a foreign key to the related item which is part of the items field anyway. It will replace the id values of the related items with the loaded items. This only works if there is a db connection available. :obj: Deserialized dictionary from basic deserialisation :returns: Deserialized dictionary with additional MANYTOMANY relations. """ for field in obj.keys(): try: ftype = self._clazz_type[field] except KeyError: log.warning("Can not find field %s in %s" % (field, self._clazz_type)) continue # Handle all types of relations... if ftype in ["MANYTOMANY", "MANYTOONE", "ONETOONE", "ONETOMANY"]: # Remove the items from the list if there is no db # connection or of there are not MANYTOMANY. if not self._db or (ftype != "MANYTOMANY"): del obj[field] continue clazz = getattr(self._clazz, field).mapper.class_ tmp = [] for item_id in obj[field]: if isinstance(item_id, BaseItem): # Item has been already be deserialized in the # recursive calls. tmp.append(item_id) else: q = self._db.query(clazz).filter(clazz.id == item_id) try: tmp.append(q.one()) except: log.warning(("Can not load '%s' id: %s " "Relation '%s' of '%s' not set" % (clazz, item_id, field, self._clazz))) obj[field] = tmp return obj def deserialize(self, data): """Will convert the string data into a dictionary like data. :data: Importdata as string (JSON, XML...) :returns: Dictionary like data """ return {} def perform(self, data, user=None, translate=lambda x: x, load_key="uuid"): """Will return a list of imported items. The list will contain a tupel of the item and a string which gives information on the operaten (update, create). For create operations the new item will be created with the given user. :data: Importdata as string (JSON, XML...) :user: User object. Used when creating objects. :translate: Translation method. :load_key: Define name of the key which is used to load the item. :returns: List of imported items """ data = self.deserialize(data) imported_items = [] factory = self._clazz.get_item_factory() if self._use_strict: factory._use_strict = self._use_strict _ = translate for values in data: if load_key == "uuid": id = values.get('uuid') if "id" in values: del values["id"] load_key = "uuid" else: id = values.get(load_key) try: # uuid might be empty for new items, which will raise an # error on loading. item = factory.load(id, field=load_key) item.set_values(values, use_strict=self._use_strict) operation = _("UPDATE") except sa.orm.exc.NoResultFound: if ("id" in values and not values["id"]): del values["id"] item = factory.create(user=user, values=values) self._db.add(item) operation = _("CREATE") imported_items.append((item, operation)) return imported_items class JSONImporter(Importer): """Docstring for JSONImporter.""" def _deserialize_recursive(self, obj): for field in obj: if isinstance(obj[field], (dict, list)): clazz = getattr(self._clazz, field).mapper.class_ importer = JSONImporter(clazz, db=self._db, use_strict=self._use_strict) if not isinstance(obj[field], list): import_data = [obj[field]] imported_item = importer.perform(json.dumps(import_data)) obj[field] = imported_item[0][0] elif obj[field] and isinstance(obj[field][0], dict): import_data = obj[field] imported_item = importer.perform(json.dumps(import_data)) obj[field] = [x[0] for x in imported_item] return obj def _deserialize_hook(self, obj): obj = self._deserialize_recursive(obj) obj = self._deserialize_values(obj) return self._deserialize_relations(obj) def deserialize(self, data): """Will convert the JSON data back into a dictionary with python values :data: String JSON data :returns: List of dictionary with python values """ conv = json.loads(data) if isinstance(conv, dict): conv = [conv] return [self._deserialize_hook(c) for c in conv] class CSVImporter(Importer): """Docstring for CSVImporter.""" def _deserialize_hook(self, obj): conv = {} for k, v in obj.iteritems(): conv[k] = unicode(v, "utf-8") conv = self._deserialize_values(conv) return conv def deserialize(self, data): """Will convert the CSV data back into a dictionary with python values :data: String CSV data :returns: List of dictionary with python values """ result = [] infile = cStringIO.StringIO(data) reader = csv.DictReader(infile) for conv in reader: conv = self._deserialize_hook(conv) result.append(conv) return result