Source code for ringo.lib.imexport

"""Modul for the messanging system in ringo"""
import logging
import datetime
import json
import csv
import codecs
import cStringIO
import sets
try:
        import cStringIO as StringIO
except ImportError:
        import StringIO
import xlsxwriter
import sqlalchemy as sa

from ringo.model.base import BaseItem
from ringo.model.user import UserSetting
from ringo.lib.helpers import serialize, deserialize
from ringo.lib.alchemy import get_props_from_instance

log = logging.getLogger(__name__)


[docs]class ExportConfiguration(object):

    """
    You can provide a JSON configuration file for the export to define
    which fields of the given modul should be imported in detail.
    Providing a configuration file allows you to export also
    `properties` and related Items which are not part of the default
    export.

    Example export configuration::


        ["f1", "f2", "f3", {"bar": ["f4", "f5", {"baz": [...]}}]

    In this configuration "id", "foo", "bar" considered as fields of the
    exported item. In contrast the keys of the nested dictionarys are
    taken as the name of the relations. The following list defines again
    the fields of the items in the relation.  So assuming you are
    exporting items of type "Foo" the export will include fields "f1",
    "f2", and "f3". Further "Foo" is related to "Bar" items in the
    relation "bar". From the "Bar" items the fields "f4" and "f5" are
    included. The "Bar" items have themself a relation called "baz" and
    again you can follow the scheme to define a detail configuration of
    what should be in the export.

    The configuration also support wildcards. Use "*" so add all fields
    of the item or related item::

        ["*" {"bar": ["*", {"baz": [...]}}]
    """

    def __init__(self, jsonconfig):
        self.config = jsonconfig
        self.relations = self._parse(jsonconfig)

    def _parse(self, config, relation="root"):
        """Will return a dictionary with the field configuration for each
        relation found in the export configuration. The field configuration
        is a list of fieldnames.

        :config: Export configuration
        :relation: Name of the "current" relation. The name "root" is a
        placeholder for the elements on the first level of the export.
        :returns: Dict with realtion configuration

        """
        relations = {}
        relations[relation] = []
        for field in config:
            if isinstance(field, dict):
                for rel in field:
                    relations.update(self._parse(field[rel], rel))
            else:
                relations[relation].append(field)
        return relations

    def includes_wildcard(self):
        return "*" in self.config

    def get_relation_fields(self):
        fields = []
        for f in self.config:
            if f == "*":
                continue
            else:
                fields.append(f)
        return fields


class ExtendedJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, BaseItem):
            return obj.id
            # Let the base class default method raise the TypeError
        elif isinstance(obj, datetime.datetime):
            return obj.strftime("%Y-%m-%d %H:%M:%S")
        elif isinstance(obj, datetime.date):
            return obj.strftime("%Y-%m-%d")
            # Let the base class default method raise the TypeError
        elif isinstance(obj, UserSetting):
            return obj.id
        else:
            return json.JSONEncoder.default(self, obj)


class UnicodeCSVWriter:
    """
    A CSV writer which will write rows to CSV file "f",
    which is encoded in the given encoding.
    """

    def __init__(self, f, fields, dialect=csv.excel, encoding="utf-8", **kwds):
        # Redirect output to a queue
        self.queue = cStringIO.StringIO()
        self.writer = csv.DictWriter(self.queue, fieldnames=fields,
                                     dialect=dialect, **kwds)
        self.stream = f
        self.encoder = codecs.getincrementalencoder(encoding)()

    def writeheader(self):
        self.writer.writeheader()

    def writerow(self, row):
        tmp_dict = {}
        for k, v in row.iteritems():
            try:
                tmp_dict[k] = v.encode("utf-8")
            except AttributeError:
                tmp_dict[k] = v
        self.writer.writerow(tmp_dict)
        # Fetch UTF-8 output from the queue ...
        data = self.queue.getvalue()
        data = data.decode("utf-8")
        # ... and reencode it into the target encoding
        data = self.encoder.encode(data)
        # write to the target stream
        self.stream.write(data)
        # empty queue
        self.queue.truncate(0)

    def writerows(self, rows):
        for row in rows:
            self.writerow(row)


class Exporter(object):

    """Base exporter to export items of the given class. The
    exporter will return a list of dictionarys with key values pairs
    of the values for each items which should be exported.

    The export is done by calling the `perform` method of this
    class.

    On default the exporter will return all fields of the item but
    no relations or related items. However the Exporter is able to
    export related items if configured correct. In this case the
    Exporter will return a list of nested dictionaries.

    You can configure which will be exported on each item by either
    using the `fields` parameter. If no fields are provided all fields
    excluding the relations will be exported. The order of the
    configured fields will determine the order of the fields in the
    export (If supported e.g CSV).

    A more detailed option to configur the content of the export you can
    provide a ExportConfiguration to the exporter.

    On default the exported items will be `serialized`. This means
    that each value is converted into a export specific format. E.g
    dates are converted into ISO8601 notation in the JSONExporter.
    If set to false the the values are real python values.

    Using the `relations` parameter is deprecated. It can be used as a
    shortcut to add ORM relation of the item into the export.  Exported
    relations will be the id of the linked items. In connection with the
    serialized parameter the string representation of the linked items
    are exported.
    """

    def __init__(self, clazz, fields=None, serialized=True, relations=False, config=None):
        """
        :clazz: Clazz of the items which will be exported.
        :fields: List of fields and relations which should be exported.
        :serialized: Flag to indicate that the exported values should be serialized.
        :config: ExportConfiguration for the exporter.

        """
        self._clazz = clazz
        self._fields = fields
        self._serialized = serialized
        self._relations = relations
        self._config = config

    def serialize(self, data):
        """Method to convert the given python listing with the exported
        items into a serialized form. This is depended on the concrete
        exporter. This default implementation just returns the given
        data as it is. You can overwrite this method in a more specific
        renderer.

        :data: List containing exported items.
        :returns: String representing the data
        """
        return data

    def flatten(self, data):
        """Will flatten nested data structures as found in Blobforms"""
        values = {}
        for key in data:
            # Handle data container of blobforms
            if key == "data":
                try:
                    jdata = json.loads(data[key])
                    for jkey in jdata:
                        values[jkey] = jdata[jkey]
                except ValueError:
                    values[key] = data[key]
            else:
                values[key] = data[key]
        return values

    def perform(self, items):
        """Will export the given items. Depending if the Exporter has
        been initialised with the `serialized` parameter the export will
        return a list of dictionaries (each with the values) or in the
        exporter specific format (e.g. JSON).

        :items: Items which will be exported.
        :returns: Exported items. (Format Depends on the export configuration).

        """
        data = []
        # Check if the given item(s) is a list. If not we put it into a
        # temporary list.
        if not isinstance(items, list):
            _items = [items]
        else:
            _items = items
        for item in _items:
            # Ensure that every item has a UUID. Set missing UUID here
            # if the item has no uuid set yet.
            if not item.uuid:
                item.reset_uuid()

            # Check if a configuration is provided.

            #  FIXME: Read support for deprecated "relations" argument?
            #  Is missing here. (ti) <2017-05-23 14:02>
            if not self._config or len(self._config.config) == 0:
                # No configuration is provided. Export all fields
                # exluding relations.
                values = item.get_values(serialized=self._serialized)
            else:
                # Configuration is provided. Export fields and relations
                # based on the given configuration.
                values = {}
                if self._config.includes_wildcard():
                    fields = [p.key for p in get_props_from_instance(item)]
                    fields.extend(self._config.get_relation_fields())
                else:
                    fields = self._config.config
                for field in fields:
                    if isinstance(field, dict):
                        for relation in field:
                            clazz = getattr(self._clazz, relation).mapper.class_
                            exporter = Exporter(clazz,
                                                serialized=self._serialized,
                                                config=ExportConfiguration(field[relation]))
                            value = item.get_value(relation)
                            value = exporter.perform(value)
                            values[relation] = value
                    else:
                        value = serialize(item.get_value(field))
                        values[field] = value
            data.append(self.flatten(values))

        # If the input to the method was a single item we will return a
        # single exported item.
        if not isinstance(items, list):
            if len(data) > 0:
                data = data[0]
            else:
                data = None
        return self.serialize(data)


class XLSXExporter(Exporter):
    """Docstring for XLSXExporter. """

    def serialize(self, data):
        output = StringIO.StringIO()
        book = xlsxwriter.Workbook(output)
        if len(data) > 0:
            keys = sorted(data[0].keys())
            sheet = book.add_worksheet(self._clazz.__tablename__)
            row = 0
            col = 0
            # write header
            for key in keys:
                sheet.write(row, col, key)
                col += 1
            # write data
            col = 0
            row = 1
            for item in data:
                for key in keys:
                    sheet.write(row, col, item[key])
                    col += 1
                row += 1
                col = 0
        book.close()
        output.seek(0)
        return output.read()


class JSONExporter(Exporter):
    """Docstring for JSONExporter. """

    def serialize(self, data):
        return json.dumps(data, cls=ExtendedJSONEncoder)


class CSVExporter(Exporter):
    """Docstring for CSVExporter. """

    def _collect_keys(self, data):
        """The function will collect all keys (fields) within the given
        items. This is needed in case of blobform items as those items
        has a generic data field which may contain variable number of
        fields depending if the item has a value for the fields or
        not."""
        keys = sets.Set()
        for item in data:
            keys = keys.union(item.keys())
        return keys

    def serialize(self, data):
        outfile = cStringIO.StringIO()
        writer = UnicodeCSVWriter(outfile, sorted(self._collect_keys(data)))
        writer.writeheader()
        writer.writerows(data)
        outfile.seek(0)
        return outfile.read()


class Importer(object):
    """Docstring for Importer."""

    def __init__(self, clazz, db=None, use_strict=False):
        """@todo: to be defined1.

        :clazz: The clazz for which we will import data

        """
        self._clazz = clazz
        self._db = db
        self._clazz_type = self._get_types(clazz)
        self._use_strict = use_strict

    def _get_types(self, clazz):
        type_mapping = {}
        mapper = sa.orm.class_mapper(clazz)
        for prop in mapper.iterate_properties:
            if isinstance(prop, sa.orm.RelationshipProperty):
                type_mapping[prop.key] = str(prop.direction.name)
            else:
                type_mapping[prop.key] = str(prop.columns[0].type)
        return type_mapping

    def _deserialize_values(self, obj):
        """This function can be called after the basic deserialisation
        has finished. It is used to convert integer, date and datetime
        objects which are either not supported by the defaults decoders
        or not decoded correct (NULL values)

        :obj: Deserialized dictionary from basic deserialisation
        :returns: Deserialized dictionary with additional integer, date
        and datetime deserialisation
        """
        for field in obj:
            if (field not in self._clazz_type or
               obj[field] is None):
                continue
            else:
                datatype = self._clazz_type[field].lower()
                if datatype in ['manytoone', 'manytomany', 'onetomany', 'onetoone']:
                    continue
                obj[field] = deserialize(obj[field], datatype)
        return obj

    def _deserialize_relations(self, obj):
        """Will deserialize items in a MANYTOMANY relation. Other
        relations do not need to be handled as they should have a
        foreign key to the related item which is part of the items field
        anyway. It will replace the id values of the related items with
        the loaded items. This only works if there is a db connection
        available.

        :obj: Deserialized dictionary from basic deserialisation
        :returns: Deserialized dictionary with additional MANYTOMANY
        relations.
        """
        for field in obj.keys():
            try:
                ftype = self._clazz_type[field]
            except KeyError:
                log.warning("Can not find field %s in %s" % (field, self._clazz_type))
                continue
            # Handle all types of relations...
            if ftype in ["MANYTOMANY", "MANYTOONE",
                         "ONETOONE", "ONETOMANY"]:
                # Remove the items from the list if there is no db
                # connection or of there are not MANYTOMANY.
                if not self._db or (ftype != "MANYTOMANY"):
                    del obj[field]
                    continue
                clazz = getattr(self._clazz, field).mapper.class_
                tmp = []
                for item_id in obj[field]:
                    if isinstance(item_id, BaseItem):
                        # Item has been already be deserialized in the
                        # recursive calls.
                        tmp.append(item_id)
                    else:
                        q = self._db.query(clazz).filter(clazz.id == item_id)
                        try:
                            tmp.append(q.one())
                        except:
                            log.warning(("Can not load '%s' id: %s "
                                         "Relation '%s' of '%s' not set"
                                         % (clazz, item_id, field, self._clazz)))
                obj[field] = tmp
        return obj

    def deserialize(self, data):
        """Will convert the string data into a dictionary like data.

        :data: Importdata as string (JSON, XML...)
        :returns: Dictionary like data

        """
        return {}

    def perform(self, data, user=None, translate=lambda x: x, load_key="uuid"):
        """Will return a list of imported items. The list will contain a
        tupel of the item and a string which gives information on the
        operaten (update, create). For create operations the new item
        will be created with the given user.

        :data: Importdata as string (JSON, XML...)
        :user: User object. Used when creating objects.
        :translate: Translation method.
        :load_key: Define name of the key which is used to load the
        item.
        :returns: List of imported items

        """
        data = self.deserialize(data)
        imported_items = []
        factory = self._clazz.get_item_factory()
        if self._use_strict:
            factory._use_strict = self._use_strict
        _ = translate
        for values in data:
            if load_key == "uuid":
                id = values.get('uuid')
                if "id" in values:
                    del values["id"]
                load_key = "uuid"
            else:
                id = values.get(load_key)
            try:
                # uuid might be empty for new items, which will raise an
                # error on loading.
                item = factory.load(id, field=load_key)
                item.set_values(values, use_strict=self._use_strict)
                operation = _("UPDATE")
            except sa.orm.exc.NoResultFound:
                if ("id" in values and not values["id"]):
                    del values["id"]
                item = factory.create(user=user, values=values)
                self._db.add(item)
                operation = _("CREATE")
            imported_items.append((item, operation))
        return imported_items


class JSONImporter(Importer):
    """Docstring for JSONImporter."""

    def _deserialize_recursive(self, obj):
        for field in obj:
            if isinstance(obj[field], (dict, list)):
                clazz = getattr(self._clazz, field).mapper.class_
                importer = JSONImporter(clazz, db=self._db, use_strict=self._use_strict)
                if not isinstance(obj[field], list):
                    import_data = [obj[field]]
                    imported_item = importer.perform(json.dumps(import_data))
                    obj[field] = imported_item[0][0]
                elif obj[field] and isinstance(obj[field][0], dict):
                    import_data = obj[field]
                    imported_item = importer.perform(json.dumps(import_data))
                    obj[field] = [x[0] for x in imported_item]
        return obj

    def _deserialize_hook(self, obj):
        obj = self._deserialize_recursive(obj)
        obj = self._deserialize_values(obj)
        return self._deserialize_relations(obj)

    def deserialize(self, data):
        """Will convert the JSON data back into a dictionary with python values

        :data: String JSON data
        :returns: List of dictionary with python values
        """
        conv = json.loads(data)
        if isinstance(conv, dict):
            conv = [conv]
        return [self._deserialize_hook(c) for c in conv]


class CSVImporter(Importer):
    """Docstring for CSVImporter."""

    def _deserialize_hook(self, obj):
        conv = {}
        for k, v in obj.iteritems():
            conv[k] = unicode(v, "utf-8")
        conv = self._deserialize_values(conv)
        return conv

    def deserialize(self, data):
        """Will convert the CSV data back into a dictionary with python values

        :data: String CSV data
        :returns: List of dictionary with python values
        """
        result = []
        infile = cStringIO.StringIO(data)
        reader = csv.DictReader(infile)
        for conv in reader:
            conv = self._deserialize_hook(conv)
            result.append(conv)
        return result