dict_to_xml.py 11.2 KB
#!/usr/bin/env python
# coding: utf-8

"""
Converts a Python dictionary or other native data type into a valid XML string.
Supports item (`int`, `float`, `long`, `decimal.Decimal`, `bool`, `str`, `unicode`, `datetime`, `none` and other number-like objects) and collection (`list`, `set`, `tuple` and `dict`, as well as iterable and dict-like objects) data types, with arbitrary nesting for the collections. Items with a `datetime` type are converted to ISO format strings. Items with a `None` type become empty XML elements.
This module works with both Python 2 and 3.
"""

# from __future__ import unicode_literals

from random import randint
import collections
import numbers
from xml.dom.minidom import parseString

# python 3 doesn't have a unicode type
unicode = str

# python 3 doesn't have a long type
long = int


def set_debug(debug=True, filename='dicttoxml.log'):
    if debug:
        import datetime
        print('Debug mode is on. Events are logged at: %s' % (filename))
    else:
        print('Debug mode is off.')


def unicode_me(something):
    """Converts strings with non-ASCII characters to unicode for LOG.
    Python 3 doesn't have a `unicode()` function, so `unicode()` is an alias
    for `str()`, but `str()` doesn't take a second argument, hence this kludge.
    """
    try:
        return unicode(something, 'utf-8')
    except:
        return unicode(something)


ids = []  # initialize list of unique ids


def make_id(element, start=100000, end=999999):
    """Returns a random integer"""
    return '%s_%s' % (element, randint(start, end))


def get_unique_id(element):
    """Returns a unique id for a given element"""
    this_id = make_id(element)
    dup = True
    while dup:
        if this_id not in ids:
            dup = False
            ids.append(this_id)
        else:
            this_id = make_id(element)
    return ids[-1]


def get_xml_type(val):
    """Returns the data type for the xml type attribute"""
    if type(val).__name__ in ('str', 'unicode'):
        return 'str'
    if type(val).__name__ in ('int', 'long'):
        return 'int'
    if type(val).__name__ == 'float':
        return 'float'
    if type(val).__name__ == 'bool':
        return 'bool'
    if isinstance(val, numbers.Number):
        return 'number'
    if type(val).__name__ == 'NoneType':
        return 'null'
    if isinstance(val, dict):
        return 'dict'
    if isinstance(val, collections.Iterable):
        return 'list'
    return type(val).__name__


def escape_xml(s):
    if type(s) in (str, unicode):
        s = unicode_me(s)  # avoid UnicodeDecodeError
        s = s.replace('&', '&')
        s = s.replace('"', '"')
        s = s.replace('\'', ''')
        s = s.replace('<', '&amp;lt;')
        s = s.replace('>', '&amp;gt;')
    return s


def make_attrstring(attr):
    """Returns an attribute string in the form key="val" """
    attrstring = ' '.join(['%s="%s"' % (k, v) for k, v in attr.items()])
    return '%s%s' % (' ' if attrstring != '' else '', attrstring)


def key_is_valid_xml(key):
    """Checks that a key is a valid XML name"""
    test_xml = '<?xml version="1.0" encoding="UTF-8" ?><%s>foo</%s>' % (key, key)
    try:
        parseString(test_xml)
        return True
    except Exception:  # minidom does not implement exceptions well
        return False


def make_valid_xml_name(key, attr):
    """Tests an XML name and fixes it if invalid"""
    key = escape_xml(key)
    attr = escape_xml(attr)

    # pass through if key is already valid
    if key_is_valid_xml(key):
        return key, attr

    # prepend a lowercase n if the key is numeric
    if key.isdigit():
        return 'n%s' % (key), attr

    # replace spaces with underscores if that fixes the problem
    if key_is_valid_xml(key.replace(' ', '_')):
        return key.replace(' ', '_'), attr

    # key is still invalid - move it into a name attribute
    attr['name'] = key
    key = 'key'
    return key, attr


def wrap_cdata(s):
    """Wraps a string into CDATA sections"""
    s = unicode_me(s).replace(']]>', ']]]]><![CDATA[>')
    return '<![CDATA[' + s + ']]>'


def default_item_func(parent):
    return 'item'


def convert(obj, ids, attr_type, item_func, cdata, parent='root'):
    """Routes the elements of an object to the right function to convert them
    based on their data type"""

    item_name = item_func(parent)

    if isinstance(obj, numbers.Number) or type(obj) in (str, unicode):
        return convert_kv(item_name, obj, attr_type, cdata)

    if hasattr(obj, 'isoformat'):
        return convert_kv(item_name, obj.isoformat(), attr_type, cdata)

    if type(obj) == bool:
        return convert_bool(item_name, obj, attr_type, cdata)

    if obj is None:
        return convert_none(item_name, '', attr_type, cdata)

    if isinstance(obj, dict):
        return convert_dict(obj, ids, parent, attr_type, item_func, cdata)

    if isinstance(obj, collections.Iterable):
        return convert_list(obj, ids, parent, attr_type, item_func, cdata)

    raise TypeError('Unsupported data type: %s (%s)' % (obj, type(obj).__name__))


def convert_dict(obj, ids, parent, attr_type, item_func, cdata):
    """Converts a dict into an XML string."""
    output = []
    addline = output.append

    item_name = item_func(parent)

    for key, val in obj.items():

        attr = {} if not ids else {'id': '%s' % (get_unique_id(parent))}

        key, attr = make_valid_xml_name(key, attr)

        if isinstance(val, numbers.Number) or type(val) in (str, unicode):
            addline(convert_kv(key, val, attr_type, attr, cdata))

        elif hasattr(val, 'isoformat'):  # datetime
            addline(convert_kv(key, val.isoformat(), attr_type, attr, cdata))

        elif type(val) == bool:
            addline(convert_bool(key, val, attr_type, attr, cdata))

        elif isinstance(val, dict):
            if attr_type:
                attr['type'] = get_xml_type(val)
            addline('<%s%s>%s</%s>' % (
                key, make_attrstring(attr),
                convert_dict(val, ids, key, attr_type, item_func, cdata),
                key
            )
                    )

        elif isinstance(val, collections.Iterable):
            if attr_type:
                attr['type'] = get_xml_type(val)
            # addline('<%s%s>%s</%s>' % (
            #     key,
            #     make_attrstring(attr),
            #     convert_list(val, ids, key, attr_type, item_func, cdata),
            #     key
            # )
            item_func = lambda x: key
            addline('%s' % (
                convert_list(val, ids, key, attr_type, item_func, cdata),
            ))

        elif val is None:
            addline(convert_none(key, val, attr_type, attr, cdata))

        else:
            raise TypeError('Unsupported data type: %s (%s)' % (
                val, type(val).__name__)
                            )

    return ''.join(output)


def convert_list(items, ids, parent, attr_type, item_func, cdata):
    """Converts a list into an XML string."""
    output = []
    addline = output.append

    item_name = item_func(parent)

    if ids:
        this_id = get_unique_id(parent)

    for i, item in enumerate(items):
        attr = {} if not ids else {'id': '%s_%s' % (this_id, i + 1)}
        if isinstance(item, numbers.Number) or type(item) in (str, unicode):
            addline(convert_kv(item_name, item, attr_type, attr, cdata))

        elif hasattr(item, 'isoformat'):  # datetime
            addline(convert_kv(item_name, item.isoformat(), attr_type, attr, cdata))

        elif type(item) == bool:
            addline(convert_bool(item_name, item, attr_type, attr, cdata))

        elif isinstance(item, dict):
            if not attr_type:
                addline('<%s>%s</%s>' % (
                    item_name,
                    convert_dict(item, ids, parent, attr_type, item_func, cdata),
                    item_name,
                )
                        )
            else:
                addline('<%s type="dict">%s</%s>' % (
                    item_name,
                    convert_dict(item, ids, parent, attr_type, item_func, cdata),
                    item_name,
                )
                        )

        elif isinstance(item, collections.Iterable):
            if not attr_type:
                addline('<%s %s>%s</%s>' % (
                    item_name, make_attrstring(attr),
                    convert_list(item, ids, item_name, attr_type, item_func, cdata),
                    item_name,
                )
                        )
            else:
                addline('<%s type="list"%s>%s</%s>' % (
                    item_name, make_attrstring(attr),
                    convert_list(item, ids, item_name, attr_type, item_func, cdata),
                    item_name,
                )
                        )

        elif item is None:
            addline(convert_none(item_name, None, attr_type, attr, cdata))

        else:
            raise TypeError('Unsupported data type: %s (%s)' % (
                item, type(item).__name__)
                            )
    return ''.join(output)


def convert_kv(key, val, attr_type, attr={}, cdata=False):
    """Converts a number or string into an XML element"""

    key, attr = make_valid_xml_name(key, attr)

    if attr_type:
        attr['type'] = get_xml_type(val)
    attrstring = make_attrstring(attr)
    return '<%s%s>%s</%s>' % (
        key, attrstring,
        wrap_cdata(val) if cdata == True else escape_xml(val),
        key
    )


def convert_bool(key, val, attr_type, attr={}, cdata=False):
    """Converts a boolean into an XML element"""

    key, attr = make_valid_xml_name(key, attr)

    if attr_type:
        attr['type'] = get_xml_type(val)
    attrstring = make_attrstring(attr)
    return '<%s%s>%s</%s>' % (key, attrstring, unicode(val).lower(), key)


def convert_none(key, val, attr_type, attr={}, cdata=False):
    """Converts a null value into an XML element"""

    key, attr = make_valid_xml_name(key, attr)

    if attr_type:
        attr['type'] = get_xml_type(val)
    attrstring = make_attrstring(attr)
    return '<%s%s></%s>' % (key, attrstring, key)


def dicttoxml(obj, root=True, custom_root='root', ids=False, attr_type=True,
              item_func=default_item_func, cdata=False):
    """Converts a python object into XML.
    Arguments:
    - root specifies whether the output is wrapped in an XML root element
      Default is True
    - custom_root allows you to specify a custom root element.
      Default is 'root'
    - ids specifies whether elements get unique ids.
      Default is False
    - attr_type specifies whether elements get a data type attribute.
      Default is True
    - item_func specifies what function should generate the element name for
      items in a list.
      Default is 'item'
    - cdata specifies whether string values should be wrapped in CDATA sections.
      Default is False
    """
    output = []
    addline = output.append
    if root == True:
        addline('<?xml version="1.0" encoding="UTF-8" ?>')
        addline('<%s>%s</%s>' % (
            custom_root,
            convert(obj, ids, attr_type, item_func, cdata, parent=custom_root),
            custom_root,
        )
                )
    else:
        addline(convert(obj, ids, attr_type, item_func, cdata, parent=''))
    return ''.join(output)