Source code for json2xml.dicttoxml

# coding: utf-8

"""
Converts a Python dictionary or other native data type into a valid XML string.

Supports item (`int`, `float`, `long`, `decimal.Decimal`, `bool`, `str`, `unicode`, `datetime`, `none` and other
        number-like objects) and collection (`list`, `set`, `tuple` and `dict`, as well as iterable and
                dict-like objects) data types, with arbitrary nesting for the collections.
        Items with a `datetime` type are converted to ISO format strings.
        Items with a `None` type become empty XML elements.
This module works with Python 3.7+
"""

import collections
import logging
import numbers
from random import randint
from defusedxml.minidom import parseString

from typing import Dict, Any

LOG = logging.getLogger("dicttoxml")


ids = []  # initialize list of unique ids


[docs]def make_id(element, start=100000, end=999999): """Returns a random integer""" return f"{element}_{randint(start, end)}"
[docs]def get_unique_id(element): """Returns a unique id for a given element""" this_id = make_id(element) dup = True while dup: if this_id not in ids: dup = False ids.append(this_id) else: this_id = make_id(element) return ids[-1]
[docs]def get_xml_type(val): """Returns the data type for the xml type attribute""" if type(val).__name__ in ("str", "unicode"): return "str" if type(val).__name__ in ("int", "long"): return "int" if type(val).__name__ == "float": return "float" if type(val).__name__ == "bool": return "bool" if isinstance(val, numbers.Number): return "number" if type(val).__name__ == "NoneType": return "null" if isinstance(val, dict): return "dict" if isinstance(val, collections.abc.Iterable): return "list" return type(val).__name__
[docs]def escape_xml(s: str) -> str: if isinstance(s, str): s = str(s) # avoid UnicodeDecodeError s = s.replace("&", "&amp;") s = s.replace('"', "&quot;") s = s.replace("'", "&apos;") s = s.replace("<", "&lt;") s = s.replace(">", "&gt;") return s
[docs]def make_attrstring(attr): """Returns an attribute string in the form key="val" """ attrstring = " ".join([f'{k}="{v}"' for k, v in attr.items()]) return f'{" " if attrstring != "" else ""}{attrstring}'
[docs]def key_is_valid_xml(key): """Checks that a key is a valid XML name""" LOG.info(f'Inside key_is_valid_xml(). Testing "{str(key)}"') test_xml = f'<?xml version="1.0" encoding="UTF-8" ?><{key}>foo</{key}>' try: parseString(test_xml) return True except Exception: # minidom does not implement exceptions well return False
[docs]def make_valid_xml_name(key, attr: Dict[str, Any]): """Tests an XML name and fixes it if invalid""" LOG.info( f'Inside make_valid_xml_name(). Testing key "{str(key)}" with attr "{str(attr)}"' ) key = escape_xml(key) # nothing happens at escape_xml if attr is not a string, we don't # need to pass it to the method at all. # attr = escape_xml(attr) # pass through if key is already valid if key_is_valid_xml(key): return key, attr # prepend a lowercase n if the key is numeric if key.isdigit(): return f"n{key}", attr # replace spaces with underscores if that fixes the problem if key_is_valid_xml(key.replace(" ", "_")): return key.replace(" ", "_"), attr # key is still invalid - move it into a name attribute attr["name"] = key key = "key" return key, attr
[docs]def wrap_cdata(s: str) -> str: """Wraps a string into CDATA sections""" s = str(s).replace("]]>", "]]]]><![CDATA[>") return "<![CDATA[" + s + "]]>"
[docs]def default_item_func(parent): return "item"
[docs]def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): """Routes the elements of an object to the right function to convert them based on their data type""" LOG.info(f'Inside convert(). obj type is: "{type(obj).__name__}", obj="{str(obj)}"') item_name = item_func(parent) if isinstance(obj, (numbers.Number, str)): return convert_kv( key=item_name, val=obj, attr_type=attr_type, attr={}, cdata=cdata ) if hasattr(obj, "isoformat"): return convert_kv( key=item_name, val=obj.isoformat(), attr_type=attr_type, attr={}, cdata=cdata, ) if isinstance(obj, bool): return convert_bool(item_name, obj, attr_type, cdata) if obj is None: return convert_none(item_name, "", attr_type, cdata) if isinstance(obj, dict): return convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap) if isinstance(obj, collections.abc.Iterable): return convert_list(obj, ids, parent, attr_type, item_func, cdata, item_wrap) raise TypeError(f"Unsupported data type: {obj} ({type(obj).__name__})")
[docs]def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): """Converts a dict into an XML string.""" LOG.info( f'Inside convert_dict(): obj type is: "{type(obj).__name__}", obj="{str(obj)}"' ) output = [] addline = output.append for key, val in obj.items(): LOG.info( f'Looping inside convert_dict(): key="{str(key)}", val="{str(val)}", type(val)="{type(val).__name__}"' ) attr = {} if not ids else {"id": f"{get_unique_id(parent)}"} key, attr = make_valid_xml_name(key, attr) if isinstance(val, (numbers.Number, str)): addline( convert_kv( key=key, val=val, attr_type=attr_type, attr=attr, cdata=cdata ) ) elif hasattr(val, "isoformat"): # datetime addline( convert_kv( key=key, val=val.isoformat(), attr_type=attr_type, attr=attr, cdata=cdata, ) ) elif isinstance(val, bool): addline(convert_bool(key, val, attr_type, attr, cdata)) elif isinstance(val, dict): if attr_type: attr["type"] = get_xml_type(val) dict_str = convert_dict( val, ids, key, attr_type, item_func, cdata, item_wrap ) attrstring = make_attrstring(attr) addline(f"<{key}{attrstring}>{dict_str}</{key}>") elif isinstance(val, collections.abc.Iterable) and val: if attr_type: attr["type"] = get_xml_type(val) if ( isinstance(val[0], numbers.Number) or isinstance(val[0], str) and not item_wrap ): addline( convert_list(val, ids, key, attr_type, item_func, cdata, item_wrap) ) else: attrstring = make_attrstring(attr) list_str = convert_list( val, ids, key, attr_type, item_func, cdata, item_wrap ) addline(f"<{key}{attrstring}>{list_str}</{key}>") elif not val: addline(convert_none(key, val, attr_type, attr, cdata)) else: raise TypeError(f"Unsupported data type: {val} ({type(val).__name__})") return "".join(output)
[docs]def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): """Converts a list into an XML string.""" LOG.info("Inside convert_list()") output = [] addline = output.append item_name = item_func(parent) this_id = None if ids: this_id = get_unique_id(parent) for i, item in enumerate(items): LOG.info( f'Looping inside convert_list(): item="{str(item)}", item_name="{item_name}", type="{type(item).__name__}"' ) attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} if isinstance(item, (numbers.Number, str)): if item_wrap: addline( convert_kv( key=item_name, val=item, attr_type=attr_type, attr=attr, cdata=cdata, ) ) else: addline( convert_kv( key=parent, val=item, attr_type=attr_type, attr=attr, cdata=cdata, ) ) elif hasattr(item, "isoformat"): # datetime addline( convert_kv( key=item_name, val=item.isoformat(), attr_type=attr_type, attr=attr, cdata=cdata, ) ) elif isinstance(item, bool): addline(convert_bool(item_name, item, attr_type, attr, cdata)) elif isinstance(item, dict): item_dict_str = convert_dict( item, ids, parent, attr_type, item_func, cdata, item_wrap, ) if not attr_type: if item_wrap: addline(f"<{item_name}>{item_dict_str}</{item_name}>") else: addline(f"{item_dict_str}") else: if item_wrap: addline(f'<{item_name} type="dict">{item_dict_str}</{item_name}>') else: addline(f"{item_dict_str}") elif isinstance(item, collections.abc.Iterable): attrstring = make_attrstring(attr) convert_list_str = convert_list( item, ids, item_name, attr_type, item_func, cdata, item_wrap ) if not attr_type: addline(f"<{item_name} {attrstring}>{convert_list_str}</{item_name}>") else: addline( f'<{item_name} type="list"{attrstring}>{convert_list_str}</{item_name}>' ) elif item is None: addline(convert_none(item_name, None, attr_type, attr, cdata)) else: raise TypeError(f"Unsupported data type: {item} ({type(item).__name__})") return "".join(output)
[docs]def convert_kv(key, val, attr_type, attr={}, cdata: bool = False): """Converts a number or string into an XML element""" LOG.info( f'Inside convert_kv(): key="{str(key)}", val="{str(val)}", type(val) is: "{type(val).__name__}"' ) key, attr = make_valid_xml_name(key, attr) if attr_type: attr["type"] = get_xml_type(val) attrstring = make_attrstring(attr) return f"<{key}{attrstring}>{wrap_cdata(val) if cdata else escape_xml(val)}</{key}>"
[docs]def convert_bool(key, val, attr_type, attr={}, cdata=False): """Converts a boolean into an XML element""" LOG.info( f'Inside convert_bool(): key="{str(key)}", val="{str(val)}", type(val) is: "{type(val).__name__}"' ) key, attr = make_valid_xml_name(key, attr) if attr_type: attr["type"] = get_xml_type(val) attrstring = make_attrstring(attr) return f"<{key}{attrstring}>{str(val).lower()}</{key}>"
[docs]def convert_none(key, val, attr_type, attr={}, cdata=False): """Converts a null value into an XML element""" LOG.info(f'Inside convert_none(): key="{str(key)}"') key, attr = make_valid_xml_name(key, attr) if attr_type: attr["type"] = get_xml_type(val) attrstring = make_attrstring(attr) return f"<{key}{attrstring}></{key}>"
[docs]def dicttoxml( obj, root: bool = True, custom_root="root", ids=False, attr_type=True, item_wrap=True, item_func=default_item_func, cdata=False, ): """Converts a python object into XML. Arguments: - root specifies whether the output is wrapped in an XML root element Default is True - custom_root allows you to specify a custom root element. Default is 'root' - ids specifies whether elements get unique ids. Default is False - attr_type specifies whether elements get a data type attribute. Default is True - item_func specifies what function should generate the element name for items in a list. Default is 'item' - item_wrap specifies whether to nest items in array in <item/> Default is True - cdata specifies whether string values should be wrapped in CDATA sections. Default is False """ LOG.info( f'Inside dicttoxml(): type(obj) is: "{type(obj).__name__}", obj="{str(obj)}"' ) output = [] if root: output.append('<?xml version="1.0" encoding="UTF-8" ?>') output_elem = convert( obj, ids, attr_type, item_func, cdata, item_wrap, parent=custom_root ) output.append(f"<{custom_root}>{output_elem}</{custom_root}>") else: output.append( convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="") ) return "".join(output).encode("utf-8")