Skip to content
Snippets Groups Projects
Commit 654dfc74 authored by Benedikt's avatar Benedikt
Browse files

Merge remote-tracking branch 'origin/devel_xmlschemaLibary' into devel_xmlschemaLibary

parents bedfd74c 18212f92
Branches
No related tags found
No related merge requests found
import os
from xml.etree.ElementTree import ElementTree
import xmlschema
from jinja2 import Environment, PackageLoader, select_autoescape
import lxml.etree as ElementTree
......@@ -10,7 +11,16 @@ from importlib import resources as imp_resources
from importlib.resources import files
import warnings
def strip_outer_xml_tag(xml_str, outer_tag):
def strip_outer_xml_tag(xml_str: str, outer_tag: str) -> str:
"""Remove xml schema declaration and outer xml tag
Args:
xml_str (str): xml str from which to strip
outer_tag (str): tag to remove
Returns:
str: stripped xml str
"""
#TODO raise/check error if outer not found
xml_str = re.sub(r'<\?xml.*?\?>', '', xml_str).strip()
xml_str = re.sub(rf'<{outer_tag}[^>]*>', '', xml_str, count=1).strip()
......@@ -18,16 +28,36 @@ def strip_outer_xml_tag(xml_str, outer_tag):
return xml_str
def get_from_dict(data_dict, map_list):
def get_from_dict(data_dict: dict, map_list: list[int|str]):
"""get entry from dict at specified location
Args:
data_dict (dict): dict from which to get the element
map_list (list[int | str]): list of keys describing the location at which to extract the element
Returns:
element at given location
"""
return reduce(operator.getitem, map_list, data_dict)
def set_in_dict(data_dict, map_list, value):
def set_in_dict(data_dict: dict, map_list: list[int|str], value):
"""insert value into dict
Args:
data_dict (dict): dict into which to insert the item
map_list (list[int | str]): location at which to insert the item
value: item to insert
"""
get_from_dict(data_dict, map_list[:-1])[map_list[-1]] = value
class XMLSchemaConverter:
"""Converter between XML and json/python dicts, based on a given XSD schema
"""
def __init__(self):
"""Converter instance. Converts between XML and json/python dicts
"""
schema_file = files("dccXMLJSONConv.schemata").joinpath('dcc.xsd')
# Set the base directory for relative includes/imports
schema_base_dir = os.path.dirname(schema_file)
......@@ -43,11 +73,28 @@ class XMLSchemaConverter:
def _load_jinja_template(self, template_name, data_fragment):
def _load_jinja_template(self, template_name: str, data_fragment: str) -> str:
"""Inserts XML fragment into template to create valid XML string
Args:
template_name (str): jinja template into which to insert
data_fragment (str): fragment to insert into template
Returns:
str: full xml string
"""
template = self.env.get_template(template_name)
return template.render(data_fragment=data_fragment)
def convert_xml_to_json(self, input_data):
def convert_xml_to_json(self, input_data) -> tuple[dict, list[Exception]]:
"""Deserialize XML to JSON
Args:
input_data: xml data to parse. Can be an XMLResource instance, a file-like object a path to a file or a URI of a resource or an Element instance or an ElementTree instance or a string containing the XML data.
Returns:
tuple[dict, list[Exception]]: converted dict; and list of exceptions that occurred during parsing
"""
try:
#TODO check if we have to parse the errors
return self.schema.to_dict(input_data, validation="lax")
......@@ -62,7 +109,18 @@ class XMLSchemaConverter:
converted_data = get_from_dict(conversion_result[0], template_info['json_path'])
return converted_data, conversion_result[1]
def convert_json_to_xml(self, input_data):
def convert_json_to_xml(self, input_data) -> tuple[str, ElementTree, list[Exception]]:
"""Serialize data back into XML format
Args:
input_data: the data that has to be encoded to XML data
Raises:
Exception: Any exceptions that made the serialization fail
Returns:
tuple[str, ElementTree, list[Exception]]: string containing the XML; ElementTree representation of the XML; and list of exceptions that occurred during serialization
"""
try:
conversion_result = self.schema.encode(input_data, validation="lax")
errors=conversion_result[1]
......@@ -91,20 +149,37 @@ class XMLSchemaConverter:
xml_str = strip_outer_xml_tag(xml_str, template_info["strip_outer_tag"])
return xml_str, converted_element, conversion_result[1]
def _getRootElement(self,xml_str:str)->str:
def _getRootElement(self, xml_str:str ) -> str:
"""Returns tag name of first element
Args:
xml_str (str): XML to get the tag from
Returns:
str: contents of first tag, without any attributes
"""
match = re.search(r'<\s*([\w:-]+)', xml_str)
return match.group(1) if match else 'default'
def _checkIfValidationErrorIsUncritical(self,errors):
crticalErrors=[]
def _checkIfValidationErrorIsUncritical(self, errors: list[Exception]) -> tuple[bool, list[Exception], list[Exception]]:
"""Sort validation errors into critical and uncritical errors
Args:
errors (list[Exception]): Exceptions that occurred during XML validation
Returns:
tuple[bool, list[Exception], list[Exception]]: whether any critical exceptions were found; list of critical exceptions; list of uncritical exceptions
"""
criticalErrors=[]
unCriticalErrors=[]
for error in errors:
if "value doesn't match any pattern of ['3\\\\.3\\\\.0']" in error.reason:
unCriticalErrors.append(error)
else:
crticalErrors.append(error)
isNotCritical=len(crticalErrors)==0
return isNotCritical,crticalErrors,unCriticalErrors
criticalErrors.append(error)
isNotCritical=len(criticalErrors)==0
return isNotCritical,criticalErrors,unCriticalErrors
converter=XMLSchemaConverter()
def XMLToJson(xml):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment