Skip to content
Snippets Groups Projects
Commit 769526b4 authored by Benedikt's avatar Benedikt
Browse files

improved error handling

parent 0ceecd01
No related branches found
No related tags found
No related merge requests found
{
"default": {
"template_path": "minimalDCC_3_3_0_jinjaTemplate",
"replacement_key": "data_fragment",
"json_path": [
"dcc:measurementResults",
"dcc:measurementResult",
0,
"dcc:results",
"dcc:result",
0,
"dcc:data"
],
"xPath": ".//dcc:measurementResults/dcc:measurementResult[1]/dcc:results/dcc:result[1]/dcc:data",
"strip_outer_tag": "dcc:data"
"templateInformation": {
"inData": {
"template_path": "minimalDCC_3_3_0_jinjaTemplate",
"replacement_key": "data_fragment",
"json_path": [
"dcc:measurementResults",
"dcc:measurementResult",
0,
"dcc:results",
"dcc:result",
0,
"dcc:data"
],
"xPath": ".//dcc:measurementResults/dcc:measurementResult[1]/dcc:results/dcc:result[1]/dcc:data",
"strip_outer_tag": "dcc:data"
}
},
"elementMapping": {
"default": "inData",
"dcc:quantity": "inData",
"dcc:data": "inData",
"dcc:measurementResults": "inData",
"dcc:measurementResult": "inData",
"dcc:result": "inData"
}
}
\ No newline at end of file
}
......@@ -8,8 +8,10 @@ from functools import reduce
import operator
from importlib import resources as imp_resources
from importlib.resources import files
import warnings
def strip_outer_xml_tag(xml_str, outer_tag):
#TODO raise/check error if outer not found
xml_str = re.sub(r'<\?xml.*?\?>', '', xml_str).strip()
xml_str = re.sub(rf'<{outer_tag}[^>]*>', '', xml_str, count=1).strip()
xml_str = re.sub(rf'</{outer_tag}>', '', xml_str, count=1).strip()
......@@ -32,7 +34,9 @@ class XMLSchemaConverter:
self.schema = xmlschema.XMLSchema(schema_file, base_url=schema_base_dir)
with imp_resources.files('dccXMLJSONConv.data').joinpath('template_descriptions.json').open('r',encoding='utf-8') as f:
self.templates = json.load(f)
jsonData=json.load(f)
self.templates=jsonData["templateInformation"]
self.templateMapping=jsonData["elementMapping"]
self.env = Environment(
loader=PackageLoader('dccXMLJSONConv.data', '.'),
autoescape=False)
......@@ -45,11 +49,14 @@ class XMLSchemaConverter:
def convert_xml_to_json(self, input_data):
try:
#TODO check if we have to parse the errors
return self.schema.to_dict(input_data, validation="lax")
except Exception as e:
print(f"Validation error: {e}. Attempting template insertion.")
root_tag = list(self.templates.keys())[0]
template_info = self.templates.get(root_tag, "default")
warnings.warn(f"XML->JSON Validation error: {e}. Attempting template insertion.")
#TODO add logic to find fragments root tag
root_tag = self._getRootElement(input_data)
templateName = self.templateMapping.get(root_tag, "default")
template_info=self.templates[templateName]
rendered_xml = self._load_jinja_template(template_info["template_path"] + '.xml', input_data)
conversion_result = self.schema.to_dict(rendered_xml, validation="lax")
converted_data = get_from_dict(conversion_result[0], template_info['json_path'])
......@@ -60,15 +67,22 @@ class XMLSchemaConverter:
conversion_result = self.schema.encode(input_data, validation="lax")
errors=conversion_result[1]
if len(errors)>0:
#TODO check how to handle schema facete validation errors and other uncritical stuff
raise Exception(errors)
converted_element = conversion_result[0] # we take all since we had a complete XML tree
xml_str = xmlschema.etree_tostring(converted_element, namespaces=self.schema.namespaces)
return xml_str, converted_element, conversion_result[1]
isNotCritical,errors,unCriticalErrors=self._checkIfValidationErrorIsUncritical(errors)
if isNotCritical:
converted_element = conversion_result[0] # we take all since we had a complete XML tree
xml_str = xmlschema.etree_tostring(converted_element, namespaces=self.schema.namespaces)
return xml_str, converted_element, conversion_result[1]
else:
raise Exception(f"Validation errors: {errors}")
except Exception as e:
print(f"Validation error: {e}. Attempting template insertion.")
root_tag = list(self.templates.keys())[0]
template_info = self.templates.get(root_tag, "default")
warnings.warn(f"JSON->XML Validation error: {e}. Attempting template insertion.")
dictKeys=list(input_data.keys())
if len(dictKeys)>1:
#we will not handle this since the callee should have taken care of this
raise Exception("Multiple root elements in JSON. Cannot determine template")
root_tag=dictKeys[0]
templateName = self.templateMapping.get(root_tag, "default")
template_info=self.templates[templateName]
rendered_json = self._load_jinja_template(template_info["template_path"] + '.json', json.dumps(input_data))
rendered_dict = json.loads(rendered_json)
conversion_result = self.schema.encode(rendered_dict, validation="lax")
......@@ -77,5 +91,19 @@ class XMLSchemaConverter:
xml_str = strip_outer_xml_tag(xml_str, template_info["strip_outer_tag"])
return xml_str, converted_element, conversion_result[1]
def _getRootElement(self,xml_str:str)->str:
match = re.search(r'<\s*([\w:-]+)', xml_str)
return match.group(1) if match else 'default'
def _checkIfValidationErrorIsUncritical(self,errors):
crticalErrors=[]
unCriticalErrors=[]
for error in errors:
if "value doesn't match any pattern of ['3\\\\.3\\\\.0']" in error.reason:
unCriticalErrors.append(error)
else:
crticalErrors.append(error)
isNotCritical=len(crticalErrors)==0
return isNotCritical,crticalErrors,unCriticalErrors
converter=XMLSchemaConverter()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment