From e5dce42df2c545f0040298c55cdfb1368c5eb76c Mon Sep 17 00:00:00 2001 From: Benedikt Seeger <benedikt.seeger@ptb.de> Date: Mon, 17 Jun 2024 11:41:06 +0200 Subject: [PATCH] added logging for dsiUnits and regEx missmatch --- XMLUnitExtractor.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/XMLUnitExtractor.py b/XMLUnitExtractor.py index 7e493d3..0570067 100644 --- a/XMLUnitExtractor.py +++ b/XMLUnitExtractor.py @@ -1,6 +1,15 @@ import re from dsiUnits import dsiUnit +import logging +import json +from regexGenerator import generateRegex +dsiregExStr = generateRegex() +# Compile the regex pattern +dsiregExPattern = re.compile(dsiregExStr) + +# Configure logging to log to a file +logging.basicConfig(filename='unitValidationLog.log', level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s') def parse_plain_utf8_xml(xml_string): result = {} @@ -28,26 +37,48 @@ def parse_plain_utf8_xml(xml_string): def process_units(unit_dict): + # Static regex parser function + def validate_dsi_unit(dsi_unit_str): + return dsiregExPattern.fullmatch(dsi_unit_str) is not None + valid_units = {} invalid_units = {} for key, value in unit_dict.items(): try: unit = dsiUnit(value) + regExresult = validate_dsi_unit(value) if unit.valid: valid_units[key] = value # Assuming you want to return the string value + if not regExresult: + discrepancy = { + "type": "Regex Error", + "message": "Unit parsed as valid by dsiUnit constructor but invalid by regex", + "key": key, + "value": value + } + logging.debug(json.dumps(discrepancy)) else: invalid_units[key] = { "unit": value, "warnings": unit.warnings } print(f"Warning: Invalid unit at {key} with value: {value}") + if regExresult: + discrepancy = { + "type": "Regex Error", + "message": "Unit parsed as invalid by dsiUnit constructor but valid by regex", + "key": key, + "value": value + } + logging.debug(json.dumps(discrepancy)) except Exception as e: print(f"Error processing unit at {key} with value: {value}. Error: {e}") invalid_units[key] = { "unit": value, "error": str(e) } + return valid_units, invalid_units def parse_and_process(xml_string): -- GitLab