From e5dce42df2c545f0040298c55cdfb1368c5eb76c Mon Sep 17 00:00:00 2001
From: Benedikt Seeger <benedikt.seeger@ptb.de>
Date: Mon, 17 Jun 2024 11:41:06 +0200
Subject: [PATCH] added logging for dsiUnits and regEx missmatch

---
 XMLUnitExtractor.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/XMLUnitExtractor.py b/XMLUnitExtractor.py
index 7e493d3..0570067 100644
--- a/XMLUnitExtractor.py
+++ b/XMLUnitExtractor.py
@@ -1,6 +1,15 @@
 import re
 from dsiUnits import dsiUnit
+import logging
+import json
+from regexGenerator import generateRegex
 
+dsiregExStr = generateRegex()
+# Compile the regex pattern
+dsiregExPattern = re.compile(dsiregExStr)
+
+# Configure logging to log to a file
+logging.basicConfig(filename='unitValidationLog.log', level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s')
 
 def parse_plain_utf8_xml(xml_string):
     result = {}
@@ -28,26 +37,48 @@ def parse_plain_utf8_xml(xml_string):
 
 
 def process_units(unit_dict):
+    # Static regex parser function
+    def validate_dsi_unit(dsi_unit_str):
+        return dsiregExPattern.fullmatch(dsi_unit_str) is not None
+
     valid_units = {}
     invalid_units = {}
 
     for key, value in unit_dict.items():
         try:
             unit = dsiUnit(value)
+            regExresult = validate_dsi_unit(value)
             if unit.valid:
                 valid_units[key] = value  # Assuming you want to return the string value
+                if not regExresult:
+                    discrepancy = {
+                        "type": "Regex Error",
+                        "message": "Unit parsed as valid by dsiUnit constructor but invalid by regex",
+                        "key": key,
+                        "value": value
+                    }
+                    logging.debug(json.dumps(discrepancy))
             else:
                 invalid_units[key] = {
                     "unit": value,
                     "warnings": unit.warnings
                 }
                 print(f"Warning: Invalid unit at {key} with value: {value}")
+                if regExresult:
+                    discrepancy = {
+                        "type": "Regex Error",
+                        "message": "Unit parsed as invalid by dsiUnit constructor but valid by regex",
+                        "key": key,
+                        "value": value
+                    }
+                    logging.debug(json.dumps(discrepancy))
         except Exception as e:
             print(f"Error processing unit at {key} with value: {value}. Error: {e}")
             invalid_units[key] = {
                 "unit": value,
                 "error": str(e)
             }
+
     return valid_units, invalid_units
 
 def parse_and_process(xml_string):
-- 
GitLab