From 0a2753b6fe62a1cc76848584bd235fc9131de4ed Mon Sep 17 00:00:00 2001 From: Benedikt Seeger <benedikt.seeger@ptb.de> Date: Mon, 17 Jun 2024 08:38:48 +0200 Subject: [PATCH] starting with XML Unit validator --- XMLUnitExtractor.py | 55 +++++++++++++++++++++++++++++++++++++++++++++ restAPIServer.py | 14 ++++++++++++ test_API.py | 37 ++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 XMLUnitExtractor.py diff --git a/XMLUnitExtractor.py b/XMLUnitExtractor.py new file mode 100644 index 0000000..d206a55 --- /dev/null +++ b/XMLUnitExtractor.py @@ -0,0 +1,55 @@ +import re +from dsiUnits import dsiUnit + + +def parse_plain_utf8_xml(xml_string): + result = {} + + # Regular expressions to match the required XML elements + unit_regex = re.compile(r'<si:unit>(.*?)</si:unit>') + unit_xml_list_regex = re.compile(r'<si:unitXMLList>(.*?)</si:unitXMLList>') + + lines = xml_string.split('\n') + for line_num, line in enumerate(lines, 1): + # Check for si:unit elements + unit_match = unit_regex.search(line) + if unit_match: + content = unit_match.group(1).strip() + result[f"{line_num}"] = content + + # Check for si:unitXMLList elements + unit_xml_list_match = unit_xml_list_regex.search(line) + if unit_xml_list_match: + contents = unit_xml_list_match.group(1).strip().split() + for idx, content in enumerate(contents): + result[f"{line_num}:{idx}"] = content + + return result + + +def process_units(unit_dict): + valid_units = {} + invalid_units = {} + + for key, value in unit_dict.items(): + try: + unit = dsiUnit(value) + if unit.valid: + valid_units[key] = value # Assuming you want to return the string value + else: + invalid_units[key] = value + print(f"Warning: Invalid unit at {key} with value: {value}") + except Exception as e: + print(f"Error processing unit at {key} with value: {value}. Error: {e}") + invalid_units[key] = value # Optionally store the raw value or create an error object + + return valid_units, invalid_units + + +def parse_and_process(xml_string): + unit_dict = parse_plain_utf8_xml(xml_string) + valid_units, invalid_units = process_units(unit_dict) + return { + "valid_units": valid_units, + "invalid_units": invalid_units + } \ No newline at end of file diff --git a/restAPIServer.py b/restAPIServer.py index 91170e0..7a083b8 100644 --- a/restAPIServer.py +++ b/restAPIServer.py @@ -2,6 +2,8 @@ import numpy as np from fastapi import FastAPI, HTTPException from pydantic import BaseModel from dsiUnits import dsiUnit +import re +import XMLUnitExtractor # Import the newly created module app = FastAPI() class UnitRequest(BaseModel): @@ -12,6 +14,10 @@ class UnitComparisonRequest(BaseModel): unit_string2: str complete: bool = False +class XMLRequest(BaseModel): + xml: str + + @app.post("/convert/utf8/") async def convert_to_utf8(request: UnitRequest): try: @@ -54,3 +60,11 @@ async def compare_units(request: UnitComparisonRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) +@app.post("/validateUnitsInXML/") +async def parse_xml(request: XMLRequest): + try: + result = XMLUnitExtractor.parse_and_process(request.xml) + return result + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + diff --git a/test_API.py b/test_API.py index 9c57970..84f239f 100644 --- a/test_API.py +++ b/test_API.py @@ -34,3 +34,40 @@ def test_compare_units_not_equal_but_equal_WithCompleate(): def test_invalid_unit(): response = client.post("/convert/utf8/", json={"unit_string": "not_a_unit"}) assert response.status_code == 500 # Assuming your API returns 500 for invalid units + + +def test_parse_xml(): + xml_content = """<root> + <si:unit>\metre</si:unit> + <si:unitXMLList>\metre \second \kilogram</si:unitXMLList> + <si:unit>not_a_unit</si:unit> + <si:unit>\seconds</si:unit> + </root> + """ + response = client.post("/validateUnitsInXML/", json={"xml": xml_content}) + assert response.status_code == 200 + result = response.json() + + assert "valid_units" in result + assert "invalid_units" in result + + valid_units = result["valid_units"] + invalid_units = result["invalid_units"] + + # Check valid units + assert "2" in valid_units # Line number for <si:unit>\metre</si:unit> + assert valid_units["2"] == "\\metre" + + assert "3:0" in valid_units # Line number and index for the first entry in <si:unitXMLList> + assert valid_units["3:0"] == "\\metre" + assert "3:1" in valid_units # Line number and index for the second entry in <si:unitXMLList> + assert valid_units["3:1"] == "\\second" + assert "3:2" in valid_units # Line number and index for the third entry in <si:unitXMLList> + assert valid_units["3:2"] == "\\kilogram" + + # Check invalid units + assert "4" in invalid_units # Line number for <si:unit>not_a_unit</si:unit> + assert invalid_units["4"] == "not_a_unit" + # Check invalid units + assert "5" in invalid_units # Line number for <si:unit>not_a_unit</si:unit> + assert invalid_units["5"] == "\seconds" -- GitLab