diff --git a/XMLUnitExtractor.py b/XMLUnitExtractor.py new file mode 100644 index 0000000000000000000000000000000000000000..057006725d5d645756464489d286daa76521e2e2 --- /dev/null +++ b/XMLUnitExtractor.py @@ -0,0 +1,90 @@ +import re +from dsiUnits import dsiUnit +import logging +import json +from regexGenerator import generateRegex + +dsiregExStr = generateRegex() +# Compile the regex pattern +dsiregExPattern = re.compile(dsiregExStr) + +# Configure logging to log to a file +logging.basicConfig(filename='unitValidationLog.log', level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s') + +def parse_plain_utf8_xml(xml_string): + result = {} + + # Regular expressions to match the required XML elements + unit_regex = re.compile(r'<si:unit>(.*?)</si:unit>') + unit_xml_list_regex = re.compile(r'<si:unitXMLList>(.*?)</si:unitXMLList>') + + lines = xml_string.split('\n') + for line_num, line in enumerate(lines, 1): + # Check for si:unit elements + unit_match = unit_regex.search(line) + if unit_match: + content = unit_match.group(1).strip() + result[f"{line_num}"] = content + + # Check for si:unitXMLList elements + unit_xml_list_match = unit_xml_list_regex.search(line) + if unit_xml_list_match: + contents = unit_xml_list_match.group(1).strip().split() + for idx, content in enumerate(contents): + result[f"{line_num}:{idx}"] = content + + return result + + +def process_units(unit_dict): + # Static regex parser function + def validate_dsi_unit(dsi_unit_str): + return dsiregExPattern.fullmatch(dsi_unit_str) is not None + + valid_units = {} + invalid_units = {} + + for key, value in unit_dict.items(): + try: + unit = dsiUnit(value) + regExresult = validate_dsi_unit(value) + if unit.valid: + valid_units[key] = value # Assuming you want to return the string value + if not regExresult: + discrepancy = { + "type": "Regex Error", + "message": "Unit parsed as valid by dsiUnit constructor but invalid by regex", + "key": key, + "value": value + } + logging.debug(json.dumps(discrepancy)) + else: + invalid_units[key] = { + "unit": value, + "warnings": unit.warnings + } + print(f"Warning: Invalid unit at {key} with value: {value}") + if regExresult: + discrepancy = { + "type": "Regex Error", + "message": "Unit parsed as invalid by dsiUnit constructor but valid by regex", + "key": key, + "value": value + } + logging.debug(json.dumps(discrepancy)) + except Exception as e: + print(f"Error processing unit at {key} with value: {value}. Error: {e}") + invalid_units[key] = { + "unit": value, + "error": str(e) + } + + return valid_units, invalid_units + +def parse_and_process(xml_string): + unit_dict = parse_plain_utf8_xml(xml_string) + valid_units, invalid_units = process_units(unit_dict) + return { + "valid_units": valid_units, + "invalid_units": invalid_units + } \ No newline at end of file diff --git a/main.py b/main.py index be32efae02cf8ec5b4e5664fe9579f972c5f0ee4..4b46a596e6a2fa9ae113dcb1e0fc62478e8783ab 100644 --- a/main.py +++ b/main.py @@ -20,13 +20,16 @@ import math import bokehCssPTB from urllib.parse import quote from dsiUnits import dsiUnit -from bokeh.plotting import curdoc,figure -from bokeh.layouts import column, row -from bokeh.models import FileInput, Div, CustomJS, Button, TabPanel, Tabs, Dropdown, TextInput, Button, MathText, Label, Arrow, NormalHead,CheckboxGroup +from bokeh.plotting import curdoc, figure +from bokeh.layouts import column, row, GridBox +from bokeh.models import FileInput, Div, CustomJS, Button, TabPanel, Tabs, TextInput, Label, Arrow, NormalHead, CheckboxGroup from bokeh.palettes import Category10 from bokeh.events import ValueSubmit import numpy as np -colors=Category10[10] +from base64 import b64decode +import XMLUnitExtractor + +colors = Category10[10] VERSION = "0.1.0" import socket @@ -65,18 +68,18 @@ If there was an error with the calculation, please fill out the table above. Fee *Free text comment*""" -labelOffsetX=[0.0,0.0,-0.02,0.07,0.02,-0.07] -labelOffsetY=[-0.02,0.09,0,0.07,0,0.07] -class dsiparserInput(): +labelOffsetX = [0.0, 0.0, -0.02, 0.07, 0.02, -0.07] +labelOffsetY = [-0.02, 0.09, 0, 0.07, 0, 0.07] - def __init__(self,defaultInput="",additionalComparisonCallbacks=[]): - self.additionalComparisonCallbacks=additionalComparisonCallbacks +class dsiparserInput(): + def __init__(self, defaultInput="", additionalComparisonCallbacks=[]): + self.additionalComparisonCallbacks = additionalComparisonCallbacks self.dsiInput = TextInput(value=defaultInput, title="DSI unit string:", width=500) self.dsiInput.on_event(ValueSubmit, self.parseInput) self.dsiSubmitButton = Button(label="Convert", button_type="primary") self.dsiSubmitButton.on_click(self.parseInput) - self.inputRow = row(children = [self.dsiInput, self.dsiSubmitButton], css_classes = ["textInputRow"]) - self.results = column(children = []) + self.inputRow = row(children=[self.dsiInput, self.dsiSubmitButton], css_classes=["textInputRow"]) + self.results = column(children=[]) self.widget = column(children=[self.inputRow, self.results], css_classes=["doubleColumn"]) self.valideUnit = False self.dsiTree = None @@ -199,49 +202,47 @@ class dsiCompGraphGen: y2 = quant2['coords'][1] name2 = quant2['name'] - if not name1+'_'+name2 in self.arrows: + if not name1 + '_' + name2 in self.arrows: nh = NormalHead(fill_color=colors[colIDX], fill_alpha=0.5, line_color=colors[colIDX]) - self.arrows[name1+'_'+name2]=Arrow(end=nh, line_color=colors[colIDX], line_dash=[15, 5],x_start=x1, y_start=y1, x_end=x2, y_end=y2) - self.plot.add_layout(self.arrows[name1+'_'+name2]) - scale12, baseUnit = quant1['baseUnit'].isScalablyEqualTo(quant2['baseUnit'],complete=complete)# TODO remove this un neescary reclaculation - #isSpecial, Latex = format_special_scale_factor(scale12) + self.arrows[name1 + '_' + name2] = Arrow(end=nh, line_color=colors[colIDX], line_dash=[15, 5], x_start=x1, y_start=y1, x_end=x2, y_end=y2) + self.plot.add_layout(self.arrows[name1 + '_' + name2]) + scale12, baseUnit = quant1['baseUnit'].isScalablyEqualTo(quant2['baseUnit'], complete=complete) isSpecial = False - Latex="" + Latex = "" if isSpecial: - text="{:.4g}".format(scale12)+" = "+Latex + text = "{:.4g}".format(scale12) + " = " + Latex else: - text="{:.4g}".format(scale12) + text = "{:.4g}".format(scale12) if not name1 + '_' + name2 in self.scalFactorLables: - angle_deg = -1*np.arctan2(y2 - y1, x2 - x1) - if abs(angle_deg)>np.pi/8: - angle_deg+=-np.pi/2 - self.scalFactorLables[name1 + '_' + name2]=Label(x=np.abs(x1-x2)/2+np.min([x1,x2])+labelOffsetX[colIDX], y=np.abs(y1-y2)/2+np.min([y1,y2])+labelOffsetY[colIDX], text=text, text_font_size="24px", text_baseline=unitToDraw['text_baseLine'], text_align=unitToDraw['text_align'],text_color=colors[colIDX],angle=angle_deg) + angle_deg = -1 * np.arctan2(y2 - y1, x2 - x1) + if abs(angle_deg) > np.pi / 8: + angle_deg += -np.pi / 2 + self.scalFactorLables[name1 + '_' + name2] = Label(x=np.abs(x1 - x2) / 2 + np.min([x1, x2]) + labelOffsetX[colIDX], y=np.abs(y1 - y2) / 2 + np.min([y1, y2]) + labelOffsetY[colIDX], text=text, text_font_size="24px", text_baseline=unitToDraw['text_baseLine'], text_align=unitToDraw['text_align'], text_color=colors[colIDX], angle=angle_deg) self.plot.add_layout(self.scalFactorLables[name1 + '_' + name2]) else: - self.scalFactorLables[name1 + '_' + name2].text=text + self.scalFactorLables[name1 + '_' + name2].text = text if not name2 + '_' + name1 in self.arrows: - nh = NormalHead(fill_color=colors[colIDX+1], fill_alpha=0.5, line_color=colors[colIDX+1]) - self.arrows[name2+'_'+name1]=Arrow(end=nh, line_color=colors[colIDX+1], line_dash=[15, 5], x_start=x2, y_start=y2+0.05, x_end=x1, y_end=y1+0.05) - self.plot.add_layout(self.arrows[name2+'_'+name1]) - - scale21, baseUnit = quant2['baseUnit'].isScalablyEqualTo(quant1['baseUnit'],complete=complete) - #isSpecial, Latex = format_special_scale_factor(scale21) - isSpecial=False - Latex="" + nh = NormalHead(fill_color=colors[colIDX + 1], fill_alpha=0.5, line_color=colors[colIDX + 1]) + self.arrows[name2 + '_' + name1] = Arrow(end=nh, line_color=colors[colIDX + 1], line_dash=[15, 5], x_start=x2, y_start=y2 + 0.05, x_end=x1, y_end=y1 + 0.05) + self.plot.add_layout(self.arrows[name2 + '_' + name1]) + + scale21, baseUnit = quant2['baseUnit'].isScalablyEqualTo(quant1['baseUnit'], complete=complete) + isSpecial = False + Latex = "" if isSpecial: - text="{:.4g}".format(scale21)+" = "+Latex + text = "{:.4g}".format(scale21) + " = " + Latex else: - text="{:.4g}".format(scale21) + text = "{:.4g}".format(scale21) if not name2 + '_' + name1 in self.scalFactorLables: - angle_deg = -1*np.arctan2(y2 - y1, x2 - x1) - if abs(angle_deg)>np.pi/8: - angle_deg+=-np.pi/2 - self.scalFactorLables[name2 + '_' + name1]=Label(x=np.abs(x1-x2)/2+np.min([x1,x2])+labelOffsetX[colIDX+1], y=np.abs(y1-y2)/2+np.min([y1,y2])+labelOffsetY[colIDX+1], text=text, text_font_size="24px", text_baseline=unitToDraw['text_baseLine'], text_align=unitToDraw['text_align'],text_color=colors[colIDX+1],angle=angle_deg) + angle_deg = -1 * np.arctan2(y2 - y1, x2 - x1) + if abs(angle_deg) > np.pi / 8: + angle_deg += -np.pi / 2 + self.scalFactorLables[name2 + '_' + name1] = Label(x=np.abs(x1 - x2) / 2 + np.min([x1, x2]) + labelOffsetX[colIDX + 1], y=np.abs(y1 - y2) / 2 + np.min([y1, y2]) + labelOffsetY[colIDX + 1], text=text, text_font_size="24px", text_baseline=unitToDraw['text_baseLine'], text_align=unitToDraw['text_align'], text_color=colors[colIDX + 1], angle=angle_deg) self.plot.add_layout(self.scalFactorLables[name2 + '_' + name1]) else: - self.scalFactorLables[name2+ '_' + name1].text=text - colIDX+=2 + self.scalFactorLables[name2 + '_' + name1].text = text + colIDX += 2 class page(): def __init__(self): @@ -249,62 +250,116 @@ class page(): curdoc().title = "DSI to Latex" curdoc().add_root(bokehCssPTB.getStyleDiv()) curdoc().theme = bokehCssPTB.getTheme() - self.dsiInput1 = dsiparserInput(defaultInput="\\milli\\newton\\metre",additionalComparisonCallbacks=[self.clearComparison,self.tryComparison]) - self.dsiInput2 = dsiparserInput(defaultInput="\\kilo\\joule",additionalComparisonCallbacks=[self.clearComparison,self.tryComparison]) - self.inputs=row([self.dsiInput1.widget, self.dsiInput2.widget]) - curdoc().add_root(self.inputs) + # Tab 1: DSI Parser and Comparator + self.dsiInput1 = dsiparserInput(defaultInput="\\milli\\newton\\metre", additionalComparisonCallbacks=[self.clearComparison, self.tryComparison]) + self.dsiInput2 = dsiparserInput(defaultInput="\\kilo\\joule", additionalComparisonCallbacks=[self.clearComparison, self.tryComparison]) + self.inputs = row([self.dsiInput1.widget, self.dsiInput2.widget]) self.comapreButton = Button(label="Compare", button_type="primary") self.comapreButton.on_click(self.compare) LABELS = ['Complete Comparison'] self.completeComCBGPR = CheckboxGroup(labels=LABELS, active=[]) - self.compaReresult = Div(text = "", css_classes = ["msg-positive"],visible=False) - self.compareRow = row(children = [self.comapreButton,self.completeComCBGPR,self.compaReresult], css_classes = ["textInputRow"]) - curdoc().add_root(self.compareRow) - self.dsiCompGraphGen=dsiCompGraphGen(self.dsiInput1,self.dsiInput2) - curdoc().add_root(self.dsiCompGraphGen.widget) + self.compaReresult = Div(text="", css_classes=["msg-positive"], visible=False) + self.compareRow = row(children=[self.comapreButton, self.completeComCBGPR, self.compaReresult], css_classes=["textInputRow"]) + self.dsiCompGraphGen = dsiCompGraphGen(self.dsiInput1, self.dsiInput2) self.createIssueButton = Button(label="Report conversion error", disabled=True) - # self.createIssueButton.on_click(self.createIssueUrl) - curdoc().add_root(self.dsiCompGraphGen.widget) - curdoc().add_root(self.createIssueButton) + tab1_layout = column(self.inputs, self.compareRow, self.dsiCompGraphGen.widget, self.createIssueButton) + tab1 = TabPanel(child=tab1_layout, title="DSI Parser and Comparator") + + # Tab 2: XML Unit Validator + self.title = Div(text="<h4>Please upload DCC or other XML containing D-SI Unit Strings here</h4>") + self.upload_widget = FileInput(accept=".xml") + self.upload_widget.on_change('value', self.process_xml) + self.valid_units_message = Div(text="Upload an XML file to validate units", css_classes=["msg-neutral"]) + self.invalid_units_message = Div(text="") + self.invalid_units_list = GridBox() + + tab2_layout = column(self.title, self.upload_widget, self.valid_units_message, self.invalid_units_message, self.invalid_units_list) + tab2 = TabPanel(child=tab2_layout, title="XML Unit Validator") + + + # Handle URL parameter active tab + tabs = Tabs(tabs=[tab1, tab2]) + try: + active_tab = int(curdoc().session_context.request.arguments["active_tab"][0]) + except: + active_tab = 0 + if active_tab >= len(tabs.tabs): + active_tab = 0 + tabs.active=active_tab + + curdoc().add_root(tabs) + + def process_xml(self, attr, old, new): + decoded = b64decode(new).decode('utf-8') + result = XMLUnitExtractor.parse_and_process(decoded) + valid_units = result['valid_units'] + invalid_units = result['invalid_units'] + + # Update valid units message + if valid_units: + self.valid_units_message.text = f"{len(valid_units)} valid units found" + self.valid_units_message.css_classes = ["msg-positive"] + else: + self.valid_units_message.text = "No valid units found" + self.valid_units_message.css_classes = ["msg-negative"] + + # Update invalid units message + if invalid_units: + self.invalid_units_message.text = f"{len(invalid_units)} invalid units found" + self.invalid_units_message.css_classes = ["msg-negative"] + else: + self.invalid_units_message.text = "" + self.invalid_units_message.css_classes = [""] + + # Update invalid units content + self.invalid_units_list.children = [] + for row_num, (line_num, details) in enumerate(invalid_units.items()): + warnings_div = column([Div(text=warning, css_classes=["msg-negative"]) for warning in details.get("warnings", [])]) + unit_row = [ + (Div(text=str(line_num), height_policy="min", css_classes=["msg-neutral"]), 2*row_num, 0), + (Div(text=details["unit"], css_classes=["msg-raw"]), 2*row_num, 1), + (Div(text=f"{dsiUnit(details['unit']).toLatex()}", width=150), 2*row_num, 2), + (warnings_div, 2*row_num, 3), + (Div(text="", height=20), 2*row_num+1, 0, 1, 4) + ] + self.invalid_units_list.children += unit_row def compare(self): self.dsiInput1.parseInput() self.dsiInput2.parseInput() - completeConversion=self.completeComCBGPR.active==[0] + completeConversion = self.completeComCBGPR.active == [0] try: - scalfactor,baseUnit=self.dsiInput1.dsiTree.isScalablyEqualTo(self.dsiInput2.dsiTree,complete=completeConversion) + scalfactor, baseUnit = self.dsiInput1.dsiTree.isScalablyEqualTo(self.dsiInput2.dsiTree, complete=completeConversion) if not math.isnan(scalfactor): - self.compaReresult.text = "The two units are equal up to a scaling factor of "+str(scalfactor)+" and a base unit of "+str(baseUnit) - self.compaReresult.css_classes=["msg-positive"] + self.compaReresult.text = "The two units are equal up to a scaling factor of " + str(scalfactor) + " and a base unit of " + str(baseUnit) + self.compaReresult.css_classes = ["msg-positive"] else: self.compaReresult.text = "The two units are not equal" self.compaReresult.css_classes = ["msg-negative"] if self.dsiInput1.valideUnit and self.dsiInput2.valideUnit: - self.dsiCompGraphGen.reDraw(self.dsiInput1.dsiTree,self.dsiInput2.dsiTree,complete=completeConversion) + self.dsiCompGraphGen.reDraw(self.dsiInput1.dsiTree, self.dsiInput2.dsiTree, complete=completeConversion) else: self.dsiCompGraphGen.flush() except AttributeError as Ae: - warnings.warn("AttributeError: "+str(Ae)) + warnings.warn("AttributeError: " + str(Ae)) self.compaReresult.text = "The two units are not equal" self.compaReresult.css_classes = ["msg-negative"] self.dsiCompGraphGen.flush() self.compaReresult.visible = True - self.createIssueButton.disabled=False - self.createIssueButton.button_type="danger" - self.createIssueButton.js_on_event("button_click",CustomJS(code=f"window.open('{self.createIssueUrl()}', '_blank');")) + self.createIssueButton.disabled = False + self.createIssueButton.button_type = "danger" + self.createIssueButton.js_on_event("button_click", CustomJS(code=f"window.open('{self.createIssueUrl()}', '_blank');")) def createIssueUrl(self): - issueArgs=[self.dsiInput1.dsiInput.value,str(self.dsiInput1.dsiTree),self.dsiInput2.dsiInput.value,str(self.dsiInput2.dsiTree)] - comGenAtrssFroIssue=['baseUnit', 'scalfactorAB', 'scalfactorBA', 'scalfactorABase', 'scalfactorBaseA', 'scalfactorBBase', 'scalfactorBaseB'] + issueArgs = [self.dsiInput1.dsiInput.value, str(self.dsiInput1.dsiTree), self.dsiInput2.dsiInput.value, str(self.dsiInput2.dsiTree)] + comGenAtrssFroIssue = ['baseUnit', 'scalfactorAB', 'scalfactorBA', 'scalfactorABase', 'scalfactorBaseA', 'scalfactorBBase', 'scalfactorBaseB'] for comGenAtrss in comGenAtrssFroIssue: try: - issueArgs.append(str(getattr(self.dsiCompGraphGen,comGenAtrss))) + issueArgs.append(str(getattr(self.dsiCompGraphGen, comGenAtrss))) except AttributeError as Ae: - issueArgs.append("AttributeError: "+str(Ae)) - #quantitiesToAdd=[self.dsiInput1.dsiInput.value,str(self.dsiInput1.dsiTree),self.dsiInput2.dsiInput.value,str(self.dsiInput2.dsiTree),str(self.dsiCompGraphGen.baseUnit),self.dsiCompGraphGen.scalfactorAB,self.dsiCompGraphGen.scalfactorBA,self.dsiCompGraphGen.scalfactorABase,self.dsiCompGraphGen.scalfactorBaseA,self.dsiCompGraphGen.scalfactorBBase,self.dsiCompGraphGen.scalfactorBaseB] - #issueTemplate=open('./issue.md').read() #TODO add file inculde instead of the str.... - filledResult=issueTemplate.format(*issueArgs) + issueArgs.append("AttributeError: " + str(Ae)) + filledResult = issueTemplate.format(*issueArgs) filledTitle = f'Unexpected comparison result: {self.dsiInput1.dsiInput.value} to {self.dsiInput2.dsiInput.value}' issueUrl = r'https://gitlab1.ptb.de/digitaldynamicmeasurement/dsi-parser-frontend/-/issues/new?' title = quote(filledTitle) @@ -315,13 +370,12 @@ class page(): def clearComparison(self): self.dsiCompGraphGen.flush() self.compaReresult.text = "" - self.createIssueButton.disabled=True - self.createIssueButton.button_type="primary" + self.createIssueButton.disabled = True + self.createIssueButton.button_type = "primary" self.compaReresult.visible = False def tryComparison(self): if self.dsiInput1.valideUnit and self.dsiInput2.valideUnit: self.compare() - thisPage = page() diff --git a/restAPIServer.py b/restAPIServer.py index 91170e0e7a8fab09169c31bac471612f7429fa2f..7a083b8f2dee987765574cd276e1056aa3799947 100644 --- a/restAPIServer.py +++ b/restAPIServer.py @@ -2,6 +2,8 @@ import numpy as np from fastapi import FastAPI, HTTPException from pydantic import BaseModel from dsiUnits import dsiUnit +import re +import XMLUnitExtractor # Import the newly created module app = FastAPI() class UnitRequest(BaseModel): @@ -12,6 +14,10 @@ class UnitComparisonRequest(BaseModel): unit_string2: str complete: bool = False +class XMLRequest(BaseModel): + xml: str + + @app.post("/convert/utf8/") async def convert_to_utf8(request: UnitRequest): try: @@ -54,3 +60,11 @@ async def compare_units(request: UnitComparisonRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) +@app.post("/validateUnitsInXML/") +async def parse_xml(request: XMLRequest): + try: + result = XMLUnitExtractor.parse_and_process(request.xml) + return result + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + diff --git a/test_API.py b/test_API.py index 9c57970a31477eede2567ef4a051fc142e93faee..317f870864c68cade380638a3a1528ef687c1ebc 100644 --- a/test_API.py +++ b/test_API.py @@ -34,3 +34,43 @@ def test_compare_units_not_equal_but_equal_WithCompleate(): def test_invalid_unit(): response = client.post("/convert/utf8/", json={"unit_string": "not_a_unit"}) assert response.status_code == 500 # Assuming your API returns 500 for invalid units + + +def test_parse_xml(): + xml_content = """<root> + <si:unit>\metre</si:unit> + <si:unitXMLList>\metre \second \kilogram</si:unitXMLList> + <si:unit>not_a_unit</si:unit> + <si:unit>\seconds</si:unit> + </root> + """ + response = client.post("/validateUnitsInXML/", json={"xml": xml_content}) + assert response.status_code == 200 + result = response.json() + + assert "valid_units" in result + assert "invalid_units" in result + + valid_units = result["valid_units"] + invalid_units = result["invalid_units"] + + # Check valid units + assert "2" in valid_units # Line number for <si:unit>\metre</si:unit> + assert valid_units["2"] == "\\metre" + + assert "3:0" in valid_units # Line number and index for the first entry in <si:unitXMLList> + assert valid_units["3:0"] == "\\metre" + assert "3:1" in valid_units # Line number and index for the second entry in <si:unitXMLList> + assert valid_units["3:1"] == "\\second" + assert "3:2" in valid_units # Line number and index for the third entry in <si:unitXMLList> + assert valid_units["3:2"] == "\\kilogram" + + # Check invalid units + assert "4" in invalid_units # Line number for <si:unit>not_a_unit</si:unit> + assert invalid_units["4"]['unit'] == "not_a_unit" + assert invalid_units["4"]['warnings']==[r'String should start with \, string given was «not_a_unit»', + r'The identifier «not_a_unit» does not match any D-SI units!'] + # Check invalid units + assert "5" in invalid_units # Line number for <si:unit>not_a_unit</si:unit> + assert invalid_units["5"]['unit'] == "\seconds" + assert invalid_units["5"]['warnings']==[r'The identifier «seconds» does not match any D-SI units! Did you mean one of these «\second, \arcsecond»?']