Skip to content
Snippets Groups Projects
XMLUnitExtractor.py 1.88 KiB
Newer Older
import re
from dsiUnits import dsiUnit


def parse_plain_utf8_xml(xml_string):
    result = {}

    # Regular expressions to match the required XML elements
    unit_regex = re.compile(r'<si:unit>(.*?)</si:unit>')
    unit_xml_list_regex = re.compile(r'<si:unitXMLList>(.*?)</si:unitXMLList>')

    lines = xml_string.split('\n')
    for line_num, line in enumerate(lines, 1):
        # Check for si:unit elements
        unit_match = unit_regex.search(line)
        if unit_match:
            content = unit_match.group(1).strip()
            result[f"{line_num}"] = content

        # Check for si:unitXMLList elements
        unit_xml_list_match = unit_xml_list_regex.search(line)
        if unit_xml_list_match:
            contents = unit_xml_list_match.group(1).strip().split()
            for idx, content in enumerate(contents):
                result[f"{line_num}:{idx}"] = content

    return result


def process_units(unit_dict):
    valid_units = {}
    invalid_units = {}

    for key, value in unit_dict.items():
        try:
            unit = dsiUnit(value)
            if unit.valid:
                valid_units[key] = value  # Assuming you want to return the string value
            else:
                invalid_units[key] = {
                    "unit": value,
                    "warnings": unit.warnings
                }
                print(f"Warning: Invalid unit at {key} with value: {value}")
        except Exception as e:
            print(f"Error processing unit at {key} with value: {value}. Error: {e}")
            invalid_units[key] = {
                "unit": value,
                "error": str(e)
            }
    return valid_units, invalid_units

def parse_and_process(xml_string):
    unit_dict = parse_plain_utf8_xml(xml_string)
    valid_units, invalid_units = process_units(unit_dict)
    return {
        "valid_units": valid_units,
        "invalid_units": invalid_units
    }