import re from dsiUnits import dsiUnit def parse_plain_utf8_xml(xml_string): result = {} # Regular expressions to match the required XML elements unit_regex = re.compile(r'<si:unit>(.*?)</si:unit>') unit_xml_list_regex = re.compile(r'<si:unitXMLList>(.*?)</si:unitXMLList>') lines = xml_string.split('\n') for line_num, line in enumerate(lines, 1): # Check for si:unit elements unit_match = unit_regex.search(line) if unit_match: content = unit_match.group(1).strip() result[f"{line_num}"] = content # Check for si:unitXMLList elements unit_xml_list_match = unit_xml_list_regex.search(line) if unit_xml_list_match: contents = unit_xml_list_match.group(1).strip().split() for idx, content in enumerate(contents): result[f"{line_num}:{idx}"] = content return result def process_units(unit_dict): valid_units = {} invalid_units = {} for key, value in unit_dict.items(): try: unit = dsiUnit(value) if unit.valid: valid_units[key] = value # Assuming you want to return the string value else: invalid_units[key] = { "unit": value, "warnings": unit.warnings } print(f"Warning: Invalid unit at {key} with value: {value}") except Exception as e: print(f"Error processing unit at {key} with value: {value}. Error: {e}") invalid_units[key] = { "unit": value, "error": str(e) } return valid_units, invalid_units def parse_and_process(xml_string): unit_dict = parse_plain_utf8_xml(xml_string) valid_units, invalid_units = process_units(unit_dict) return { "valid_units": valid_units, "invalid_units": invalid_units }