Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import re
from dsiUnits import dsiUnit
def parse_plain_utf8_xml(xml_string):
result = {}
# Regular expressions to match the required XML elements
unit_regex = re.compile(r'<si:unit>(.*?)</si:unit>')
unit_xml_list_regex = re.compile(r'<si:unitXMLList>(.*?)</si:unitXMLList>')
lines = xml_string.split('\n')
for line_num, line in enumerate(lines, 1):
# Check for si:unit elements
unit_match = unit_regex.search(line)
if unit_match:
content = unit_match.group(1).strip()
result[f"{line_num}"] = content
# Check for si:unitXMLList elements
unit_xml_list_match = unit_xml_list_regex.search(line)
if unit_xml_list_match:
contents = unit_xml_list_match.group(1).strip().split()
for idx, content in enumerate(contents):
result[f"{line_num}:{idx}"] = content
return result
def process_units(unit_dict):
valid_units = {}
invalid_units = {}
for key, value in unit_dict.items():
try:
unit = dsiUnit(value)
if unit.valid:
valid_units[key] = value # Assuming you want to return the string value
else:
invalid_units[key] = value
print(f"Warning: Invalid unit at {key} with value: {value}")
except Exception as e:
print(f"Error processing unit at {key} with value: {value}. Error: {e}")
invalid_units[key] = value # Optionally store the raw value or create an error object
return valid_units, invalid_units
def parse_and_process(xml_string):
unit_dict = parse_plain_utf8_xml(xml_string)
valid_units, invalid_units = process_units(unit_dict)
return {
"valid_units": valid_units,
"invalid_units": invalid_units
}