import re
from dsiUnits import dsiUnit
import logging
import json
from regexGenerator import generateRegex

dsiregExStr = generateRegex()
# Compile the regex pattern
dsiregExPattern = re.compile(dsiregExStr)

# Configure logging to log to a file
logging.basicConfig(filename='unitValidationLog.log', level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s')

def parse_plain_utf8_xml(xml_string):
    result = {}

    # Regular expressions to match the required XML elements
    unit_regex = re.compile(r'<si:unit>(.*?)</si:unit>')
    unit_xml_list_regex = re.compile(r'<si:unitXMLList>(.*?)</si:unitXMLList>')

    lines = xml_string.split('\n')
    for line_num, line in enumerate(lines, 1):
        # Check for si:unit elements
        unit_match = unit_regex.search(line)
        if unit_match:
            content = unit_match.group(1).strip()
            result[f"{line_num}"] = content

        # Check for si:unitXMLList elements
        unit_xml_list_match = unit_xml_list_regex.search(line)
        if unit_xml_list_match:
            contents = unit_xml_list_match.group(1).strip().split()
            for idx, content in enumerate(contents):
                result[f"{line_num}:{idx}"] = content

    return result


def process_units(unit_dict):
    # Static regex parser function
    def validate_dsi_unit(dsi_unit_str):
        return dsiregExPattern.fullmatch(dsi_unit_str) is not None

    valid_units = {}
    invalid_units = {}

    for key, value in unit_dict.items():
        try:
            unit = dsiUnit(value)
            regExresult = validate_dsi_unit(value)
            if unit.valid:
                valid_units[key] = value  # Assuming you want to return the string value
                if not regExresult:
                    discrepancy = {
                        "type": "Regex Error",
                        "message": "Unit parsed as valid by dsiUnit constructor but invalid by regex",
                        "key": key,
                        "value": value
                    }
                    logging.debug(json.dumps(discrepancy))
            else:
                invalid_units[key] = {
                    "unit": value,
                    "warnings": unit.warnings
                }
                print(f"Warning: Invalid unit at {key} with value: {value}")
                if regExresult:
                    discrepancy = {
                        "type": "Regex Error",
                        "message": "Unit parsed as invalid by dsiUnit constructor but valid by regex",
                        "key": key,
                        "value": value
                    }
                    logging.debug(json.dumps(discrepancy))
        except Exception as e:
            print(f"Error processing unit at {key} with value: {value}. Error: {e}")
            invalid_units[key] = {
                "unit": value,
                "error": str(e)
            }

    return valid_units, invalid_units

def parse_and_process(xml_string):
    unit_dict = parse_plain_utf8_xml(xml_string)
    valid_units, invalid_units = process_units(unit_dict)
    return {
        "valid_units": valid_units,
        "invalid_units": invalid_units
    }