From 8ffa653a38f598ca41949fab19084e825a3b8f5b Mon Sep 17 00:00:00 2001 From: Benedikt Seeger <benedikt.seeger@ptb.de> Date: Mon, 14 Apr 2025 08:46:11 +0200 Subject: [PATCH] added BIPMRP syntax parsing to constructor --- pyproject.toml | 4 +- src/dsiParser.py | 59 +++++++++- src/dsiUnits.py | 58 ++++++++++ src/unitStrings.py | 80 +++++++++++++- tests/test_dsiUnits.py | 244 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 439 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9f51898..47c8548 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "dsiunits" # Ensure this is correctly specified -version = "2.4.2" +version = "2.5.0" description = "This is a Python module for handling the SI units as objects in Python, parsing them from strings and converting them to Latex and Unicode, as well as performing math operations and calculating scale factors." authors = [ { name="Benedikt Seeger", email="benedikt.seeger@ptb.de" }, @@ -17,4 +17,4 @@ classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", "Operating System :: OS Independent", -] \ No newline at end of file +] diff --git a/src/dsiParser.py b/src/dsiParser.py index 02f7dd0..29b63d1 100644 --- a/src/dsiParser.py +++ b/src/dsiParser.py @@ -18,7 +18,7 @@ import warnings from fractions import Fraction import difflib -from unitStrings import _dsiPrefixesLatex, _dsiUnitsLatex, _dsiKeyWords +from unitStrings import _dsiPrefixesLatex, _dsiUnitsLatex, _dsiKeyWords,_ascii_to_dsi_unit_map, _dsiPrefixesUTF8 from dsiUnitNode import dsiUnitNode @@ -84,6 +84,9 @@ class dsiParser: _warn(f"Double backslash found in string, treating as one backslash: «{dsiString}»", RuntimeWarning)) dsiString = dsiString.replace(r'\\', '\\') + if not dsiString.startswith("\\") and not dsiString.startswith("|") and len(dsiString)>0: + # if the string does not start with a backslash or |, it is not a D-SI unit so we will try if its bipmrp syntax + return self._parseBipmRp(dsiString) if dsiString == "": warningMessages.append(_warn("Given D-SI string is empty!", RuntimeWarning)) return ( @@ -244,6 +247,60 @@ class dsiParser: for key, value in self._defaults.items(): setattr(self, key, value) + def _parseBipmRp(self, rp_string: str): + """ + Parses BIPM-RP or PID-style strings like 'kg.mm2.ns-2.℃' into D-SI trees. + + Returns: + (str, list[list[dsiUnitNode]], list of warnings, bool isNonDsi) + """ + warningMessages = [] + nodeList = [] + + components = rp_string.strip().split('.') + for comp in components: + match = re.fullmatch(r"([a-zA-ZµΩ℃°]+)([-+]?[0-9]+)?", comp) + if not match: + warningMessages.append(_warn(f"Invalid BIPM-RP component: «{comp}»", RuntimeWarning)) + return (rp_string, [[dsiUnitNode('', rp_string, valid=False)]], warningMessages, True) + + prefix_unit = match.group(1) + exponent_str = match.group(2) + exponent = Fraction(exponent_str) if exponent_str else Fraction(1) + + matched_prefix = '' + matched_unit = '' + + # Try matching longest known prefix first + # Special case: 'kg' is NOT prefix + unit — it's 'kilogram' + if prefix_unit == "kg": + matched_prefix = "" + matched_unit = "kilogram" + + else: + # Try matching longest known prefix first + for prefix in sorted(_dsiPrefixesUTF8.values(), key=len, reverse=True): + if prefix_unit.startswith(prefix): + possible_unit = prefix_unit[len(prefix):] + if possible_unit in _ascii_to_dsi_unit_map: + matched_prefix = prefix + matched_unit = _ascii_to_dsi_unit_map[possible_unit] + break + else: + # No prefix match; try as unit-only + if prefix_unit in _ascii_to_dsi_unit_map: + matched_unit = _ascii_to_dsi_unit_map[prefix_unit] + else: + warningMessages.append( + _warn(f"Unknown unit in BIPM-RP string: «{prefix_unit}»", RuntimeWarning)) + return (rp_string, [[dsiUnitNode('', rp_string, valid=False)]], warningMessages, True) + + # Convert prefix UTF8 → latex + latex_prefix = next((k for k, v in _dsiPrefixesUTF8.items() if v == matched_prefix), '') + nodeList.append(dsiUnitNode(latex_prefix, matched_unit, exponent)) + + return (rp_string, [nodeList], warningMessages, False) + def _warn(message: str, warningClass): """Output warning on command line and return warning message diff --git a/src/dsiUnits.py b/src/dsiUnits.py index 0cbc5bf..e84b25a 100644 --- a/src/dsiUnits.py +++ b/src/dsiUnits.py @@ -33,6 +33,9 @@ from unitStrings import ( _derivedToBaseUnits, _additionalConversions, _dsiKeyWords, + _ascii_to_dsi_unit_map, + _prefix_symbol_to_pid, + _unit_symbol_to_pid ) from dsiParser import NonDsiUnitWarning, dsiParser from dsiUnitNode import dsiUnitNode @@ -205,6 +208,61 @@ class dsiUnit: # Handle fractions, join numerator and denominator with a slash for division return scaleFactorStr + " / ".join(utf8Array).replace(" ", "") + def toSIRP(self, pid: bool = False) -> str: + """ + Converts this D-SI unit to BIPM SI Reference Point (SI RP) endpoint syntax + or full PID syntax if `pid=True`. + + Args: + pid (bool): If True, generate full PID URL instead of compact RP string. + + Returns: + str: Compact SI RP string or full PID URL. + """ + import copy + unit_copy = copy.deepcopy(self) + unit_copy._removePer() + + if unit_copy.scaleFactor != 1.0: + scale_factor = unit_copy.scaleFactor + if scale_factor in _dsiPrefixesScales.values(): + prefix_name = [ + pfx for pfx, factor in _dsiPrefixesScales.items() + if factor == scale_factor + ][0] + if len(unit_copy.tree) and len(unit_copy.tree[0]): + unit_copy.tree[0][0].prefix = prefix_name + else: + raise NotImplementedError(f"Unsupported scale factor for SI RP: {scale_factor}") + + parts = [] + for node in unit_copy.tree[0]: + if not float(node.exponent).is_integer(): + raise NotImplementedError("Non-integer exponents not supported in SI RP format.") + exp = int(node.exponent) + + if pid: + # Full PID format + from urllib.parse import quote + prefix_pid = _prefix_symbol_to_pid.get(_dsiPrefixesUTF8.get(node.prefix, ""), "") + unit_pid = _unit_symbol_to_pid.get(_dsiUnitsUTF8.get(node.unit, node.unit), node.unit) + token = prefix_pid + unit_pid + else: + # Short RP format + prefix_sym = _dsiPrefixesUTF8.get(node.prefix, "") + unit_sym = _dsiUnitsUTF8.get(node.unit, node.unit) + token = prefix_sym + unit_sym + + if exp != 1: + token += str(exp) + parts.append(token) + + if pid: + return "https://si-digital-framework.org/SI/units/" + ".".join(parts) + else: + return ".".join(parts) + + def toBaseUnitTree(self, complete=False): """ Converts the entire D-SI tree to its base unit representation. diff --git a/src/unitStrings.py b/src/unitStrings.py index 31628e4..7fde553 100644 --- a/src/unitStrings.py +++ b/src/unitStrings.py @@ -179,7 +179,7 @@ _dsiUnitsLatex = { 'planckbar':r'\hbar' } # Comprehensive mapping from ASCII/UTF-8 representations to D-SI LaTeX strings -ascii_to_dsi_unit_map = { +_ascii_to_dsi_unit_map = { 'kg': 'kilogram', 'm': 'metre', 's': 'second', @@ -203,7 +203,7 @@ ascii_to_dsi_unit_map = { 'Wb': 'weber', 'T': 'tesla', 'H': 'henry', - '°C': 'degreecelsius', + '℃': 'degreecelsius', 'lm': 'lumen', 'lx': 'lux', 'Bq': 'becquerel', @@ -263,7 +263,7 @@ _dsiUnitsUTF8 = { 'weber': 'Wb', 'tesla': 'T', 'henry': 'H', - 'degreecelsius': '°C', + 'degreecelsius': '℃', 'lumen': 'lm', 'lux': 'lx', 'becquerel': 'Bq', @@ -300,6 +300,80 @@ _dsiUnitsUTF8 = { 'planckbar': 'ħ' } +_prefix_symbol_to_pid = { + 'q': 'quecto', + 'r': 'ronto', + 'y': 'yocto', + 'z': 'zepto', + 'a': 'atto', + 'f': 'femto', + 'p': 'pico', + 'n': 'nano', + 'µ': 'micro', + 'm': 'milli', + 'c': 'centi', + 'd': 'deci', + 'da': 'deca', + 'h': 'hecto', + 'k': 'kilo', + 'M': 'mega', + 'G': 'giga', + 'T': 'tera', + 'P': 'peta', + 'E': 'exa', + 'Z': 'zetta', + 'Y': 'yotta', + 'R': 'ronna', + 'Q': 'quetta', + '': '', # no prefix +} + +_unit_symbol_to_pid = { + 'A': 'ampere', + 'Bq': 'becquerel', + 'cd': 'candela', + 'C': 'coulomb', + '℃': 'degreeCelsius', + 'F': 'farad', + 'Gy': 'gray', + 'H': 'henry', + 'Hz': 'hertz', + 'J': 'joule', + 'kat': 'katal', + 'K': 'kelvin', + 'kg': 'kilogram', + 'lm': 'lumen', + 'lx': 'lux', + 'm': 'metre', + 'mol': 'mole', + 'N': 'newton', + 'Ω': 'ohm', + 'Pa': 'pascal', + 'rad': 'radian', + 's': 'second', + 'S': 'siemens', + 'Sv': 'sievert', + 'sr': 'steradian', + 'T': 'tesla', + 'V': 'volt', + 'W': 'watt', + 'Wb': 'weber', + '′': 'arcminute', + '″': 'arcsecond', + 'au': 'astronomicalunit', + 'B': 'bel', + 'Da': 'dalton', + 'd': 'day', + '°': 'degree', + 'eV': 'electronvolt', + 'ha': 'hectare', + 'h': 'hour', + 'L': 'litre', + 'min': 'minute', + 'Np': 'neper', + 't': 'tonne', +} + _derivedToBaseUnits = { # Time units 'day': [('second', 1, 86400)], # 1 day = 86400 seconds diff --git a/tests/test_dsiUnits.py b/tests/test_dsiUnits.py index 2209883..b6dad17 100644 --- a/tests/test_dsiUnits.py +++ b/tests/test_dsiUnits.py @@ -25,10 +25,49 @@ from dsiUnits import dsiUnit from dsiParser import _getClosestStr, dsiParser, NonDsiUnitWarning from regexGenerator import generateRegex from regexGenerator import generateListRegex +from fractions import Fraction + +import requests +import os # Access the machine epsilon for the float data type epsilon = sys.float_info.epsilon + +PROXY_CANDIDATES = [ + "http://proxy:3128", + "http://proxy:8080", + "http://fw:8080", + "http://firewall:8080", + # Add your working one here directly to test! + "http://webproxy.bs.ptb.de:8080", +] + +def can_proxy_connect(proxy_url): + try: + proxies = {"http": proxy_url, "https": proxy_url} + resp = requests.get("http://example.com", proxies=proxies, timeout=3) + return resp.status_code == 200 + except Exception: + return False + +@pytest.fixture(scope="module", autouse=True) +def configure_proxy_if_needed(): + if "http_proxy" in os.environ or "HTTP_PROXY" in os.environ: + print("✅ Proxy already configured in environment.") + return + + for proxy_url in PROXY_CANDIDATES: + if can_proxy_connect(proxy_url): + for var in ["http_proxy", "HTTP_PROXY", "https_proxy", "HTTPS_PROXY"]: + os.environ[var] = proxy_url + print(f"✅ Proxy set from working candidate: {proxy_url}") + return + + print("⚠️ No working proxy configured or detected.") + + + def test_baseCase(): # Most basic case: one unit without prefix or exponent tree = dsiUnit(r'\metre') @@ -653,3 +692,208 @@ def test_constructor_preserves_existing_state(): assert unit2 is unit1, "Constructor did not return the same instance." assert hasattr(unit2, "extra_attribute"), "Extra attribute was lost on reinitialization." assert unit2.extra_attribute == "unchanged", "Extra attribute value was altered." + +def test_toSIRP_basic(): + u = dsiUnit(r"\kilogram\metre\tothe{2}\per\second\tothe{2}") + sirp = u.toSIRP() + assert sirp == "kg.m2.s-2" + +def test_toSIRP_prefix_scaling(): + u = dsiUnit(r"\milli\metre\tothe{2}\nano\second\tothe{-2}") + sirp = u.toSIRP() + assert sirp == "mm2.ns-2" + +def test_toSIRP_unit_order(): + # order must be preserved + u = dsiUnit(r"\kilogram\second\metre\tothe{2}\per\second\tothe{3}") + sirp = u.toSIRP() + assert sirp == "kg.s.m2.s-3" + +def test_toSIRP_invalid_fractional_exponent(): + u = dsiUnit.fromDsiTree( + dsiString="", + dsiTree=[ + [dsiUnitNode("kilo", "metre", Fraction(3, 2))], + ] + ) + with pytest.raises(NotImplementedError, match="Non-integer exponents not supported in SI RP format."): + u.toSIRP() + +def test_toSIRP_scaled_unit_uses_prefix(): + u = dsiUnit(r"\second") + u.scaleFactor = 1e-9 # emulate ns + # emulate that the exponent is still integer + u.tree[0][0].exponent = 1 + sirp = u.toSIRP() + assert sirp == "ns" + +def test_toSIRP_scaled_unit_invalid_scale(): + u = dsiUnit(r"\second") + u.scaleFactor = 3.14 # no matching SI prefix + with pytest.raises(NotImplementedError, match="Unsupported scale factor for SI RP: 3.14"): + u.toSIRP() + +def test_toSIRP_composite_watt_units(): + units_of_power_sirp = { + r"\volt\tothe{2}\per\ohm": "V2.Ω-1", + r"\ampere\tothe{2}\ohm": "A2.Ω", + r"\volt\ampere": "V.A", + r"\kilogram\metre\tothe{2}\per\second\tothe{3}": "kg.m2.s-3", + r"\joule\per\second": "J.s-1", + r"\newton\metre\per\second": "N.m.s-1", + r"\pascal\metre\tothe{3}\per\second": "Pa.m3.s-1", + r"\coulomb\volt\per\second": "C.V.s-1", + r"\farad\volt\tothe{2}\per\second": "F.V2.s-1", + r"\henry\ampere\tothe{2}\per\second": "H.A2.s-1", + r"\weber\ampere\per\second": "Wb.A.s-1", + r"\siemens\volt\tothe{2}": "S.V2", + } + for expr, expected_sirp in units_of_power_sirp.items(): + u = dsiUnit(expr) + assert u.toSIRP() == expected_sirp, f"{expr} -> {u.toSIRP()} != {expected_sirp}" + + + + +def test_bipmRp_basic(): + u = dsiUnit("kg.mm2.ns-2.℃") + assert u.valid + assert u.nonDsiUnit is False + assert str(u) == r"\kilogram\milli\metre\tothe{2}\nano\second\tothe{-2}\degreecelsius" + + t = u.tree[0] + assert t[0].prefix == "" + assert t[0].unit == "kilogram" + assert t[0].exponent == 1 + + assert t[1].prefix == "milli" + assert t[1].unit == "metre" + assert t[1].exponent == 2 + + assert t[2].prefix == "nano" + assert t[2].unit == "second" + assert t[2].exponent == -2 + + assert t[3].prefix == "" + assert t[3].unit == "degreecelsius" + assert t[3].exponent == 1 + + +def test_bipmRp_implicit_exponents(): + u = dsiUnit("mol.cd.m") + assert u.valid + assert [n.unit for n in u.tree[0]] == ["mole", "candela", "metre"] + assert [n.exponent for n in u.tree[0]] == [1, 1, 1] + + +def test_bipmRp_with_explicit_and_negative_exponents(): + u = dsiUnit("kg2.m-1.s3") + t = u.tree[0] + assert t[0].unit == "kilogram" and t[0].exponent == 2 + assert t[1].unit == "metre" and t[1].exponent == -1 + assert t[2].unit == "second" and t[2].exponent == 3 + + +def test_bipmRp_utf8_prefix_and_units(): + u = dsiUnit("µF.GΩ") + assert u.valid + assert [n.prefix for n in u.tree[0]] == ["micro", "giga"] + assert [n.unit for n in u.tree[0]] == ["farad", "ohm"] + + +def test_bipmRp_invalid_unit_warns(): + u = dsiUnit("kg.xunit") + assert not u.valid + assert u.nonDsiUnit + assert len(u.warnings) > 0 + assert "Unknown unit" in u.warnings[0] + + +def test_bipmRp_malformed_components(): + u = dsiUnit("kg..s") # double dot + assert not u.valid + assert u.nonDsiUnit + assert any("Invalid BIPM-RP component" in w for w in u.warnings) + + +def test_bipmRp_parse_equals_dsi(): + a = dsiUnit("kg.mm2.ns-2.℃") + b = dsiUnit(r"\kilogram\milli\metre\tothe{2}\nano\second\tothe{-2}\degreecelsius") + assert a == b + +def normalize_dsi_tree_to_tuples(unit: dsiUnit): + """ + Normalize a dsiUnit instance into a list of (prefix, unit, exponent) tuples. + The list will be flattened and _removePer will be applied to ensure compatibility. + """ + import copy + unit_copy = copy.deepcopy(unit) + unit_copy._removePer() + result = [] + for node in unit_copy.tree[0]: + result.append((node.prefix or "", node.unit, int(node.exponent))) + return result + +def normalize_label(label: str) -> str: + """Normalize unit and prefix labels for consistent comparison.""" + return label.replace(" ", "").lower() + + +def test_bipm_pid_json_vs_dsiUnit_instances(): + """ + Validate that the BIPM PID JSON response matches the internal dsiUnit representation. + + Also verify that invalid D-SI expressions result in a 404 when querying the PID URL. + """ + # ✅ Valid unit expressions and their expected normalized trees + valid_units = [ + r"\kilogram\milli\metre\tothe{2}\nano\second\tothe{-2}", + r"\kilogram\milli\metre\tothe{2}\nano\second\tothe{-2}\astronomicalunit\tothe{-4}\degreecelsius\micro\henry", + r"\volt\tothe{2}\per\ohm", + r"\ampere\tothe{2}\ohm", + r"\joule\per\second", + r"\pascal\metre\tothe{3}\per\second", + r"\weber\ampere\per\second", + r"\degreecelsius", + r"\nano\second", + r"\micro\henry" + ] + + for i,expr in enumerate(valid_units): + unit = dsiUnit(expr) + url = unit.toSIRP(pid=True) + response = requests.get(url, timeout=10) + assert response.status_code == 200, f"Expected 200 {url}, got {response.status_code}" + + json_data = response.json() + try: + bipm_units = json_data["resultsCombinedUnitList"] + except KeyError: + if len(unit.tree)!=1: + raise RuntimeError("Expected taht we stated with a tree with just one entry sinc we did'nt get a combined unit back ...") + #lets fake an ordinary response for the simple base unit ... + bipm_units = [{'unitName':json_data['unitId'],'exponent':1,'prefixName':''}] + + parsed_bipm = [] + for item in bipm_units: + prefix = normalize_label(item.get("prefixName", "")) + unit_name = normalize_label(item["unitName"]) + exponent = int(item["exponent"]) + parsed_bipm.append((prefix, unit_name, exponent)) + + local_tree = normalize_dsi_tree_to_tuples(unit) + assert parsed_bipm == local_tree, f"\nExpression: {unit.dsiString}\nExpected: {local_tree}\nGot: {parsed_bipm}" + +def test_bipm_pid_json_vs_dsiUnit_Invalide_instances(): + # ❌ Invalid unit expressions (syntactically wrong or non-existent) + invalid_units = [ + dsiUnit(r"\molli\metre"), + dsiUnit(r"\kilogram\milli\metre\tothe{2}\nano\sec\tothe{-2}"), # typo in unit + dsiUnit(r"\none"), + dsiUnit(r"") + ] + + for unit in invalid_units: + url = unit.toSIRP(pid=True) + response = requests.get(url, timeout=10) + assert response.status_code == 400, f"Expected 400 for invalid PID: {unit.dsiString} → {url} but got {response.status_code}" \ No newline at end of file -- GitLab