From dee38ad6d2daf9bbe00d2bf96390b1dde645381e Mon Sep 17 00:00:00 2001
From: Benedikt Seeger <benedikt.seeger@ptb.de>
Date: Tue, 15 Apr 2025 09:13:19 +0200
Subject: [PATCH] added BIPM-SI_RP _ in exponentes to support fractions as in
 sugested

see https://github.com/TheBIPM/SI_Digital_Framework/issues/2
---
 README.md              |  3 +-
 pyproject.toml         |  2 +-
 src/dsiParser.py       | 28 ++++++++++----
 src/dsiUnits.py        |  9 ++++-
 tests/test_dsiUnits.py | 88 +++++++++++++++++++++++++++++++++++++-----
 5 files changed, 110 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index f065202..8cb434a 100644
--- a/README.md
+++ b/README.md
@@ -15,11 +15,12 @@ pip install dsiUnits
 ```
 
 ## Usage
-The Constructor `dsiUnit(str)` will parse the string and create a dsiUnit object.
+The Constructor `dsiUnit(str)` will parse the string and create a dsiUnit object. [BIMP-SI-RP](https://si-digital-framework.org/SI/unitExpr?lang=en) strings are also supported and will be converted to D-SI units.
 The dsiUnit object has the following methods:
 - `toLatex()`: returns the Latex representation of the unit
 - `toUTF8()`: returns the UTF8 representation of the unit
 - `isScalablyEqualTo(other)`: checks whether the unit is equal to another unit with scalar multiplication
+- `toSIRP(pid=False)`: returns the SIRP representation of the unit. If pid is true the PID as URL is returned.
   
 And following magic functions: 
 - `__mul__(other)`: "*" multiplies the unit with another unit or a scalar
diff --git a/pyproject.toml b/pyproject.toml
index 47c8548..4e861a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "dsiunits"  # Ensure this is correctly specified
-version = "2.5.0"
+version = "2.5.1"
 description = "This is a Python module for handling the SI units as objects in Python, parsing them from strings and converting them to Latex and Unicode, as well as performing math operations and calculating scale factors."
 authors = [
     { name="Benedikt Seeger", email="benedikt.seeger@ptb.de" },
diff --git a/src/dsiParser.py b/src/dsiParser.py
index 29b63d1..896255c 100644
--- a/src/dsiParser.py
+++ b/src/dsiParser.py
@@ -250,6 +250,7 @@ class dsiParser:
     def _parseBipmRp(self, rp_string: str):
         """
         Parses BIPM-RP or PID-style strings like 'kg.mm2.ns-2.℃' into D-SI trees.
+        Accepts exponents in the form '2' or as fractions like '1_2' (1/2) or '2_3' (2/3).
 
         Returns:
             (str, list[list[dsiUnitNode]], list of warnings, bool isNonDsi)
@@ -259,26 +260,39 @@ class dsiParser:
 
         components = rp_string.strip().split('.')
         for comp in components:
-            match = re.fullmatch(r"([a-zA-ZµΩ℃°]+)([-+]?[0-9]+)?", comp)
+            # Updated regex: group 1 matches the letter part, group 2 optionally
+            # matches an exponent that can include an underscore (e.g., 1_2)
+            match = re.fullmatch(r"([a-zA-ZµΩ℃°]+)(?:([-+]?[0-9]+(?:_[0-9]+)?))?", comp)
             if not match:
                 warningMessages.append(_warn(f"Invalid BIPM-RP component: «{comp}»", RuntimeWarning))
                 return (rp_string, [[dsiUnitNode('', rp_string, valid=False)]], warningMessages, True)
 
             prefix_unit = match.group(1)
             exponent_str = match.group(2)
-            exponent = Fraction(exponent_str) if exponent_str else Fraction(1)
+            # Parse the exponent: check for the underscore indicating a fraction format
+            if exponent_str:
+                if "_" in exponent_str:
+                    try:
+                        num, den = exponent_str.split("_")
+                        exponent = Fraction(int(num), int(den))
+                    except Exception as e:
+                        warningMessages.append(_warn(f"Invalid fraction format in exponent: «{exponent_str}»", RuntimeWarning))
+                        return (rp_string, [[dsiUnitNode('', rp_string, valid=False)]], warningMessages, True)
+                else:
+                    exponent = Fraction(exponent_str)
+            else:
+                exponent = Fraction(1)
 
             matched_prefix = ''
             matched_unit = ''
 
-            # Try matching longest known prefix first
-            # Special case: 'kg' is NOT prefix + unit — it's 'kilogram'
+            # Try matching the longest known prefix first.
+            # Special case: 'kg' is NOT prefix + unit — it's the entire unit "kilogram"
             if prefix_unit == "kg":
                 matched_prefix = ""
                 matched_unit = "kilogram"
-
             else:
-                # Try matching longest known prefix first
+                # Iterate over known prefixes (using longest first)
                 for prefix in sorted(_dsiPrefixesUTF8.values(), key=len, reverse=True):
                     if prefix_unit.startswith(prefix):
                         possible_unit = prefix_unit[len(prefix):]
@@ -295,7 +309,7 @@ class dsiParser:
                             _warn(f"Unknown unit in BIPM-RP string: «{prefix_unit}»", RuntimeWarning))
                         return (rp_string, [[dsiUnitNode('', rp_string, valid=False)]], warningMessages, True)
 
-            # Convert prefix UTF8 → latex
+            # Convert prefix UTF8 → LaTeX (if needed)
             latex_prefix = next((k for k, v in _dsiPrefixesUTF8.items() if v == matched_prefix), '')
             nodeList.append(dsiUnitNode(latex_prefix, matched_unit, exponent))
 
diff --git a/src/dsiUnits.py b/src/dsiUnits.py
index e84b25a..3146c75 100644
--- a/src/dsiUnits.py
+++ b/src/dsiUnits.py
@@ -238,8 +238,13 @@ class dsiUnit:
         parts = []
         for node in unit_copy.tree[0]:
             if not float(node.exponent).is_integer():
-                raise NotImplementedError("Non-integer exponents not supported in SI RP format.")
-            exp = int(node.exponent)
+                warnings.warn("Using sugested integer fraction representation with '_' as seperator from Issue: https://github.com/TheBIPM/SI_Digital_Framework/issues/2")
+                try:
+                    exp=str(node.exponent.numerator)+'_'+str(node.exponent.denominator)
+                except Exception as e:
+                    raise e
+            else:
+                exp = int(node.exponent)
 
             if pid:
                 # Full PID format
diff --git a/tests/test_dsiUnits.py b/tests/test_dsiUnits.py
index b6dad17..1536e8a 100644
--- a/tests/test_dsiUnits.py
+++ b/tests/test_dsiUnits.py
@@ -87,23 +87,23 @@ def test_robustness():
     # Unknown unit
     with pytest.warns(RuntimeWarning, match='The identifier «foo» does not match any D-SI units!'):
         tree = dsiUnit(r'\foo')
-        assert tree.toLatex() == r'$${\color{red}\mathrm{foo}}$$'  
+        assert tree.toLatex() == r'$${\color{red}\mathrm{foo}}$$'
         assert not tree.valid
         assert len(tree.warnings) == 1
         assert tree.warnings == ['The identifier «foo» does not match any D-SI units!']
-    
+
     # Unknown string in the middle of input
     with pytest.warns(RuntimeWarning, match=r'The identifier «mini» does not match any D-SI units! Did you mean one of these «\\milli» ?'):
         tree = dsiUnit(r'\kilo\metre\per\mini\second')
-        assert tree.toLatex() == r'$$\frac{\mathrm{k}\mathrm{m}}{{\color{red}\mathrm{mini}}\,\mathrm{s}}$$'  
+        assert tree.toLatex() == r'$$\frac{\mathrm{k}\mathrm{m}}{{\color{red}\mathrm{mini}}\,\mathrm{s}}$$'
         assert not tree.valid
         assert len(tree.warnings) == 1
         assert tree.warnings == ['The identifier «mini» does not match any D-SI units! Did you mean one of these «\\milli»?']
-    
+
     # Base unit missing
     with pytest.warns(RuntimeWarning, match=r'This D-SI unit seems to be missing the base unit! «\\milli\\tothe\{2\}»'):
         tree = dsiUnit(r'\milli\tothe{2}')
-        assert tree.toLatex() == r'$${\color{red}\mathrm{m}{\color{red}\mathrm{}}^{2}}$$'  
+        assert tree.toLatex() == r'$${\color{red}\mathrm{m}{\color{red}\mathrm{}}^{2}}$$'
         assert not tree.valid
         assert len(tree.warnings) == 1
         assert tree.warnings == ['This D-SI unit seems to be missing the base unit! «\\milli\\tothe{2}»']
@@ -149,7 +149,7 @@ def test_fraction():
     # double fraction
     with pytest.warns(RuntimeWarning, match=r'The dsi string contains more than one \\per, does not match specs! Given string: \\metre\\per\\metre\\per\\metre'):
         tree = dsiUnit(r'\metre\per\metre\per\metre')
-        assert tree.toLatex() == r'$$\mathrm{m}{\color{red}/}\mathrm{m}{\color{red}/}\mathrm{m}$$'  
+        assert tree.toLatex() == r'$$\mathrm{m}{\color{red}/}\mathrm{m}{\color{red}/}\mathrm{m}$$'
         assert not tree.valid
         assert len(tree.warnings) == 1
         assert tree.warnings == [r'The dsi string contains more than one \per, does not match specs! Given string: \metre\per\metre\per\metre']
@@ -709,15 +709,16 @@ def test_toSIRP_unit_order():
     sirp = u.toSIRP()
     assert sirp == "kg.s.m2.s-3"
 
-def test_toSIRP_invalid_fractional_exponent():
+def test_toSIRP_fractional_exponent():
     u = dsiUnit.fromDsiTree(
         dsiString="",
         dsiTree=[
             [dsiUnitNode("kilo", "metre", Fraction(3, 2))],
         ]
     )
-    with pytest.raises(NotImplementedError, match="Non-integer exponents not supported in SI RP format."):
-        u.toSIRP()
+    assert u.toSIRP() =='km3_2'
+    assert dsiUnit(r'\ampere\tothe{0.5}').toSIRP()=='A1_2'
+    assert dsiUnit(r'\ampere\tothe{0.6666666666666666666666}').toSIRP() == 'A2_3'
 
 def test_toSIRP_scaled_unit_uses_prefix():
     u = dsiUnit(r"\second")
@@ -821,6 +822,75 @@ def test_bipmRp_parse_equals_dsi():
     b = dsiUnit(r"\kilogram\milli\metre\tothe{2}\nano\second\tothe{-2}\degreecelsius")
     assert a == b
 
+
+def test_bipmRp_fractional_underscore_parsing():
+    """
+    Test that a BIPM‐RP string with fractional exponents in underscore notation is
+    correctly parsed into dsiUnitNodes with Fraction exponents.
+
+    Input: "kg.mm1_2.ns-2.℃"
+        - "kg" should be recognized as kilogram with exponent 1.
+        - "mm1_2" should be parsed as milli + metre with exponent Fraction(1, 2).
+        - "ns-2" should be parsed as nano + second with exponent -2.
+        - "℃" should be parsed as degreecelsius with an implicit exponent of 1.
+    """
+    u = dsiUnit("kg.mm1_2.ns-2.℃")
+    assert u.valid, "Unit should be valid."
+    # There should be four components in the parsed tree.
+    assert len(u.tree[0]) == 4, "Expected four components in the unit tree."
+
+    # Component 0: "kg"
+    node0 = u.tree[0][0]
+    assert node0.unit == "kilogram", "First component should be 'kilogram'."
+    assert node0.exponent == Fraction(1), "First component exponent should be 1."
+
+    # Component 1: "mm1_2"
+    node1 = u.tree[0][1]
+    assert node1.prefix == "milli", "Second component should have prefix 'milli'."
+    assert node1.unit == "metre", "Second component should be 'metre'."
+    assert node1.exponent == Fraction(1, 2), "Second component exponent should be 1/2."
+
+    # Component 2: "ns-2"
+    node2 = u.tree[0][2]
+    assert node2.prefix == "nano", "Third component should have prefix 'nano'."
+    assert node2.unit == "second", "Third component should be 'second'."
+    assert node2.exponent == Fraction(-2), "Third component exponent should be -2."
+
+    # Component 3: "℃"
+    node3 = u.tree[0][3]
+    # Assuming the mapping converts ℃ to 'degreecelsius'
+    assert node3.unit == "degreecelsius", "Fourth component should be 'degreecelsius'."
+    assert node3.exponent == Fraction(1), "Fourth component exponent should be 1."
+
+
+def test_bipmRp_toSIRP_with_fractional_exponent():
+    """
+    Test that conversion to BIPM‐SI‐RP (toSIRP method) properly renders fractional exponents
+    using underscore notation.
+
+    Examples:
+        - \metre\tothe{0.5} should yield "m1_2"
+        - \ampere\tothe{0.6666666666666666} should yield "A2_3"
+    """
+    u = dsiUnit(r'\metre\tothe{0.5}')
+    sirp_output = u.toSIRP()
+    assert sirp_output == 'm1_2', f"Expected toSIRP() to return 'm1_2', but got '{sirp_output}'."
+
+    u2 = dsiUnit(r'\ampere\tothe{0.6666666666666666}')
+    sirp_output2 = u2.toSIRP()
+    assert sirp_output2 == 'A2_3', f"Expected toSIRP() to return 'A2_3', but got '{sirp_output2}'."
+
+
+def test_bipmRp_malformed_fraction_underscore():
+    """
+    Test that a malformed fractional exponent using an unexpected underscore format
+    (e.g. "m1_2_3") renders the unit invalid and produces an appropriate warning.
+    """
+    u = dsiUnit("m1_2_3")
+    assert not u.valid, "Unit should be invalid due to malformed fractional exponent."
+    assert any("Invalid BIPM-RP component" in warning for warning in u.warnings), \
+        "Expected a warning about Invalid BIPM-RP component"
+
 def normalize_dsi_tree_to_tuples(unit: dsiUnit):
     """
     Normalize a dsiUnit instance into a list of (prefix, unit, exponent) tuples.
-- 
GitLab