diff --git a/dist/dsiParser-1.0.0-py3-none-any.whl b/dist/dsiParser-1.0.0-py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..fe1664a79c4ec84e9ff95c4d64cce91e010352e3 Binary files /dev/null and b/dist/dsiParser-1.0.0-py3-none-any.whl differ diff --git a/dist/dsiParser-1.0.0.tar.gz b/dist/dsiParser-1.0.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41bc5dee470fc1cd6a27ea6042c9aae1632583a5 Binary files /dev/null and b/dist/dsiParser-1.0.0.tar.gz differ diff --git a/setup.cfg b/setup.cfg index c567c9e939994b8ffd242c228435e1fafa3feef8..56626787ee6abbf68442a8951a3e41d1727aed96 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = dsiParser -version = 0.2.0 +version = 1.0.0 description = Parse D-SI unit strings to LaTeX long_description = file: README.md diff --git a/src/dsiParser.py b/src/dsiParser.py index 313c8398e8a51272ce05edd97efb851c6a440077..8d1d2041d48d6dc1d4cbbca6df0714d3bfbce1dc 100644 --- a/src/dsiParser.py +++ b/src/dsiParser.py @@ -17,27 +17,145 @@ class dsiParser: self.latexDefaultSuffix = latexDefaultSuffix def parse(self, dsiString: str): - """Parse D-SI unit string into tree structure + """parses a D-SI string into a tree structure + + + Args: + dsiString (str): D-SI unit raw string + + Raises: + RuntimeWarning: double backslashes in D-SI string + RuntimeWarning: empty D-SI string + + Returns: + dsiTree: dsiTree object containing the D-SI unit + """ + warningMessages = [] + # Catch any double (triple...) \ before they annoy us + while r'\\' in dsiString: + warningMessages.append(_warn(f"Double backslash found in string, treating as one backslash: \"{dsiString}\"", RuntimeWarning)) + dsiString = dsiString.replace(r'\\', '\\') + + if dsiString == "": + warningMessages.append(_warn("Given D-SI string is empty!", RuntimeWarning)) + return _dsiTree(dsiString, [], warningMessages, self.latexDefaultWrapper, self.latexDefaultPrefix, self.latexDefaultSuffix) + + tree = [] + (tree, fractionWarnings) = self._parseDsiFraction(dsiString) + warningMessages += fractionWarnings + for i, node in enumerate(tree): + (tree[i], fractionlessWarnings) = self._parseFractionlessDsi(node) + warningMessages += fractionlessWarnings + return _dsiTree(dsiString, tree, warningMessages, self.latexDefaultWrapper, self.latexDefaultPrefix, self.latexDefaultSuffix) + + def _parseDsiFraction(self, dsiString: str): + """parses D-SI fraction into list of fraction elements Args: - dsiString (str): The D-SI string to be parsed + dsiString (str): D-SI unit raw string + + Raises: + RuntimeWarning: String must not contain more than one "per", + as defined in the D-SI specs Returns: - _dsiTree: The generated tree + list: strings separated by the "per" + list: warning messages of problems encountered while parsing """ - return _dsiTree(dsiString, self.latexDefaultWrapper, self.latexDefaultPrefix, self.latexDefaultSuffix) + tree = [] + warningMessages = [] + tree = dsiString.split(r"\per") + if len(tree) > 2: + warningMessages.append(_warn(r"The dsi string contains more than one \per, does not "+ + f"match specs! Given string: {dsiString}", + RuntimeWarning)) + return (tree, warningMessages) + + def _parseFractionlessDsi(self, dsiString:str): + """parses D-SI unit string without fractions + Args: + dsiString (str): D-SI unit raw string, not containing any fractions + + Raises: + RuntimeWarning: if string does not meet the specs + + Returns: + list: list of nodes + list: warning messages of problems encountered while parsing + """ + warningMessages = [] + items = dsiString.split("\\") + if items[0] == '': #first item of List should be empty, remove it + items.pop(0) + else: + warningMessages.append(_warn(f"string should start with \\, string given was \"{dsiString}\"", RuntimeWarning)) + nodes = [] + + (prefix, unit, exponent) = ('','','') + item = items.pop(0) + while True: + if item in _dsiPrefixesLatex: + prefix = item + try: + item = items.pop(0) + except IndexError: + item = '' + if item in _dsiUnitsLatex: + unit = item + try: + item = items.pop(0) + except IndexError: + item = '' + if re.match(r'tothe\{-?\d+\.?\d?\}', item): + exponent = item.split('{')[1].split('}')[0] + try: + item = items.pop(0) + except IndexError: + item = '' + elif re.match(r'tothe\{.*\}', item): + exponent = item.split('{')[1].split('}')[0] + _warn(f"The exponent \"{exponent}\" is not a number!", RuntimeWarning) + try: + item = items.pop(0) + except IndexError: + item = '' + if (prefix, unit, exponent) == ('','',''): + unit = item + try: + item = items.pop(0) + except IndexError: + item = '' + warningMessages.append(_warn(f"The identifier \"{unit}\" does not match any D-SI units!", RuntimeWarning)) + elif unit == '': + itemStr = "" + if prefix != "": + itemStr = itemStr + "\\" + prefix + if exponent != "": + itemStr = itemStr + r"\tothe{" + exponent + r"}" + warningMessages.append(_warn(f"This D-SI unit seems to be missing the base unit! \"{itemStr}\"", RuntimeWarning)) + + nodes.append(_node(prefix, unit, exponent)) + if (len(items) == 0) and (item == ''): break + (prefix, unit, exponent) = ('','','') + + return (nodes, warningMessages) class _dsiTree: - """D-SI representation in tree form, also includes validity check and warnings about D-SI string + """D-SI representation in tree form, also includes validity check and warnings about D-SI string. + Tree format: list of lists: + List format: + First layer: items of the fraction + Second layer: nodes containing prefix, unit, power """ - def __init__(self, dsiString: str, latexDefaultWrapper='$$', latexDefaultPrefix='', latexDefaultSuffix=''): + def __init__(self, dsiString: str, dsiTree, warningMessages, latexDefaultWrapper='$$', latexDefaultPrefix='', latexDefaultSuffix=''): """ Args: dsiString (str): the D-SI unit string to be parsed """ self.dsiString = dsiString - (self.tree, self.warnings) = _parseDsi(dsiString) + self.tree = dsiTree + self.warnings = warningMessages self.valid = len(self.warnings) == 0 self._latexDefaultWrapper = latexDefaultWrapper self._latexDefaultPrefix = latexDefaultPrefix @@ -126,136 +244,6 @@ class _node: return latexString - -def _parseDsi(dsiString: str): - """parses a D-SI string into a list of lists: - List format: - First layer: items of the fraction - Second layer: nodes containing prefix, unit, power - - - Args: - dsiString (str): D-SI unit raw string - - Raises: - RuntimeWarning: double backslashes in D-SI string - RuntimeWarning: empty D-SI string - - Returns: - list: tree containing the D-SI unit - list: warning messages of problems encountered while parsing - """ - warningMessages = [] - # Catch any double (triple...) \ before they annoy us - while r'\\' in dsiString: - warningMessages.append(_warn(f"Double backslash found in string, treating as one backslash: \"{dsiString}\"", RuntimeWarning)) - dsiString = dsiString.replace(r'\\', '\\') - - if dsiString == "": - warningMessages.append(_warn("Given D-SI string is empty!", RuntimeWarning)) - return ([], warningMessages) - - tree = [] - (tree, fractionWarnings) = _parseDsiFraction(dsiString) - warningMessages += fractionWarnings - for i, node in enumerate(tree): - (tree[i], fractionlessWarnings) = _parseFractionlessDsi(node) - warningMessages += fractionlessWarnings - return (tree, warningMessages) - -def _parseDsiFraction(dsiString: str): - """parses D-SI fraction into list of fraction elements - - Args: - dsiString (str): D-SI unit raw string - - Raises: - RuntimeWarning: String must not contain more than one "per", - as defined in the D-SI specs - - Returns: - list: strings separated by the "per" - list: warning messages of problems encountered while parsing - """ - tree = [] - warningMessages = [] - tree = dsiString.split(r"\per") - if len(tree) > 2: - warningMessages.append(_warn(r"The dsi string contains more than one \per, does not "+ - f"match specs! Given string: {dsiString}", - RuntimeWarning)) - return (tree, warningMessages) - -def _parseFractionlessDsi(dsiString:str): - """parses D-SI unit string without fractions - - Args: - dsiString (str): D-SI unit raw string, not containing any fractions - - Raises: - RuntimeWarning: if string does not meet the specs - - Returns: - list: list of nodes - list: warning messages of problems encountered while parsing - """ - warningMessages = [] - items = dsiString.split("\\") - if items[0] == '': #first item of List should be empty, remove it - items.pop(0) - else: - warningMessages.append(_warn(f"string should start with \\, string given was \"{dsiString}\"", RuntimeWarning)) - nodes = [] - - (prefix, unit, exponent) = ('','','') - item = items.pop(0) - while True: - if item in _dsiPrefixesLatex: - prefix = item - try: - item = items.pop(0) - except IndexError: - item = '' - if item in _dsiUnitsLatex: - unit = item - try: - item = items.pop(0) - except IndexError: - item = '' - if re.match(r'tothe\{-?\d+\.?\d?\}', item): - exponent = item.split('{')[1].split('}')[0] - try: - item = items.pop(0) - except IndexError: - item = '' - elif re.match(r'tothe\{.*\}', item): - exponent = item.split('{')[1].split('}')[0] - _warn(f"The exponent \"{exponent}\" is not a number!", RuntimeWarning) - try: - item = items.pop(0) - except IndexError: - item = '' - if (prefix, unit, exponent) == ('','',''): - unit = item - try: - item = items.pop(0) - except IndexError: - item = '' - warningMessages.append(_warn(f"The identifier \"{unit}\" does not match any D-SI units!", RuntimeWarning)) - elif unit == '': - itemStr = "" - if prefix != "": - itemStr = itemStr + "\\" + prefix - if exponent != "": - itemStr = itemStr + r"\tothe{" + exponent + r"}" - warningMessages.append(_warn(f"This D-SI unit seems to be missing the base unit! \"{itemStr}\"", RuntimeWarning)) - - nodes.append(_node(prefix, unit, exponent)) - if (len(items) == 0) and (item == ''): break - (prefix, unit, exponent) = ('','','') - - return (nodes, warningMessages) - def _warn(message: str, warningClass): """Output warning on command line and return warning message