Skip to content
Snippets Groups Projects
Commit 17d3e100 authored by Vanessa Stehr's avatar Vanessa Stehr
Browse files

Refactor code to better represent the parser

parent 75edf1df
No related branches found
No related tags found
No related merge requests found
File added
File added
[metadata]
name = dsiParser
version = 0.2.0
version = 1.0.0
description = Parse D-SI unit strings to LaTeX
long_description = file: README.md
......
......@@ -17,27 +17,145 @@ class dsiParser:
self.latexDefaultSuffix = latexDefaultSuffix
def parse(self, dsiString: str):
"""Parse D-SI unit string into tree structure
"""parses a D-SI string into a tree structure
Args:
dsiString (str): D-SI unit raw string
Raises:
RuntimeWarning: double backslashes in D-SI string
RuntimeWarning: empty D-SI string
Returns:
dsiTree: dsiTree object containing the D-SI unit
"""
warningMessages = []
# Catch any double (triple...) \ before they annoy us
while r'\\' in dsiString:
warningMessages.append(_warn(f"Double backslash found in string, treating as one backslash: \"{dsiString}\"", RuntimeWarning))
dsiString = dsiString.replace(r'\\', '\\')
if dsiString == "":
warningMessages.append(_warn("Given D-SI string is empty!", RuntimeWarning))
return _dsiTree(dsiString, [], warningMessages, self.latexDefaultWrapper, self.latexDefaultPrefix, self.latexDefaultSuffix)
tree = []
(tree, fractionWarnings) = self._parseDsiFraction(dsiString)
warningMessages += fractionWarnings
for i, node in enumerate(tree):
(tree[i], fractionlessWarnings) = self._parseFractionlessDsi(node)
warningMessages += fractionlessWarnings
return _dsiTree(dsiString, tree, warningMessages, self.latexDefaultWrapper, self.latexDefaultPrefix, self.latexDefaultSuffix)
def _parseDsiFraction(self, dsiString: str):
"""parses D-SI fraction into list of fraction elements
Args:
dsiString (str): The D-SI string to be parsed
dsiString (str): D-SI unit raw string
Raises:
RuntimeWarning: String must not contain more than one "per",
as defined in the D-SI specs
Returns:
_dsiTree: The generated tree
list: strings separated by the "per"
list: warning messages of problems encountered while parsing
"""
return _dsiTree(dsiString, self.latexDefaultWrapper, self.latexDefaultPrefix, self.latexDefaultSuffix)
tree = []
warningMessages = []
tree = dsiString.split(r"\per")
if len(tree) > 2:
warningMessages.append(_warn(r"The dsi string contains more than one \per, does not "+
f"match specs! Given string: {dsiString}",
RuntimeWarning))
return (tree, warningMessages)
def _parseFractionlessDsi(self, dsiString:str):
"""parses D-SI unit string without fractions
Args:
dsiString (str): D-SI unit raw string, not containing any fractions
Raises:
RuntimeWarning: if string does not meet the specs
Returns:
list: list of nodes
list: warning messages of problems encountered while parsing
"""
warningMessages = []
items = dsiString.split("\\")
if items[0] == '': #first item of List should be empty, remove it
items.pop(0)
else:
warningMessages.append(_warn(f"string should start with \\, string given was \"{dsiString}\"", RuntimeWarning))
nodes = []
(prefix, unit, exponent) = ('','','')
item = items.pop(0)
while True:
if item in _dsiPrefixesLatex:
prefix = item
try:
item = items.pop(0)
except IndexError:
item = ''
if item in _dsiUnitsLatex:
unit = item
try:
item = items.pop(0)
except IndexError:
item = ''
if re.match(r'tothe\{-?\d+\.?\d?\}', item):
exponent = item.split('{')[1].split('}')[0]
try:
item = items.pop(0)
except IndexError:
item = ''
elif re.match(r'tothe\{.*\}', item):
exponent = item.split('{')[1].split('}')[0]
_warn(f"The exponent \"{exponent}\" is not a number!", RuntimeWarning)
try:
item = items.pop(0)
except IndexError:
item = ''
if (prefix, unit, exponent) == ('','',''):
unit = item
try:
item = items.pop(0)
except IndexError:
item = ''
warningMessages.append(_warn(f"The identifier \"{unit}\" does not match any D-SI units!", RuntimeWarning))
elif unit == '':
itemStr = ""
if prefix != "":
itemStr = itemStr + "\\" + prefix
if exponent != "":
itemStr = itemStr + r"\tothe{" + exponent + r"}"
warningMessages.append(_warn(f"This D-SI unit seems to be missing the base unit! \"{itemStr}\"", RuntimeWarning))
nodes.append(_node(prefix, unit, exponent))
if (len(items) == 0) and (item == ''): break
(prefix, unit, exponent) = ('','','')
return (nodes, warningMessages)
class _dsiTree:
"""D-SI representation in tree form, also includes validity check and warnings about D-SI string
"""D-SI representation in tree form, also includes validity check and warnings about D-SI string.
Tree format: list of lists:
List format:
First layer: items of the fraction
Second layer: nodes containing prefix, unit, power
"""
def __init__(self, dsiString: str, latexDefaultWrapper='$$', latexDefaultPrefix='', latexDefaultSuffix=''):
def __init__(self, dsiString: str, dsiTree, warningMessages, latexDefaultWrapper='$$', latexDefaultPrefix='', latexDefaultSuffix=''):
"""
Args:
dsiString (str): the D-SI unit string to be parsed
"""
self.dsiString = dsiString
(self.tree, self.warnings) = _parseDsi(dsiString)
self.tree = dsiTree
self.warnings = warningMessages
self.valid = len(self.warnings) == 0
self._latexDefaultWrapper = latexDefaultWrapper
self._latexDefaultPrefix = latexDefaultPrefix
......@@ -126,136 +244,6 @@ class _node:
return latexString
def _parseDsi(dsiString: str):
"""parses a D-SI string into a list of lists:
List format:
First layer: items of the fraction
Second layer: nodes containing prefix, unit, power
Args:
dsiString (str): D-SI unit raw string
Raises:
RuntimeWarning: double backslashes in D-SI string
RuntimeWarning: empty D-SI string
Returns:
list: tree containing the D-SI unit
list: warning messages of problems encountered while parsing
"""
warningMessages = []
# Catch any double (triple...) \ before they annoy us
while r'\\' in dsiString:
warningMessages.append(_warn(f"Double backslash found in string, treating as one backslash: \"{dsiString}\"", RuntimeWarning))
dsiString = dsiString.replace(r'\\', '\\')
if dsiString == "":
warningMessages.append(_warn("Given D-SI string is empty!", RuntimeWarning))
return ([], warningMessages)
tree = []
(tree, fractionWarnings) = _parseDsiFraction(dsiString)
warningMessages += fractionWarnings
for i, node in enumerate(tree):
(tree[i], fractionlessWarnings) = _parseFractionlessDsi(node)
warningMessages += fractionlessWarnings
return (tree, warningMessages)
def _parseDsiFraction(dsiString: str):
"""parses D-SI fraction into list of fraction elements
Args:
dsiString (str): D-SI unit raw string
Raises:
RuntimeWarning: String must not contain more than one "per",
as defined in the D-SI specs
Returns:
list: strings separated by the "per"
list: warning messages of problems encountered while parsing
"""
tree = []
warningMessages = []
tree = dsiString.split(r"\per")
if len(tree) > 2:
warningMessages.append(_warn(r"The dsi string contains more than one \per, does not "+
f"match specs! Given string: {dsiString}",
RuntimeWarning))
return (tree, warningMessages)
def _parseFractionlessDsi(dsiString:str):
"""parses D-SI unit string without fractions
Args:
dsiString (str): D-SI unit raw string, not containing any fractions
Raises:
RuntimeWarning: if string does not meet the specs
Returns:
list: list of nodes
list: warning messages of problems encountered while parsing
"""
warningMessages = []
items = dsiString.split("\\")
if items[0] == '': #first item of List should be empty, remove it
items.pop(0)
else:
warningMessages.append(_warn(f"string should start with \\, string given was \"{dsiString}\"", RuntimeWarning))
nodes = []
(prefix, unit, exponent) = ('','','')
item = items.pop(0)
while True:
if item in _dsiPrefixesLatex:
prefix = item
try:
item = items.pop(0)
except IndexError:
item = ''
if item in _dsiUnitsLatex:
unit = item
try:
item = items.pop(0)
except IndexError:
item = ''
if re.match(r'tothe\{-?\d+\.?\d?\}', item):
exponent = item.split('{')[1].split('}')[0]
try:
item = items.pop(0)
except IndexError:
item = ''
elif re.match(r'tothe\{.*\}', item):
exponent = item.split('{')[1].split('}')[0]
_warn(f"The exponent \"{exponent}\" is not a number!", RuntimeWarning)
try:
item = items.pop(0)
except IndexError:
item = ''
if (prefix, unit, exponent) == ('','',''):
unit = item
try:
item = items.pop(0)
except IndexError:
item = ''
warningMessages.append(_warn(f"The identifier \"{unit}\" does not match any D-SI units!", RuntimeWarning))
elif unit == '':
itemStr = ""
if prefix != "":
itemStr = itemStr + "\\" + prefix
if exponent != "":
itemStr = itemStr + r"\tothe{" + exponent + r"}"
warningMessages.append(_warn(f"This D-SI unit seems to be missing the base unit! \"{itemStr}\"", RuntimeWarning))
nodes.append(_node(prefix, unit, exponent))
if (len(items) == 0) and (item == ''): break
(prefix, unit, exponent) = ('','','')
return (nodes, warningMessages)
def _warn(message: str, warningClass):
"""Output warning on command line and return warning message
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment