Skip to content
Snippets Groups Projects
Commit c428b910 authored by Daniele Nicolodi's avatar Daniele Nicolodi
Browse files

salaries: Move processing outside the SAP connection context and extend

parent b23f998d
No related branches found
No related tags found
No related merge requests found
......@@ -51,7 +51,8 @@ def sum_amounts(values):
@click.command()
def main():
@click.option('--save', metavar='FILENAME', help='Save data into CSV file.')
def main(save):
psp = '1K-43045'
fromdate = datetime.date(2020, 1, 1)
todate = datetime.date.today() + datetime.timedelta(days=1)
......@@ -85,29 +86,41 @@ def main():
# This could be probably be done in the SAP query.
rows = list(filter(sieve, get_table_rows(table, fields)))
table = petl.fromdicts(rows) \
.convert('cost-element', int) \
.selecteq('cost-element', 5100900)
# There is no easy way to check whom salary each entry
# corresponds to. The only way is to extract the name from the
# narration string which fortunately most of the times conform
# to a fixed structure. However, there may be typos in the
# names as entered there. To get around this, we extract all
# the possible names from the narration fields and run a
# clustering algorithm to match the mispelled names to the
# correct ones.
names = [m.group(1) for m in [re.match(r'.*/([A-Z][a-z]+$)', v) for v in table.values('narration')] if m]
clusters = cluster(names, 2.0)
table = table.addfield('name', lambda x: categorize(x.narration, clusters))
data = []
for name in sorted(clusters.keys()):
total = sum_amounts(table.selecteq('name', name).values('amount'))
data.append({'account': psp, 'name': name, 'total': total})
res = petl.fromdicts(data)
print(res)
table = petl.fromdicts(rows) \
.convert('cost-element', int) \
.selectne('cost-element', 5100900) \
.addfield('account', psp, index=0)
print(table)
if save:
table.tocsv(save)
print(table.aggregate(('account', 'year'), sum_amounts, 'amount').rename('value', 'amount'))
table = petl.fromdicts(rows) \
.convert('cost-element', int) \
.selecteq('cost-element', 5100900) \
.addfield('account', psp, index=0)
# To associate a employee name to a table entry it is ncessary to
# extract the employee name from the narration. In most cases, the
# narration text is in the form "Something/ $name" where $name is
# the employee name. However, employee names often contain typos.
# To handle these typos, we extract the names from the narration
# fields and run a clustering algorithm to match the mispelled
# names to the correct ones.
names = [m.group(1) for m in [re.match(r'.*/([A-Z][a-z]+$)', v) for v in table.values('narration')] if m]
clusters = cluster(names, 2.0)
table = table.addfield('name', lambda x: categorize(x.narration, clusters)) \
.cut('account', 'year', 'period', 'name', 'amount')
print(table)
print()
print(table.aggregate(('account', 'year', 'name'), sum_amounts, 'amount').rename('value', 'amount'))
print()
print(table.aggregate(('account', 'name'), sum_amounts, 'amount').rename('value', 'amount'))
print()
print(table.aggregate(('account', 'year'), sum_amounts, 'amount').rename('value', 'amount'))
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment