salaries: Move processing outside the SAP connection context and extend

c428b910 · Daniele Nicolodi · b23f998d · c428b910
Commit c428b910 authored 3 years ago by Daniele Nicolodi
--- a/salaries.py
+++ b/salaries.py
@@ -51,7 +51,8 @@ def sum_amounts(values):


 @click.command()
-def main():
+@click.option('--save', metavar='FILENAME', help='Save data into CSV file.')
+def main(save):
    psp = '1K-43045'
    fromdate = datetime.date(2020, 1, 1)
    todate = datetime.date.today() + datetime.timedelta(days=1)
@@ -85,29 +86,41 @@ def main():
        # This could be probably be done in the SAP query.
        rows = list(filter(sieve, get_table_rows(table, fields)))

-        table = petl.fromdicts(rows) \
-                    .convert('cost-element', int) \
-                    .selecteq('cost-element', 5100900)
-
-        # There is no easy way to check whom salary each entry
-        # corresponds to. The only way is to extract the name from the
-        # narration string which fortunately most of the times conform
-        # to a fixed structure. However, there may be typos in the
-        # names as entered there. To get around this, we extract all
-        # the possible names from the narration fields and run a
-        # clustering algorithm to match the mispelled names to the
-        # correct ones.
-        names = [m.group(1) for m in [re.match(r'.*/([A-Z][a-z]+$)', v) for v in table.values('narration')] if m]
-        clusters = cluster(names, 2.0)
-        table = table.addfield('name', lambda x: categorize(x.narration, clusters))
-
-        data = []
-        for name in sorted(clusters.keys()):
-            total = sum_amounts(table.selecteq('name', name).values('amount'))
-            data.append({'account': psp, 'name': name, 'total': total})
-
-        res = petl.fromdicts(data)
-        print(res)
+    table = petl.fromdicts(rows) \
+                .convert('cost-element', int) \
+                .selectne('cost-element', 5100900) \
+                .addfield('account', psp, index=0)
+
+    print(table)
+    if save:
+        table.tocsv(save)
+
+    print(table.aggregate(('account', 'year'), sum_amounts, 'amount').rename('value', 'amount'))
+
+    table = petl.fromdicts(rows) \
+                .convert('cost-element', int) \
+                .selecteq('cost-element', 5100900) \
+                .addfield('account', psp, index=0)
+
+    # To associate a employee name to a table entry it is ncessary to
+    # extract the employee name from the narration. In most cases, the
+    # narration text is in the form "Something/ $name" where $name is
+    # the employee name. However, employee names often contain typos.
+    # To handle these typos, we extract the names from the narration
+    # fields and run a clustering algorithm to match the mispelled
+    # names to the correct ones.
+    names = [m.group(1) for m in [re.match(r'.*/([A-Z][a-z]+$)', v) for v in table.values('narration')] if m]
+    clusters = cluster(names, 2.0)
+    table = table.addfield('name', lambda x: categorize(x.narration, clusters)) \
+                 .cut('account', 'year', 'period', 'name', 'amount')
+
+    print(table)
+    print()
+    print(table.aggregate(('account', 'year', 'name'), sum_amounts, 'amount').rename('value', 'amount'))
+    print()
+    print(table.aggregate(('account', 'name'), sum_amounts, 'amount').rename('value', 'amount'))
+    print()
+    print(table.aggregate(('account', 'year'), sum_amounts, 'amount').rename('value', 'amount'))


 if __name__ == '__main__':