-
Notifications
You must be signed in to change notification settings - Fork 1
/
rank.py
55 lines (48 loc) · 1.92 KB
/
rank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""
This script read a Cordis CSV file (from url, or local) and
tries to Rank a given company w.r.t. the EC contribution value.
It preprocesses the CSV first, calculates the overall budget,
and it then ranks the single company branches (if can be grouped) or the company
"""
import sys
from settings import printerlog, print_configuration
from data_tools import (
get_cordis_data, company_filter, rename_df_columns,
groupby_sort, calculate_company_budget, get_company_ranking)
def rank():
"""
Print a dataframe populated with the company data only.
Output example with COMPANY_NAME = "R2M":
Rank Branch Country EC Contribution
0 264 R2M SOLUTION SRL IT 6106138.88
1 819 R2M SOLUTION FR 2726124.69
2 2825 R2M SOLUTION SPAIN SL ES 1209711.38
3 9799 R2M SOLUTION LTD UK 248675.00
"""
print_configuration()
try:
df = get_cordis_data()
df_grouped = groupby_sort(df)
df_rank = company_filter(df_grouped)
if df_rank.empty:
msg = "Empty data frame!"
printerlog.error(msg)
printerlog.error(" Check the company name and/or filter. Quitting")
sys.exit(0)
df_company = rename_df_columns(df_rank)
printerlog.info("-" * 50)
printerlog.info("Ranking:\n{}".format(df_company))
printerlog.info("-" * 50)
budget = calculate_company_budget(df)
ranking = get_company_ranking(df_grouped, budget)
printerlog.info("Overall company budget: {}".format(budget))
n_companies = df_grouped.shape[0]
msg = "Company Ranking: {} out of {}".format(ranking, n_companies)
printerlog.info(msg)
printerlog.info("Done")
except Exception as e:
printerlog.error("{}".format(e))
printerlog.error("Quitting")
sys.exit(0)
if __name__ == "__main__":
rank()