Skip to content

Commit 425d64d

Browse files
committed
Fixed pre-commit errors.
1 parent dae76f3 commit 425d64d

File tree

5 files changed

+96
-106
lines changed

5 files changed

+96
-106
lines changed

app/algo.py

Lines changed: 55 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -3,45 +3,53 @@
33
import plotly.express as px
44
import plotly.graph_objects as go
55
import humanize
6+
from wordcloud import WordCloud
7+
import matplotlib.pyplot as plt
8+
69

710
# TODO add viz comment
811
# Viz 1 -
912
def number_of_tracked_reports(df):
1013
number_of_tracked_reports = len(df.groupby(["year", "mnc"])["mnc"])
1114
return number_of_tracked_reports
1215

16+
1317
# TODO add viz comment
1418
def number_of_tracked_reports_company(df_selected_company):
1519
number_of_tracked_reports_company = len(
1620
df_selected_company.groupby(["year"])["year"]
1721
)
1822
return number_of_tracked_reports_company
1923

24+
2025
# TODO add viz comment
2126
def number_of_tracked_reports_sector(df_selected_sector):
2227
number_of_tracked_reports_sector = len(
23-
df_selected_sector.groupby(["year", "mnc"])["year"]
24-
)
28+
df_selected_sector.groupby(["year", "mnc"])["year"]
29+
)
2530
return number_of_tracked_reports_sector
2631

32+
2733
# TODO add viz comment
28-
def number_of_tracked_reports_country(df_selected_country):
34+
def number_of_tracked_reports_country(df_selected_country):
2935
number_of_tracked_reports_country = len(
3036
df_selected_country.groupby(["year", "mnc"])["year"]
3137
)
3238
return number_of_tracked_reports_country
3339

40+
3441
# TODO add viz comment
3542
# Viz 2 - Number of tracked reports over time
3643
def number_of_tracked_reports_over_time(df):
3744
df_count = df.groupby(["year"])["mnc"].nunique().reset_index()
3845
return df_count
3946

47+
4048
# TODO add viz comment
4149
def number_of_tracked_reports_over_time_company(df_selected_company):
4250
df_count_company = (
4351
df_selected_company.groupby(["year"])["mnc"].nunique().reset_index()
44-
)
52+
)
4553
# df_count_all_company = df.groupby(["year"])["mnc"].nunique().reset_index()
4654

4755
# row[3].line_chart(df_count_all_company, x="year", y="mnc")
@@ -52,6 +60,7 @@ def number_of_tracked_reports_over_time_company(df_selected_company):
5260
# )
5361
return df_count_company
5462

63+
5564
# TODO add viz comment
5665
def number_of_tracked_reports_over_time_sector(df_selected_sector):
5766
df_count_sector = (
@@ -70,6 +79,7 @@ def number_of_tracked_reports_over_time_sector(df_selected_sector):
7079
# )
7180
return df_count_sector
7281

82+
7383
# TODO add viz comment
7484
def number_of_tracked_reports_over_time_country(df_selected_country):
7585
df_count_country = (
@@ -82,18 +92,20 @@ def number_of_tracked_reports_over_time_country(df_selected_country):
8292
# row[5].line_chart(df_count_all_country, x="year", y="mnc", color="jur_name")
8393
return df_count_country
8494

95+
8596
# Viz 16
8697

8798
# company’s % pre-tax profit and profit per employee
8899
# plot chart : x-axis = % profit, y axis = profit / employee
89100
# size of the bubble based on % profit and a color code for
90101
# tax havens vs others
91102
def company_pourcentage_pretax_profit_and_profit_per_employee(df_selected_company):
92-
pretax_profit_col_name = 'profit_before_tax'
103+
# pretax_profit_col_name = 'profit_before_tax'
93104
profit_col_name = ''
94105
employee_col_name = 'employees'
95106
df_selected_company[profit_col_name] / df_selected_company[employee_col_name]
96107

108+
97109
# Viz 19
98110
# what are the tax havens being used by the company
99111
# to test but could be a table with one row per jurisdiction (filtering on TH) with
@@ -111,9 +123,7 @@ def tax_haven_used_by_company(df_selected_company):
111123
df_selected_company_th = df_selected_company[df_selected_company['jur_tax_haven'] != 'not.TH']
112124
df_selected_company_nth = df_selected_company[df_selected_company['jur_tax_haven'] == 'not.TH']
113125

114-
115126
for col in pc_list:
116-
117127
df_selected_company.insert(
118128
len(df_selected_company.columns),
119129
col + '_domestic_sum',
@@ -137,11 +147,9 @@ def tax_haven_used_by_company(df_selected_company):
137147
df_selected_company.insert(
138148
len(df_selected_company.columns),
139149
col + '_pc',
140-
100 * df_selected_company[col] / df_selected_company[col+'_sum'])
150+
100 * df_selected_company[col] / df_selected_company[col + '_sum'])
141151
# df_selected_company[col + '_pc'] = 100 * df_selected_company[col] / df_selected_company[col+'_sum']
142152

143-
144-
145153
df_selected_company_th = df_selected_company[df_selected_company['jur_tax_haven'] != 'not.TH']
146154
df_selected_company_th_agg = df_selected_company_th.groupby(['mnc', 'jur_name']).agg(
147155
profit_before_tax=('profit_before_tax', 'sum'),
@@ -151,16 +159,18 @@ def tax_haven_used_by_company(df_selected_company):
151159
related_revenues_pc=('related_revenues_pc', 'sum')
152160
)
153161
df_selected_company_th_agg = df_selected_company_th_agg.reset_index()
154-
df_selected_company_th_agg['profit per employee'] =\
155-
df_selected_company_th_agg['profit_before_tax']/df_selected_company_th_agg['employees']
156-
df_selected_company_th_agg['profit per employee'] = df_selected_company_th_agg['profit per employee'].replace([np.inf, -np.inf], None)
162+
df_selected_company_th_agg['profit per employee'] = \
163+
df_selected_company_th_agg['profit_before_tax'] / df_selected_company_th_agg['employees']
164+
df_selected_company_th_agg['profit per employee'] = df_selected_company_th_agg['profit per employee'].replace(
165+
[np.inf, -np.inf], None)
157166

158167
return df_selected_company, df_selected_company_th_agg
159168

169+
160170
# TODO add viz comment
161171
# complete table table showing for all jurisdictions revenues, profits, employees, taxes with % of total for each (color code for tax havens)
162172
def company_table(df_selected_company):
163-
company_upe_code = df_selected_company['upe_code'].unique()[0]
173+
# company_upe_code = df_selected_company['upe_code'].unique()[0]
164174
pc_list = ['employees', 'profit_before_tax', 'unrelated_revenues', 'related_revenues', 'total_revenues', 'tax_paid']
165175

166176
for col in pc_list:
@@ -190,12 +200,11 @@ def company_table(df_selected_company):
190200
return df_selected_company_by_jur.reset_index()
191201

192202

193-
194203
# Viz 4 - Breakdown of reports by sector (pie chart)
195204
def breakdown_of_reports_by_sector(df):
196-
197205
#Dataframe called df
198-
df_reports_per_sector_year = df.groupby(['sector', 'year'])['mnc'].nunique().reset_index(name='unique_company_count')
206+
df_reports_per_sector_year = df.groupby(['sector', 'year'])['mnc'].nunique().reset_index(
207+
name='unique_company_count')
199208

200209
# Aggregate the counts of unique companies across all years for each sector
201210
df_reports_per_sector = df_reports_per_sector_year.groupby('sector')['unique_company_count'].sum().reset_index()
@@ -204,27 +213,30 @@ def breakdown_of_reports_by_sector(df):
204213
total_companies = df_reports_per_sector['unique_company_count'].sum()
205214

206215
# Calculate the percentage of each sector's count relative to the total count and round to 2 decimals
207-
df_reports_per_sector['percent'] = ((df_reports_per_sector['unique_company_count'] / total_companies) * 100).round(2)
216+
df_reports_per_sector['percent'] = ((df_reports_per_sector['unique_company_count'] / total_companies) * 100).round(
217+
2)
208218

209219
# Sort the DataFrame by the count of unique companies in ascending order
210220
df_reports_per_sector = df_reports_per_sector.sort_values(by='unique_company_count', ascending=True)
211221

212222
return df_reports_per_sector
213223

224+
214225
def breakdown_of_reports_by_sector_viz(df_reports_per_sector):
215226
# Plotting the horizontal bar chart with Plotly Express
216227
fig = px.bar(df_reports_per_sector, y='sector', x='percent',
217-
orientation='h', # Horizontal orientation
218-
title='Breakdown of Reports by Sector (All Years)',
219-
labels={'percent': 'Percentage of Companies (%)', 'sector': 'Sector'},
220-
text='percent', # Show the percentage as text label
221-
hover_data={'unique_company_count': True, 'percent': ':.2f%'}, # Add tooltip for count and rounded percentage
222-
)
228+
orientation='h', # Horizontal orientation
229+
title='Breakdown of Reports by Sector (All Years)',
230+
labels={'percent': 'Percentage of Companies (%)', 'sector': 'Sector'},
231+
text='percent', # Show the percentage as text label
232+
hover_data={'unique_company_count': True, 'percent': ':.2f%'},
233+
# Add tooltip for count and rounded percentage
234+
)
223235

224236
# Update layout to display the title above the chart
225237
fig.update_layout(title='Breakdown of Reports by Sector',
226-
title_x=0.5, title_y=0.9, # Adjust position
227-
title_font_size=20) # Adjust font size
238+
title_x=0.5, title_y=0.9, # Adjust position
239+
title_font_size=20) # Adjust font size
228240

229241
# Show the horizontal bar chart
230242
return go.Figure(fig)
@@ -244,13 +256,14 @@ def breakdown_of_reports_by_hq_country(df):
244256

245257
# Calculate the percentage of each HQ country's count relative to the total count and round to 2 decimals
246258
df_reports_per_country['percent'] = (
247-
(df_reports_per_country['unique_company_count'] / total_companies) * 100).round(2)
259+
(df_reports_per_country['unique_company_count'] / total_companies) * 100).round(2)
248260

249261
# Sort the DataFrame by the count of unique companies in ascending order
250262
df_reports_per_country = df_reports_per_country.sort_values(by='unique_company_count', ascending=True)
251263

252264
return df_reports_per_country
253265

266+
254267
def breakdown_of_reports_by_hq_country_viz(df_reports_per_country):
255268
# Plotting the horizontal bar chart with Plotly Express
256269
fig = px.bar(df_reports_per_country, y='upe_name', x='percent',
@@ -271,11 +284,12 @@ def breakdown_of_reports_by_hq_country_viz(df_reports_per_country):
271284
# fig.show()
272285
return go.Figure(fig)
273286

287+
274288
## Viz 6 - Breakdown of reports by sector over time (bar chart)
275289

276290

277291
def breakdown_of_reports_by_sector_over_time(df):
278-
df_reports_per_sector_over_time = df
292+
# df_reports_per_sector_over_time = df
279293
# return df_reports_per_sector_over_time
280294

281295
# Step 1: Determine the top 10 sectors that released reports
@@ -285,14 +299,16 @@ def breakdown_of_reports_by_sector_over_time(df):
285299
df['Sectors'] = df['sector'].apply(lambda x: x if x in top_10_sectors else 'Others')
286300

287301
# Step 3: Group the DataFrame by 'year', 'Sectors', and count the number of unique companies for each year and sector
288-
df_reports_per_year_sector = df.groupby(['year', 'Sectors'])['mnc'].nunique().reset_index(name='unique_company_count')
302+
df_reports_per_year_sector = df.groupby(['year', 'Sectors'])['mnc'].nunique().reset_index(
303+
name='unique_company_count')
289304

290305
# Sort sectors alphabetically
291306
df_reports_per_year_sector = df_reports_per_year_sector.sort_values(by='Sectors', ascending=False)
292307

293308
return df_reports_per_year_sector, top_10_sectors
294-
def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top_10_sectors):
295309

310+
311+
def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top_10_sectors):
296312
# Define the order of sectors for the stacked bar chart and legend, reversed
297313
chart_order = ['Others'] + top_10_sectors[::-1]
298314
legend_order = ['Others'] + top_10_sectors[::-1]
@@ -319,10 +335,6 @@ def breakdown_of_reports_by_sector_over_time_viz(df_reports_per_year_sector, top
319335
return go.Figure(fig)
320336

321337

322-
323-
324-
325-
326338
## Viz 7 - Breakdown of reports by HQ country over time (bar chart)
327339
# TODO add code
328340

@@ -372,6 +384,7 @@ def compute_company_available_reports(df: pd.DataFrame, company: str) -> dict:
372384

373385
return data
374386

387+
375388
def display_company_available_reports(
376389
df: pd.DataFrame, company: str, hide_company: bool = True) -> pd.DataFrame:
377390
"""Display the number of reports tracked for a specific company and the
@@ -449,6 +462,8 @@ def compute_company_key_financials_kpis(
449462
data = df.to_dict(orient='index')
450463

451464
return data
465+
466+
452467
def display_company_key_financials_kpis(
453468
df: pd.DataFrame, company: str, year: int = None) -> pd.DataFrame:
454469
"""Display key financial KPIs for a company.
@@ -470,6 +485,7 @@ def display_company_key_financials_kpis(
470485

471486
return df
472487

488+
473489
# Viz 14
474490
def compute_top_jurisdictions_revenue(
475491
df: pd.DataFrame, company: str, year: int) -> dict:
@@ -728,8 +744,6 @@ def display_pretax_profit_and_employees_rank(
728744
return go.Figure(fig)
729745

730746

731-
732-
733747
# Viz 18
734748

735749
def compute_related_and_unrelated_revenues_breakdown(
@@ -852,7 +866,8 @@ def display_related_and_unrelated_revenues_breakdown(df: pd.DataFrame, company:
852866

853867
# fig.show()
854868
return go.Figure(fig)
855-
869+
870+
856871
# Viz 21 - evolution of tax havens use over time : % profit vs % employees in TH over time
857872
def compute_tax_havens_use_evolution(df: pd.DataFrame, company: str) -> dict:
858873
"""Compute the evolution of tax havens use by company over time.
@@ -946,8 +961,8 @@ def display_tax_havens_use_evolution(df: pd.DataFrame, company: str):
946961

947962

948963
# Viz 24
949-
from wordcloud import WordCloud
950-
import matplotlib.pyplot as plt
964+
965+
951966

952967
def viz_24_compute_data(df):
953968
# Drop duplicates to ensure each MNC appears only once per year
@@ -961,6 +976,7 @@ def viz_24_compute_data(df):
961976

962977
return mnc_report_count
963978

979+
964980
def viz_24_viz(mnc_report_count):
965981
# Generate the word cloud using the report counts as weights
966982
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(mnc_report_count)
@@ -972,18 +988,6 @@ def viz_24_viz(mnc_report_count):
972988
return fig
973989

974990

975-
976-
977-
978-
979-
980-
981-
982-
983-
984-
985-
986-
987991
# Viz 25
988992

989993
# List financial columns
@@ -1090,9 +1094,6 @@ def compute_completness_score(df: pd.DataFrame, company: str, year: int) -> floa
10901094
return completness_score
10911095

10921096

1093-
1094-
1095-
10961097
def compute_transparency_score(df: pd.DataFrame, company: str) -> dict:
10971098
"""Compute the transparency score which is the average of component I
10981099
(geographic score) and component II (completness score).
@@ -1131,7 +1132,6 @@ def compute_transparency_score(df: pd.DataFrame, company: str) -> dict:
11311132
return data
11321133

11331134

1134-
11351135
def transparency_scores_to_csv(
11361136
df: pd.DataFrame, csv_path: str = './') -> pd.DataFrame:
11371137
"""Compute transparency score for all companies and all years into a
@@ -1221,9 +1221,6 @@ def display_transparency_score(df: pd.DataFrame, company: str, year: int = None)
12211221
fig.show()
12221222

12231223

1224-
1225-
1226-
12271224
# Viz 26
12281225

12291226
# Functions below use the 'financial_columns' list, and same computation
@@ -1322,6 +1319,3 @@ def display_transparency_score_over_time_details(
13221319
})
13231320

13241321
return df
1325-
1326-
1327-

app/requirements.txt

Lines changed: 0 additions & 6 deletions
This file was deleted.

app/viz_library.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def _viz_render(props: dict) -> str:
7575
# Get the property values
7676
title = props.get("title")
7777
subtitle = props.get("subtitle")
78-
data = props.get("data")
78+
# data = props.get("data")
7979

8080
return f'''
8181
<div class="viz">

0 commit comments

Comments
 (0)