Skip to content

Commit 7a2fade

Browse files
Merge pull request #58 from dataforgoodfr/ta/fix-wordcloud
Ta/fix wordcloud including Images Optimisation
2 parents 3b63eea + 2154da1 commit 7a2fade

34 files changed

+208
-513
lines changed

app/algo.py

Lines changed: 90 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@
33
are needed in Taxplorer tool. Below functions will be used in different pages of the website.
44
"""
55

6-
import pandas as pd
6+
import random
7+
8+
import humanize
79
import numpy as np
10+
import pandas as pd
811
import plotly.express as px
912
import plotly.graph_objects as go
10-
import humanize
11-
from wordcloud import WordCloud, get_single_color_func
13+
from PIL import Image, ImageColor, ImageDraw, ImageFont
1214

15+
from app import config as cfg
1316

1417
# Define custom template
1518
custom_template = {
@@ -925,42 +928,113 @@ def tax_havens_use_evolution(df: pd.DataFrame, company: str) -> go.Figure:
925928

926929

927930
# Viz 24 : mnc tracked
928-
def mnc_tracked(df: pd.DataFrame) -> go.Figure:
931+
def mnc_tracked(
932+
df: pd.DataFrame,
933+
image_width: int = 1200,
934+
image_height: int = 1000,
935+
margin: int = 10,
936+
min_font_size: int = 10,
937+
) -> go.Figure:
929938
"""Compute and plot the list of company name in a word cloud where the size of the font depends of the number
930939
of reports available.
931940
932941
Args:
933942
df (pd.DataFrame): CbCRs database.
943+
image_width (int, optional): Image width in pixel. Defaults to 900.
944+
image_height (int, optional): Image height in pixel. Defaults to 600.
945+
margin (int, optional): Margin around words in pixel. Defaults to 10.
946+
min_font_size (int, optional): Minimum fontsize. Defaults to 10.
934947
935948
Returns:
936949
go.Figure: word cloud with company name in a Plotly figure.
937950
"""
938951

939-
# Create dictionnary with company name as key and the number of reports as value
940-
data = df.groupby("mnc")["year"].nunique().to_dict()
941-
942-
color_func = get_single_color_func("#B8BEDB")
952+
# List of colors in hexadecimal format
953+
font_colors = ["#B8BEDB", "#8087A8", "#080F33", "#181F42", "#424A75"]
943954

944-
# Generate the word cloud using the report counts as weights
945-
wordcloud = WordCloud(
946-
width=1200, height=800, background_color="white", color_func=color_func
947-
).generate_from_frequencies(data)
955+
# Create dictionnary with company name as key and the number of reports as value
956+
data = df.groupby("mnc")["year"].nunique().sort_values(ascending=False).to_dict()
957+
958+
# Create a blank image
959+
image = Image.new("RGB", (image_width, image_height), (255, 255, 255))
960+
draw = ImageDraw.Draw(image)
961+
962+
# Load a default scalable font
963+
font_path = f"{cfg.FONTS}/roboto/Roboto-Regular.ttf"
964+
965+
# Calculate max font size based on the most frequent word
966+
most_freq_word = max(data, key=data.get)
967+
max_width = image_width * 0.8
968+
969+
# Function to find the max font size for a word
970+
max_font_size = min_font_size
971+
font = ImageFont.truetype(font_path, max_font_size)
972+
while draw.textlength(most_freq_word, font=font) < max_width:
973+
max_font_size += 2
974+
font = ImageFont.truetype(font_path, max_font_size)
975+
976+
# Calculate and store in a dictionnary text size and bounding boxe for each word
977+
word_sizes = {}
978+
max_frequency = max(data.values())
979+
for word, frequency in data.items():
980+
word_font_size = max(int(max_font_size * (frequency / max_frequency)), min_font_size)
981+
font = ImageFont.truetype(font_path, word_font_size)
982+
left, top, right, bottom = font.getbbox(word)
983+
word_sizes[word] = (word_font_size, (right - left), (bottom - top))
984+
985+
# Place words on the image
986+
# Initiliaze a list to store all words' boundary boxes
987+
bboxes = []
988+
for word in data.keys():
989+
font_size, word_width, word_height = word_sizes[word]
990+
font = ImageFont.truetype(font_path, font_size)
991+
992+
word_placed = False
993+
tries = 0
994+
while not word_placed and tries < 100:
995+
# Pick a random x and y position to place the word
996+
x = random.randint(margin, image_width - word_width - margin)
997+
y = random.randint(margin, image_height - word_height - margin)
998+
999+
# Calculate word's boundary box
1000+
bbox = [x, y, x + word_width + margin, y + word_height + margin]
1001+
1002+
# Check if the word overlaps with already placed words
1003+
overlap = False
1004+
for existing_bbox in bboxes:
1005+
if (
1006+
bbox[2] > existing_bbox[0]
1007+
and bbox[0] < existing_bbox[2]
1008+
and bbox[3] > existing_bbox[1]
1009+
and bbox[1] < existing_bbox[3]
1010+
):
1011+
overlap = True
1012+
break
1013+
1014+
# Add word to the image
1015+
if not overlap:
1016+
bboxes.append(bbox)
1017+
word_color = ImageColor.getrgb(random.choice(font_colors))
1018+
draw.text((x, y), word, font=font, fill=word_color)
1019+
word_placed = True
1020+
1021+
tries += 1
9481022

9491023
# Display the word cloud
950-
fig = px.imshow(wordcloud)
1024+
fig = px.imshow(image)
9511025

9521026
# Remove hover on image
9531027
fig.update_traces(hoverinfo="skip", hovertemplate="")
9541028

9551029
# Remove colorbar
9561030
fig.update_layout(coloraxis_showscale=False)
9571031

958-
# Remove axis
1032+
# Update axis layouts
9591033
fig.update_xaxes(showticklabels=False)
9601034
fig.update_yaxes(showticklabels=False)
9611035

962-
# Remove margins
963-
fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
1036+
# Update layout setting
1037+
fig.update_layout(template=custom_template, height=400)
9641038

9651039
return go.Figure(fig)
9661040

app/css/style.css renamed to app/assets/css/style.css

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11
@charset "UTF-8";
22
@import url("viz.css");
3-
@import url('https://fonts.googleapis.com/css2?family=Manrope:wght@200..800&display=swap');
4-
@import url('https://fonts.googleapis.com/css2?family=Roboto:ital,wght@0,100;0,300;0,400;0,500;0,700;0,900;1,100;1,300;1,400;1,500;1,700;1,900&display=swap');
3+
4+
@font-face {
5+
font-family: Manrope;
6+
src: url("../fonts/manrope/Manrope-VariableFont_wght.ttf") format("truetype");
7+
}
8+
9+
@font-face {
10+
font-family: Roboto;
11+
src: url("../fonts/roboto/Roboto-Regular.ttf") format("truetype");
12+
}
513

614
.viz-container {
715
display: flex;
@@ -272,11 +280,15 @@ color: rgba(0,0,0,0.0)
272280
}
273281

274282
.world-bg {
275-
background-image: url("../../images/world_map.png");
283+
background-image: url("../../assets/images/world_map.jpg");
284+
}
285+
286+
.company-image {
287+
border-radius: 22px;
276288
}
277289

278290
.footer-bg {
279-
background-color: #B8BEDB
291+
background-color: #B8BEDB;
280292
}
281293

282294
/* Taipy classes */
@@ -285,7 +297,7 @@ color: rgba(0,0,0,0.0)
285297
margin: 0 auto;
286298
background-color: white !important;
287299
box-shadow: none;
288-
-webkit-text-fill-color: initial !important
300+
-webkit-text-fill-color: initial !important;
289301
}
290302

291303
.container {
@@ -610,7 +622,7 @@ hr.header-hr35 {
610622
}
611623

612624
.world {
613-
background-image: url("../../images/world_map.png");
625+
background-image: url("../../images/world_map.jpg");
614626
background-size: cover;
615627
}
616628

File renamed without changes.
Binary file not shown.
164 KB
Binary file not shown.
File renamed without changes.

app/assets/images/company-image.jpg

28.1 KB
Loading
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

app/assets/images/world_map.jpg

92.1 KB
Loading

app/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
66
# AUTO
77
DATA = f"{APP}data"
8-
IMAGES = f"{APP}images"
8+
IMAGES = f"{APP}assets/images"
9+
FONTS = f"{APP}assets/fonts"
910
PAGES = f"{APP}pages"
1011
# DO NOT USE: css path in main is relative to main.py location
1112
# CSS_PATH = f"{APP}css"

app/images/Arrow 1.png

-216 Bytes
Binary file not shown.

app/images/Arrow_ 1 .png

-352 Bytes
Binary file not shown.

app/images/bahamas.png

-108 KB
Binary file not shown.

app/images/logo.png

-12.1 KB
Binary file not shown.
-378 KB
Binary file not shown.

app/images/viz.png

-48.6 KB
Binary file not shown.

app/images/world_map.png

-2.16 MB
Binary file not shown.

app/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# Global variables
1616
# APP
1717
APP_TITLE = "Taxplorer"
18-
FAVICON = "images/taxplorer-logo.svg"
18+
FAVICON = "assets/images/taxplorer-logo.svg"
1919
# DATA
2020
MAX_YEAR_OF_REPORTS = 2021
2121
PATH_TO_DATA = f"{cfg.DATA}/data_final_dataviz.csv"
@@ -83,7 +83,7 @@ def goto_download(state):
8383

8484

8585
# Initialise Gui with pages and style sheet
86-
gui_multi_pages = Gui(pages=pages, css_file="css/style.css")
86+
gui_multi_pages = Gui(pages=pages, css_file="assets/css/style.css")
8787

8888
# Customize the Stylekit
8989
stylekit = {

app/pages/company/company.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ We might have missed out in its report.<br/>
1616
{ .text-blue .text-weight300 .cpt12}
1717
|>
1818

19-
<|{company_image_path}|image|width=100%|>
19+
<|{company_image_path}|image|width=100%|class_name=company-image|>
2020

2121
|>
2222

app/pages/company/company.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from app import config as cfg
99
from app.viz import Viz
1010

11-
company_image_path = f"{cfg.IMAGES}/pexels-ingo-joseph-1880351.png"
11+
company_image_path = f"{cfg.IMAGES}/company-image.jpg"
1212

1313
DEFAULT_COMPANY = "SHELL"
1414

app/pages/home/home.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# from app.data.data import data
88

99
# Path to images
10-
world_map_path = f"{cfg.IMAGES}/world_map.png"
10+
world_map_path = f"{cfg.IMAGES}/world_map.jpg"
1111
download_icon_path = f"{cfg.IMAGES}/Vector.svg"
1212

1313

app/pages/methodology/methodology.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ the highest). The transparency score calculation follows the general formula :
157157
[//]: # ($$\text{Transparency Score} = \sum_{i=1}^{n} w_i \times \frac{\sum_{j\in J_i} | x_{ij} |}{\sum_{j} | x_{ij} |} \times 100\$$)
158158

159159
<|part|class_name=text-center|
160-
<img class="test" src="./images/transparency-score-equation.svg" height="40px"/>
160+
<img class="test" src="./assets/images/transparency-score-equation.svg" height="40px"/>
161161
|>
162162
<br/><br/>
163163

app/pages/methodology/methodology.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,5 @@
22

33
from app import config as cfg
44

5-
# Path to equation image
6-
equation = f"{cfg.IMAGES}/transparency-score-equation.svg"
7-
85
# Generate page from Markdown file
96
methodology_md = Markdown(f"{cfg.PAGES}/methodology/methodology.md")

app/pages/root.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ Taxplorer
4545

4646
<|part|class_name=text-center|
4747

48-
<img class="cpb14" src="./images/data4good-logo.svg" height="130px"/>
48+
<img class="cpb14" src="./assets/images/data4good-logo.svg" height="130px"/>
4949

5050
DATA FOR GOOD
5151
{: .h4 .text-blue .text-footer .cpb6 }
@@ -55,7 +55,7 @@ Website
5555
{: .text-left }
5656

5757
<a class="text-center" href="https://dataforgood.fr/" target="_blank">
58-
<img src="./images/website-logo.svg"/>
58+
<img src="./assets/images/website-logo.svg"/>
5959
</a>
6060
|>
6161

@@ -64,7 +64,7 @@ Twitter
6464
{: .text-left }
6565

6666
<a class="text-center" href="https://twitter.com/dataforgood_fr" target="_blank">
67-
<img src="./images/twitter-logo.svg"/>
67+
<img src="./assets/images/twitter-logo.svg"/>
6868
</a>
6969
|>
7070

@@ -73,7 +73,7 @@ LinkedIn
7373
{: .text-left }
7474

7575
<a class="text-center" href="https://www.linkedin.com/company/dataforgood" target="_blank">
76-
<img src="./images/linkedin-logo.svg"/>
76+
<img src="./assets/images/linkedin-logo.svg"/>
7777
</a>
7878
|>
7979

@@ -87,7 +87,7 @@ LinkedIn
8787

8888
<|part|class_name=text-center|
8989

90-
<img class="cpb14" src="./images/eutax-logo.svg" height="130px"/>
90+
<img class="cpb14" src="./assets/images/eutax-logo.svg" height="130px"/>
9191

9292
EU TAX OBSERVATORY
9393
{: .h4 .text-blue .text-footer .cpb6 }
@@ -97,7 +97,7 @@ Website
9797
{: .text-left }
9898

9999
<a class="text-center" href="https://www.taxobservatory.eu/" target="_blank">
100-
<img src="./images/website-logo.svg"/>
100+
<img src="./assets/images/website-logo.svg"/>
101101
</a>
102102
|>
103103

@@ -106,7 +106,7 @@ Twitter
106106
{: .text-left }
107107

108108
<a class="text-center" href="https://twitter.com/taxobservatory" target="_blank">
109-
<img src="./images/twitter-logo.svg"/>
109+
<img src="./assets/images/twitter-logo.svg"/>
110110
</a>
111111
|>
112112

@@ -115,7 +115,7 @@ LinkedIn
115115
{: .text-left }
116116

117117
<a class="text-center" href="https://www.linkedin.com/company/70917369/" target="_blank">
118-
<img src="./images/linkedin-logo.svg"/>
118+
<img src="./assets/images/linkedin-logo.svg"/>
119119
</a>
120120
|>
121121

0 commit comments

Comments
 (0)