|
3 | 3 | are needed in Taxplorer tool. Below functions will be used in different pages of the website.
|
4 | 4 | """
|
5 | 5 |
|
6 |
| -import pandas as pd |
| 6 | +import random |
| 7 | + |
| 8 | +import humanize |
7 | 9 | import numpy as np
|
| 10 | +import pandas as pd |
8 | 11 | import plotly.express as px
|
9 | 12 | import plotly.graph_objects as go
|
10 |
| -import humanize |
11 |
| -from wordcloud import WordCloud, get_single_color_func |
| 13 | +from PIL import Image, ImageColor, ImageDraw, ImageFont |
12 | 14 |
|
| 15 | +from app import config as cfg |
13 | 16 |
|
14 | 17 | # Define custom template
|
15 | 18 | custom_template = {
|
@@ -925,42 +928,113 @@ def tax_havens_use_evolution(df: pd.DataFrame, company: str) -> go.Figure:
|
925 | 928 |
|
926 | 929 |
|
927 | 930 | # Viz 24 : mnc tracked
|
928 |
| -def mnc_tracked(df: pd.DataFrame) -> go.Figure: |
| 931 | +def mnc_tracked( |
| 932 | + df: pd.DataFrame, |
| 933 | + image_width: int = 1200, |
| 934 | + image_height: int = 1000, |
| 935 | + margin: int = 10, |
| 936 | + min_font_size: int = 10, |
| 937 | +) -> go.Figure: |
929 | 938 | """Compute and plot the list of company name in a word cloud where the size of the font depends of the number
|
930 | 939 | of reports available.
|
931 | 940 |
|
932 | 941 | Args:
|
933 | 942 | df (pd.DataFrame): CbCRs database.
|
| 943 | + image_width (int, optional): Image width in pixel. Defaults to 900. |
| 944 | + image_height (int, optional): Image height in pixel. Defaults to 600. |
| 945 | + margin (int, optional): Margin around words in pixel. Defaults to 10. |
| 946 | + min_font_size (int, optional): Minimum fontsize. Defaults to 10. |
934 | 947 |
|
935 | 948 | Returns:
|
936 | 949 | go.Figure: word cloud with company name in a Plotly figure.
|
937 | 950 | """
|
938 | 951 |
|
939 |
| - # Create dictionnary with company name as key and the number of reports as value |
940 |
| - data = df.groupby("mnc")["year"].nunique().to_dict() |
941 |
| - |
942 |
| - color_func = get_single_color_func("#B8BEDB") |
| 952 | + # List of colors in hexadecimal format |
| 953 | + font_colors = ["#B8BEDB", "#8087A8", "#080F33", "#181F42", "#424A75"] |
943 | 954 |
|
944 |
| - # Generate the word cloud using the report counts as weights |
945 |
| - wordcloud = WordCloud( |
946 |
| - width=1200, height=800, background_color="white", color_func=color_func |
947 |
| - ).generate_from_frequencies(data) |
| 955 | + # Create dictionnary with company name as key and the number of reports as value |
| 956 | + data = df.groupby("mnc")["year"].nunique().sort_values(ascending=False).to_dict() |
| 957 | + |
| 958 | + # Create a blank image |
| 959 | + image = Image.new("RGB", (image_width, image_height), (255, 255, 255)) |
| 960 | + draw = ImageDraw.Draw(image) |
| 961 | + |
| 962 | + # Load a default scalable font |
| 963 | + font_path = f"{cfg.FONTS}/roboto/Roboto-Regular.ttf" |
| 964 | + |
| 965 | + # Calculate max font size based on the most frequent word |
| 966 | + most_freq_word = max(data, key=data.get) |
| 967 | + max_width = image_width * 0.8 |
| 968 | + |
| 969 | + # Function to find the max font size for a word |
| 970 | + max_font_size = min_font_size |
| 971 | + font = ImageFont.truetype(font_path, max_font_size) |
| 972 | + while draw.textlength(most_freq_word, font=font) < max_width: |
| 973 | + max_font_size += 2 |
| 974 | + font = ImageFont.truetype(font_path, max_font_size) |
| 975 | + |
| 976 | + # Calculate and store in a dictionnary text size and bounding boxe for each word |
| 977 | + word_sizes = {} |
| 978 | + max_frequency = max(data.values()) |
| 979 | + for word, frequency in data.items(): |
| 980 | + word_font_size = max(int(max_font_size * (frequency / max_frequency)), min_font_size) |
| 981 | + font = ImageFont.truetype(font_path, word_font_size) |
| 982 | + left, top, right, bottom = font.getbbox(word) |
| 983 | + word_sizes[word] = (word_font_size, (right - left), (bottom - top)) |
| 984 | + |
| 985 | + # Place words on the image |
| 986 | + # Initiliaze a list to store all words' boundary boxes |
| 987 | + bboxes = [] |
| 988 | + for word in data.keys(): |
| 989 | + font_size, word_width, word_height = word_sizes[word] |
| 990 | + font = ImageFont.truetype(font_path, font_size) |
| 991 | + |
| 992 | + word_placed = False |
| 993 | + tries = 0 |
| 994 | + while not word_placed and tries < 100: |
| 995 | + # Pick a random x and y position to place the word |
| 996 | + x = random.randint(margin, image_width - word_width - margin) |
| 997 | + y = random.randint(margin, image_height - word_height - margin) |
| 998 | + |
| 999 | + # Calculate word's boundary box |
| 1000 | + bbox = [x, y, x + word_width + margin, y + word_height + margin] |
| 1001 | + |
| 1002 | + # Check if the word overlaps with already placed words |
| 1003 | + overlap = False |
| 1004 | + for existing_bbox in bboxes: |
| 1005 | + if ( |
| 1006 | + bbox[2] > existing_bbox[0] |
| 1007 | + and bbox[0] < existing_bbox[2] |
| 1008 | + and bbox[3] > existing_bbox[1] |
| 1009 | + and bbox[1] < existing_bbox[3] |
| 1010 | + ): |
| 1011 | + overlap = True |
| 1012 | + break |
| 1013 | + |
| 1014 | + # Add word to the image |
| 1015 | + if not overlap: |
| 1016 | + bboxes.append(bbox) |
| 1017 | + word_color = ImageColor.getrgb(random.choice(font_colors)) |
| 1018 | + draw.text((x, y), word, font=font, fill=word_color) |
| 1019 | + word_placed = True |
| 1020 | + |
| 1021 | + tries += 1 |
948 | 1022 |
|
949 | 1023 | # Display the word cloud
|
950 |
| - fig = px.imshow(wordcloud) |
| 1024 | + fig = px.imshow(image) |
951 | 1025 |
|
952 | 1026 | # Remove hover on image
|
953 | 1027 | fig.update_traces(hoverinfo="skip", hovertemplate="")
|
954 | 1028 |
|
955 | 1029 | # Remove colorbar
|
956 | 1030 | fig.update_layout(coloraxis_showscale=False)
|
957 | 1031 |
|
958 |
| - # Remove axis |
| 1032 | + # Update axis layouts |
959 | 1033 | fig.update_xaxes(showticklabels=False)
|
960 | 1034 | fig.update_yaxes(showticklabels=False)
|
961 | 1035 |
|
962 |
| - # Remove margins |
963 |
| - fig.update_layout(margin=dict(l=0, r=0, t=0, b=0)) |
| 1036 | + # Update layout setting |
| 1037 | + fig.update_layout(template=custom_template, height=400) |
964 | 1038 |
|
965 | 1039 | return go.Figure(fig)
|
966 | 1040 |
|
|
0 commit comments