|
| 1 | +--- |
| 2 | +title: "pyOpenSci Editorial Dashboard" |
| 3 | +format: |
| 4 | + dashboard: |
| 5 | + scrolling: true |
| 6 | +execute: |
| 7 | + echo: false |
| 8 | +--- |
| 9 | + |
| 10 | + |
| 11 | +```{python} |
| 12 | +#| echo: false |
| 13 | +#| |
| 14 | +import os |
| 15 | +from pathlib import Path |
| 16 | +
|
| 17 | +import warnings |
| 18 | +from datetime import datetime, timezone |
| 19 | +
|
| 20 | +import altair as alt |
| 21 | +from itables import show |
| 22 | +import pandas as pd |
| 23 | +import pytz |
| 24 | +import plotly.express as px |
| 25 | +from plotly.subplots import make_subplots |
| 26 | +from pyosmeta import ProcessIssues |
| 27 | +from pyosmeta.github_api import GitHubAPI |
| 28 | +
|
| 29 | +# Suppress all warnings |
| 30 | +warnings.filterwarnings("ignore") |
| 31 | +# Make tables nicer |
| 32 | +pd.set_option("display.max_colwidth", None) |
| 33 | +current_date = datetime.today().date() |
| 34 | +today = current_date.strftime("%d %B %Y") |
| 35 | +``` |
| 36 | + |
| 37 | + |
| 38 | +```{python} |
| 39 | +def clean_and_order(df, DROP_COLS, order): |
| 40 | + """Drop columns and reorder DataFrame columns.""" |
| 41 | + return df.drop(columns=DROP_COLS, errors="ignore")[ORDER] |
| 42 | +
|
| 43 | +def get_status_df(df, status): |
| 44 | + return df[df["active_status"] == status].reset_index(drop=True) |
| 45 | +
|
| 46 | +DROP_COLS = ["date_accepted", "labels", "editor", "status", "active_status"] |
| 47 | +ORDER = ["Name", "Description", "Days Open", "Date Opened","Last Comment", "Last User to Comment", "Issue", "Categories", "eic"] |
| 48 | +
|
| 49 | +label_map = { |
| 50 | + "pre-review": [ |
| 51 | + "New Submission!", |
| 52 | + "0/pre-review-checks"], |
| 53 | + "seeking editor": ["0/seeking-editor"], |
| 54 | + "seeking reviewers": ["2/seeking-reviewers"], |
| 55 | + "under-review": [ |
| 56 | + "1/editor-assigned", |
| 57 | + "3/reviewers-assigned", |
| 58 | + "4/reviews-in-awaiting-changes", |
| 59 | + "5/awaiting-reviewer-response", |
| 60 | + ], |
| 61 | + "accepted-open": ["6/pyOS-approved", "9/joss-approved"], |
| 62 | +} |
| 63 | +
|
| 64 | +def get_active_status(labels): |
| 65 | + """A function that returns an active status value based on a list of label values""" |
| 66 | + for status, label_list in label_map.items(): |
| 67 | + if any(label in labels for label in label_list): |
| 68 | + return status |
| 69 | + return "unknown" |
| 70 | +``` |
| 71 | + |
| 72 | + |
| 73 | +```{python} |
| 74 | +#| echo: false |
| 75 | +
|
| 76 | +# Open submission review data |
| 77 | +data_path = Path.cwd().parents[0] / "_data" / "review_submissions.csv" |
| 78 | +reviews = pd.read_csv(data_path, |
| 79 | + parse_dates=["date_opened", "date_closed", "last_comment_date"], |
| 80 | +) |
| 81 | +reviews["date_opened"] = reviews["date_opened"] |
| 82 | +reviews["last_comment_date"] = reviews["last_comment_date"] |
| 83 | +reviews = reviews.rename( |
| 84 | + columns={ |
| 85 | + "package_name": "Name", |
| 86 | + "date_opened": "Date Opened", |
| 87 | + "date_closed": "Date Closed", |
| 88 | + "issue_num": "Issue", |
| 89 | + "description": "Description", |
| 90 | + "categories": "Categories", |
| 91 | + "last_comment_date": "Last Comment", |
| 92 | + "last_comment_user": "Last User to Comment", |
| 93 | + } |
| 94 | +) |
| 95 | +
|
| 96 | +open_reviews = reviews[reviews["Date Closed"].isna()] |
| 97 | +open_reviews.drop(columns=["Date Closed","Unnamed: 0"], inplace=True) |
| 98 | +
|
| 99 | +``` |
| 100 | + |
| 101 | +```{python} |
| 102 | +# get editor team data |
| 103 | +editor_path = Path.cwd().parents[0] / "_data" / "editorial_team_domains.csv" |
| 104 | +editor_data = pd.read_csv(editor_path) |
| 105 | +editor_data = editor_data[["gh_username","first_name","Domain_areas","Description","technical_areas"]] |
| 106 | +all_editors_df = editor_data[["gh_username"]].rename(columns={"gh_username": "editor"}) |
| 107 | +
|
| 108 | +# Get busy editors |
| 109 | +busy_editors = open_reviews.loc[open_reviews.editor != "TBD", ["editor"]].value_counts().reset_index() |
| 110 | +
|
| 111 | +``` |
| 112 | + |
| 113 | +```{python} |
| 114 | +#| echo: false |
| 115 | +all_editor_activity = all_editors_df.merge(busy_editors, on="editor", how="outer").fillna(0) |
| 116 | +
|
| 117 | +all_editor_activity["count"]= all_editor_activity["count"].astype(int) |
| 118 | +all_editor_activity.sort_values(by="count", inplace=True) |
| 119 | +all_editor_activity = all_editor_activity.reset_index(drop=True) |
| 120 | +
|
| 121 | +# Get counts of available and unavailable editors |
| 122 | +available_editors = all_editor_activity[all_editor_activity["count"] == 0] |
| 123 | +all_busy_editors = all_editor_activity[all_editor_activity["count"] != 0] |
| 124 | +
|
| 125 | +num_available_editors = len(available_editors) |
| 126 | +available_editors = available_editors.merge(editor_data, left_on="editor", right_on="gh_username", how="inner") |
| 127 | +
|
| 128 | +available_editors = available_editors.rename(columns={"count": "review count"}) |
| 129 | +available_editors = available_editors[["editor", "review count", "Domain_areas", "Description", "technical_areas", "first_name"]] |
| 130 | +``` |
| 131 | + |
| 132 | + |
| 133 | +## Row {height=1%} |
| 134 | + |
| 135 | +```{python} |
| 136 | +#| content: valuebox |
| 137 | +#| title: "Total Editors" |
| 138 | +
|
| 139 | +dict( |
| 140 | + icon = "box2-heart", |
| 141 | + color = "primary", |
| 142 | + value = len(editor_data) |
| 143 | +) |
| 144 | +``` |
| 145 | + |
| 146 | +```{python} |
| 147 | +#| content: valuebox |
| 148 | +#| title: "Busy Editors" |
| 149 | +# fails if the value isn't an int |
| 150 | +dict( |
| 151 | + icon = "person-hearts", |
| 152 | + color = "primary", |
| 153 | + value = len(busy_editors) |
| 154 | +) |
| 155 | +``` |
| 156 | + |
| 157 | +```{python} |
| 158 | +#| content: valuebox |
| 159 | +#| title: "Available Editors" |
| 160 | +# fails if the value isn't an int |
| 161 | +dict( |
| 162 | + icon = "person-hearts", |
| 163 | + color = "primary", |
| 164 | + value = len(available_editors) |
| 165 | +) |
| 166 | +``` |
| 167 | + |
| 168 | +## Row {height=3%} |
| 169 | +```{python} |
| 170 | +#| title: "pyOpenSci Editorial Team" |
| 171 | +print("Note: the total editor count is currently incorrect-we should open an issue.") |
| 172 | +
|
| 173 | +show(editor_data) |
| 174 | +``` |
| 175 | + |
| 176 | +## Row {height=2%} |
| 177 | + |
| 178 | +```{python} |
| 179 | +#| title: "Busy editors running reviews " |
| 180 | +show(busy_editors) |
| 181 | +``` |
| 182 | + |
| 183 | +## Row {height=2%} |
| 184 | +```{python} |
| 185 | +#| title: "Available Editors" |
| 186 | +show(available_editors) |
| 187 | +``` |
| 188 | + |
| 189 | + |
| 190 | + |
| 191 | +```{python} |
| 192 | +# TODO: make this focus only on current open reviews vs all reviews over time. |
| 193 | +
|
| 194 | +# Get a list of all editors over time that have supported pyOpenSci |
| 195 | +ignore_editors = ["TBD"] |
| 196 | +ignore_editors += ["lwasser", "xmnlab"] |
| 197 | +editors = [ |
| 198 | + editor |
| 199 | + for editor in pd.unique(reviews.editor) |
| 200 | + if editor not in ignore_editors |
| 201 | +] |
| 202 | +n_edits = ( |
| 203 | + reviews.groupby([reviews["editor"], reviews["Date Opened"].dt.to_period("Q")]) |
| 204 | + .size() |
| 205 | + .reset_index(name="n_edits") # Ensures a DataFrame with column name |
| 206 | +) |
| 207 | +``` |
| 208 | + |
| 209 | + |
| 210 | + |
| 211 | +```{python} |
| 212 | +
|
| 213 | +n_edits_filtered = n_edits[~n_edits["editor"].isin(ignore_editors)] |
| 214 | +n_edits_filtered = n_edits_filtered.rename(columns={"Date Opened": "Date", "n_edits": "count"}) |
| 215 | +# Date should be a standard date time |
| 216 | +df = n_edits_filtered.copy() |
| 217 | +df[["Year", "Quarter"]] = df["Date"].astype(str).str.extract(r"(\d{4})Q(\d)").astype(int) |
| 218 | +df["Date"] = pd.to_datetime(df["Year"].astype(str) + "-" + (3 * (df["Quarter"].astype(int) - 1) + 1).astype(str) + "-01") |
| 219 | +``` |
| 220 | + |
| 221 | +```{python} |
| 222 | +# This likely should be active edits? |
| 223 | +edits = reviews.rename(columns={"Date Opened": "Date"}).copy() |
| 224 | +``` |
| 225 | + |
| 226 | + |
| 227 | +```{python} |
| 228 | +
|
| 229 | +# TODO: If this uses open_reviews it's only showing current load |
| 230 | +# if it uses the reviews df it's showing reviews all time 2019 to present. open_reviews has a slightly different structure |
| 231 | +edits = reviews[["editor", "Name", "Date Opened"]] |
| 232 | +edits = edits.rename(columns={"Date Opened": "Date", "Name":"package_name"}) |
| 233 | +edits = edits[edits["editor"] != "TBD"] |
| 234 | +
|
| 235 | +``` |
| 236 | + |
| 237 | +# Editors |
| 238 | + |
| 239 | +## Row {height=8%} |
| 240 | + |
| 241 | +```{python} |
| 242 | +# Cleanup |
| 243 | +# Ensure datetime and a clean quarter label |
| 244 | +edits["Date"] = pd.to_datetime(edits["Date"]) |
| 245 | +edits["Year"] = edits["Date"].dt.year |
| 246 | +edits["QuarterNum"] = edits["Date"].dt.quarter |
| 247 | +edits["QuarterLabel"] = edits["Year"].astype(str) + " Q" + edits["QuarterNum"].astype(str) |
| 248 | +
|
| 249 | +quarter_order = sorted(edits["QuarterLabel"].unique()) |
| 250 | +edits["QuarterLabel"] = pd.Categorical(edits["QuarterLabel"], categories=quarter_order, ordered=True) |
| 251 | +
|
| 252 | +# Count edits per editor and quarter |
| 253 | +df = ( |
| 254 | + edits.groupby(["editor", "QuarterLabel"]) |
| 255 | + .size() |
| 256 | + .reset_index(name="count") |
| 257 | +) |
| 258 | +
|
| 259 | +all_editors = df["editor"].unique() |
| 260 | +all_quarters = df["QuarterLabel"].cat.categories |
| 261 | +
|
| 262 | +full_index = pd.MultiIndex.from_product( |
| 263 | + [all_editors, all_quarters], |
| 264 | + names=["editor", "QuarterLabel"] |
| 265 | +) |
| 266 | +df_full = ( |
| 267 | + df.set_index(["editor", "QuarterLabel"]) |
| 268 | + .reindex(full_index, fill_value=0) |
| 269 | + .reset_index() |
| 270 | +) |
| 271 | +``` |
| 272 | + |
| 273 | +```{python} |
| 274 | +quarter_order = sorted(df["QuarterLabel"].unique()) |
| 275 | +df["QuarterLabel"] = pd.Categorical(df["QuarterLabel"], categories=quarter_order, ordered=True) |
| 276 | +
|
| 277 | +facet_wrap = 2 |
| 278 | +num_editors = len(df["editor"].unique()) |
| 279 | +num_rows = (num_editors + facet_wrap - 1) // facet_wrap |
| 280 | +row_height = 400 |
| 281 | +
|
| 282 | +fig = px.bar( |
| 283 | + df, |
| 284 | + x="QuarterLabel", |
| 285 | + y="count", |
| 286 | + facet_col="editor", |
| 287 | + facet_row_spacing=0.04, |
| 288 | + facet_col_spacing=0.06, |
| 289 | + facet_col_wrap=facet_wrap, |
| 290 | + color_discrete_sequence=["indigo"], |
| 291 | + labels={"count": "Number of edits", "QuarterLabel": "Quarter"}, |
| 292 | + title="Editor activity by quarter", |
| 293 | + height=row_height * num_rows, |
| 294 | + width=1200, |
| 295 | +) |
| 296 | +
|
| 297 | +fig = fig.for_each_annotation( |
| 298 | + lambda a: a.update(text=a.text.split("=")[-1], font=dict(size=16)) |
| 299 | +) |
| 300 | +fig = fig.update_xaxes( |
| 301 | + tickangle=45, |
| 302 | + tickfont=dict(size=10), |
| 303 | + title_text="Quarter", |
| 304 | + showticklabels=True |
| 305 | +) |
| 306 | +fig = fig.update_yaxes( |
| 307 | + dtick=1, |
| 308 | + tickformat=",d", |
| 309 | + title_text="Number of edits", |
| 310 | + range=[0, 4] |
| 311 | +) |
| 312 | +fig = fig.update_layout( |
| 313 | + showlegend=False, |
| 314 | + margin=dict(t=80), |
| 315 | + title_font_size=24, |
| 316 | +) |
| 317 | +
|
| 318 | +fig.show() |
| 319 | +
|
| 320 | +``` |
0 commit comments