Skip to content

Commit 48c9e2e

Browse files
authored
Enh: editor & Peer review dashboard separated(#125)
* add: new dashboard * feat: peer review dashboard part 1 * Fix editor plot * Create editor dashboard * feat: editor dashboard separate
1 parent 2dddcc6 commit 48c9e2e

File tree

4 files changed

+343
-220
lines changed

4 files changed

+343
-220
lines changed

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,9 @@ _output/*
2424
# pixi environments
2525
.pixi
2626
*.egg-info
27+
<<<<<<< HEAD
28+
29+
# Quarto
30+
*.quarto_ipynb
31+
=======
32+
>>>>>>> 726e820 (feat: peer review dashboard part 1)

_quarto.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,23 @@ website:
2424
menu:
2525
- text: Peer Review Status Dashboard
2626
href: peer-review/peer-review-status-dashboard.qmd
27+
- text: Editorial Dashboard
28+
href: peer-review/editorial-dashboard.qmd
2729
- text: Current Review Status
2830
href: peer-review/current-review-status.qmd
2931
- text: Over Time
3032
href: peer-review/reviews-over-time.qmd
3133
- text: Peer review trends
3234
href: peer-review/review-trends.qmd
35+
36+
- text: "pyOpenSci Package Metrics"
37+
menu:
3338
- text: Accepted Package Metrics
3439
href: peer-review/accepted-packages.qmd
3540
- text: Package Dashboard
3641
href: peer-review/pyos-package-dashboard.qmd
42+
43+
3744
- text: "Contributors"
3845
menu:
3946
- text: Contributor Data

peer-review/editorial-dashboard.qmd

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
---
2+
title: "pyOpenSci Editorial Dashboard"
3+
format:
4+
dashboard:
5+
scrolling: true
6+
execute:
7+
echo: false
8+
---
9+
10+
11+
```{python}
12+
#| echo: false
13+
#|
14+
import os
15+
from pathlib import Path
16+
17+
import warnings
18+
from datetime import datetime, timezone
19+
20+
import altair as alt
21+
from itables import show
22+
import pandas as pd
23+
import pytz
24+
import plotly.express as px
25+
from plotly.subplots import make_subplots
26+
from pyosmeta import ProcessIssues
27+
from pyosmeta.github_api import GitHubAPI
28+
29+
# Suppress all warnings
30+
warnings.filterwarnings("ignore")
31+
# Make tables nicer
32+
pd.set_option("display.max_colwidth", None)
33+
current_date = datetime.today().date()
34+
today = current_date.strftime("%d %B %Y")
35+
```
36+
37+
38+
```{python}
39+
def clean_and_order(df, DROP_COLS, order):
40+
"""Drop columns and reorder DataFrame columns."""
41+
return df.drop(columns=DROP_COLS, errors="ignore")[ORDER]
42+
43+
def get_status_df(df, status):
44+
return df[df["active_status"] == status].reset_index(drop=True)
45+
46+
DROP_COLS = ["date_accepted", "labels", "editor", "status", "active_status"]
47+
ORDER = ["Name", "Description", "Days Open", "Date Opened","Last Comment", "Last User to Comment", "Issue", "Categories", "eic"]
48+
49+
label_map = {
50+
"pre-review": [
51+
"New Submission!",
52+
"0/pre-review-checks"],
53+
"seeking editor": ["0/seeking-editor"],
54+
"seeking reviewers": ["2/seeking-reviewers"],
55+
"under-review": [
56+
"1/editor-assigned",
57+
"3/reviewers-assigned",
58+
"4/reviews-in-awaiting-changes",
59+
"5/awaiting-reviewer-response",
60+
],
61+
"accepted-open": ["6/pyOS-approved", "9/joss-approved"],
62+
}
63+
64+
def get_active_status(labels):
65+
"""A function that returns an active status value based on a list of label values"""
66+
for status, label_list in label_map.items():
67+
if any(label in labels for label in label_list):
68+
return status
69+
return "unknown"
70+
```
71+
72+
73+
```{python}
74+
#| echo: false
75+
76+
# Open submission review data
77+
data_path = Path.cwd().parents[0] / "_data" / "review_submissions.csv"
78+
reviews = pd.read_csv(data_path,
79+
parse_dates=["date_opened", "date_closed", "last_comment_date"],
80+
)
81+
reviews["date_opened"] = reviews["date_opened"]
82+
reviews["last_comment_date"] = reviews["last_comment_date"]
83+
reviews = reviews.rename(
84+
columns={
85+
"package_name": "Name",
86+
"date_opened": "Date Opened",
87+
"date_closed": "Date Closed",
88+
"issue_num": "Issue",
89+
"description": "Description",
90+
"categories": "Categories",
91+
"last_comment_date": "Last Comment",
92+
"last_comment_user": "Last User to Comment",
93+
}
94+
)
95+
96+
open_reviews = reviews[reviews["Date Closed"].isna()]
97+
open_reviews.drop(columns=["Date Closed","Unnamed: 0"], inplace=True)
98+
99+
```
100+
101+
```{python}
102+
# get editor team data
103+
editor_path = Path.cwd().parents[0] / "_data" / "editorial_team_domains.csv"
104+
editor_data = pd.read_csv(editor_path)
105+
editor_data = editor_data[["gh_username","first_name","Domain_areas","Description","technical_areas"]]
106+
all_editors_df = editor_data[["gh_username"]].rename(columns={"gh_username": "editor"})
107+
108+
# Get busy editors
109+
busy_editors = open_reviews.loc[open_reviews.editor != "TBD", ["editor"]].value_counts().reset_index()
110+
111+
```
112+
113+
```{python}
114+
#| echo: false
115+
all_editor_activity = all_editors_df.merge(busy_editors, on="editor", how="outer").fillna(0)
116+
117+
all_editor_activity["count"]= all_editor_activity["count"].astype(int)
118+
all_editor_activity.sort_values(by="count", inplace=True)
119+
all_editor_activity = all_editor_activity.reset_index(drop=True)
120+
121+
# Get counts of available and unavailable editors
122+
available_editors = all_editor_activity[all_editor_activity["count"] == 0]
123+
all_busy_editors = all_editor_activity[all_editor_activity["count"] != 0]
124+
125+
num_available_editors = len(available_editors)
126+
available_editors = available_editors.merge(editor_data, left_on="editor", right_on="gh_username", how="inner")
127+
128+
available_editors = available_editors.rename(columns={"count": "review count"})
129+
available_editors = available_editors[["editor", "review count", "Domain_areas", "Description", "technical_areas", "first_name"]]
130+
```
131+
132+
133+
## Row {height=1%}
134+
135+
```{python}
136+
#| content: valuebox
137+
#| title: "Total Editors"
138+
139+
dict(
140+
icon = "box2-heart",
141+
color = "primary",
142+
value = len(editor_data)
143+
)
144+
```
145+
146+
```{python}
147+
#| content: valuebox
148+
#| title: "Busy Editors"
149+
# fails if the value isn't an int
150+
dict(
151+
icon = "person-hearts",
152+
color = "primary",
153+
value = len(busy_editors)
154+
)
155+
```
156+
157+
```{python}
158+
#| content: valuebox
159+
#| title: "Available Editors"
160+
# fails if the value isn't an int
161+
dict(
162+
icon = "person-hearts",
163+
color = "primary",
164+
value = len(available_editors)
165+
)
166+
```
167+
168+
## Row {height=3%}
169+
```{python}
170+
#| title: "pyOpenSci Editorial Team"
171+
print("Note: the total editor count is currently incorrect-we should open an issue.")
172+
173+
show(editor_data)
174+
```
175+
176+
## Row {height=2%}
177+
178+
```{python}
179+
#| title: "Busy editors running reviews "
180+
show(busy_editors)
181+
```
182+
183+
## Row {height=2%}
184+
```{python}
185+
#| title: "Available Editors"
186+
show(available_editors)
187+
```
188+
189+
190+
191+
```{python}
192+
# TODO: make this focus only on current open reviews vs all reviews over time.
193+
194+
# Get a list of all editors over time that have supported pyOpenSci
195+
ignore_editors = ["TBD"]
196+
ignore_editors += ["lwasser", "xmnlab"]
197+
editors = [
198+
editor
199+
for editor in pd.unique(reviews.editor)
200+
if editor not in ignore_editors
201+
]
202+
n_edits = (
203+
reviews.groupby([reviews["editor"], reviews["Date Opened"].dt.to_period("Q")])
204+
.size()
205+
.reset_index(name="n_edits") # Ensures a DataFrame with column name
206+
)
207+
```
208+
209+
210+
211+
```{python}
212+
213+
n_edits_filtered = n_edits[~n_edits["editor"].isin(ignore_editors)]
214+
n_edits_filtered = n_edits_filtered.rename(columns={"Date Opened": "Date", "n_edits": "count"})
215+
# Date should be a standard date time
216+
df = n_edits_filtered.copy()
217+
df[["Year", "Quarter"]] = df["Date"].astype(str).str.extract(r"(\d{4})Q(\d)").astype(int)
218+
df["Date"] = pd.to_datetime(df["Year"].astype(str) + "-" + (3 * (df["Quarter"].astype(int) - 1) + 1).astype(str) + "-01")
219+
```
220+
221+
```{python}
222+
# This likely should be active edits?
223+
edits = reviews.rename(columns={"Date Opened": "Date"}).copy()
224+
```
225+
226+
227+
```{python}
228+
229+
# TODO: If this uses open_reviews it's only showing current load
230+
# if it uses the reviews df it's showing reviews all time 2019 to present. open_reviews has a slightly different structure
231+
edits = reviews[["editor", "Name", "Date Opened"]]
232+
edits = edits.rename(columns={"Date Opened": "Date", "Name":"package_name"})
233+
edits = edits[edits["editor"] != "TBD"]
234+
235+
```
236+
237+
# Editors
238+
239+
## Row {height=8%}
240+
241+
```{python}
242+
# Cleanup
243+
# Ensure datetime and a clean quarter label
244+
edits["Date"] = pd.to_datetime(edits["Date"])
245+
edits["Year"] = edits["Date"].dt.year
246+
edits["QuarterNum"] = edits["Date"].dt.quarter
247+
edits["QuarterLabel"] = edits["Year"].astype(str) + " Q" + edits["QuarterNum"].astype(str)
248+
249+
quarter_order = sorted(edits["QuarterLabel"].unique())
250+
edits["QuarterLabel"] = pd.Categorical(edits["QuarterLabel"], categories=quarter_order, ordered=True)
251+
252+
# Count edits per editor and quarter
253+
df = (
254+
edits.groupby(["editor", "QuarterLabel"])
255+
.size()
256+
.reset_index(name="count")
257+
)
258+
259+
all_editors = df["editor"].unique()
260+
all_quarters = df["QuarterLabel"].cat.categories
261+
262+
full_index = pd.MultiIndex.from_product(
263+
[all_editors, all_quarters],
264+
names=["editor", "QuarterLabel"]
265+
)
266+
df_full = (
267+
df.set_index(["editor", "QuarterLabel"])
268+
.reindex(full_index, fill_value=0)
269+
.reset_index()
270+
)
271+
```
272+
273+
```{python}
274+
quarter_order = sorted(df["QuarterLabel"].unique())
275+
df["QuarterLabel"] = pd.Categorical(df["QuarterLabel"], categories=quarter_order, ordered=True)
276+
277+
facet_wrap = 2
278+
num_editors = len(df["editor"].unique())
279+
num_rows = (num_editors + facet_wrap - 1) // facet_wrap
280+
row_height = 400
281+
282+
fig = px.bar(
283+
df,
284+
x="QuarterLabel",
285+
y="count",
286+
facet_col="editor",
287+
facet_row_spacing=0.04,
288+
facet_col_spacing=0.06,
289+
facet_col_wrap=facet_wrap,
290+
color_discrete_sequence=["indigo"],
291+
labels={"count": "Number of edits", "QuarterLabel": "Quarter"},
292+
title="Editor activity by quarter",
293+
height=row_height * num_rows,
294+
width=1200,
295+
)
296+
297+
fig = fig.for_each_annotation(
298+
lambda a: a.update(text=a.text.split("=")[-1], font=dict(size=16))
299+
)
300+
fig = fig.update_xaxes(
301+
tickangle=45,
302+
tickfont=dict(size=10),
303+
title_text="Quarter",
304+
showticklabels=True
305+
)
306+
fig = fig.update_yaxes(
307+
dtick=1,
308+
tickformat=",d",
309+
title_text="Number of edits",
310+
range=[0, 4]
311+
)
312+
fig = fig.update_layout(
313+
showlegend=False,
314+
margin=dict(t=80),
315+
title_font_size=24,
316+
)
317+
318+
fig.show()
319+
320+
```

0 commit comments

Comments
 (0)