Enh: editor & Peer review dashboard separated(#125)

lwasser · web-flow · commit 48c9e2e0fafa · 2025-06-17T18:52:19.000-04:00
* add: new dashboard

* feat: peer review dashboard part 1

* Fix editor plot

* Create editor dashboard

* feat: editor dashboard separate
diff --git a/.gitignore b/.gitignore
@@ -24,3 +24,9 @@ _output/*
 # pixi environments
 .pixi
 *.egg-info
+<<<<<<< HEAD
+
+# Quarto
+*.quarto_ipynb
+=======
+>>>>>>> 726e820 (feat: peer review dashboard part 1)
diff --git a/_quarto.yml b/_quarto.yml
@@ -24,16 +24,23 @@ website:
         menu:
           - text: Peer Review Status Dashboard
             href: peer-review/peer-review-status-dashboard.qmd
+          - text: Editorial Dashboard
+            href: peer-review/editorial-dashboard.qmd
           - text: Current Review Status
             href: peer-review/current-review-status.qmd
           - text: Over Time 
             href: peer-review/reviews-over-time.qmd
           - text: Peer review trends
             href: peer-review/review-trends.qmd
+
+      - text: "pyOpenSci Package Metrics"
+        menu:
           - text: Accepted Package Metrics
             href: peer-review/accepted-packages.qmd
           - text: Package Dashboard 
             href: peer-review/pyos-package-dashboard.qmd
+
+
       - text: "Contributors"
         menu:
           - text: Contributor Data
diff --git a/peer-review/editorial-dashboard.qmd b/peer-review/editorial-dashboard.qmd
@@ -0,0 +1,320 @@
+--- 
+title: "pyOpenSci Editorial Dashboard"
+format: 
+  dashboard:
+    scrolling: true 
+execute:
+  echo: false
+--- 
+
+
+```{python}
+#| echo: false
+#| 
+import os
+from pathlib import Path
+
+import warnings
+from datetime import datetime, timezone
+
+import altair as alt
+from itables import show
+import pandas as pd
+import pytz
+import plotly.express as px
+from plotly.subplots import make_subplots
+from pyosmeta import ProcessIssues
+from pyosmeta.github_api import GitHubAPI
+
+# Suppress all warnings
+warnings.filterwarnings("ignore")
+# Make tables nicer
+pd.set_option("display.max_colwidth", None)
+current_date = datetime.today().date()
+today = current_date.strftime("%d %B %Y") 
+```
+
+
+```{python}
+def clean_and_order(df, DROP_COLS, order):
+    """Drop columns and reorder DataFrame columns."""
+    return df.drop(columns=DROP_COLS, errors="ignore")[ORDER]
+
+def get_status_df(df, status):
+    return df[df["active_status"] == status].reset_index(drop=True)
+
+DROP_COLS = ["date_accepted", "labels", "editor", "status", "active_status"] 
+ORDER = ["Name", "Description", "Days Open", "Date Opened","Last Comment", "Last User to Comment", "Issue", "Categories", "eic"]
+
+label_map = {
+    "pre-review": [
+        "New Submission!", 
+        "0/pre-review-checks"],
+    "seeking editor": ["0/seeking-editor"],
+    "seeking reviewers": ["2/seeking-reviewers"],
+    "under-review": [
+        "1/editor-assigned",
+        "3/reviewers-assigned",
+        "4/reviews-in-awaiting-changes",
+        "5/awaiting-reviewer-response",
+    ],
+    "accepted-open": ["6/pyOS-approved", "9/joss-approved"],
+}
+
+def get_active_status(labels):
+    """A function that returns an active status value based on a list of label values"""
+    for status, label_list in label_map.items():
+        if any(label in labels for label in label_list):
+            return status
+    return "unknown"
+```
+
+
+```{python}
+#| echo: false
+
+# Open submission review data
+data_path = Path.cwd().parents[0] / "_data" / "review_submissions.csv"
+reviews = pd.read_csv(data_path,
+    parse_dates=["date_opened", "date_closed", "last_comment_date"],
+)
+reviews["date_opened"] = reviews["date_opened"]
+reviews["last_comment_date"] = reviews["last_comment_date"]
+reviews = reviews.rename(
+    columns={
+        "package_name": "Name",
+        "date_opened": "Date Opened",
+        "date_closed": "Date Closed",
+        "issue_num": "Issue",
+        "description": "Description",
+        "categories": "Categories",
+        "last_comment_date": "Last Comment",
+        "last_comment_user": "Last User to Comment",
+    }
+)
+
+open_reviews = reviews[reviews["Date Closed"].isna()]
+open_reviews.drop(columns=["Date Closed","Unnamed: 0"], inplace=True)
+
+```
+
+```{python}
+# get editor team data
+editor_path = Path.cwd().parents[0] / "_data" /  "editorial_team_domains.csv"
+editor_data = pd.read_csv(editor_path)
+editor_data = editor_data[["gh_username","first_name","Domain_areas","Description","technical_areas"]]
+all_editors_df = editor_data[["gh_username"]].rename(columns={"gh_username": "editor"})
+
+# Get busy editors
+busy_editors = open_reviews.loc[open_reviews.editor != "TBD", ["editor"]].value_counts().reset_index()
+
+```
+
+```{python}
+#| echo: false
+all_editor_activity = all_editors_df.merge(busy_editors, on="editor", how="outer").fillna(0)
+
+all_editor_activity["count"]= all_editor_activity["count"].astype(int)
+all_editor_activity.sort_values(by="count", inplace=True)
+all_editor_activity = all_editor_activity.reset_index(drop=True)
+
+# Get counts of available and unavailable editors
+available_editors = all_editor_activity[all_editor_activity["count"] == 0]
+all_busy_editors = all_editor_activity[all_editor_activity["count"] != 0]
+
+num_available_editors = len(available_editors)
+available_editors = available_editors.merge(editor_data, left_on="editor", right_on="gh_username", how="inner")
+
+available_editors = available_editors.rename(columns={"count": "review count"})
+available_editors = available_editors[["editor", "review count", "Domain_areas", "Description", "technical_areas", "first_name"]]
+```
+
+
+## Row {height=1%}
+
+```{python}
+#| content: valuebox
+#| title: "Total Editors"
+
+dict(
+  icon = "box2-heart",
+  color = "primary",
+  value = len(editor_data)
+)
+```
+
+```{python}
+#| content: valuebox
+#| title: "Busy Editors"
+# fails if the value isn't an int
+dict(
+  icon = "person-hearts",
+  color = "primary",
+  value = len(busy_editors)
+)
+```
+
+```{python}
+#| content: valuebox
+#| title: "Available Editors"
+# fails if the value isn't an int
+dict(
+  icon = "person-hearts",
+  color = "primary",
+  value = len(available_editors)
+)
+```
+
+## Row {height=3%}
+```{python}
+#| title: "pyOpenSci Editorial Team" 
+print("Note: the total editor count is currently incorrect-we should open an issue.")
+
+show(editor_data)
+```
+
+## Row {height=2%}
+
+```{python}
+#| title: "Busy editors running reviews "
+show(busy_editors)
+```
+
+## Row {height=2%}
+```{python}
+#| title: "Available Editors"
+show(available_editors)
+```
+
+
+
+```{python}
+# TODO: make this focus only on current open reviews vs all reviews over time. 
+
+# Get a list of all editors over time that have supported pyOpenSci
+ignore_editors = ["TBD"]
+ignore_editors += ["lwasser", "xmnlab"]
+editors = [
+    editor
+    for editor in pd.unique(reviews.editor)
+    if editor not in ignore_editors
+]
+n_edits = (
+    reviews.groupby([reviews["editor"], reviews["Date Opened"].dt.to_period("Q")])
+    .size()
+    .reset_index(name="n_edits")  # Ensures a DataFrame with column name
+)
+```
+
+
+
+```{python}
+
+n_edits_filtered = n_edits[~n_edits["editor"].isin(ignore_editors)]
+n_edits_filtered = n_edits_filtered.rename(columns={"Date Opened": "Date", "n_edits": "count"})
+# Date should be a standard date time
+df = n_edits_filtered.copy()
+df[["Year", "Quarter"]] = df["Date"].astype(str).str.extract(r"(\d{4})Q(\d)").astype(int)
+df["Date"] = pd.to_datetime(df["Year"].astype(str) + "-" + (3 * (df["Quarter"].astype(int) - 1) + 1).astype(str) + "-01")
+```
+
+```{python}
+# This likely should be active edits?
+edits = reviews.rename(columns={"Date Opened": "Date"}).copy()
+```
+
+
+```{python}
+
+# TODO: If this uses open_reviews it's only showing current load
+# if it uses the reviews df it's showing reviews all time 2019 to present. open_reviews has a slightly different structure
+edits = reviews[["editor", "Name", "Date Opened"]]
+edits = edits.rename(columns={"Date Opened": "Date", "Name":"package_name"})
+edits = edits[edits["editor"] != "TBD"]
+
+```
+
+# Editors 
+
+## Row {height=8%}
+
+```{python}
+# Cleanup 
+# Ensure datetime and a clean quarter label
+edits["Date"] = pd.to_datetime(edits["Date"])
+edits["Year"] = edits["Date"].dt.year
+edits["QuarterNum"] = edits["Date"].dt.quarter
+edits["QuarterLabel"] = edits["Year"].astype(str) + " Q" + edits["QuarterNum"].astype(str)
+
+quarter_order = sorted(edits["QuarterLabel"].unique())
+edits["QuarterLabel"] = pd.Categorical(edits["QuarterLabel"], categories=quarter_order, ordered=True)
+
+# Count edits per editor and quarter
+df = (
+    edits.groupby(["editor", "QuarterLabel"])
+    .size()
+    .reset_index(name="count")
+)
+
+all_editors = df["editor"].unique()
+all_quarters = df["QuarterLabel"].cat.categories
+
+full_index = pd.MultiIndex.from_product(
+    [all_editors, all_quarters],
+    names=["editor", "QuarterLabel"]
+)
+df_full = (
+    df.set_index(["editor", "QuarterLabel"])
+      .reindex(full_index, fill_value=0)
+      .reset_index()
+)
+```
+
+```{python}
+quarter_order = sorted(df["QuarterLabel"].unique())
+df["QuarterLabel"] = pd.Categorical(df["QuarterLabel"], categories=quarter_order, ordered=True)
+
+facet_wrap = 2  
+num_editors = len(df["editor"].unique())
+num_rows = (num_editors + facet_wrap - 1) // facet_wrap
+row_height = 400 
+
+fig = px.bar(
+    df,
+    x="QuarterLabel",
+    y="count",
+    facet_col="editor",
+    facet_row_spacing=0.04, 
+    facet_col_spacing=0.06, 
+    facet_col_wrap=facet_wrap,
+    color_discrete_sequence=["indigo"],
+    labels={"count": "Number of edits", "QuarterLabel": "Quarter"},
+    title="Editor activity by quarter",
+    height=row_height * num_rows,
+    width=1200,
+)
+
+fig = fig.for_each_annotation(
+    lambda a: a.update(text=a.text.split("=")[-1], font=dict(size=16))
+)
+fig = fig.update_xaxes(
+    tickangle=45,
+    tickfont=dict(size=10),
+    title_text="Quarter",
+    showticklabels=True
+)
+fig = fig.update_yaxes(
+    dtick=1,
+    tickformat=",d",
+    title_text="Number of edits",
+    range=[0, 4]
+)
+fig = fig.update_layout(
+    showlegend=False,
+    margin=dict(t=80),
+    title_font_size=24,
+)
+
+fig.show()
+
+```
diff --git a/peer-review/peer-review-status-dashboard.qmd b/peer-review/peer-review-status-dashboard.qmd