diff --git a/source-repo-scripts/project_contributors/README.md b/source-repo-scripts/project_contributors/README.md new file mode 100644 index 000000000..4cc422eb6 --- /dev/null +++ b/source-repo-scripts/project_contributors/README.md @@ -0,0 +1,85 @@ +# GitHub Contributor Generator + +This project provides a set of tools to generate a list of contributors from +merged pull requests in a GitHub organization. + +## Installation + +It is recommended to use a virtual environment to manage dependencies: + +```bash +python3 -m venv venv +source venv/bin/activate +pip install --upgrade pip +pip install -e . +``` + +## Prerequisites + +This project requires the [GitHub CLI (`gh`)](https://cli.github.com/) to be +installed and authenticated. Please ensure you have it set up before running +the scripts. + +## Usage + +### 1. Get Merged Pull Requests + +The `get-merged-prs` script fetches all merged pull requests for a given +GitHub organization and date range. It handles pagination and bypasses the +GitHub API's 1000-item search limit. + +**Example:** + +To get all merged pull requests for the `gazebosim` organization from +October 1, 2024, to September 23, 2025 (corresponding to the Jetty Release), +run the following command: + +```bash +get-merged-prs gazebosim 2024-10-01 2025-09-23 > gazebosim-prs.json +``` + +This will create a `gazebosim-prs.json` file containing the merged pull +requests. + +### 2. Generate Contributors List and Collage + +The `generate-contributors` script can be used to generate a Markdown file +with a list of contributors or a collage of their avatars. + +#### Generate Markdown List + +To generate a Markdown file with the list of contributors from the +`gazebosim-prs.json` file, run: + +```bash +generate-contributors md gazebosim-prs.json contributors.md +``` + +This will create a `contributors.md` file. + +#### Generate Avatar Collage + +To generate a collage of contributor avatars, you can use either the JSON +file or a list of usernames. + +**From JSON file:** + +```bash +generate-contributors collage --input-json gazebosim-prs.json \ + media/contributors.png --columns 15 +``` + +**From a list of usernames:** + +```bash +generate-contributors collage --usernames user1 user2 user3 \ + media/contributors.png --rows 5 +``` + +This will create a `media/contributors.png` file with the collage of +avatars. You can use the `--rows` or `--columns` flags to specify the +layout of the collage. + +### Example Collage + +![Contributors Collage](media/contributors.png) diff --git a/source-repo-scripts/project_contributors/media/contributors.png b/source-repo-scripts/project_contributors/media/contributors.png new file mode 100644 index 000000000..d9f85a5a9 Binary files /dev/null and b/source-repo-scripts/project_contributors/media/contributors.png differ diff --git a/source-repo-scripts/project_contributors/pyproject.toml b/source-repo-scripts/project_contributors/pyproject.toml new file mode 100644 index 000000000..e2a2434d2 --- /dev/null +++ b/source-repo-scripts/project_contributors/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "github-contributor-generator" +version = "0.1.0" +description = "Generates a Markdown list and/or an avatar collage of contributors from a GitHub pull request JSON file or a list of usernames." +readme = "README.md" +requires-python = ">=3.8" +license = { text = "Apache-2.0" } +authors = [ + { name = "Addisu Taddese", email = "addisuzt@intrinsic.ai" }, +] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Topic :: Software Development :: Documentation", + "Topic :: Utilities", +] +dependencies = [ + "requests", + "Pillow", +] + +[project.scripts] +get-merged-prs = "get_merged_prs:main" +generate-contributors = "generate_contributors:main" + +[tool.setuptools.packages.find] +where = ["src"] + diff --git a/source-repo-scripts/project_contributors/src/collage.py b/source-repo-scripts/project_contributors/src/collage.py new file mode 100644 index 000000000..622814a20 --- /dev/null +++ b/source-repo-scripts/project_contributors/src/collage.py @@ -0,0 +1,126 @@ +# Copyright 2025 Open Source Robotics Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file was generated by Gemini 2.5 Pro. + +""" +Provides functionality to create a collage from a list of images. +""" + +import os +import math +import requests +from PIL import Image, ImageDraw, ImageOps +from urllib.parse import urlparse + +AVATAR_SIZE = 256 # The size (width/height) for each avatar in the collage +CACHE_DIR = ".avatar_cache" # Directory to store downloaded avatars + +def download_avatar(username: str, url: str) -> str | None: + """ + Downloads a user's avatar and saves it to a local cache. + + Args: + username: The GitHub username, used for the filename. + url: The URL of the avatar image. + + Returns: + The local file path to the cached avatar, or None on failure. + """ + os.makedirs(CACHE_DIR, exist_ok=True) + file_extension = os.path.splitext(urlparse(url).path)[1] or '.png' + cache_path = os.path.join(CACHE_DIR, f"{username}{file_extension}") + + if os.path.exists(cache_path): + return cache_path + + try: + response = requests.get(url, stream=True) + response.raise_for_status() + with open(cache_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + return cache_path + except requests.exceptions.RequestException as e: + print(f"❌ Failed to download avatar for {username}: {e}") + return None + +def create_circular_avatar(image_path: str) -> Image: + """ + Opens an image, crops it to a circle, and returns it. + + Args: + image_path: Path to the square avatar image. + + Returns: + A PIL Image object with a transparent background, cropped to a circle. + """ + img = Image.open(image_path).convert("RGBA") + + mask = Image.new('L', img.size, 0) + draw = ImageDraw.Draw(mask) + draw.ellipse((0, 0) + img.size, fill=255) + + output = ImageOps.fit(img, mask.size, centering=(0.5, 0.5)) + output.putalpha(mask) + return output + +def create_collage(image_paths: list[str], output_path: str, rows: int | None = None, columns: int | None = None): + """ + Creates a collage from a list of circular avatars with a specified aspect ratio. + + Args: + image_paths: A list of file paths to the avatar images. + output_path: The path to save the final collage image. + rows: The number of rows in the collage grid. + columns: The number of columns in the collage grid. + """ + num_images = len(image_paths) + if not num_images: + print("⚠️ No images provided to create a collage.") + return + + if rows and not columns: + grid_rows = rows + grid_cols = int(math.ceil(num_images / grid_rows)) + elif columns and not rows: + grid_cols = columns + grid_rows = int(math.ceil(num_images / grid_cols)) + else: + grid_cols = int(math.ceil(math.sqrt(num_images))) + if num_images > 0: + grid_rows = int(math.ceil(num_images / grid_cols)) + else: + grid_rows = 0 + + canvas_width = grid_cols * AVATAR_SIZE + canvas_height = grid_rows * AVATAR_SIZE + + collage = Image.new('RGBA', (canvas_width, canvas_height), (255, 255, 255, 0)) + + print(f"Creating a {grid_cols}x{grid_rows} collage for {num_images} contributors...") + for i, path in enumerate(image_paths): + try: + avatar = create_circular_avatar(path) + if avatar.size != (AVATAR_SIZE, AVATAR_SIZE): + avatar = avatar.resize((AVATAR_SIZE, AVATAR_SIZE), Image.Resampling.LANCZOS) + + x = (i % grid_cols) * AVATAR_SIZE + y = (i // grid_cols) * AVATAR_SIZE + collage.paste(avatar, (x, y), avatar) + except Exception as e: + print(f"❌ Error processing image {path}: {e}") + + collage.save(output_path, 'PNG') + print(f"✅ Collage saved successfully to '{output_path}'") \ No newline at end of file diff --git a/source-repo-scripts/project_contributors/src/generate_contributors.py b/source-repo-scripts/project_contributors/src/generate_contributors.py new file mode 100644 index 000000000..e70ab5c37 --- /dev/null +++ b/source-repo-scripts/project_contributors/src/generate_contributors.py @@ -0,0 +1,160 @@ +# Copyright 2025 Open Source Robotics Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file was generated by Gemini 2.5 Pro. + +""" +Generates a Markdown list and/or an avatar collage of contributors. +""" + +import json +import argparse +from collections import Counter +from github_api import build_graphql_query, fetch_contributors_gh_cli +from collage import download_avatar, create_collage + +def generate_md(args): + """Generates a Markdown file of contributors from a JSON file.""" + try: + with open(args.input_json, 'r', encoding='utf-8') as f: + pull_requests = json.load(f) + except (IOError, json.JSONDecodeError) as e: + print(f"❌ Error reading or parsing '{args.input_json}': {e}") + return + + if not pull_requests: + print("No pull requests found in the input file.") + return + + author_counts = Counter(pr['author'] for pr in pull_requests if 'author' in pr) + + if not author_counts: + print("No authors found in the input file.") + return + + unique_authors = sorted(list(author_counts.keys())) + print(f"Found {len(unique_authors)} unique contributors. Building GraphQL query...") + + graphql_query = build_graphql_query(unique_authors) + all_user_data = fetch_contributors_gh_cli(graphql_query) + + if not all_user_data: + print("❌ Failed to fetch contributor data. Aborting.") + return + + contributors_md = ["# Contributors\n"] + + sorted_contributors = sorted( + filter(None, all_user_data.values()), + key=lambda u: (u.get('name') or u.get('login', '')).lower() + ) + + for user_data in sorted_contributors: + display_name = user_data.get('name') or user_data.get('login') + username = user_data.get('login', 'unknown') + profile_url = user_data.get('url', '#') + pr_count = author_counts.get(username, 0) + pr_plural = "PR" if pr_count == 1 else "PRs" + + md_line = f"1. {display_name} ([@{username}]({profile_url})) - {pr_count} {pr_plural}" + contributors_md.append(md_line) + + try: + with open(args.output_md, 'w', encoding='utf-8') as f: + f.write("\n".join(contributors_md) + "\n") + print(f"\n✅ Successfully wrote contributors list to '{args.output_md}'") + except IOError as e: + print(f"❌ Error writing to '{args.output_md}': {e}") + +def generate_collage(args): + """Generates a collage of avatars.""" + if args.input_json: + try: + with open(args.input_json, 'r', encoding='utf-8') as f: + pull_requests = json.load(f) + usernames = sorted(list(set(pr['author'] for pr in pull_requests if 'author' in pr))) + except (IOError, json.JSONDecodeError) as e: + print(f"❌ Error reading or parsing '{args.input_json}': {e}") + return + else: + usernames = args.usernames + + if not usernames: + print("No usernames provided.") + return + + graphql_query = build_graphql_query(usernames) + all_user_data = fetch_contributors_gh_cli(graphql_query) + + if not all_user_data: + print("❌ Failed to fetch user data. Aborting.") + return + + avatar_paths = [] + for user_data in all_user_data.values(): + if user_data: + username = user_data.get('login') + avatar_url = user_data.get('avatarUrl') + if username and avatar_url: + avatar_path = download_avatar(username, avatar_url) + if avatar_path: + avatar_paths.append(avatar_path) + + if avatar_paths: + create_collage( + avatar_paths, + args.output_png, + rows=args.rows, + columns=args.columns + ) + else: + print("No avatars were downloaded. Collage not created.") + +def main(): + """Main function to parse arguments and generate the contributors list.""" + parser = argparse.ArgumentParser( + description="Generates a Markdown list and/or an avatar collage of contributors." + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + # --- 'md' Sub-command --- + parser_md = subparsers.add_parser("md", help="Generate a Markdown file of contributors from a JSON file.") + parser_md.add_argument("input_json", help="Path to the input JSON file.") + parser_md.add_argument("output_md", help="Path for the output Markdown file.") + parser_md.set_defaults(func=generate_md) + + # --- 'collage' Sub-command --- + parser_collage = subparsers.add_parser("collage", help="Generate a collage of avatars.") + input_group = parser_collage.add_mutually_exclusive_group(required=True) + input_group.add_argument("--input-json", help="Path to the input JSON file.") + input_group.add_argument("--usernames", nargs='+', help="A list of GitHub usernames.") + parser_collage.add_argument("output_png", help="Path to save the generated avatar collage.") + collage_group = parser_collage.add_mutually_exclusive_group() + collage_group.add_argument( + "--rows", + type=int, + help="Number of rows for the avatar collage. Cannot be used with --columns." + ) + collage_group.add_argument( + "--columns", + type=int, + help="Number of columns for the avatar collage. Cannot be used with --rows." + ) + parser_collage.set_defaults(func=generate_collage) + + args = parser.parse_args() + args.func(args) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/source-repo-scripts/project_contributors/src/get_merged_prs.py b/source-repo-scripts/project_contributors/src/get_merged_prs.py new file mode 100755 index 000000000..cea11df9a --- /dev/null +++ b/source-repo-scripts/project_contributors/src/get_merged_prs.py @@ -0,0 +1,302 @@ +# Copyright 2025 Open Source Robotics Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file was generated by Gemini 2.5 Pro. + +""" +A script to find all merged pull requests in a GitHub organization, bypassing +the 1000-item search limit. + +Description: +This script works around the GitHub API's 1000-result search limit by breaking the +requested date range into quarterly (3-month) chunks. For each chunk, it performs +a paginated GraphQL search to fetch all merged PRs. The results are then +aggregated and filtered into a single final JSON array. + +Usage: + ./get_merged_prs.py [END_DATE] + +Example: + # Get all PRs merged in the 'gazebosim' org during the first half of 2024 + ./get_merged_prs.py gazebosim 2024-01-01 2024-06-30 > gazebo-prs.json + + # Get all PRs merged since the start of 2025 (end date defaults to today) + ./get_merged_prs.py gazebosim 2025-01-01 > gazebo-prs-since.json + +Dependencies: + - gh (the GitHub CLI): https://cli.github.com/ +""" + +import sys +import json +import shutil +import subprocess +import argparse +from datetime import datetime, date, timedelta + +# The GraphQL query to fetch merged PRs. +GQL_QUERY = """ +query($searchQuery: String!, $afterCursor: String) { + search(query: $searchQuery, type: ISSUE, first: 100, after: $afterCursor) { + pageInfo { + hasNextPage + endCursor + } + edges { + node { + ... on PullRequest { + url + title + author { login } + repository { nameWithOwner } + createdAt + closedAt # Using closedAt, which is the merge time for merged PRs + baseRefName + } + } + } + } +} +""" + +def check_dependencies(): + """Checks if the 'gh' command is available in the system's PATH.""" + if not shutil.which("gh"): + print( + "Error: 'gh' command not found. " + "Please install the GitHub CLI.", + file=sys.stderr + ) + sys.exit(1) + +def parse_date(date_str: str) -> date: + """Helper function to parse YYYY-MM-DD strings into date objects.""" + try: + return datetime.strptime(date_str, "%Y-%m-%d").date() + except ValueError: + print( + f"Error: Invalid date format '{date_str}'. " + "Please use YYYY-MM-DD.", + file=sys.stderr + ) + sys.exit(1) + +def fetch_prs_for_range( + org_name: str, + chunk_start: date, + chunk_end: date +) -> list: + """ + Fetches all pages of PRs for a specific date range (a "chunk"). + This function handles pagination within its given date range. + """ + all_prs_in_range = [] + end_cursor = None + + # Use 'closed' date filter as it corresponds to the 'closedAt' field + search_query = ( + f"is:pr is:merged org:{org_name} " + f"closed:{chunk_start.isoformat()}..{chunk_end.isoformat()}" + ) + print( + f"Fetching PRs for range: {chunk_start} to {chunk_end}", + file=sys.stderr + ) + + while True: + # Build the 'gh api' command + command = [ + "gh", "api", "graphql", + "-f", f"query={GQL_QUERY}", + "-f", f"searchQuery={search_query}" + ] + if end_cursor: + command.extend(["-f", f"afterCursor={end_cursor}"]) + + try: + # Execute the command + result = subprocess.run( + command, + capture_output=True, + text=True, + check=True, + encoding="utf-8" + ) + response = json.loads(result.stdout) + + if "errors" in response: + print( + f"Error in GraphQL query: {response['errors']}", + file=sys.stderr + ) + break + + search_data = response.get("data", {}).get("search", {}) + edges = search_data.get("edges", []) + + # Add valid nodes to our list + for edge in edges: + if node := edge.get("node"): + all_prs_in_range.append(node) + + page_info = search_data.get("pageInfo", {}) + has_next_page = page_info.get("hasNextPage", False) + + if not has_next_page: + break # Exit loop if no more pages + + end_cursor = page_info.get("endCursor") + + except subprocess.CalledProcessError as e: + print( + f"Error calling 'gh api': {e.stderr}", + file=sys.stderr + ) + break + except json.JSONDecodeError as e: + print( + f"Error decoding JSON response from 'gh api': {e}", + file=sys.stderr + ) + break + + return all_prs_in_range + +def filter_pr(pr: dict) -> bool: + """ + Filters out unwanted pull requests based on defined criteria. + Returns True if the PR should be KEPT, False if it should be FILTERED OUT. + + Current filters (all are case-insensitive where applicable): + - Author is 'mergify' + - Title contains 'backport' + - Title contains 'Merge ' + """ + author = pr.get("author", {}).get("login") + title = pr.get("title", "").lower() # Convert to lowercase once for efficiency + + if author == "mergify": + return False + + if author == "renovate": + return False + + if "backport" in title: + return False + + if "merge " in title: + return False + + # This is a good PR, keep it + return True + +def main(): + """ + Main function to parse arguments and orchestrate the PR fetching. + """ + check_dependencies() + + parser = argparse.ArgumentParser( + description=( + "Find all merged PRs in a GitHub org, " + "bypassing the 1000-item search limit." + ) + ) + parser.add_argument( + "org_name", + help="GitHub organization name (e.g., 'gazebosim')" + ) + parser.add_argument( + "start_date", + type=parse_date, + help="Start date in YYYY-MM-DD format" + ) + parser.add_argument( + "end_date", + nargs="?", + default=date.today(), + type=parse_date, + help="End date in YYYY-MM-DD format (defaults to today)" + ) + args = parser.parse_args() + + # Ensure start_date is not after end_date + if args.start_date > args.end_date: + print( + "Error: Start date cannot be after end date.", + file=sys.stderr + ) + sys.exit(1) + + all_merged_prs = [] + current_date = args.start_date + + # Iterate from the start date to the end date, quarter by quarter + while current_date <= args.end_date: + chunk_start = current_date + + # Get the first day of the current month + month_start = chunk_start.replace(day=1) + + # Calculate the start of the next 3-month period + next_period_month_raw = month_start.month + 3 + next_period_year = ( + month_start.year + (next_period_month_raw - 1) // 12 + ) + next_period_month = (next_period_month_raw - 1) % 12 + 1 + + next_period_start = date(next_period_year, next_period_month, 1) + + # The end of the current quarter is one day before the next period starts + quarter_end = next_period_start - timedelta(days=1) + + # Ensure the chunk's end doesn't go past the overall END_DATE + chunk_end = min(quarter_end, args.end_date) + + # Fetch PRs for this chunk + prs_in_chunk = fetch_prs_for_range( + args.org_name, + chunk_start, + chunk_end + ) + all_merged_prs.extend(prs_in_chunk) + + # Move to the first day of the next 3-month period + current_date = next_period_start + + # Filter the aggregated PRs using our filter function + filtered_prs_list = [pr for pr in all_merged_prs if filter_pr(pr)] + + print(f"Total PRs fetched: {len(all_merged_prs)}", file=sys.stderr) + print(f"Total PRs after filtering: {len(filtered_prs_list)}", file=sys.stderr) + + # Format the final list of PRs, similar to the original jq filter + formatted_prs = [ + { + "url": pr.get("url"), + "title": pr.get("title"), + "author": pr.get("author", {}).get("login"), + "repository": pr.get("repository", {}).get("nameWithOwner"), + "createdAt": pr.get("createdAt"), + "mergedAt": pr.get("closedAt"), # Rename for clarity + "baseRefName": pr.get("baseRefName") + } + for pr in filtered_prs_list # Use the filtered list here + ] + + # Print the final aggregated JSON array to stdout + print(json.dumps(formatted_prs, indent=2)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/source-repo-scripts/project_contributors/src/github_api.py b/source-repo-scripts/project_contributors/src/github_api.py new file mode 100644 index 000000000..706613c74 --- /dev/null +++ b/source-repo-scripts/project_contributors/src/github_api.py @@ -0,0 +1,86 @@ +# Copyright 2025 Open Source Robotics Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file was generated by Gemini 2.5 Pro. + +""" +Provides functionality to interact with the GitHub GraphQL API to fetch user data. +""" + +import json +import subprocess +import re +from PIL import Image, ImageDraw, ImageOps + +AVATAR_SIZE = 256 # The size (width/height) for each avatar in the collage + +def build_graphql_query(usernames: list[str]) -> str: + """ + Constructs a single GraphQL query to fetch multiple users by login. + + Args: + usernames: A list of GitHub usernames. + + Returns: + A formatted GraphQL query string. + """ + query_parts = [] + for username in usernames: + alias = "user_" + re.sub(r'[^a-zA-Z0-9_]', '_', username) + query_parts.append(f""" + {alias}: user(login: "{username}") {{ + name + login + url + avatarUrl(size: {AVATAR_SIZE}) + }} + """) + + return f"query GetContributors {{ {' '.join(query_parts)} }}" + +def fetch_contributors_gh_cli(query: str) -> dict | None: + """ + Sends a query to the GitHub GraphQL API using the 'gh' CLI tool. + + Args: + query: The GraphQL query string. + + Returns: + A dictionary containing the user data from the API, or None on error. + """ + command = ["gh", "api", "graphql", "-f", f"query={query}"] + try: + process = subprocess.run( + command, + capture_output=True, + text=True, + check=True, + encoding='utf-8' + ) + result = json.loads(process.stdout) + if "errors" in result: + print(f"❌ GraphQL API returned errors: {result['errors']}") + return None + return result.get("data") + except FileNotFoundError: + print("❌ Error: The 'gh' command-line tool is not installed or not in your PATH.") + print(" Please install it from https://cli.github.com/ and authenticate with 'gh auth login'.") + return None + except subprocess.CalledProcessError as e: + print("⚠️ Error executing 'gh' command. Is 'gh auth login' configured?") + print(f" Stderr: {e.stderr}") + return None + except json.JSONDecodeError as e: + print(f"❌ Error parsing JSON response from 'gh' CLI: {e}") + return None \ No newline at end of file