From 6e15c0bb9aad852fa5ab42f6d9cc5590a7af2fa8 Mon Sep 17 00:00:00 2001 From: Kegan Maher Date: Mon, 3 Jun 2024 16:50:22 +0000 Subject: [PATCH 1/2] chore(dependencies): update GAM --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fdf9e49..5f522b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ authors = [ ] requires-python = ">=3.11" dependencies = [ - "advanced-gam-for-google-workspace @ git+https://github.com/taers232c/GAMADV-XTD3.git@v6.71.15#subdirectory=src", + "advanced-gam-for-google-workspace @ git+https://github.com/taers232c/GAMADV-XTD3.git@v6.76.12#subdirectory=src", "pandas==2.2.2", ] From f003cdb3993a2e3b9e765a38524119427fd91445 Mon Sep 17 00:00:00 2001 From: Kegan Maher Date: Mon, 3 Jun 2024 19:26:13 +0000 Subject: [PATCH 2/2] refactor(notebook): simplify summarize notebook no longer using the Google process, so removing that code just output some summary info about the data --- notebooks/summarize-harvest.ipynb | 111 ++++++++++++++++++ notebooks/verify-harvest.ipynb | 179 ------------------------------ 2 files changed, 111 insertions(+), 179 deletions(-) create mode 100644 notebooks/summarize-harvest.ipynb delete mode 100644 notebooks/verify-harvest.ipynb diff --git a/notebooks/summarize-harvest.ipynb b/notebooks/summarize-harvest.ipynb new file mode 100644 index 0000000..59a1d51 --- /dev/null +++ b/notebooks/summarize-harvest.ipynb @@ -0,0 +1,111 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = os.environ[\"HARVEST_DATA\"]\n", + "df = pd.read_csv(data.removeprefix(\"notebooks/\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Date\"].min()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Date\"].max()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Hours\"].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "groupby = df.groupby([\"Project\"])\n", + "\n", + "for group in groupby.groups:\n", + " group_df = groupby.get_group((group, ))\n", + " sum = group_df[\"Hours\"].sum()\n", + "\n", + " print(f\"{group}: {sum}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "groupby = df.groupby([\"First name\", \"Last name\", \"Project\"])\n", + "\n", + "for group in groupby.groups:\n", + " group_df = groupby.get_group(group)\n", + " sum = group_df[\"Hours\"].sum()\n", + "\n", + " print(f\"{group}: {sum}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/verify-harvest.ipynb b/notebooks/verify-harvest.ipynb deleted file mode 100644 index bb39a43..0000000 --- a/notebooks/verify-harvest.ipynb +++ /dev/null @@ -1,179 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "google_file = os.environ[\"HARVEST_GOOGLE_DATA\"]\n", - "local_file = os.environ[\"HARVEST_DATA\"]\n", - "\n", - "df_google = pd.read_csv(google_file)\n", - "df_local = pd.read_csv(local_file)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_google.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_local.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_google.columns.to_list()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_local.columns.to_list()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_local.columns.equals(df_google.columns)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_google[\"Hours\"].sum()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df_local[\"Hours\"].sum()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(df_google)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(df_local)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "google_groups = df_google.groupby([\"Project\", \"Last name\"])\n", - "google_sums = {}\n", - "for group in google_groups.groups:\n", - " project, last_name = group\n", - " group_df = google_groups.get_group(group)\n", - " google_sums[group] = group_df['Hours'].sum()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "local_groups = df_local.groupby([\"Project\", \"Last name\"])\n", - "local_sums = {}\n", - "for group in local_groups.groups:\n", - " project, last_name = group\n", - " group_df = local_groups.get_group(group)\n", - " local_sums[group] = group_df['Hours'].sum()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(local_sums) == len(google_sums)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for k,v in local_sums.items():\n", - " print(k)\n", - " assert k in google_sums\n", - " assert v == google_sums[k]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}