You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
To get credentials file for GitHub Torrent Google Cloud (necessary for re-running the pipeline to generate images):
36
+
37
+
- Follow the instructions here to create and download a credentials file: https://developers.google.com/adwords/api/docs/guides/authentication#generate_oauth2_credentials
38
+
- Change the name of the file to `credentials_file.json` and put it in the root directory of the project (a sample file with the same name is included as a reference).
parser.add_argument("-rp", "--results_path", help="The folder to output results of the analysis. e.g. embeddings and plots", default="./results/")
86
92
parser.add_argument("-nw", "--n_workers", help="The number of workers to use when running the analysis.", default=8, type=int)
87
-
parser.add_argument("-dp", "--data_path", help="The path to the commits.feather file. e.g. /home/user/RStudio-Data-Repository/clean_data/commits.feather", default="./results/")
93
+
parser.add_argument("-dp", "--data_path", help="The path to the commits.feather file. e.g. /home/user/RStudio-Data-Repository/clean_data/commits_by_org.feather", default="/home/user/RStudio-Data-Repository/clean_data/commits_by_org.feather")
88
94
parser.add_argument("-np", "--n_projects", help="The number of projects to sample from the dataset.", default=1000, type=int)
89
-
parser.add_argument("-mc", "--min_commits", help="The minimum number of commits for a project to be included in the sample.", default=None, type=int)
95
+
parser.add_argument("-mc", "--min_commits", help="The minimum number of commits for a project to be included in the sample.", default=None, type=none_or_int)
90
96
parser.add_argument("-mcount", "--min_count", help="The min_count parameter for the graph2vec model.", default=5, type=int)
91
97
parser.add_argument("-nps", "--n_personas", help="The number of personas to extract from each cluster.", default=5, type=int)
92
98
parser.add_argument("-nn", "--n_neurons", help="The number of neurons to use for Graph2Vec (project level)", default=128, type=int)
@@ -134,4 +140,4 @@ def main(args):
134
140
# left join `ghtorrent-bq.ght.commit_parents` cp on (cp.commit_id = c.id)
0 commit comments