Skip to content

Commit 237cf4b

Browse files
authored
Merge pull request #1 from UBC-MDS/hackathon-end-to-end
Hackathon Day One - End to end
2 parents ddb83de + cc92650 commit 237cf4b

File tree

4 files changed

+41
-0
lines changed

4 files changed

+41
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
*ipynb_checkpoints*
22
*DS_Store*
33
*Rhistory*
4+
*.json

imgs/.gitignore

Whitespace-only changes.

imgs/branch_test.png

27.2 KB
Loading

src/big_cloud_scratch.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import pandas_gbq
2+
import networkx as nx
3+
import matplotlib.pyplot as plt
4+
import time
5+
6+
def query_ght(queryString):
7+
# https://bigquery.cloud.google.com/dataset/ghtorrent-bq:ght
8+
query_result_df = pandas_gbq.read_gbq(queryString)
9+
10+
return query_result_df
11+
12+
def plot_commits(commits):
13+
source_target_commits = commits[["cp_parent_id", "c_id"]].dropna().astype("int64")
14+
source_target_commits.columns = ["source", "target"]
15+
16+
g = nx.from_pandas_edgelist(source_target_commits)
17+
nx.draw_kamada_kawai(g, alpha=0.5, node_color='blue', node_size = 2)
18+
19+
if __name__ == '__main__':
20+
commitQuery = """
21+
select
22+
c.id as c_id,
23+
p.id as p_id,
24+
cp.commit_id as cp_commit_id,
25+
cp.parent_id as cp_parent_id
26+
from `ghtorrent-bq.ght.commits` c
27+
left join `ghtorrent-bq.ght.projects` p on (p.id = c.project_id)
28+
left join `ghtorrent-bq.ght.commit_parents` cp on (cp.commit_id = c.id)
29+
where (p.id = 5524547)
30+
limit 10000
31+
"""
32+
33+
start = time.time()
34+
commits = query_ght(commitQuery)
35+
getData = time.time()
36+
print("Query Time:\t" + str(getData - start))
37+
plot_commits(commits)
38+
plotTime = time.time()
39+
print("Plot Time:\t" + str(plotTime - getData))
40+
plt.savefig("./imgs/branch_test")

0 commit comments

Comments
 (0)