@@ -101,8 +101,9 @@ def calc_conf(a):
101
101
return interval [1 ]- interval [0 ]
102
102
103
103
class Report :
104
+ """Class to generate images used in report and presentation."""
104
105
def __init__ (self , data_path = '/Users/richiezitomer/Documents/RStudio-Data-Repository/clean_data/commits_by_org.feather' ,
105
- embedding_path = 'results/embeddings.csv' , num_motifs_to_sample = 100 , motif_lengths = [5 ,10 ,25 ,50 ,100 ]):
106
+ embedding_path = 'results/embeddings.csv' , num_motifs_to_sample = 1000 , motif_lengths = [5 ,10 ,25 ,50 ,100 ]):
106
107
self .emb = pd .read_csv (embedding_path )
107
108
self .project_ids = self .emb .type .values
108
109
self .proj_ids_string = "," .join (self .project_ids .astype (str ))
@@ -113,7 +114,8 @@ def __init__(self, data_path='/Users/richiezitomer/Documents/RStudio-Data-Reposi
113
114
114
115
self .project_stats_created = False
115
116
116
- def make_proj_stats_df (self , read_from_motif_files = False ,do_cluster = False ):
117
+ def make_proj_stats_df (self ):
118
+ """Method to make dataframe with stats by project."""
117
119
# Load Data
118
120
comm_auth_by_proj = pull_queries (COMM_AUTH_BY_PROJ .format (proj_ids = self .proj_ids_string )).set_index (
119
121
'p_id' ) # pd.read_csv('data/author_commits_by_proj_100.csv').set_index('p_id')
@@ -140,7 +142,7 @@ def make_proj_stats_df(self, read_from_motif_files=False,do_cluster=False):
140
142
self .project_stats_created = True
141
143
142
144
def get_multi_chain_percent_by_proj (self , k ,proj_id ):
143
- """f """
145
+ """Method that gets multi-chain percentage of each project. """
144
146
projects_cluster = self .commits_dl .getCommitsByProjectId (proj_id )
145
147
G = git_graph (projects_cluster )
146
148
roots = [n for n , d in G .in_degree () if d == 0 ]
@@ -190,30 +192,41 @@ def get_multi_chain_percent_by_proj(self, k,proj_id):
190
192
# fig.savefig(output_path)
191
193
192
194
def get_most_common_motifs (self , motif_length = 5 ):
195
+ """Method that gets 8 or 9 most common motifs for a given project or group of projects."""
193
196
motifs = mf .get_motifs (self .project_ids , motif_length , self .num_motifs_to_sample , self .commits_dl )
194
197
195
- fig , axs = plt .subplots (3 , 3 )
198
+ if motif_length == 5 :
199
+ fig , axs = plt .subplots (3 , 3 )
200
+ else :
201
+ fig , axs = plt .subplots (4 , 2 )
202
+
196
203
fig .set_size_inches (18.5 , 10.5 )
197
204
for n , key in enumerate (sorted (motifs , key = motifs .get , reverse = True )):
198
- if n >= 9 :
199
- break
200
205
if motif_length == 5 :
206
+ if n >= 9 :
207
+ break
201
208
nx .draw_kamada_kawai (key , node_size = 300 , width = 1.5 , arrowsize = 50 , ax = axs .flatten ()[n ])
202
- axs .flatten ()[n ].set_title ('{}. Motif Frequency: {}%' .format (str (n + 1 ), str (motifs [key ] / 10 )),
203
- fontsize = 15 )
209
+ axs .flatten ()[n ].set_title (
210
+ '{}. {}% (n={})' .format (str (n + 1 ), str (round (100 * (motifs [key ] / self .num_motifs_to_sample ))), str (motifs [key ])),
211
+ fontsize = 20 )
204
212
else :
213
+ if n >= 8 :
214
+ break
205
215
if n == 0 :
206
216
nx .draw_kamada_kawai (key , node_size = 100 , width = 1 , ax = axs .flatten ()[n ])
217
+ axs .flatten ()[n ].set_title ('{}. {}% (n={})' .format (str (n + 1 ), str (round (100 * (motifs [key ] / self .num_motifs_to_sample ))),
218
+ str (motifs [key ])),fontsize = 20 )
207
219
else :
208
220
nx .draw_spring (key , node_size = 100 , width = .8 , arrowsize = 20 , ax = axs .flatten ()[n ])
209
- axs .flatten ()[n ].set_title ('{}. Motif Frequency: {}%' .format (str (n + 1 ), str (motifs [key ] / 10 )),
210
- fontsize = 15 )
221
+ axs .flatten ()[n ].set_title ('{}. {}% (n={}) ' .format (str (n + 1 ), str (round ( 100 * ( motifs [key ] / self . num_motifs_to_sample ) )),
222
+ str ( motifs [ key ])), fontsize = 20 )
211
223
212
- fig .suptitle ('Most Common Motifs Length {}' .format (motif_length ), fontsize = 25 )
224
+ fig .suptitle ('Most Common Motifs Length {} Occurrence Rate and Count ' .format (motif_length ), fontsize = 25 )
213
225
fig .savefig ('results/motif_{}_visual.png' .format (motif_length ))
214
226
return fig
215
227
216
228
def get_motif_example (self , motif_length = 25 ):
229
+ """Method that gets an example motif of motif_length."""
217
230
motifs = mf .get_motifs (self .project_ids , motif_length , self .num_motifs_to_sample , self .commits_dl )
218
231
second_most_common_motif = sorted (motifs , key = motifs .get , reverse = True )[1 ]
219
232
@@ -225,6 +238,7 @@ def get_motif_example(self, motif_length=25):
225
238
return fig
226
239
227
240
def get_mcp_hist (self ):
241
+ """Method that makes a histogram of different motif lengths by project."""
228
242
if not self .project_stats_created :
229
243
self .make_proj_stats_df ()
230
244
df = self .project_stats [['mcp_5' , 'mcp_10' , 'mcp_25' , 'mcp_50' , 'mcp_100' ]]
@@ -238,6 +252,7 @@ def get_mcp_hist(self):
238
252
return fig
239
253
240
254
def get_gh_feature_comparison (self ):
255
+ """Method that gets relative GH features of high- and low-complexity projects."""
241
256
if not self .project_stats_created :
242
257
self .make_proj_stats_df ()
243
258
self .project_stats ['complexity' ] = self .project_stats .mcp_25 .apply (complexity_tag )
0 commit comments