20
20
21
21
22
22
def is_fbcode ():
23
- return importlib .util .find_spec ("tritonparse.fb.IS_FBCODE_CHECK " ) is not None
23
+ return importlib .util .find_spec ("tritonparse.fb" ) is not None
24
24
25
25
26
26
if is_fbcode ():
27
- from .fb .source_type import SourceType
27
+ from tritonparse .fb .source_type import SourceType
28
28
else :
29
- from .source_type import SourceType
29
+ from tritonparse .source_type import SourceType
30
30
31
31
32
32
class Rank :
@@ -113,12 +113,12 @@ def from_cli_args(
113
113
114
114
if rank is not None :
115
115
return cls (rank = Rank (rank ))
116
- if is_fbcode ():
117
- from fb .utils import rank_config_from_cli_args
116
+ if source_type in [SourceType .LOCAL , SourceType .LOCAL_FILE ]:
117
+ return cls (is_local = True )
118
+ elif is_fbcode ():
119
+ from tritonparse .fb .utils import rank_config_from_cli_args
118
120
119
121
return rank_config_from_cli_args (cls , source_type )
120
- elif source_type in [SourceType .LOCAL , SourceType .LOCAL_FILE ]:
121
- return cls (is_local = True )
122
122
else :
123
123
return cls (all_ranks = True )
124
124
@@ -134,32 +134,32 @@ def to_rank(self) -> Rank:
134
134
return Rank ()
135
135
136
136
137
- def gzip_folder ( folder : str , parsed_ranks : List [ str ], verbose : bool ) -> None :
137
+ def gzip_single_file ( file_path : str , verbose : bool = False ) -> str :
138
138
"""
139
- Gzip all files in a folder.
140
-
139
+ Gzip a single file and delete the original file.
141
140
Args:
142
- folder : Path to folder
141
+ file_path : Path to the file to gzip
143
142
verbose: Whether to print verbose information
143
+ Returns:
144
+ Path to the gzipped file
144
145
"""
145
- for parsed_rank in parsed_ranks :
146
- target_folder = os .path .join (folder , parsed_rank )
147
- for filename in os .listdir (target_folder ):
148
- if filename .endswith (".gz" ):
149
- continue
150
-
151
- file_path = os .path .join (target_folder , filename )
152
- if os .path .isfile (file_path ):
153
- if verbose :
154
- logger .info (f"Gzipping { file_path } " )
155
- with open (file_path , "rb" ) as f_in :
156
- with gzip .open (file_path + ".gz" , "wb" ) as f_out :
157
- shutil .copyfileobj (f_in , f_out )
158
- # Delete the original file after successful compression
159
- os .remove (file_path )
160
- if verbose :
161
- logger .info (f"Deleted original file { file_path } " )
162
- logger .info (f"Compressed all files in { folder } " )
146
+ if file_path .endswith (".gz" ):
147
+ return file_path
148
+
149
+ gz_file_path = file_path + ".gz"
150
+ if verbose :
151
+ logger .info (f"Gzipping { file_path } " )
152
+
153
+ with open (file_path , "rb" ) as f_in :
154
+ with gzip .open (gz_file_path , "wb" ) as f_out :
155
+ shutil .copyfileobj (f_in , f_out )
156
+
157
+ # Delete the original file after successful compression
158
+ os .remove (file_path )
159
+ if verbose :
160
+ logger .info (f"Deleted original file { file_path } " )
161
+
162
+ return gz_file_path
163
163
164
164
165
165
def copy_local_to_tmpdir (local_path : str , verbose : bool = False ) -> str :
@@ -271,26 +271,32 @@ def parse_logs(
271
271
# Parse the file
272
272
parse_single_file (input_file , output_dir )
273
273
274
- # Collect generated files after parsing
274
+ # Collect generated files after parsing and gzip them immediately
275
275
if os .path .exists (output_dir ):
276
276
generated_files = []
277
277
mapped_file = None
278
278
279
279
for generated_item in os .listdir (output_dir ):
280
280
generated_path = os .path .join (output_dir , generated_item )
281
281
if os .path .isfile (generated_path ):
282
+ # Gzip the file immediately after parsing
283
+ gz_file_path = gzip_single_file (generated_path , verbose )
284
+ gz_filename = os .path .basename (gz_file_path )
282
285
# Check if it's a mapped file (assuming files with 'mapped' in name)
283
286
if "mapped" in generated_item .lower ():
284
- mapped_file = generated_item
287
+ mapped_file = gz_filename
285
288
else :
286
- generated_files .append (generated_item )
287
-
289
+ generated_files .append (gz_filename )
288
290
# Initialize rank entry if not exists
289
291
if rank_key not in file_mapping :
290
292
file_mapping [rank_key ] = {"regular_files" : [], "mapped_file" : None }
291
293
292
- # Add files to the mapping
294
+ # Add files to the mapping (now with .gz extensions)
293
295
file_mapping [rank_key ]["regular_files" ].extend (generated_files )
296
+ # this is used to generate the tritonparse url
297
+ file_mapping [rank_key ]["rank_suffix" ] = rank_config .to_rank ().to_string (
298
+ "/"
299
+ )
294
300
if mapped_file :
295
301
file_mapping [rank_key ]["mapped_file" ] = mapped_file
296
302
@@ -304,14 +310,12 @@ def parse_logs(
304
310
# Remove mapped_file if None
305
311
if file_mapping [rank_key ]["mapped_file" ] is None :
306
312
del file_mapping [rank_key ]["mapped_file" ]
307
-
308
313
# Save file mapping to parsed_log_dir
309
314
log_file_list_path = os .path .join (parsed_log_dir , "log_file_list.json" )
310
315
with open (log_file_list_path , "w" ) as f :
311
316
json .dump (file_mapping , f , indent = 2 )
312
317
# NOTICE: this print is required for tlparser-tritonparse integration
313
318
print (f"tritonparse log file list: { log_file_list_path } " )
314
-
315
319
return parsed_log_dir , parsed_ranks
316
320
317
321
0 commit comments