Skip to content

Commit 3c9db58

Browse files
FindHaofacebook-github-bot
authored andcommitted
test tritonparse only workflow
Summary: This diff includes some different things: 1. removed `IS_FBCODE_CHECK ` because `importlib.util.find_spec` only supports the file search not the variable search 2. replace `gzip_folder` with `gzip_single_file`, because it's better to update the file_mapping directly with compressed file names. 3. add `rank_suffix` to file_mapping to ensure the tlparse part can generate correct full urls. 4. set `base_module` to "" in BUCK to ensure tritonparse works both on oss and internal. 5. add depedent usage report libraries in BUCK Reviewed By: davidberard98 Differential Revision: D76053142 fbshipit-source-id: 6922c521d32c1ec3f0cdbf453e50b636dfbcd038
1 parent d05da75 commit 3c9db58

File tree

2 files changed

+43
-50
lines changed

2 files changed

+43
-50
lines changed

tritonparse/common.py

Lines changed: 40 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020

2121

2222
def is_fbcode():
23-
return importlib.util.find_spec("tritonparse.fb.IS_FBCODE_CHECK") is not None
23+
return importlib.util.find_spec("tritonparse.fb") is not None
2424

2525

2626
if is_fbcode():
27-
from .fb.source_type import SourceType
27+
from tritonparse.fb.source_type import SourceType
2828
else:
29-
from .source_type import SourceType
29+
from tritonparse.source_type import SourceType
3030

3131

3232
class Rank:
@@ -113,12 +113,12 @@ def from_cli_args(
113113

114114
if rank is not None:
115115
return cls(rank=Rank(rank))
116-
if is_fbcode():
117-
from fb.utils import rank_config_from_cli_args
116+
if source_type in [SourceType.LOCAL, SourceType.LOCAL_FILE]:
117+
return cls(is_local=True)
118+
elif is_fbcode():
119+
from tritonparse.fb.utils import rank_config_from_cli_args
118120

119121
return rank_config_from_cli_args(cls, source_type)
120-
elif source_type in [SourceType.LOCAL, SourceType.LOCAL_FILE]:
121-
return cls(is_local=True)
122122
else:
123123
return cls(all_ranks=True)
124124

@@ -134,32 +134,32 @@ def to_rank(self) -> Rank:
134134
return Rank()
135135

136136

137-
def gzip_folder(folder: str, parsed_ranks: List[str], verbose: bool) -> None:
137+
def gzip_single_file(file_path: str, verbose: bool = False) -> str:
138138
"""
139-
Gzip all files in a folder.
140-
139+
Gzip a single file and delete the original file.
141140
Args:
142-
folder: Path to folder
141+
file_path: Path to the file to gzip
143142
verbose: Whether to print verbose information
143+
Returns:
144+
Path to the gzipped file
144145
"""
145-
for parsed_rank in parsed_ranks:
146-
target_folder = os.path.join(folder, parsed_rank)
147-
for filename in os.listdir(target_folder):
148-
if filename.endswith(".gz"):
149-
continue
150-
151-
file_path = os.path.join(target_folder, filename)
152-
if os.path.isfile(file_path):
153-
if verbose:
154-
logger.info(f"Gzipping {file_path}")
155-
with open(file_path, "rb") as f_in:
156-
with gzip.open(file_path + ".gz", "wb") as f_out:
157-
shutil.copyfileobj(f_in, f_out)
158-
# Delete the original file after successful compression
159-
os.remove(file_path)
160-
if verbose:
161-
logger.info(f"Deleted original file {file_path}")
162-
logger.info(f"Compressed all files in {folder}")
146+
if file_path.endswith(".gz"):
147+
return file_path
148+
149+
gz_file_path = file_path + ".gz"
150+
if verbose:
151+
logger.info(f"Gzipping {file_path}")
152+
153+
with open(file_path, "rb") as f_in:
154+
with gzip.open(gz_file_path, "wb") as f_out:
155+
shutil.copyfileobj(f_in, f_out)
156+
157+
# Delete the original file after successful compression
158+
os.remove(file_path)
159+
if verbose:
160+
logger.info(f"Deleted original file {file_path}")
161+
162+
return gz_file_path
163163

164164

165165
def copy_local_to_tmpdir(local_path: str, verbose: bool = False) -> str:
@@ -271,26 +271,32 @@ def parse_logs(
271271
# Parse the file
272272
parse_single_file(input_file, output_dir)
273273

274-
# Collect generated files after parsing
274+
# Collect generated files after parsing and gzip them immediately
275275
if os.path.exists(output_dir):
276276
generated_files = []
277277
mapped_file = None
278278

279279
for generated_item in os.listdir(output_dir):
280280
generated_path = os.path.join(output_dir, generated_item)
281281
if os.path.isfile(generated_path):
282+
# Gzip the file immediately after parsing
283+
gz_file_path = gzip_single_file(generated_path, verbose)
284+
gz_filename = os.path.basename(gz_file_path)
282285
# Check if it's a mapped file (assuming files with 'mapped' in name)
283286
if "mapped" in generated_item.lower():
284-
mapped_file = generated_item
287+
mapped_file = gz_filename
285288
else:
286-
generated_files.append(generated_item)
287-
289+
generated_files.append(gz_filename)
288290
# Initialize rank entry if not exists
289291
if rank_key not in file_mapping:
290292
file_mapping[rank_key] = {"regular_files": [], "mapped_file": None}
291293

292-
# Add files to the mapping
294+
# Add files to the mapping (now with .gz extensions)
293295
file_mapping[rank_key]["regular_files"].extend(generated_files)
296+
# this is used to generate the tritonparse url
297+
file_mapping[rank_key]["rank_suffix"] = rank_config.to_rank().to_string(
298+
"/"
299+
)
294300
if mapped_file:
295301
file_mapping[rank_key]["mapped_file"] = mapped_file
296302

@@ -304,14 +310,12 @@ def parse_logs(
304310
# Remove mapped_file if None
305311
if file_mapping[rank_key]["mapped_file"] is None:
306312
del file_mapping[rank_key]["mapped_file"]
307-
308313
# Save file mapping to parsed_log_dir
309314
log_file_list_path = os.path.join(parsed_log_dir, "log_file_list.json")
310315
with open(log_file_list_path, "w") as f:
311316
json.dump(file_mapping, f, indent=2)
312317
# NOTICE: this print is required for tlparser-tritonparse integration
313318
print(f"tritonparse log file list: {log_file_list_path}")
314-
315319
return parsed_log_dir, parsed_ranks
316320

317321

tritonparse/utils.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,7 @@
66
# argument parser for OSS
77
parser = None
88

9-
from .common import (
10-
copy_local_to_tmpdir,
11-
gzip_folder,
12-
is_fbcode,
13-
parse_logs,
14-
RankConfig,
15-
save_logs,
16-
)
9+
from .common import copy_local_to_tmpdir, is_fbcode, parse_logs, RankConfig, save_logs
1710
from .source_type import Source, SourceType
1811

1912

@@ -51,7 +44,7 @@ def init_parser():
5144
)
5245
parser.add_argument("-v", "--verbose", help="Verbose logging", action="store_true")
5346
if is_fbcode():
54-
from .fb.utils import append_parser
47+
from tritonparse.fb.utils import append_parser
5548

5649
append_parser(parser)
5750
return parser
@@ -93,10 +86,6 @@ def oss_parse(args) -> int:
9386
os.makedirs(out_dir, exist_ok=True)
9487
return
9588

96-
parsed_log_dir, parsed_ranks = parse_logs(logs, rank_config, verbose) # type: ignore
97-
98-
# gzip all files in the parsed log directory
99-
gzip_folder(parsed_log_dir, parsed_ranks, verbose)
100-
89+
parsed_log_dir, _ = parse_logs(logs, rank_config, verbose)
10190
if args.out is not None:
10291
save_logs(Path(args.out), parsed_log_dir, args.overwrite, verbose)

0 commit comments

Comments
 (0)