From 4836c04734114e2f7faa2559c5178b3f93db0dbf Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 10 Jul 2025 17:16:33 +0200 Subject: [PATCH 01/40] jobs --- setup.py | 1 + .../commands/huggingface_cli.py | 2 + src/huggingface_hub/commands/jobs/__init__.py | 46 +++++ .../commands/jobs/_cli_utils.py | 25 +++ src/huggingface_hub/commands/jobs/cancel.py | 35 ++++ src/huggingface_hub/commands/jobs/inspect.py | 40 ++++ src/huggingface_hub/commands/jobs/logs.py | 89 +++++++++ src/huggingface_hub/commands/jobs/ps.py | 186 ++++++++++++++++++ src/huggingface_hub/commands/jobs/run.py | 182 +++++++++++++++++ 9 files changed, 606 insertions(+) create mode 100644 src/huggingface_hub/commands/jobs/__init__.py create mode 100644 src/huggingface_hub/commands/jobs/_cli_utils.py create mode 100644 src/huggingface_hub/commands/jobs/cancel.py create mode 100644 src/huggingface_hub/commands/jobs/inspect.py create mode 100644 src/huggingface_hub/commands/jobs/logs.py create mode 100644 src/huggingface_hub/commands/jobs/ps.py create mode 100644 src/huggingface_hub/commands/jobs/run.py diff --git a/setup.py b/setup.py index 7de6594de1..b834fe7fcc 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ def get_version() -> str: "requests", "tqdm>=4.42.1", "typing-extensions>=3.7.4.3", # to be able to import TypeAlias + "dotenv", ] extras = {} diff --git a/src/huggingface_hub/commands/huggingface_cli.py b/src/huggingface_hub/commands/huggingface_cli.py index 4e30f305c2..35b4395229 100644 --- a/src/huggingface_hub/commands/huggingface_cli.py +++ b/src/huggingface_hub/commands/huggingface_cli.py @@ -17,6 +17,7 @@ from huggingface_hub.commands.delete_cache import DeleteCacheCommand from huggingface_hub.commands.download import DownloadCommand from huggingface_hub.commands.env import EnvironmentCommand +from huggingface_hub.commands.jobs import JobsCommands from huggingface_hub.commands.lfs import LfsCommands from huggingface_hub.commands.repo import RepoCommands from huggingface_hub.commands.repo_files import RepoFilesCommand @@ -44,6 +45,7 @@ def main(): DeleteCacheCommand.register_subcommand(commands_parser) TagCommands.register_subcommand(commands_parser) VersionCommand.register_subcommand(commands_parser) + JobsCommands.register_subcommand(commands_parser) # Experimental UploadLargeFolderCommand.register_subcommand(commands_parser) diff --git a/src/huggingface_hub/commands/jobs/__init__.py b/src/huggingface_hub/commands/jobs/__init__.py new file mode 100644 index 0000000000..ea3535a27d --- /dev/null +++ b/src/huggingface_hub/commands/jobs/__init__.py @@ -0,0 +1,46 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains commands to interact with jobs on the Hugging Face Hub. + +Usage: + # run a job + huggingface-cli jobs run image command +""" + +from argparse import _SubParsersAction + +from huggingface_hub.commands import BaseHuggingfaceCLICommand +from huggingface_hub.commands.jobs.cancel import CancelCommand +from huggingface_hub.commands.jobs.inspect import InspectCommand +from huggingface_hub.commands.jobs.logs import LogsCommand +from huggingface_hub.commands.jobs.ps import PsCommand +from huggingface_hub.commands.jobs.run import RunCommand +from huggingface_hub.utils import logging + + +logger = logging.get_logger(__name__) + + +class JobsCommands(BaseHuggingfaceCLICommand): + @staticmethod + def register_subcommand(parser: _SubParsersAction): + jobs_parser = parser.add_parser("jobs", help="Commands to interact with your huggingface.co jobs.") + jobs_subparsers = jobs_parser.add_subparsers(help="huggingface.co jobs related commands") + + # Register commands + InspectCommand.register_subcommand(jobs_subparsers) + LogsCommand.register_subcommand(jobs_subparsers) + PsCommand.register_subcommand(jobs_subparsers) + RunCommand.register_subcommand(jobs_subparsers) + CancelCommand.register_subcommand(jobs_subparsers) diff --git a/src/huggingface_hub/commands/jobs/_cli_utils.py b/src/huggingface_hub/commands/jobs/_cli_utils.py new file mode 100644 index 0000000000..2ff27830bf --- /dev/null +++ b/src/huggingface_hub/commands/jobs/_cli_utils.py @@ -0,0 +1,25 @@ +import os +from typing import Union + +def tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str: + """ + Inspired by: + + - stackoverflow.com/a/8356620/593036 + - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data + """ + col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)] + terminal_width = max(os.get_terminal_size().columns, len(headers) * 12) + while len(headers) + sum(col_widths) > terminal_width: + col_to_minimize = col_widths.index(max(col_widths)) + col_widths[col_to_minimize] //= 2 + if len(headers) + sum(col_widths) <= terminal_width: + col_widths[col_to_minimize] = terminal_width - sum(col_widths) - len(headers) + col_widths[col_to_minimize] + row_format = ("{{:{}}} " * len(headers)).format(*col_widths) + lines = [] + lines.append(row_format.format(*headers)) + lines.append(row_format.format(*["-" * w for w in col_widths])) + for row in rows: + row = [x[:col_width - 3] + "..." if len(str(x)) > col_width else x for x, col_width in zip(row, col_widths)] + lines.append(row_format.format(*row)) + return "\n".join(lines) diff --git a/src/huggingface_hub/commands/jobs/cancel.py b/src/huggingface_hub/commands/jobs/cancel.py new file mode 100644 index 0000000000..f9bfc6a1a2 --- /dev/null +++ b/src/huggingface_hub/commands/jobs/cancel.py @@ -0,0 +1,35 @@ +from argparse import Namespace, _SubParsersAction +from typing import Optional + +import requests + +from huggingface_hub import whoami +from huggingface_hub.utils import build_hf_headers + +from .. import BaseHuggingfaceCLICommand + + +class CancelCommand(BaseHuggingfaceCLICommand): + + @staticmethod + def register_subcommand(parser: _SubParsersAction) -> None: + run_parser = parser.add_parser("jobs cancel", help="Cancel a Job") + run_parser.add_argument( + "job_id", type=str, help="Job ID" + ) + run_parser.add_argument( + "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" + ) + run_parser.set_defaults(func=CancelCommand) + + def __init__(self, args: Namespace) -> None: + self.job_id: str = args.job_id + self.token: Optional[str] = args.token or None + + def run(self) -> None: + username = whoami(self.token)["name"] + headers = build_hf_headers(token=self.token, library_name="hfjobs") + requests.post( + f"https://huggingface.co/api/jobs/{username}/{self.job_id}/cancel", + headers=headers, + ).raise_for_status() diff --git a/src/huggingface_hub/commands/jobs/inspect.py b/src/huggingface_hub/commands/jobs/inspect.py new file mode 100644 index 0000000000..061581494a --- /dev/null +++ b/src/huggingface_hub/commands/jobs/inspect.py @@ -0,0 +1,40 @@ +import json +from argparse import Namespace, _SubParsersAction +from typing import Optional + +import requests + +from huggingface_hub import whoami +from huggingface_hub.utils import build_hf_headers + +from .. import BaseHuggingfaceCLICommand + + +class InspectCommand(BaseHuggingfaceCLICommand): + + @staticmethod + def register_subcommand(parser: _SubParsersAction) -> None: + run_parser = parser.add_parser("inspect", help="Display detailed information on one or more Jobs") + run_parser.add_argument( + "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" + ) + run_parser.add_argument( + "jobs", nargs="...", help="The jobs to inspect" + ) + run_parser.set_defaults(func=InspectCommand) + + def __init__(self, args: Namespace) -> None: + self.token: Optional[str] = args.token or None + self.jobs: list[str] = args.jobs + + def run(self) -> None: + username = whoami(self.token)["name"] + headers = build_hf_headers(token=self.token, library_name="hfjobs") + inspections = [ + requests.get( + f"https://huggingface.co/api/jobs/{username}/{job}", + headers=headers, + ).json() + for job in self.jobs + ] + print(json.dumps(inspections, indent=4)) diff --git a/src/huggingface_hub/commands/jobs/logs.py b/src/huggingface_hub/commands/jobs/logs.py new file mode 100644 index 0000000000..7f003fa4f9 --- /dev/null +++ b/src/huggingface_hub/commands/jobs/logs.py @@ -0,0 +1,89 @@ +import json +import time +from argparse import Namespace, _SubParsersAction +from typing import Optional + +import requests + +from huggingface_hub import whoami +from huggingface_hub.utils import build_hf_headers + +from .. import BaseHuggingfaceCLICommand + + +class LogsCommand(BaseHuggingfaceCLICommand): + + @staticmethod + def register_subcommand(parser: _SubParsersAction) -> None: + run_parser = parser.add_parser("logs", help="Fetch the logs of a Job") + run_parser.add_argument( + "job_id", type=str, help="Job ID" + ) + run_parser.add_argument( + "-t", "--timestamps", action="store_true", help="Show timestamps" + ) + run_parser.add_argument( + "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" + ) + run_parser.set_defaults(func=LogsCommand) + + def __init__(self, args: Namespace) -> None: + self.job_id: str = args.job_id + self.timestamps: bool = args.timestamps + self.token: Optional[str] = args.token or None + + def run(self) -> None: + username = whoami(self.token)["name"] + headers = build_hf_headers(token=self.token, library_name="hfjobs") + requests.get( + f"https://huggingface.co/api/jobs/{username}/{self.job_id}", + headers=headers, + ).raise_for_status() + + logging_started = False + logging_finished = False + job_finished = False + # - We need to retry because sometimes the /logs doesn't return logs when the job just started. + # (for example it can return only two lines: one for "Job started" and one empty line) + # - Timeouts can happen in case of build errors + # - ChunkedEncodingError can happen in case of stopped logging in the middle of streaming + # - Infinite empty log stream can happen in case of build error + # (the logs stream is infinite and empty except for the Job started message) + # - there is a ": keep-alive" every 30 seconds + while True: + try: + resp = requests.get( + f"https://huggingface.co/api/jobs/{username}/{self.job_id}/logs", + headers=headers, + stream=True, + timeout=120, + ) + log = None + for line in resp.iter_lines(chunk_size=1): + line = line.decode("utf-8") + if line and line.startswith("data: {"): + data = json.loads(line[len("data: "):]) + # timestamp = data["timestamp"] + if not data["data"].startswith("===== Job started"): + logging_started = True + log = data["data"] + print(log) + logging_finished = logging_started + except requests.exceptions.ChunkedEncodingError: + # Response ended prematurely + break + except KeyboardInterrupt: + break + except requests.exceptions.ConnectionError as err: + is_timeout = err.__context__ and isinstance(err.__context__.__cause__, TimeoutError) + if logging_started or not is_timeout: + raise + if logging_finished or job_finished: + break + job_status = requests.get( + f"https://huggingface.co/api/jobs/{username}/{self.job_id}", + headers=headers, + ).json() + if "status" in job_status and job_status["status"]["stage"] not in ("RUNNING", "UPDATING"): + job_finished = True + time.sleep(1) diff --git a/src/huggingface_hub/commands/jobs/ps.py b/src/huggingface_hub/commands/jobs/ps.py new file mode 100644 index 0000000000..8d1f32f2e3 --- /dev/null +++ b/src/huggingface_hub/commands/jobs/ps.py @@ -0,0 +1,186 @@ +import re +from argparse import Namespace, _SubParsersAction +from typing import Dict, Optional + +import requests + +from huggingface_hub import whoami +from huggingface_hub.utils import build_hf_headers + +from .. import BaseHuggingfaceCLICommand +from ._cli_utils import tabulate + + +class PsCommand(BaseHuggingfaceCLICommand): + @staticmethod + def register_subcommand(parser: _SubParsersAction) -> None: + run_parser = parser.add_parser("ps", help="List Jobs") + run_parser.add_argument( + "-a", + "--all", + action="store_true", + help="Show all Jobs (default shows just running)", + ) + run_parser.add_argument( + "--token", + type=str, + help="A User Access Token generated from https://huggingface.co/settings/tokens", + ) + # Add Docker-style filtering argument + run_parser.add_argument( + "-f", + "--filter", + action="append", + default=[], + help="Filter output based on conditions provided (format: key=value)", + ) + # Add option to format output + run_parser.add_argument( + "--format", + type=str, + help="Format output using a custom template", + ) + run_parser.set_defaults(func=PsCommand) + + def __init__(self, args: Namespace) -> None: + self.all: bool = args.all + self.token: Optional[str] = args.token or None + self.format: Optional[str] = args.format + self.filters: Dict[str, str] = {} + + # Parse filter arguments (key=value pairs) + for f in args.filter: + if "=" in f: + key, value = f.split("=", 1) + self.filters[key.lower()] = value + else: + print( + f"Warning: Ignoring invalid filter format '{f}'. Use key=value format." + ) + + def run(self) -> None: + """ + Fetch and display job information for the current user. + Uses Docker-style filtering with -f/--filter flag and key=value pairs. + """ + try: + # Get current username + username = whoami(self.token)["name"] + # Build headers for API request + headers = build_hf_headers(token=self.token, library_name="hfjobs") + # Fetch jobs data + response = requests.get( + f"https://huggingface.co/api/jobs/{username}", + headers=headers, + timeout=30, # Add timeout to prevent hanging + ) + response.raise_for_status() + + # Define table headers + table_headers = ["JOB ID", "IMAGE/SPACE", "COMMAND", "CREATED", "STATUS"] + + # Process jobs data + rows = [] + jobs = response.json() + + for job in jobs: + # Extract job data for filtering + status = job.get("status", {}).get("stage", "UNKNOWN") + + # Skip job if not all jobs should be shown and status doesn't match criteria + if not self.all and status not in ("RUNNING", "UPDATING"): + continue + + # Extract job ID safely + job_id = job.get("id", "N/A") + + # Extract image or space information + if "spaceId" in job and job["spaceId"] is not None: + image_or_space = f"hf.co/spaces/{job['spaceId']}" + else: + image_or_space = job.get("dockerImage", "N/A") + + # Extract and format command + command = job.get("command", []) + command_str = " ".join(command) if command else "N/A" + + # Extract creation time + created_at = job.get("createdAt", "N/A") + + # Create a dict with all job properties for filtering + job_properties = { + "id": job_id, + "image": image_or_space, + "status": status.lower(), + "command": command_str, + } + + # Check if job matches all filters + if not self._matches_filters(job_properties): + continue + + # Create row + rows.append([job_id, image_or_space, command_str, created_at, status]) + + # Handle empty results + if not rows: + filters_msg = "" + if self.filters: + filters_msg = f" matching filters: {', '.join([f'{k}={v}' for k, v in self.filters.items()])}" + + print(f"No jobs found{filters_msg}") + return + + # Apply custom format if provided or use default tabular format + self._print_output(rows, table_headers) + + except requests.RequestException as e: + print(f"Error fetching jobs data: {e}") + except (KeyError, ValueError, TypeError) as e: + print(f"Error processing jobs data: {e}") + except Exception as e: + print(f"Unexpected error: {e}") + + def _matches_filters(self, job_properties: Dict[str, str]) -> bool: + """Check if job matches all specified filters.""" + for key, pattern in self.filters.items(): + # Check if property exists + if key not in job_properties: + return False + + # Support pattern matching with wildcards + if "*" in pattern or "?" in pattern: + # Convert glob pattern to regex + regex_pattern = pattern.replace("*", ".*").replace("?", ".") + if not re.search( + f"^{regex_pattern}$", job_properties[key], re.IGNORECASE + ): + return False + # Simple substring matching + elif pattern.lower() not in job_properties[key].lower(): + return False + + return True + + def _print_output(self, rows, headers): + """Print output according to the chosen format.""" + if self.format: + # Custom template formatting (simplified) + template = self.format + for row in rows: + line = template + for i, field in enumerate( + ["id", "image", "command", "created", "status"] + ): + placeholder = f"{{{{.{field}}}}}" + if placeholder in line: + line = line.replace(placeholder, str(row[i])) + print(line) + else: + # Default tabular format + print( + tabulate( + rows, + headers=headers, + ) + ) diff --git a/src/huggingface_hub/commands/jobs/run.py b/src/huggingface_hub/commands/jobs/run.py new file mode 100644 index 0000000000..4c03218533 --- /dev/null +++ b/src/huggingface_hub/commands/jobs/run.py @@ -0,0 +1,182 @@ +import io +import json +import time +from argparse import Namespace, _SubParsersAction +from typing import Optional + +import requests +from dotenv import dotenv_values + +from huggingface_hub import whoami +from huggingface_hub.utils import build_hf_headers + +from .. import BaseHuggingfaceCLICommand + + +def _parse_timeout(timeout: Optional[str]) -> Optional[int]: + """Get timeout in seconds""" + time_units_factors = {"s": 1, "m": 60, "h": 3600, "d": 3600 * 24} + if not timeout: + return None + elif timeout[-1] in time_units_factors: + return int(float(timeout[:-1]) * time_units_factors[timeout[-1]]) + else: + return int(timeout) + + +class RunCommand(BaseHuggingfaceCLICommand): + + @staticmethod + def register_subcommand(parser: _SubParsersAction) -> None: + run_parser = parser.add_parser("run", help="Run a Job") + run_parser.add_argument( + "dockerImage", type=str, help="The Docker image to use." + ) + run_parser.add_argument( + "-e", "--env", action="append", help="Set environment variables." + ) + run_parser.add_argument( + "-s", "--secret", action="append", help="Set secret environment variables." + ) + run_parser.add_argument( + "--env-file", type=str, help="Read in a file of environment variables." + ) + run_parser.add_argument( + "--secret-env-file", type=str, help="Read in a file of secret environment variables." + ) + run_parser.add_argument( + "--flavor", + type=str, + help="Flavor for the hardware, as in HF Spaces.", + default="cpu-basic", + ) + run_parser.add_argument( + "--timeout", + type=str, + help="Max duration: int/float with s (seconds, default), m (minutes), h (hours) or d (days).", + ) + run_parser.add_argument( + "-d", + "--detach", + action="store_true", + help="Run the Job in the background and print the Job ID.", + ) + run_parser.add_argument( + "--token", + type=str, + help="A User Access Token generated from https://huggingface.co/settings/tokens", + ) + run_parser.add_argument("command", nargs="...", help="The command to run.") + run_parser.set_defaults(func=RunCommand) + + def __init__(self, args: Namespace) -> None: + self.docker_image: str = args.dockerImage + self.environment: dict[str, str] = {} + for env_value in args.env or []: + self.environment.update(dotenv_values(stream=io.StringIO(env_value))) + if args.env_file: + self.environment.update(dotenv_values(args.env_file)) + self.secrets: dict[str, str] = {} + for secret in args.secret or []: + self.secrets.update(dotenv_values(stream=io.StringIO(secret))) + if args.secret_env_file: + self.secrets.update(dotenv_values(args.secret_env_file)) + self.flavor: str = args.flavor + self.timeout: Optional[int] = _parse_timeout(args.timeout) + self.detach: bool = args.detach + self.token: Optional[str] = args.token + self.command: list[str] = args.command + + def run(self) -> None: + # prepare paypload to send to HF Jobs API + input_json = { + "command": self.command, + "arguments": [], + "environment": self.environment, + "flavor": self.flavor, + } + # secrets are optional + if self.secrets: + input_json["secrets"] = self.secrets + # timeout is optional + if self.timeout: + input_json["timeoutSeconds"] = self.timeout + # input is either from docker hub or from HF spaces + for prefix in ( + "https://huggingface.co/spaces/", + "https://hf.co/spaces/", + "huggingface.co/spaces/", + "hf.co/spaces/", + ): + if self.docker_image.startswith(prefix): + input_json["spaceId"] = self.docker_image[len(prefix) :] + break + else: + input_json["dockerImage"] = self.docker_image + username = whoami(self.token)["name"] + headers = build_hf_headers(token=self.token, library_name="hfjobs") + resp = requests.post( + f"https://huggingface.co/api/jobs/{username}", + json=input_json, + headers=headers, + ) + resp.raise_for_status() + response = resp.json() + # Fix: Update job_id extraction to match new response format + job_id = response["id"] + + # Always print the job ID to the user + print(f"Job started with ID: {job_id}") + print(f"View at: https://huggingface.co/jobs/{username}/{job_id}") + + if self.detach: + return + + # Now let's stream the logs + + logging_finished = logging_started = False + job_finished = False + # - We need to retry because sometimes the /logs doesn't return logs when the job just started. + # (for example it can return only two lines: one for "Job started" and one empty line) + # - Timeouts can happen in case of build errors + # - ChunkedEncodingError can happen in case of stopped logging in the middle of streaming + # - Infinite empty log stream can happen in case of build error + # (the logs stream is infinite and empty except for the Job started message) + # - there is a ": keep-alive" every 30 seconds + while True: + try: + resp = requests.get( + f"https://huggingface.co/api/jobs/{username}/{job_id}/logs", + headers=headers, + stream=True, + timeout=120, + ) + log = None + for line in resp.iter_lines(chunk_size=1): + line = line.decode("utf-8") + if line and line.startswith("data: {"): + data = json.loads(line[len("data: "):]) + # timestamp = data["timestamp"] + if not data["data"].startswith("===== Job started"): + logging_started = True + log = data["data"] + print(log) + logging_finished = logging_started + except requests.exceptions.ChunkedEncodingError: + # Response ended prematurely + break + except KeyboardInterrupt: + break + except requests.exceptions.ConnectionError as err: + is_timeout = err.__context__ and isinstance(err.__context__.__cause__, TimeoutError) + if logging_started or not is_timeout: + raise + if logging_finished or job_finished: + break + job_status = requests.get( + f"https://huggingface.co/api/jobs/{username}/{job_id}", + headers=headers, + ).json() + if "status" in job_status and job_status["status"]["stage"] not in ("RUNNING", "UPDATING"): + job_finished = True + time.sleep(1) From 682a789683e8c1870f7304640c27820801c82746 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 10 Jul 2025 17:19:28 +0200 Subject: [PATCH 02/40] style --- .../commands/jobs/_cli_utils.py | 3 ++- src/huggingface_hub/commands/jobs/cancel.py | 5 +--- src/huggingface_hub/commands/jobs/inspect.py | 5 +--- src/huggingface_hub/commands/jobs/logs.py | 11 +++------ src/huggingface_hub/commands/jobs/ps.py | 12 +++------- src/huggingface_hub/commands/jobs/run.py | 23 +++++-------------- 6 files changed, 16 insertions(+), 43 deletions(-) diff --git a/src/huggingface_hub/commands/jobs/_cli_utils.py b/src/huggingface_hub/commands/jobs/_cli_utils.py index 2ff27830bf..fea22ad8a9 100644 --- a/src/huggingface_hub/commands/jobs/_cli_utils.py +++ b/src/huggingface_hub/commands/jobs/_cli_utils.py @@ -1,6 +1,7 @@ import os from typing import Union + def tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str: """ Inspired by: @@ -20,6 +21,6 @@ def tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str: lines.append(row_format.format(*headers)) lines.append(row_format.format(*["-" * w for w in col_widths])) for row in rows: - row = [x[:col_width - 3] + "..." if len(str(x)) > col_width else x for x, col_width in zip(row, col_widths)] + row = [x[: col_width - 3] + "..." if len(str(x)) > col_width else x for x, col_width in zip(row, col_widths)] lines.append(row_format.format(*row)) return "\n".join(lines) diff --git a/src/huggingface_hub/commands/jobs/cancel.py b/src/huggingface_hub/commands/jobs/cancel.py index f9bfc6a1a2..4e4285ea3a 100644 --- a/src/huggingface_hub/commands/jobs/cancel.py +++ b/src/huggingface_hub/commands/jobs/cancel.py @@ -10,13 +10,10 @@ class CancelCommand(BaseHuggingfaceCLICommand): - @staticmethod def register_subcommand(parser: _SubParsersAction) -> None: run_parser = parser.add_parser("jobs cancel", help="Cancel a Job") - run_parser.add_argument( - "job_id", type=str, help="Job ID" - ) + run_parser.add_argument("job_id", type=str, help="Job ID") run_parser.add_argument( "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" ) diff --git a/src/huggingface_hub/commands/jobs/inspect.py b/src/huggingface_hub/commands/jobs/inspect.py index 061581494a..f93aae03ed 100644 --- a/src/huggingface_hub/commands/jobs/inspect.py +++ b/src/huggingface_hub/commands/jobs/inspect.py @@ -11,16 +11,13 @@ class InspectCommand(BaseHuggingfaceCLICommand): - @staticmethod def register_subcommand(parser: _SubParsersAction) -> None: run_parser = parser.add_parser("inspect", help="Display detailed information on one or more Jobs") run_parser.add_argument( "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" ) - run_parser.add_argument( - "jobs", nargs="...", help="The jobs to inspect" - ) + run_parser.add_argument("jobs", nargs="...", help="The jobs to inspect") run_parser.set_defaults(func=InspectCommand) def __init__(self, args: Namespace) -> None: diff --git a/src/huggingface_hub/commands/jobs/logs.py b/src/huggingface_hub/commands/jobs/logs.py index 7f003fa4f9..60f67769c2 100644 --- a/src/huggingface_hub/commands/jobs/logs.py +++ b/src/huggingface_hub/commands/jobs/logs.py @@ -12,16 +12,11 @@ class LogsCommand(BaseHuggingfaceCLICommand): - @staticmethod def register_subcommand(parser: _SubParsersAction) -> None: run_parser = parser.add_parser("logs", help="Fetch the logs of a Job") - run_parser.add_argument( - "job_id", type=str, help="Job ID" - ) - run_parser.add_argument( - "-t", "--timestamps", action="store_true", help="Show timestamps" - ) + run_parser.add_argument("job_id", type=str, help="Job ID") + run_parser.add_argument("-t", "--timestamps", action="store_true", help="Show timestamps") run_parser.add_argument( "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" ) @@ -62,7 +57,7 @@ def run(self) -> None: for line in resp.iter_lines(chunk_size=1): line = line.decode("utf-8") if line and line.startswith("data: {"): - data = json.loads(line[len("data: "):]) + data = json.loads(line[len("data: ") :]) # timestamp = data["timestamp"] if not data["data"].startswith("===== Job started"): logging_started = True diff --git a/src/huggingface_hub/commands/jobs/ps.py b/src/huggingface_hub/commands/jobs/ps.py index 8d1f32f2e3..36db6996ed 100644 --- a/src/huggingface_hub/commands/jobs/ps.py +++ b/src/huggingface_hub/commands/jobs/ps.py @@ -54,9 +54,7 @@ def __init__(self, args: Namespace) -> None: key, value = f.split("=", 1) self.filters[key.lower()] = value else: - print( - f"Warning: Ignoring invalid filter format '{f}'. Use key=value format." - ) + print(f"Warning: Ignoring invalid filter format '{f}'. Use key=value format.") def run(self) -> None: """ @@ -152,9 +150,7 @@ def _matches_filters(self, job_properties: Dict[str, str]) -> bool: if "*" in pattern or "?" in pattern: # Convert glob pattern to regex regex_pattern = pattern.replace("*", ".*").replace("?", ".") - if not re.search( - f"^{regex_pattern}$", job_properties[key], re.IGNORECASE - ): + if not re.search(f"^{regex_pattern}$", job_properties[key], re.IGNORECASE): return False # Simple substring matching elif pattern.lower() not in job_properties[key].lower(): @@ -169,9 +165,7 @@ def _print_output(self, rows, headers): template = self.format for row in rows: line = template - for i, field in enumerate( - ["id", "image", "command", "created", "status"] - ): + for i, field in enumerate(["id", "image", "command", "created", "status"]): placeholder = f"{{{{.{field}}}}}" if placeholder in line: line = line.replace(placeholder, str(row[i])) diff --git a/src/huggingface_hub/commands/jobs/run.py b/src/huggingface_hub/commands/jobs/run.py index 4c03218533..85188037bf 100644 --- a/src/huggingface_hub/commands/jobs/run.py +++ b/src/huggingface_hub/commands/jobs/run.py @@ -25,25 +25,14 @@ def _parse_timeout(timeout: Optional[str]) -> Optional[int]: class RunCommand(BaseHuggingfaceCLICommand): - @staticmethod def register_subcommand(parser: _SubParsersAction) -> None: run_parser = parser.add_parser("run", help="Run a Job") - run_parser.add_argument( - "dockerImage", type=str, help="The Docker image to use." - ) - run_parser.add_argument( - "-e", "--env", action="append", help="Set environment variables." - ) - run_parser.add_argument( - "-s", "--secret", action="append", help="Set secret environment variables." - ) - run_parser.add_argument( - "--env-file", type=str, help="Read in a file of environment variables." - ) - run_parser.add_argument( - "--secret-env-file", type=str, help="Read in a file of secret environment variables." - ) + run_parser.add_argument("dockerImage", type=str, help="The Docker image to use.") + run_parser.add_argument("-e", "--env", action="append", help="Set environment variables.") + run_parser.add_argument("-s", "--secret", action="append", help="Set secret environment variables.") + run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") + run_parser.add_argument("--secret-env-file", type=str, help="Read in a file of secret environment variables.") run_parser.add_argument( "--flavor", type=str, @@ -155,7 +144,7 @@ def run(self) -> None: for line in resp.iter_lines(chunk_size=1): line = line.decode("utf-8") if line and line.startswith("data: {"): - data = json.loads(line[len("data: "):]) + data = json.loads(line[len("data: ") :]) # timestamp = data["timestamp"] if not data["data"].startswith("===== Job started"): logging_started = True From af05c27e7d52526fd639e09572c014426f08c901 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 10 Jul 2025 17:40:49 +0200 Subject: [PATCH 03/40] docs --- docs/source/en/guides/cli.md | 121 +++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/docs/source/en/guides/cli.md b/docs/source/en/guides/cli.md index 481cbf4c79..6a48d1349e 100644 --- a/docs/source/en/guides/cli.md +++ b/docs/source/en/guides/cli.md @@ -604,3 +604,124 @@ Copy-and-paste the text below in your GitHub issue. - HF_HUB_ETAG_TIMEOUT: 10 - HF_HUB_DOWNLOAD_TIMEOUT: 10 ``` + +## huggingface-cli jobs + +Experimental. Run compute jobs on Hugging Face infrastructure with a familiar Docker-like interface. + +`huggingface-cli jobs` is a command-line tool that lets you run anything on Hugging Face's infrastructure (including GPUs and TPUs!) with simple commands. Think `docker run`, but for running code on A100s. + +```bash +# Directly run Python code +>>> huggingface-cli jobs run python:3.12 python -c "print('Hello from the cloud!')" + +# Use GPUs without any setup +>>> huggingface-cli jobs run --flavor a10g-small pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel \ +... python -c "import torch; print(torch.cuda.get_device_name())" + +# Run from Hugging Face Spaces +>>> huggingface-cli jobs run hf.co/spaces/lhoestq/duckdb duckdb -c "select 'hello world'" +``` + +### ✨ Key Features + +- 🐳 **Docker-like CLI**: Familiar commands (`run`, `ps`, `logs`, `inspect`) to run and manage jobs +- 🔥 **Any Hardware**: From CPUs to A100 GPUs and TPU pods - switch with a simple flag +- 📦 **Run Anything**: Use Docker images, HF Spaces, or your custom containers +- 🔐 **Simple Auth**: Just use your HF token +- 📊 **Live Monitoring**: Stream logs in real-time, just like running locally +- 💰 **Pay-as-you-go**: Only pay for the seconds you use + +### Prerequisites + +- A Hugging Face account (currently in testing for HF staff) +- Authenticate with the Hugging Gace Hub (e.g. `huggingface-cli login`) + + +### Quick Start + +#### 1. Run your first job + +```bash +# Run a simple Python script +>>> huggingface-cli jobs run python:3.12 python -c "print('Hello from HF compute!')" +``` + +This command runs the job and shows the logs. You can pass `--detach` to run the Job in the background and only print the Job ID. + +#### 2. Check job status + +```bash +# List your running jobs +>>> huggingface-cli jobs ps + +# Inspect the status of a job +>>> huggingface-cli jobs inspect + +# View logs from a job +>>> huggingface-cli jobs logs + +# Cancel a job +>>> huggingface-cli jobs cancel +``` + +#### 3. Run on GPU + +You can also run jobs on GPUs or TPUs with the `--flavor` option. For example, to run a PyTorch job on an A10G GPU: + +```bash +# Use an A10G GPU to check PyTorch CUDA +>>> huggingface-cli jobs run --flavor a10g-small pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel \ +... python -c "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")" +``` + +Running this will show the following output! + +```bash +This code ran with the following GPU: NVIDIA A10G +``` + +That's it! You're now running code on Hugging Face's infrastructure. For more detailed information checkout the [Quickstart Guide](docs/quickstart.md). + +### Common Use Cases + +- **Model Training**: Fine-tune or train models on GPUs (T4, A10G, A100) without managing infrastructure +- **Synthetic Data Generation**: Generate large-scale datasets using LLMs on powerful hardware +- **Data Processing**: Process massive datasets with high-CPU configurations for parallel workloads +- **Batch Inference**: Run offline inference on thousands of samples using optimized GPU setups +- **Experiments & Benchmarks**: Run ML experiments on consistent hardware for reproducible results +- **Development & Debugging**: Test GPU code without local CUDA setup + +### Pass Environment variables and Secrets + +You can pass environment variables to your job using + +```bash +# Pass environment variables +>>> huggingface-cli jobs run -e FOO=foo -e BAR=bar python:3.12 python -c "import os; print(os.environ['FOO'], os.environ['BAR'])" +``` + +```bash +# Pass an environment from a local .env file +>>> huggingface-cli jobs run --env-file .env python:3.12 python -c "import os; print(os.environ['FOO'], os.environ['BAR'])" +``` + +```bash +# Pass secrets - they will be encrypted server side +>>> huggingface-cli jobs run -s MY_SECRET=psswrd python:3.12 python -c "import os; print(os.environ['MY_SECRET'])" +``` + +```bash +# Pass secrets from a local .secrets.env file - they will be encrypted server side +>>> huggingface-cli jobs run --secret-env-file .secrets.env python:3.12 python -c "import os; print(os.environ['MY_SECRET'])" +``` + +### Hardware + +Available `--flavor` options: + +- CPU: `cpu-basic`, `cpu-upgrade` +- GPU: `t4-small`, `t4-medium`, `l4x1`, `l4x4`, `a10g-small`, `a10g-large`, `a10g-largex2`, `a10g-largex4`,`a100-large` +- TPU: `v5e-1x1`, `v5e-2x2`, `v5e-2x4` + +(updated in 03/25 from Hugging Face [suggested_hardware docs](https://huggingface.co/docs/hub/en/spaces-config-reference)) From 3895c8e28b68092d578624b5dd38cdfbec150df2 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 10 Jul 2025 17:55:58 +0200 Subject: [PATCH 04/40] mypy --- src/huggingface_hub/commands/jobs/_cli_utils.py | 4 ++-- src/huggingface_hub/commands/jobs/run.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/huggingface_hub/commands/jobs/_cli_utils.py b/src/huggingface_hub/commands/jobs/_cli_utils.py index fea22ad8a9..c0b6194194 100644 --- a/src/huggingface_hub/commands/jobs/_cli_utils.py +++ b/src/huggingface_hub/commands/jobs/_cli_utils.py @@ -21,6 +21,6 @@ def tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str: lines.append(row_format.format(*headers)) lines.append(row_format.format(*["-" * w for w in col_widths])) for row in rows: - row = [x[: col_width - 3] + "..." if len(str(x)) > col_width else x for x, col_width in zip(row, col_widths)] - lines.append(row_format.format(*row)) + row_format_args = [str(x)[: col_width - 3] + "..." if len(str(x)) > col_width else str(x) for x, col_width in zip(row, col_widths)] + lines.append(row_format.format(*row_format_args)) return "\n".join(lines) diff --git a/src/huggingface_hub/commands/jobs/run.py b/src/huggingface_hub/commands/jobs/run.py index 85188037bf..336257a54d 100644 --- a/src/huggingface_hub/commands/jobs/run.py +++ b/src/huggingface_hub/commands/jobs/run.py @@ -2,7 +2,7 @@ import json import time from argparse import Namespace, _SubParsersAction -from typing import Optional +from typing import Optional, Union import requests from dotenv import dotenv_values @@ -60,12 +60,12 @@ def register_subcommand(parser: _SubParsersAction) -> None: def __init__(self, args: Namespace) -> None: self.docker_image: str = args.dockerImage - self.environment: dict[str, str] = {} + self.environment: dict[str, Optional[str]] = {} for env_value in args.env or []: self.environment.update(dotenv_values(stream=io.StringIO(env_value))) if args.env_file: self.environment.update(dotenv_values(args.env_file)) - self.secrets: dict[str, str] = {} + self.secrets: dict[str, Optional[str]] = {} for secret in args.secret or []: self.secrets.update(dotenv_values(stream=io.StringIO(secret))) if args.secret_env_file: @@ -78,7 +78,7 @@ def __init__(self, args: Namespace) -> None: def run(self) -> None: # prepare paypload to send to HF Jobs API - input_json = { + input_json: dict[str, Optional[Union[str, float, list[str], dict[str, Optional[str]]]]] = { "command": self.command, "arguments": [], "environment": self.environment, From 3661cb76a255cc435f39c48564ddc8bd9a58b68e Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 10 Jul 2025 17:57:52 +0200 Subject: [PATCH 05/40] style --- src/huggingface_hub/commands/jobs/_cli_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/huggingface_hub/commands/jobs/_cli_utils.py b/src/huggingface_hub/commands/jobs/_cli_utils.py index c0b6194194..c5effb2993 100644 --- a/src/huggingface_hub/commands/jobs/_cli_utils.py +++ b/src/huggingface_hub/commands/jobs/_cli_utils.py @@ -21,6 +21,9 @@ def tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str: lines.append(row_format.format(*headers)) lines.append(row_format.format(*["-" * w for w in col_widths])) for row in rows: - row_format_args = [str(x)[: col_width - 3] + "..." if len(str(x)) > col_width else str(x) for x, col_width in zip(row, col_widths)] + row_format_args = [ + str(x)[: col_width - 3] + "..." if len(str(x)) > col_width else str(x) + for x, col_width in zip(row, col_widths) + ] lines.append(row_format.format(*row_format_args)) return "\n".join(lines) From 13f17c801413614758c95fbc53c73ce5c24b50e8 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 10 Jul 2025 18:06:48 +0200 Subject: [PATCH 06/40] minor --- docs/source/en/guides/cli.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/guides/cli.md b/docs/source/en/guides/cli.md index 6a48d1349e..d8e1c5b818 100644 --- a/docs/source/en/guides/cli.md +++ b/docs/source/en/guides/cli.md @@ -607,7 +607,7 @@ Copy-and-paste the text below in your GitHub issue. ## huggingface-cli jobs -Experimental. Run compute jobs on Hugging Face infrastructure with a familiar Docker-like interface. +Run compute jobs on Hugging Face infrastructure with a familiar Docker-like interface. `huggingface-cli jobs` is a command-line tool that lets you run anything on Hugging Face's infrastructure (including GPUs and TPUs!) with simple commands. Think `docker run`, but for running code on A100s. From 5e99d64765588fb7b251c50faedcb1f5dcb5a434 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 10 Jul 2025 18:08:12 +0200 Subject: [PATCH 07/40] remove hfjobs mentions --- src/huggingface_hub/commands/jobs/cancel.py | 2 +- src/huggingface_hub/commands/jobs/inspect.py | 2 +- src/huggingface_hub/commands/jobs/logs.py | 2 +- src/huggingface_hub/commands/jobs/ps.py | 2 +- src/huggingface_hub/commands/jobs/run.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/huggingface_hub/commands/jobs/cancel.py b/src/huggingface_hub/commands/jobs/cancel.py index 4e4285ea3a..000ba66d4a 100644 --- a/src/huggingface_hub/commands/jobs/cancel.py +++ b/src/huggingface_hub/commands/jobs/cancel.py @@ -25,7 +25,7 @@ def __init__(self, args: Namespace) -> None: def run(self) -> None: username = whoami(self.token)["name"] - headers = build_hf_headers(token=self.token, library_name="hfjobs") + headers = build_hf_headers(token=self.token) requests.post( f"https://huggingface.co/api/jobs/{username}/{self.job_id}/cancel", headers=headers, diff --git a/src/huggingface_hub/commands/jobs/inspect.py b/src/huggingface_hub/commands/jobs/inspect.py index f93aae03ed..bb27858bba 100644 --- a/src/huggingface_hub/commands/jobs/inspect.py +++ b/src/huggingface_hub/commands/jobs/inspect.py @@ -26,7 +26,7 @@ def __init__(self, args: Namespace) -> None: def run(self) -> None: username = whoami(self.token)["name"] - headers = build_hf_headers(token=self.token, library_name="hfjobs") + headers = build_hf_headers(token=self.token) inspections = [ requests.get( f"https://huggingface.co/api/jobs/{username}/{job}", diff --git a/src/huggingface_hub/commands/jobs/logs.py b/src/huggingface_hub/commands/jobs/logs.py index 60f67769c2..efb224aae3 100644 --- a/src/huggingface_hub/commands/jobs/logs.py +++ b/src/huggingface_hub/commands/jobs/logs.py @@ -29,7 +29,7 @@ def __init__(self, args: Namespace) -> None: def run(self) -> None: username = whoami(self.token)["name"] - headers = build_hf_headers(token=self.token, library_name="hfjobs") + headers = build_hf_headers(token=self.token) requests.get( f"https://huggingface.co/api/jobs/{username}/{self.job_id}", headers=headers, diff --git a/src/huggingface_hub/commands/jobs/ps.py b/src/huggingface_hub/commands/jobs/ps.py index 36db6996ed..473ccb617f 100644 --- a/src/huggingface_hub/commands/jobs/ps.py +++ b/src/huggingface_hub/commands/jobs/ps.py @@ -65,7 +65,7 @@ def run(self) -> None: # Get current username username = whoami(self.token)["name"] # Build headers for API request - headers = build_hf_headers(token=self.token, library_name="hfjobs") + headers = build_hf_headers(token=self.token) # Fetch jobs data response = requests.get( f"https://huggingface.co/api/jobs/{username}", diff --git a/src/huggingface_hub/commands/jobs/run.py b/src/huggingface_hub/commands/jobs/run.py index 336257a54d..360acd7822 100644 --- a/src/huggingface_hub/commands/jobs/run.py +++ b/src/huggingface_hub/commands/jobs/run.py @@ -103,7 +103,7 @@ def run(self) -> None: else: input_json["dockerImage"] = self.docker_image username = whoami(self.token)["name"] - headers = build_hf_headers(token=self.token, library_name="hfjobs") + headers = build_hf_headers(token=self.token) resp = requests.post( f"https://huggingface.co/api/jobs/{username}", json=input_json, From 7efe99800eb8100aaed24d63810420452b4cb36f Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Fri, 11 Jul 2025 14:47:37 +0200 Subject: [PATCH 08/40] add huggingface-cli jobs uv commands --- docs/source/en/guides/cli.md | 27 ++- src/huggingface_hub/commands/jobs/__init__.py | 2 + src/huggingface_hub/commands/jobs/uv.py | 213 ++++++++++++++++++ 3 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 src/huggingface_hub/commands/jobs/uv.py diff --git a/docs/source/en/guides/cli.md b/docs/source/en/guides/cli.md index d8e1c5b818..dd078313f4 100644 --- a/docs/source/en/guides/cli.md +++ b/docs/source/en/guides/cli.md @@ -621,6 +621,9 @@ Run compute jobs on Hugging Face infrastructure with a familiar Docker-like inte # Run from Hugging Face Spaces >>> huggingface-cli jobs run hf.co/spaces/lhoestq/duckdb duckdb -c "select 'hello world'" + +# Run a Python script with `uv` (experimental) +>>> huggingface-cli jobs uv run my_script.py ``` ### ✨ Key Features @@ -637,7 +640,6 @@ Run compute jobs on Hugging Face infrastructure with a familiar Docker-like inte - A Hugging Face account (currently in testing for HF staff) - Authenticate with the Hugging Gace Hub (e.g. `huggingface-cli login`) - ### Quick Start #### 1. Run your first job @@ -725,3 +727,26 @@ Available `--flavor` options: - TPU: `v5e-1x1`, `v5e-2x2`, `v5e-2x4` (updated in 03/25 from Hugging Face [suggested_hardware docs](https://huggingface.co/docs/hub/en/spaces-config-reference)) + +### UV Scripts (Experimental) + +Run UV scripts (Python scripts with inline dependencies) on HF infrastructure: + +```bash +# Run a UV script (creates temporary repo) +>>> huggingface-cli jobs uv run my_script.py + +# Run with persistent repo +>>> huggingface-cli jobs uv run my_script.py --repo my-uv-scripts + +# Run with GPU +>>> huggingface-cli jobs uv run ml_training.py --flavor gpu-t4-small + +# Pass arguments to script +>>> huggingface-cli jobs uv run process.py input.csv output.parquet --repo data-scripts + +# Run a script directly from a URL +>>> huggingface-cli jobs uv run https://huggingface.co/datasets/username/scripts/resolve/main/example.py +``` + +UV scripts are Python scripts that include their dependencies directly in the file using a special comment syntax. This makes them perfect for self-contained tasks that don't require complex project setups. Learn more about UV scripts in the [UV documentation](https://docs.astral.sh/uv/guides/scripts/). diff --git a/src/huggingface_hub/commands/jobs/__init__.py b/src/huggingface_hub/commands/jobs/__init__.py index ea3535a27d..62b3d52af1 100644 --- a/src/huggingface_hub/commands/jobs/__init__.py +++ b/src/huggingface_hub/commands/jobs/__init__.py @@ -26,6 +26,7 @@ from huggingface_hub.commands.jobs.logs import LogsCommand from huggingface_hub.commands.jobs.ps import PsCommand from huggingface_hub.commands.jobs.run import RunCommand +from huggingface_hub.commands.jobs.uv import UvCommand from huggingface_hub.utils import logging @@ -44,3 +45,4 @@ def register_subcommand(parser: _SubParsersAction): PsCommand.register_subcommand(jobs_subparsers) RunCommand.register_subcommand(jobs_subparsers) CancelCommand.register_subcommand(jobs_subparsers) + UvCommand.register_subcommand(jobs_subparsers) diff --git a/src/huggingface_hub/commands/jobs/uv.py b/src/huggingface_hub/commands/jobs/uv.py new file mode 100644 index 0000000000..c50caf9b88 --- /dev/null +++ b/src/huggingface_hub/commands/jobs/uv.py @@ -0,0 +1,213 @@ +"""UV run command for huggingface-cli jobs - execute UV scripts on HF infrastructure.""" + +import hashlib +from argparse import Namespace +from datetime import datetime +from pathlib import Path + +from huggingface_hub import HfApi, create_repo +from huggingface_hub.utils import RepositoryNotFoundError + +from .. import BaseHuggingfaceCLICommand +from .run import RunCommand + + +class UvCommand(BaseHuggingfaceCLICommand): + """Run UV scripts on Hugging Face infrastructure.""" + + @staticmethod + def register_subcommand(parser): + """Register UV run subcommand.""" + uv_parser = parser.add_parser( + "uv", + help="Run UV scripts (Python with inline dependencies) on HF infrastructure", + ) + + subparsers = uv_parser.add_subparsers(dest="uv_command", help="UV commands", required=True) + + # Run command only + run_parser = subparsers.add_parser( + "run", + help="Run a UV script (local file or URL) on HF infrastructure", + ) + run_parser.add_argument("script", help="UV script to run (local file or URL)") + run_parser.add_argument("script_args", nargs="*", help="Arguments for the script", default=[]) + run_parser.add_argument( + "--repo", + help="Repository name for the script (creates ephemeral if not specified)", + ) + run_parser.add_argument("--flavor", default="cpu-basic", help="Hardware flavor (default: cpu-basic)") + run_parser.add_argument("-e", "--env", action="append", help="Environment variables") + run_parser.add_argument("-s", "--secret", action="append", help="Secret environment variables") + run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") + run_parser.add_argument( + "--secret-env-file", + type=str, + help="Read in a file of secret environment variables.", + ) + run_parser.add_argument("--timeout", help="Max duration (e.g., 30s, 5m, 1h)") + run_parser.add_argument("-d", "--detach", action="store_true", help="Run in background") + run_parser.add_argument("--token", help="HF token") + run_parser.set_defaults(func=UvCommand) + + def __init__(self, args): + """Initialize the command with parsed arguments.""" + self.args = args + + def run(self): + """Execute UV command.""" + if self.args.uv_command == "run": + self._run_script(self.args) + + def _run_script(self, args): + """Run a UV script on HF infrastructure.""" + print("Note: huggingface-cli jobs uv run is experimental and subject to change.") + api = HfApi(token=args.token) + + if args.script.startswith("http://") or args.script.startswith("https://"): + # Direct URL execution - no upload needed + script_url = args.script + print(f"Running script from URL: {script_url}") + else: + # Local file - upload to HF + script_path = Path(args.script) + if not script_path.exists(): + print(f"Error: Script not found: {args.script}") + return + + # Determine repository + repo_id = self._determine_repository(args, api) + is_ephemeral = args.repo is None + + # Create repo if needed + try: + api.repo_info(repo_id, repo_type="dataset") + if not is_ephemeral: + print(f"Using existing repository: {repo_id}") + except RepositoryNotFoundError: + print(f"Creating repository: {repo_id}") + create_repo(repo_id, repo_type="dataset", exist_ok=True) + + # Upload script + print(f"Uploading {script_path.name}...") + with open(script_path, "r") as f: + script_content = f.read() + + filename = script_path.name + + api.upload_file( + path_or_fileobj=script_content.encode(), + path_in_repo=filename, + repo_id=repo_id, + repo_type="dataset", + ) + + script_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{filename}" + repo_url = f"https://huggingface.co/datasets/{repo_id}" + + print(f"✓ Script uploaded to: {repo_url}/blob/main/{filename}") + + # Create and upload minimal README + readme_content = self._create_minimal_readme(repo_id, filename, is_ephemeral) + api.upload_file( + path_or_fileobj=readme_content.encode(), + path_in_repo="README.md", + repo_id=repo_id, + repo_type="dataset", + ) + + if is_ephemeral: + print(f"✓ Temporary repository created: {repo_id}") + + # Prepare docker image (always use Python 3.12) + docker_image = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim" + + # Build command + command = ["uv", "run", script_url] + args.script_args + + # Create RunCommand args + run_args = Namespace( + dockerImage=docker_image, + command=command, + env=args.env, + secret=args.secret, + env_file=args.env_file, + secret_env_file=args.secret_env_file, + flavor=args.flavor, + timeout=args.timeout, + detach=args.detach, + token=args.token, + ) + + print("Starting job on HF infrastructure...") + RunCommand(run_args).run() + + def _determine_repository(self, args, api): + """Determine which repository to use for the script.""" + # Use provided repo + if args.repo: + repo_id = args.repo + if "/" not in repo_id: + username = api.whoami()["name"] + repo_id = f"{username}/{repo_id}" + return repo_id + + # Create ephemeral repo + username = api.whoami()["name"] + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + + # Simple hash for uniqueness + script_hash = hashlib.md5(Path(args.script).read_bytes()).hexdigest()[:8] + + return f"{username}/huggingface-cli-jobs-uv-run-{timestamp}-{script_hash}" + + def _create_minimal_readme(self, repo_id, script_name, is_ephemeral): + """Create minimal README content.""" + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") + + if is_ephemeral: + # Ephemeral repository README + return f"""--- +tags: +- huggingface-cli-jobs-uv-script +- ephemeral +--- + +# UV Script: {script_name} + +Executed via `huggingface-cli jobs uv run` on {timestamp} + +## Run this script + +```bash +huggingface-cli jobs run ghcr.io/astral-sh/uv:python3.12-bookworm-slim \\ + uv run https://huggingface.co/datasets/{repo_id}/resolve/main/{script_name} +``` + +--- +*Created with [huggingface-cli jobs](https://github.com/huggingface/huggingface-cli jobs)* +""" + # Named repository README + repo_name = repo_id.split("/")[-1] + return f"""--- +tags: +- huggingface-cli-jobs-uv-script +viewer: false +--- + +# {repo_name} + +UV scripts repository + +## Scripts +- `{script_name}` - Added {timestamp} + +## Run + +```bash +huggingface-cli jobs uv run {script_name} --repo {repo_name} +``` + +--- +*Created with [huggingface-cli jobs](https://github.com/huggingface/huggingface-cli jobs)* +""" From ab8511ed470d928dbc3f2e2cb190b40fd6d89cdd Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Fri, 11 Jul 2025 16:02:10 +0200 Subject: [PATCH 09/40] add some uv options --- src/huggingface_hub/commands/jobs/uv.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/huggingface_hub/commands/jobs/uv.py b/src/huggingface_hub/commands/jobs/uv.py index c50caf9b88..a0b77cf8f4 100644 --- a/src/huggingface_hub/commands/jobs/uv.py +++ b/src/huggingface_hub/commands/jobs/uv.py @@ -31,12 +31,12 @@ def register_subcommand(parser): help="Run a UV script (local file or URL) on HF infrastructure", ) run_parser.add_argument("script", help="UV script to run (local file or URL)") - run_parser.add_argument("script_args", nargs="*", help="Arguments for the script", default=[]) + run_parser.add_argument("script_args", nargs="...", help="Arguments for the script", default=[]) run_parser.add_argument( "--repo", help="Repository name for the script (creates ephemeral if not specified)", ) - run_parser.add_argument("--flavor", default="cpu-basic", help="Hardware flavor (default: cpu-basic)") + run_parser.add_argument("--flavor", type=str, default="cpu-basic", help="Hardware flavor (default: cpu-basic)") run_parser.add_argument("-e", "--env", action="append", help="Environment variables") run_parser.add_argument("-s", "--secret", action="append", help="Secret environment variables") run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") @@ -45,9 +45,14 @@ def register_subcommand(parser): type=str, help="Read in a file of secret environment variables.", ) - run_parser.add_argument("--timeout", help="Max duration (e.g., 30s, 5m, 1h)") + run_parser.add_argument("--timeout", type=str, help="Max duration (e.g., 30s, 5m, 1h)") run_parser.add_argument("-d", "--detach", action="store_true", help="Run in background") - run_parser.add_argument("--token", help="HF token") + run_parser.add_argument("--token", type=str, help="HF token") + # UV options + run_parser.add_argument("--with", action="append", help="Run with the given packages installed", dest="with_") + run_parser.add_argument( + "-p", "--python", type=str, help="The Python interpreter to use for the run environment" + ) run_parser.set_defaults(func=UvCommand) def __init__(self, args): @@ -123,7 +128,12 @@ def _run_script(self, args): docker_image = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim" # Build command - command = ["uv", "run", script_url] + args.script_args + uv_args = [] + for with_arg in args.with_: + uv_args += ["--with", with_arg] + if args.python: + uv_args += ["--python", args.python] + command = ["uv", "run"] + uv_args + [script_url] + args.script_args # Create RunCommand args run_args = Namespace( From 3c00292c71b4c9cc0293b3f7413cde1c639486ca Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Fri, 11 Jul 2025 16:48:41 +0200 Subject: [PATCH 10/40] add test --- tests/test_cli.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 21ea90b409..5ad29e320b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -9,6 +9,7 @@ from huggingface_hub.commands.delete_cache import DeleteCacheCommand from huggingface_hub.commands.download import DownloadCommand +from huggingface_hub.commands.jobs import JobsCommands, RunCommand from huggingface_hub.commands.repo_files import DeleteFilesSubCommand, RepoFilesCommand from huggingface_hub.commands.scan_cache import ScanCacheCommand from huggingface_hub.commands.tag import TagCommands @@ -837,3 +838,41 @@ def test_delete(self, delete_files_mock: Mock) -> None: assert kwargs == delete_files_args delete_files_mock.reset_mock() + + +class DummyResponse: + def __init__(self, json): + self._json = json + + def raise_for_status(self): + pass + + def json(self): + return self._json + + +class TestJobsCommand(unittest.TestCase): + def setUp(self) -> None: + """ + Set up CLI as in `src/huggingface_hub/commands/huggingface_cli.py`. + """ + self.parser = ArgumentParser("huggingface-cli", usage="huggingface-cli []") + commands_parser = self.parser.add_subparsers() + JobsCommands.register_subcommand(commands_parser) + + @patch("requests.post", return_value=DummyResponse({"id": "my-job-id"})) + @patch("huggingface_hub.commands.jobs.run.whoami", return_value={"name": "my-username"}) + def test_run(self, whoami: Mock, requests_post: Mock) -> None: + input_args = ["jobs", "run", "--detach", "ubuntu", "echo", "hello"] + cmd = RunCommand(self.parser.parse_args(input_args)) + cmd.run() + assert requests_post.call_count == 1 + args, kwargs = requests_post.call_args_list[0] + assert args == ("https://huggingface.co/api/jobs/my-username",) + assert kwargs["json"] == { + "command": ["echo", "hello"], + "arguments": [], + "environment": {}, + "flavor": "cpu-basic", + "dockerImage": "ubuntu", + } From 3136ef43d9124f14cbcdcab85b695fc1b0681d77 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Fri, 11 Jul 2025 16:52:20 +0200 Subject: [PATCH 11/40] fix for 3.8 --- src/huggingface_hub/commands/jobs/_cli_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/huggingface_hub/commands/jobs/_cli_utils.py b/src/huggingface_hub/commands/jobs/_cli_utils.py index c5effb2993..d9d1f1411c 100644 --- a/src/huggingface_hub/commands/jobs/_cli_utils.py +++ b/src/huggingface_hub/commands/jobs/_cli_utils.py @@ -1,8 +1,8 @@ import os -from typing import Union +from typing import List, Union -def tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str: +def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str: """ Inspired by: From 9fc3c7874b352b5982d2a7c4ce315c9cffd03d80 Mon Sep 17 00:00:00 2001 From: Daniel van Strien Date: Mon, 14 Jul 2025 09:04:16 +0100 Subject: [PATCH 12/40] Update src/huggingface_hub/commands/jobs/uv.py Co-authored-by: Julien Chaumond --- src/huggingface_hub/commands/jobs/uv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huggingface_hub/commands/jobs/uv.py b/src/huggingface_hub/commands/jobs/uv.py index a0b77cf8f4..630b726d60 100644 --- a/src/huggingface_hub/commands/jobs/uv.py +++ b/src/huggingface_hub/commands/jobs/uv.py @@ -91,7 +91,7 @@ def _run_script(self, args): print(f"Using existing repository: {repo_id}") except RepositoryNotFoundError: print(f"Creating repository: {repo_id}") - create_repo(repo_id, repo_type="dataset", exist_ok=True) + create_repo(repo_id, repo_type="dataset", private=True, exist_ok=True) # Upload script print(f"Uploading {script_path.name}...") From fd926b50ab3ed6d90a7a0346de047a6f7d543fab Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 16 Jul 2025 23:58:58 +0200 Subject: [PATCH 13/40] move to HfApi --- src/huggingface_hub/__init__.py | 15 + src/huggingface_hub/_jobs_api.py | 126 +++++ src/huggingface_hub/commands/jobs.py | 471 +++++++++++++++++ src/huggingface_hub/commands/jobs/__init__.py | 48 -- .../commands/jobs/_cli_utils.py | 29 -- src/huggingface_hub/commands/jobs/cancel.py | 32 -- src/huggingface_hub/commands/jobs/inspect.py | 37 -- src/huggingface_hub/commands/jobs/logs.py | 84 --- src/huggingface_hub/commands/jobs/ps.py | 180 ------- src/huggingface_hub/commands/jobs/run.py | 171 ------ src/huggingface_hub/commands/jobs/uv.py | 223 -------- src/huggingface_hub/hf_api.py | 493 ++++++++++++++++++ 12 files changed, 1105 insertions(+), 804 deletions(-) create mode 100644 src/huggingface_hub/_jobs_api.py create mode 100644 src/huggingface_hub/commands/jobs.py delete mode 100644 src/huggingface_hub/commands/jobs/__init__.py delete mode 100644 src/huggingface_hub/commands/jobs/_cli_utils.py delete mode 100644 src/huggingface_hub/commands/jobs/cancel.py delete mode 100644 src/huggingface_hub/commands/jobs/inspect.py delete mode 100644 src/huggingface_hub/commands/jobs/logs.py delete mode 100644 src/huggingface_hub/commands/jobs/ps.py delete mode 100644 src/huggingface_hub/commands/jobs/run.py delete mode 100644 src/huggingface_hub/commands/jobs/uv.py diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index f74b00d5c8..deb63ffeef 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -165,6 +165,7 @@ "add_space_variable", "auth_check", "cancel_access_request", + "cancel_job", "change_discussion_status", "comment_discussion", "create_branch", @@ -194,6 +195,7 @@ "duplicate_space", "edit_discussion_comment", "enable_webhook", + "fetch_job_logs", "file_exists", "get_collection", "get_dataset_tags", @@ -210,11 +212,13 @@ "get_user_overview", "get_webhook", "grant_access", + "inspect_job", "list_accepted_access_requests", "list_collections", "list_datasets", "list_inference_catalog", "list_inference_endpoints", + "list_jobs", "list_lfs_files", "list_liked_repos", "list_models", @@ -251,6 +255,7 @@ "resume_inference_endpoint", "revision_exists", "run_as_future", + "run_job", "scale_to_zero_inference_endpoint", "set_space_sleep_time", "space_info", @@ -792,6 +797,7 @@ "auth_switch", "cached_assets_path", "cancel_access_request", + "cancel_job", "change_discussion_status", "comment_discussion", "configure_http_backend", @@ -825,6 +831,7 @@ "enable_webhook", "export_entries_as_dduf", "export_folder_as_dduf", + "fetch_job_logs", "file_exists", "from_pretrained_fastai", "from_pretrained_keras", @@ -851,12 +858,14 @@ "grant_access", "hf_hub_download", "hf_hub_url", + "inspect_job", "interpreter_login", "list_accepted_access_requests", "list_collections", "list_datasets", "list_inference_catalog", "list_inference_endpoints", + "list_jobs", "list_lfs_files", "list_liked_repos", "list_models", @@ -907,6 +916,7 @@ "resume_inference_endpoint", "revision_exists", "run_as_future", + "run_job", "save_pretrained_keras", "save_torch_model", "save_torch_state_dict", @@ -1143,6 +1153,7 @@ def __dir__(): add_space_variable, # noqa: F401 auth_check, # noqa: F401 cancel_access_request, # noqa: F401 + cancel_job, # noqa: F401 change_discussion_status, # noqa: F401 comment_discussion, # noqa: F401 create_branch, # noqa: F401 @@ -1172,6 +1183,7 @@ def __dir__(): duplicate_space, # noqa: F401 edit_discussion_comment, # noqa: F401 enable_webhook, # noqa: F401 + fetch_job_logs, # noqa: F401 file_exists, # noqa: F401 get_collection, # noqa: F401 get_dataset_tags, # noqa: F401 @@ -1188,11 +1200,13 @@ def __dir__(): get_user_overview, # noqa: F401 get_webhook, # noqa: F401 grant_access, # noqa: F401 + inspect_job, # noqa: F401 list_accepted_access_requests, # noqa: F401 list_collections, # noqa: F401 list_datasets, # noqa: F401 list_inference_catalog, # noqa: F401 list_inference_endpoints, # noqa: F401 + list_jobs, # noqa: F401 list_lfs_files, # noqa: F401 list_liked_repos, # noqa: F401 list_models, # noqa: F401 @@ -1229,6 +1243,7 @@ def __dir__(): resume_inference_endpoint, # noqa: F401 revision_exists, # noqa: F401 run_as_future, # noqa: F401 + run_job, # noqa: F401 scale_to_zero_inference_endpoint, # noqa: F401 set_space_sleep_time, # noqa: F401 space_info, # noqa: F401 diff --git a/src/huggingface_hub/_jobs_api.py b/src/huggingface_hub/_jobs_api.py new file mode 100644 index 0000000000..8305d55c01 --- /dev/null +++ b/src/huggingface_hub/_jobs_api.py @@ -0,0 +1,126 @@ +# coding=utf-8 +# Copyright 2019-present, the HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional + +from huggingface_hub import constants +from huggingface_hub._space_api import SpaceHardware +from huggingface_hub.utils._datetime import parse_datetime +from huggingface_hub.utils._http import fix_hf_endpoint_in_url + + +class JobStage(str, Enum): + """ + Enumeration of possible stage of a Job on the Hub. + + Value can be compared to a string: + ```py + assert JobStage.COMPLETED == "COMPLETED" + ``` + + Taken from https://github.com/huggingface/moon-landing/blob/main/server/job_types/JobInfo.ts#L61 (private url). + """ + + # Copied from moon-landing > server > lib > Job.ts + COMPLETED = "COMPLETED" + CANCELED = "CANCELED" + ERROR = "ERROR" + DELETED = "DELETED" + RUNNING = "RUNNING" + + +class JobUrl(str): + """Subclass of `str` describing a job URL on the Hub. + + `JobUrl` is returned by `HfApi.create_job`. It inherits from `str` for backward + compatibility. At initialization, the URL is parsed to populate properties: + - endpoint (`str`) + - namespace (`Optional[str]`) + - job_id (`str`) + - url (`str`) + + Args: + url (`Any`): + String value of the job url. + endpoint (`str`, *optional*): + Endpoint of the Hub. Defaults to . + + Example: + ```py + >>> HfApi.run_job("ubuntu", ["echo", "hello"]) + JobUrl('https://huggingface.co/jobs/lhoestq/6877b757344d8f02f6001012', endpoint='https://huggingface.co', job_id='6877b757344d8f02f6001012') + ``` + + Raises: + [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) + If URL cannot be parsed. + """ + + def __new__(cls, url: Any, endpoint: Optional[str] = None): + url = fix_hf_endpoint_in_url(url, endpoint=endpoint) + return super(JobUrl, cls).__new__(cls, url) + + def __init__(self, url: Any, endpoint: Optional[str] = None) -> None: + super().__init__() + # Parse URL + self.endpoint = endpoint or constants.ENDPOINT + namespace, job_id = url.split("/")[-2:] + + # Populate fields + self.namespace = namespace + self.job_id = job_id + self.url = str(self) # just in case it's needed + + def __repr__(self) -> str: + return f"JobUrl('{self}', endpoint='{self.endpoint}', job_id='{self.job_id}')" + + +@dataclass +class JobStatus: + stage: JobStage + message: Optional[str] + + def __init__(self, **kwargs) -> None: + self.stage = kwargs["stage"] + self.message = kwargs.get("message") + + +@dataclass +class JobInfo: + id: str + created_at: Optional[datetime] + docker_image: Optional[str] + space_id: Optional[str] + command: Optional[List[str]] + arguments: Optional[List[str]] + environment: Optional[Dict[str, Any]] + secrets: Optional[Dict[str, Any]] + flavor: Optional[SpaceHardware] + status: Optional[JobStatus] + + def __init__(self, **kwargs) -> None: + self.id = kwargs["id"] + created_at = kwargs.get("createdAt") or kwargs.get("created_at") + self.created_at = parse_datetime(created_at) if created_at else None + self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image") + self.space_id = kwargs.get("spaceId") or kwargs.get("space_id") + self.command = kwargs.get("command") + self.arguments = kwargs.get("arguments") + self.environment = kwargs.get("environment") + self.secrets = kwargs.get("secrets") + self.flavor = kwargs.get("flavor") + self.status = JobStatus(**(kwargs["status"] if isinstance(kwargs.get("status"), dict) else {})) diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py new file mode 100644 index 0000000000..b81e7063b6 --- /dev/null +++ b/src/huggingface_hub/commands/jobs.py @@ -0,0 +1,471 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Contains commands to interact with jobs on the Hugging Face Hub. + +Usage: + # run a job + huggingface-cli jobs run image command +""" + +import io +import json +import os +import re +from argparse import Namespace, _SubParsersAction +from dataclasses import asdict +from datetime import datetime +from typing import Any, Dict, List, Optional, Union + +import requests +from dotenv import dotenv_values + +from huggingface_hub import HfApi +from huggingface_hub.utils import logging + +from . import BaseHuggingfaceCLICommand + + +logger = logging.get_logger(__name__) + + +class JobsCommands(BaseHuggingfaceCLICommand): + @staticmethod + def register_subcommand(parser: _SubParsersAction): + jobs_parser = parser.add_parser("jobs", help="Commands to interact with your huggingface.co jobs.") + jobs_subparsers = jobs_parser.add_subparsers(help="huggingface.co jobs related commands") + + # Register commands + InspectCommand.register_subcommand(jobs_subparsers) + LogsCommand.register_subcommand(jobs_subparsers) + PsCommand.register_subcommand(jobs_subparsers) + RunCommand.register_subcommand(jobs_subparsers) + CancelCommand.register_subcommand(jobs_subparsers) + UvCommand.register_subcommand(jobs_subparsers) + + +class RunCommand(BaseHuggingfaceCLICommand): + @staticmethod + def register_subcommand(parser: _SubParsersAction) -> None: + run_parser = parser.add_parser("run", help="Run a Job") + run_parser.add_argument("image", type=str, help="The Docker image to use.") + run_parser.add_argument("-e", "--env", action="append", help="Set environment variables.") + run_parser.add_argument("-s", "--secret", action="append", help="Set secret environment variables.") + run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") + run_parser.add_argument("--secret-env-file", type=str, help="Read in a file of secret environment variables.") + run_parser.add_argument( + "--flavor", + type=str, + help="Flavor for the hardware, as in HF Spaces.", + default="cpu-basic", + ) + run_parser.add_argument( + "--timeout", + type=str, + help="Max duration: int/float with s (seconds, default), m (minutes), h (hours) or d (days).", + ) + run_parser.add_argument( + "-d", + "--detach", + action="store_true", + help="Run the Job in the background and print the Job ID.", + ) + run_parser.add_argument( + "--token", + type=str, + help="A User Access Token generated from https://huggingface.co/settings/tokens", + ) + run_parser.add_argument("command", nargs="...", help="The command to run.") + run_parser.set_defaults(func=RunCommand) + + def __init__(self, args: Namespace) -> None: + self.image: str = args.image + self.command: list[str] = args.command + self.env: dict[str, Optional[str]] = {} + for env_value in args.env or []: + self.env.update(dotenv_values(stream=io.StringIO(env_value))) + if args.env_file: + self.env.update(dotenv_values(args.env_file)) + self.secrets: dict[str, Optional[str]] = {} + for secret in args.secret or []: + self.secrets.update(dotenv_values(stream=io.StringIO(secret))) + if args.secret_env_file: + self.secrets.update(dotenv_values(args.secret_env_file)) + self.flavor: str = args.flavor + self.timeout: Optional[str] = args.timeout + self.detach: bool = args.detach + self.token: Optional[str] = args.token + + def run(self) -> None: + api = HfApi(token=self.token) + job_url = api.run_job( + image=self.image, + command=self.command, + env=self.env, + secrets=self.secrets, + flavor=self.flavor, + timeout=self.timeout, + token=self.token, + ) + # Always print the job ID to the user + print(f"Job started with ID: {job_url.job_id}") + print(f"View at: {job_url}") + + if self.detach: + return + + # Now let's stream the logs + for log in api.fetch_job_logs(job_id=job_url.job_id): + print(log) + + +class LogsCommand(BaseHuggingfaceCLICommand): + @staticmethod + def register_subcommand(parser: _SubParsersAction) -> None: + run_parser = parser.add_parser("logs", help="Fetch the logs of a Job") + run_parser.add_argument("job_id", type=str, help="Job ID") + run_parser.add_argument( + "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" + ) + run_parser.set_defaults(func=LogsCommand) + + def __init__(self, args: Namespace) -> None: + self.job_id: str = args.job_id + self.token: Optional[str] = args.token + + def run(self) -> None: + api = HfApi(token=self.token) + for log in api.fetch_job_logs(job_id=self.job_id): + print(log) + + +def _tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str: + """ + Inspired by: + + - stackoverflow.com/a/8356620/593036 + - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data + """ + col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)] + terminal_width = max(os.get_terminal_size().columns, len(headers) * 12) + while len(headers) + sum(col_widths) > terminal_width: + col_to_minimize = col_widths.index(max(col_widths)) + col_widths[col_to_minimize] //= 2 + if len(headers) + sum(col_widths) <= terminal_width: + col_widths[col_to_minimize] = terminal_width - sum(col_widths) - len(headers) + col_widths[col_to_minimize] + row_format = ("{{:{}}} " * len(headers)).format(*col_widths) + lines = [] + lines.append(row_format.format(*headers)) + lines.append(row_format.format(*["-" * w for w in col_widths])) + for row in rows: + row_format_args = [ + str(x)[: col_width - 3] + "..." if len(str(x)) > col_width else str(x) + for x, col_width in zip(row, col_widths) + ] + lines.append(row_format.format(*row_format_args)) + return "\n".join(lines) + + +class PsCommand(BaseHuggingfaceCLICommand): + @staticmethod + def register_subcommand(parser: _SubParsersAction) -> None: + run_parser = parser.add_parser("ps", help="List Jobs") + run_parser.add_argument( + "-a", + "--all", + action="store_true", + help="Show all Jobs (default shows just running)", + ) + run_parser.add_argument( + "--token", + type=str, + help="A User Access Token generated from https://huggingface.co/settings/tokens", + ) + # Add Docker-style filtering argument + run_parser.add_argument( + "-f", + "--filter", + action="append", + default=[], + help="Filter output based on conditions provided (format: key=value)", + ) + # Add option to format output + run_parser.add_argument( + "--format", + type=str, + help="Format output using a custom template", + ) + run_parser.set_defaults(func=PsCommand) + + def __init__(self, args: Namespace) -> None: + self.all: bool = args.all + self.token: Optional[str] = args.token or None + self.format: Optional[str] = args.format + self.filters: Dict[str, str] = {} + + # Parse filter arguments (key=value pairs) + for f in args.filter: + if "=" in f: + key, value = f.split("=", 1) + self.filters[key.lower()] = value + else: + print(f"Warning: Ignoring invalid filter format '{f}'. Use key=value format.") + + def run(self) -> None: + """ + Fetch and display job information for the current user. + Uses Docker-style filtering with -f/--filter flag and key=value pairs. + """ + try: + api = HfApi(token=self.token) + + # Fetch jobs data + jobs = api.list_jobs() + + # Define table headers + table_headers = ["JOB ID", "IMAGE/SPACE", "COMMAND", "CREATED", "STATUS"] + + # Process jobs data + rows = [] + + for job in jobs: + # Extract job data for filtering + status = job.status.stage if job.status else "UNKNOWN" + + # Skip job if not all jobs should be shown and status doesn't match criteria + if not self.all and status not in ("RUNNING", "UPDATING"): + continue + + # Extract job ID + job_id = job.id + + # Extract image or space information + image_or_space = job.docker_image or "N/A" + + # Extract and format command + command = job.command or [] + command_str = " ".join(command) if command else "N/A" + + # Extract creation time + created_at = job.created_at or "N/A" + + # Create a dict with all job properties for filtering + job_properties = { + "id": job_id, + "image": image_or_space, + "status": status.lower(), + "command": command_str, + } + + # Check if job matches all filters + if not self._matches_filters(job_properties): + continue + + # Create row + rows.append([job_id, image_or_space, command_str, created_at, status]) + + # Handle empty results + if not rows: + filters_msg = "" + if self.filters: + filters_msg = f" matching filters: {', '.join([f'{k}={v}' for k, v in self.filters.items()])}" + + print(f"No jobs found{filters_msg}") + return + + # Apply custom format if provided or use default tabular format + self._print_output(rows, table_headers) + + except requests.RequestException as e: + print(f"Error fetching jobs data: {e}") + except (KeyError, ValueError, TypeError) as e: + print(f"Error processing jobs data: {e}") + except Exception as e: + print(f"Unexpected error - {type(e).__name__}: {e}") + + def _matches_filters(self, job_properties: Dict[str, str]) -> bool: + """Check if job matches all specified filters.""" + for key, pattern in self.filters.items(): + # Check if property exists + if key not in job_properties: + return False + + # Support pattern matching with wildcards + if "*" in pattern or "?" in pattern: + # Convert glob pattern to regex + regex_pattern = pattern.replace("*", ".*").replace("?", ".") + if not re.search(f"^{regex_pattern}$", job_properties[key], re.IGNORECASE): + return False + # Simple substring matching + elif pattern.lower() not in job_properties[key].lower(): + return False + + return True + + def _print_output(self, rows, headers): + """Print output according to the chosen format.""" + if self.format: + # Custom template formatting (simplified) + template = self.format + for row in rows: + line = template + for i, field in enumerate(["id", "image", "command", "created", "status"]): + placeholder = f"{{{{.{field}}}}}" + if placeholder in line: + line = line.replace(placeholder, str(row[i])) + print(line) + else: + # Default tabular format + print( + _tabulate( + rows, + headers=headers, + ) + ) + + +class JSONEncoder(json.JSONEncoder): + def default(self, o: Any) -> Any: + return str(o) if isinstance(o, datetime) else super().default(o) + + +class InspectCommand(BaseHuggingfaceCLICommand): + @staticmethod + def register_subcommand(parser: _SubParsersAction) -> None: + run_parser = parser.add_parser("inspect", help="Display detailed information on one or more Jobs") + run_parser.add_argument( + "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" + ) + run_parser.add_argument("job_ids", nargs="...", help="The jobs to inspect") + run_parser.set_defaults(func=InspectCommand) + + def __init__(self, args: Namespace) -> None: + self.token: Optional[str] = args.token or None + self.job_ids: list[str] = args.job_ids + + def run(self) -> None: + api = HfApi(token=self.token) + jobs = [api.inspect_job(job_id) for job_id in self.job_ids] + print(JSONEncoder(indent=4).encode([asdict(job) for job in jobs])) + + +class CancelCommand(BaseHuggingfaceCLICommand): + @staticmethod + def register_subcommand(parser: _SubParsersAction) -> None: + run_parser = parser.add_parser("jobs cancel", help="Cancel a Job") + run_parser.add_argument("job_id", type=str, help="Job ID") + run_parser.add_argument( + "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" + ) + run_parser.set_defaults(func=CancelCommand) + + def __init__(self, args: Namespace) -> None: + self.job_id: str = args.job_id + self.token: Optional[str] = args.token or None + + def run(self) -> None: + api = HfApi(token=self.token) + api.cancel_job(self.job_id) + + +class UvCommand(BaseHuggingfaceCLICommand): + """Run UV scripts on Hugging Face infrastructure.""" + + @staticmethod + def register_subcommand(parser): + """Register UV run subcommand.""" + uv_parser = parser.add_parser( + "uv", + help="Run UV scripts (Python with inline dependencies) on HF infrastructure", + ) + + subparsers = uv_parser.add_subparsers(dest="uv_command", help="UV commands", required=True) + + # Run command only + run_parser = subparsers.add_parser( + "run", + help="Run a UV script (local file or URL) on HF infrastructure", + ) + run_parser.add_argument("script", help="UV script to run (local file or URL)") + run_parser.add_argument("script_args", nargs="...", help="Arguments for the script", default=[]) + run_parser.add_argument( + "--repo", + help="Repository name for the script (creates ephemeral if not specified)", + ) + run_parser.add_argument("--flavor", type=str, default="cpu-basic", help="Hardware flavor (default: cpu-basic)") + run_parser.add_argument("-e", "--env", action="append", help="Environment variables") + run_parser.add_argument("-s", "--secret", action="append", help="Secret environment variables") + run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") + run_parser.add_argument( + "--secret-env-file", + type=str, + help="Read in a file of secret environment variables.", + ) + run_parser.add_argument("--timeout", type=str, help="Max duration (e.g., 30s, 5m, 1h)") + run_parser.add_argument("-d", "--detach", action="store_true", help="Run in background") + run_parser.add_argument("--token", type=str, help="HF token") + # UV options + run_parser.add_argument("--with", action="append", help="Run with the given packages installed", dest="with_") + run_parser.add_argument( + "-p", "--python", type=str, help="The Python interpreter to use for the run environment" + ) + run_parser.set_defaults(func=UvCommand) + + def __init__(self, args: Namespace) -> None: + """Initialize the command with parsed arguments.""" + self.script = args.script + self.script_args = args.script_args + self.dependencies = args.with_ + self.python = args.python + self.env: dict[str, Optional[str]] = {} + for env_value in args.env or []: + self.env.update(dotenv_values(stream=io.StringIO(env_value))) + if args.env_file: + self.env.update(dotenv_values(args.env_file)) + self.secrets: dict[str, Optional[str]] = {} + for secret in args.secret or []: + self.secrets.update(dotenv_values(stream=io.StringIO(secret))) + if args.secret_env_file: + self.secrets.update(dotenv_values(args.secret_env_file)) + self.flavor: Optional[str] = args.flavor + self.timeout: Optional[str] = args.timeout + self.detach: bool = args.detach + self.token: Optional[str] = args.token + self._repo = args.repo + + def run(self) -> None: + """Execute UV command.""" + logging.set_verbosity(logging.INFO) + api = HfApi(token=self.token) + job_url = api.run_uv_job( + script=self.script, + script_args=self.script_args, + dependencies=self.dependencies, + python=self.python, + env=self.env, + secrets=self.secrets, + flavor=self.flavor, + timeout=self.timeout, + _repo=self._repo, + ) + + # Always print the job ID to the user + print(f"Job started with ID: {job_url.job_id}") + print(f"View at: {job_url}") + + if self.detach: + return + + # Now let's stream the logs + for log in api.fetch_job_logs(job_id=job_url.job_id): + print(log) diff --git a/src/huggingface_hub/commands/jobs/__init__.py b/src/huggingface_hub/commands/jobs/__init__.py deleted file mode 100644 index 62b3d52af1..0000000000 --- a/src/huggingface_hub/commands/jobs/__init__.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright 2025 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Contains commands to interact with jobs on the Hugging Face Hub. - -Usage: - # run a job - huggingface-cli jobs run image command -""" - -from argparse import _SubParsersAction - -from huggingface_hub.commands import BaseHuggingfaceCLICommand -from huggingface_hub.commands.jobs.cancel import CancelCommand -from huggingface_hub.commands.jobs.inspect import InspectCommand -from huggingface_hub.commands.jobs.logs import LogsCommand -from huggingface_hub.commands.jobs.ps import PsCommand -from huggingface_hub.commands.jobs.run import RunCommand -from huggingface_hub.commands.jobs.uv import UvCommand -from huggingface_hub.utils import logging - - -logger = logging.get_logger(__name__) - - -class JobsCommands(BaseHuggingfaceCLICommand): - @staticmethod - def register_subcommand(parser: _SubParsersAction): - jobs_parser = parser.add_parser("jobs", help="Commands to interact with your huggingface.co jobs.") - jobs_subparsers = jobs_parser.add_subparsers(help="huggingface.co jobs related commands") - - # Register commands - InspectCommand.register_subcommand(jobs_subparsers) - LogsCommand.register_subcommand(jobs_subparsers) - PsCommand.register_subcommand(jobs_subparsers) - RunCommand.register_subcommand(jobs_subparsers) - CancelCommand.register_subcommand(jobs_subparsers) - UvCommand.register_subcommand(jobs_subparsers) diff --git a/src/huggingface_hub/commands/jobs/_cli_utils.py b/src/huggingface_hub/commands/jobs/_cli_utils.py deleted file mode 100644 index d9d1f1411c..0000000000 --- a/src/huggingface_hub/commands/jobs/_cli_utils.py +++ /dev/null @@ -1,29 +0,0 @@ -import os -from typing import List, Union - - -def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str: - """ - Inspired by: - - - stackoverflow.com/a/8356620/593036 - - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data - """ - col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)] - terminal_width = max(os.get_terminal_size().columns, len(headers) * 12) - while len(headers) + sum(col_widths) > terminal_width: - col_to_minimize = col_widths.index(max(col_widths)) - col_widths[col_to_minimize] //= 2 - if len(headers) + sum(col_widths) <= terminal_width: - col_widths[col_to_minimize] = terminal_width - sum(col_widths) - len(headers) + col_widths[col_to_minimize] - row_format = ("{{:{}}} " * len(headers)).format(*col_widths) - lines = [] - lines.append(row_format.format(*headers)) - lines.append(row_format.format(*["-" * w for w in col_widths])) - for row in rows: - row_format_args = [ - str(x)[: col_width - 3] + "..." if len(str(x)) > col_width else str(x) - for x, col_width in zip(row, col_widths) - ] - lines.append(row_format.format(*row_format_args)) - return "\n".join(lines) diff --git a/src/huggingface_hub/commands/jobs/cancel.py b/src/huggingface_hub/commands/jobs/cancel.py deleted file mode 100644 index 000ba66d4a..0000000000 --- a/src/huggingface_hub/commands/jobs/cancel.py +++ /dev/null @@ -1,32 +0,0 @@ -from argparse import Namespace, _SubParsersAction -from typing import Optional - -import requests - -from huggingface_hub import whoami -from huggingface_hub.utils import build_hf_headers - -from .. import BaseHuggingfaceCLICommand - - -class CancelCommand(BaseHuggingfaceCLICommand): - @staticmethod - def register_subcommand(parser: _SubParsersAction) -> None: - run_parser = parser.add_parser("jobs cancel", help="Cancel a Job") - run_parser.add_argument("job_id", type=str, help="Job ID") - run_parser.add_argument( - "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" - ) - run_parser.set_defaults(func=CancelCommand) - - def __init__(self, args: Namespace) -> None: - self.job_id: str = args.job_id - self.token: Optional[str] = args.token or None - - def run(self) -> None: - username = whoami(self.token)["name"] - headers = build_hf_headers(token=self.token) - requests.post( - f"https://huggingface.co/api/jobs/{username}/{self.job_id}/cancel", - headers=headers, - ).raise_for_status() diff --git a/src/huggingface_hub/commands/jobs/inspect.py b/src/huggingface_hub/commands/jobs/inspect.py deleted file mode 100644 index bb27858bba..0000000000 --- a/src/huggingface_hub/commands/jobs/inspect.py +++ /dev/null @@ -1,37 +0,0 @@ -import json -from argparse import Namespace, _SubParsersAction -from typing import Optional - -import requests - -from huggingface_hub import whoami -from huggingface_hub.utils import build_hf_headers - -from .. import BaseHuggingfaceCLICommand - - -class InspectCommand(BaseHuggingfaceCLICommand): - @staticmethod - def register_subcommand(parser: _SubParsersAction) -> None: - run_parser = parser.add_parser("inspect", help="Display detailed information on one or more Jobs") - run_parser.add_argument( - "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" - ) - run_parser.add_argument("jobs", nargs="...", help="The jobs to inspect") - run_parser.set_defaults(func=InspectCommand) - - def __init__(self, args: Namespace) -> None: - self.token: Optional[str] = args.token or None - self.jobs: list[str] = args.jobs - - def run(self) -> None: - username = whoami(self.token)["name"] - headers = build_hf_headers(token=self.token) - inspections = [ - requests.get( - f"https://huggingface.co/api/jobs/{username}/{job}", - headers=headers, - ).json() - for job in self.jobs - ] - print(json.dumps(inspections, indent=4)) diff --git a/src/huggingface_hub/commands/jobs/logs.py b/src/huggingface_hub/commands/jobs/logs.py deleted file mode 100644 index efb224aae3..0000000000 --- a/src/huggingface_hub/commands/jobs/logs.py +++ /dev/null @@ -1,84 +0,0 @@ -import json -import time -from argparse import Namespace, _SubParsersAction -from typing import Optional - -import requests - -from huggingface_hub import whoami -from huggingface_hub.utils import build_hf_headers - -from .. import BaseHuggingfaceCLICommand - - -class LogsCommand(BaseHuggingfaceCLICommand): - @staticmethod - def register_subcommand(parser: _SubParsersAction) -> None: - run_parser = parser.add_parser("logs", help="Fetch the logs of a Job") - run_parser.add_argument("job_id", type=str, help="Job ID") - run_parser.add_argument("-t", "--timestamps", action="store_true", help="Show timestamps") - run_parser.add_argument( - "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" - ) - run_parser.set_defaults(func=LogsCommand) - - def __init__(self, args: Namespace) -> None: - self.job_id: str = args.job_id - self.timestamps: bool = args.timestamps - self.token: Optional[str] = args.token or None - - def run(self) -> None: - username = whoami(self.token)["name"] - headers = build_hf_headers(token=self.token) - requests.get( - f"https://huggingface.co/api/jobs/{username}/{self.job_id}", - headers=headers, - ).raise_for_status() - - logging_started = False - logging_finished = False - job_finished = False - # - We need to retry because sometimes the /logs doesn't return logs when the job just started. - # (for example it can return only two lines: one for "Job started" and one empty line) - # - Timeouts can happen in case of build errors - # - ChunkedEncodingError can happen in case of stopped logging in the middle of streaming - # - Infinite empty log stream can happen in case of build error - # (the logs stream is infinite and empty except for the Job started message) - # - there is a ": keep-alive" every 30 seconds - while True: - try: - resp = requests.get( - f"https://huggingface.co/api/jobs/{username}/{self.job_id}/logs", - headers=headers, - stream=True, - timeout=120, - ) - log = None - for line in resp.iter_lines(chunk_size=1): - line = line.decode("utf-8") - if line and line.startswith("data: {"): - data = json.loads(line[len("data: ") :]) - # timestamp = data["timestamp"] - if not data["data"].startswith("===== Job started"): - logging_started = True - log = data["data"] - print(log) - logging_finished = logging_started - except requests.exceptions.ChunkedEncodingError: - # Response ended prematurely - break - except KeyboardInterrupt: - break - except requests.exceptions.ConnectionError as err: - is_timeout = err.__context__ and isinstance(err.__context__.__cause__, TimeoutError) - if logging_started or not is_timeout: - raise - if logging_finished or job_finished: - break - job_status = requests.get( - f"https://huggingface.co/api/jobs/{username}/{self.job_id}", - headers=headers, - ).json() - if "status" in job_status and job_status["status"]["stage"] not in ("RUNNING", "UPDATING"): - job_finished = True - time.sleep(1) diff --git a/src/huggingface_hub/commands/jobs/ps.py b/src/huggingface_hub/commands/jobs/ps.py deleted file mode 100644 index 473ccb617f..0000000000 --- a/src/huggingface_hub/commands/jobs/ps.py +++ /dev/null @@ -1,180 +0,0 @@ -import re -from argparse import Namespace, _SubParsersAction -from typing import Dict, Optional - -import requests - -from huggingface_hub import whoami -from huggingface_hub.utils import build_hf_headers - -from .. import BaseHuggingfaceCLICommand -from ._cli_utils import tabulate - - -class PsCommand(BaseHuggingfaceCLICommand): - @staticmethod - def register_subcommand(parser: _SubParsersAction) -> None: - run_parser = parser.add_parser("ps", help="List Jobs") - run_parser.add_argument( - "-a", - "--all", - action="store_true", - help="Show all Jobs (default shows just running)", - ) - run_parser.add_argument( - "--token", - type=str, - help="A User Access Token generated from https://huggingface.co/settings/tokens", - ) - # Add Docker-style filtering argument - run_parser.add_argument( - "-f", - "--filter", - action="append", - default=[], - help="Filter output based on conditions provided (format: key=value)", - ) - # Add option to format output - run_parser.add_argument( - "--format", - type=str, - help="Format output using a custom template", - ) - run_parser.set_defaults(func=PsCommand) - - def __init__(self, args: Namespace) -> None: - self.all: bool = args.all - self.token: Optional[str] = args.token or None - self.format: Optional[str] = args.format - self.filters: Dict[str, str] = {} - - # Parse filter arguments (key=value pairs) - for f in args.filter: - if "=" in f: - key, value = f.split("=", 1) - self.filters[key.lower()] = value - else: - print(f"Warning: Ignoring invalid filter format '{f}'. Use key=value format.") - - def run(self) -> None: - """ - Fetch and display job information for the current user. - Uses Docker-style filtering with -f/--filter flag and key=value pairs. - """ - try: - # Get current username - username = whoami(self.token)["name"] - # Build headers for API request - headers = build_hf_headers(token=self.token) - # Fetch jobs data - response = requests.get( - f"https://huggingface.co/api/jobs/{username}", - headers=headers, - timeout=30, # Add timeout to prevent hanging - ) - response.raise_for_status() - - # Define table headers - table_headers = ["JOB ID", "IMAGE/SPACE", "COMMAND", "CREATED", "STATUS"] - - # Process jobs data - rows = [] - jobs = response.json() - - for job in jobs: - # Extract job data for filtering - status = job.get("status", {}).get("stage", "UNKNOWN") - - # Skip job if not all jobs should be shown and status doesn't match criteria - if not self.all and status not in ("RUNNING", "UPDATING"): - continue - - # Extract job ID safely - job_id = job.get("id", "N/A") - - # Extract image or space information - if "spaceId" in job and job["spaceId"] is not None: - image_or_space = f"hf.co/spaces/{job['spaceId']}" - else: - image_or_space = job.get("dockerImage", "N/A") - - # Extract and format command - command = job.get("command", []) - command_str = " ".join(command) if command else "N/A" - - # Extract creation time - created_at = job.get("createdAt", "N/A") - - # Create a dict with all job properties for filtering - job_properties = { - "id": job_id, - "image": image_or_space, - "status": status.lower(), - "command": command_str, - } - - # Check if job matches all filters - if not self._matches_filters(job_properties): - continue - - # Create row - rows.append([job_id, image_or_space, command_str, created_at, status]) - - # Handle empty results - if not rows: - filters_msg = "" - if self.filters: - filters_msg = f" matching filters: {', '.join([f'{k}={v}' for k, v in self.filters.items()])}" - - print(f"No jobs found{filters_msg}") - return - - # Apply custom format if provided or use default tabular format - self._print_output(rows, table_headers) - - except requests.RequestException as e: - print(f"Error fetching jobs data: {e}") - except (KeyError, ValueError, TypeError) as e: - print(f"Error processing jobs data: {e}") - except Exception as e: - print(f"Unexpected error: {e}") - - def _matches_filters(self, job_properties: Dict[str, str]) -> bool: - """Check if job matches all specified filters.""" - for key, pattern in self.filters.items(): - # Check if property exists - if key not in job_properties: - return False - - # Support pattern matching with wildcards - if "*" in pattern or "?" in pattern: - # Convert glob pattern to regex - regex_pattern = pattern.replace("*", ".*").replace("?", ".") - if not re.search(f"^{regex_pattern}$", job_properties[key], re.IGNORECASE): - return False - # Simple substring matching - elif pattern.lower() not in job_properties[key].lower(): - return False - - return True - - def _print_output(self, rows, headers): - """Print output according to the chosen format.""" - if self.format: - # Custom template formatting (simplified) - template = self.format - for row in rows: - line = template - for i, field in enumerate(["id", "image", "command", "created", "status"]): - placeholder = f"{{{{.{field}}}}}" - if placeholder in line: - line = line.replace(placeholder, str(row[i])) - print(line) - else: - # Default tabular format - print( - tabulate( - rows, - headers=headers, - ) - ) diff --git a/src/huggingface_hub/commands/jobs/run.py b/src/huggingface_hub/commands/jobs/run.py deleted file mode 100644 index 360acd7822..0000000000 --- a/src/huggingface_hub/commands/jobs/run.py +++ /dev/null @@ -1,171 +0,0 @@ -import io -import json -import time -from argparse import Namespace, _SubParsersAction -from typing import Optional, Union - -import requests -from dotenv import dotenv_values - -from huggingface_hub import whoami -from huggingface_hub.utils import build_hf_headers - -from .. import BaseHuggingfaceCLICommand - - -def _parse_timeout(timeout: Optional[str]) -> Optional[int]: - """Get timeout in seconds""" - time_units_factors = {"s": 1, "m": 60, "h": 3600, "d": 3600 * 24} - if not timeout: - return None - elif timeout[-1] in time_units_factors: - return int(float(timeout[:-1]) * time_units_factors[timeout[-1]]) - else: - return int(timeout) - - -class RunCommand(BaseHuggingfaceCLICommand): - @staticmethod - def register_subcommand(parser: _SubParsersAction) -> None: - run_parser = parser.add_parser("run", help="Run a Job") - run_parser.add_argument("dockerImage", type=str, help="The Docker image to use.") - run_parser.add_argument("-e", "--env", action="append", help="Set environment variables.") - run_parser.add_argument("-s", "--secret", action="append", help="Set secret environment variables.") - run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") - run_parser.add_argument("--secret-env-file", type=str, help="Read in a file of secret environment variables.") - run_parser.add_argument( - "--flavor", - type=str, - help="Flavor for the hardware, as in HF Spaces.", - default="cpu-basic", - ) - run_parser.add_argument( - "--timeout", - type=str, - help="Max duration: int/float with s (seconds, default), m (minutes), h (hours) or d (days).", - ) - run_parser.add_argument( - "-d", - "--detach", - action="store_true", - help="Run the Job in the background and print the Job ID.", - ) - run_parser.add_argument( - "--token", - type=str, - help="A User Access Token generated from https://huggingface.co/settings/tokens", - ) - run_parser.add_argument("command", nargs="...", help="The command to run.") - run_parser.set_defaults(func=RunCommand) - - def __init__(self, args: Namespace) -> None: - self.docker_image: str = args.dockerImage - self.environment: dict[str, Optional[str]] = {} - for env_value in args.env or []: - self.environment.update(dotenv_values(stream=io.StringIO(env_value))) - if args.env_file: - self.environment.update(dotenv_values(args.env_file)) - self.secrets: dict[str, Optional[str]] = {} - for secret in args.secret or []: - self.secrets.update(dotenv_values(stream=io.StringIO(secret))) - if args.secret_env_file: - self.secrets.update(dotenv_values(args.secret_env_file)) - self.flavor: str = args.flavor - self.timeout: Optional[int] = _parse_timeout(args.timeout) - self.detach: bool = args.detach - self.token: Optional[str] = args.token - self.command: list[str] = args.command - - def run(self) -> None: - # prepare paypload to send to HF Jobs API - input_json: dict[str, Optional[Union[str, float, list[str], dict[str, Optional[str]]]]] = { - "command": self.command, - "arguments": [], - "environment": self.environment, - "flavor": self.flavor, - } - # secrets are optional - if self.secrets: - input_json["secrets"] = self.secrets - # timeout is optional - if self.timeout: - input_json["timeoutSeconds"] = self.timeout - # input is either from docker hub or from HF spaces - for prefix in ( - "https://huggingface.co/spaces/", - "https://hf.co/spaces/", - "huggingface.co/spaces/", - "hf.co/spaces/", - ): - if self.docker_image.startswith(prefix): - input_json["spaceId"] = self.docker_image[len(prefix) :] - break - else: - input_json["dockerImage"] = self.docker_image - username = whoami(self.token)["name"] - headers = build_hf_headers(token=self.token) - resp = requests.post( - f"https://huggingface.co/api/jobs/{username}", - json=input_json, - headers=headers, - ) - resp.raise_for_status() - response = resp.json() - # Fix: Update job_id extraction to match new response format - job_id = response["id"] - - # Always print the job ID to the user - print(f"Job started with ID: {job_id}") - print(f"View at: https://huggingface.co/jobs/{username}/{job_id}") - - if self.detach: - return - - # Now let's stream the logs - - logging_finished = logging_started = False - job_finished = False - # - We need to retry because sometimes the /logs doesn't return logs when the job just started. - # (for example it can return only two lines: one for "Job started" and one empty line) - # - Timeouts can happen in case of build errors - # - ChunkedEncodingError can happen in case of stopped logging in the middle of streaming - # - Infinite empty log stream can happen in case of build error - # (the logs stream is infinite and empty except for the Job started message) - # - there is a ": keep-alive" every 30 seconds - while True: - try: - resp = requests.get( - f"https://huggingface.co/api/jobs/{username}/{job_id}/logs", - headers=headers, - stream=True, - timeout=120, - ) - log = None - for line in resp.iter_lines(chunk_size=1): - line = line.decode("utf-8") - if line and line.startswith("data: {"): - data = json.loads(line[len("data: ") :]) - # timestamp = data["timestamp"] - if not data["data"].startswith("===== Job started"): - logging_started = True - log = data["data"] - print(log) - logging_finished = logging_started - except requests.exceptions.ChunkedEncodingError: - # Response ended prematurely - break - except KeyboardInterrupt: - break - except requests.exceptions.ConnectionError as err: - is_timeout = err.__context__ and isinstance(err.__context__.__cause__, TimeoutError) - if logging_started or not is_timeout: - raise - if logging_finished or job_finished: - break - job_status = requests.get( - f"https://huggingface.co/api/jobs/{username}/{job_id}", - headers=headers, - ).json() - if "status" in job_status and job_status["status"]["stage"] not in ("RUNNING", "UPDATING"): - job_finished = True - time.sleep(1) diff --git a/src/huggingface_hub/commands/jobs/uv.py b/src/huggingface_hub/commands/jobs/uv.py deleted file mode 100644 index 630b726d60..0000000000 --- a/src/huggingface_hub/commands/jobs/uv.py +++ /dev/null @@ -1,223 +0,0 @@ -"""UV run command for huggingface-cli jobs - execute UV scripts on HF infrastructure.""" - -import hashlib -from argparse import Namespace -from datetime import datetime -from pathlib import Path - -from huggingface_hub import HfApi, create_repo -from huggingface_hub.utils import RepositoryNotFoundError - -from .. import BaseHuggingfaceCLICommand -from .run import RunCommand - - -class UvCommand(BaseHuggingfaceCLICommand): - """Run UV scripts on Hugging Face infrastructure.""" - - @staticmethod - def register_subcommand(parser): - """Register UV run subcommand.""" - uv_parser = parser.add_parser( - "uv", - help="Run UV scripts (Python with inline dependencies) on HF infrastructure", - ) - - subparsers = uv_parser.add_subparsers(dest="uv_command", help="UV commands", required=True) - - # Run command only - run_parser = subparsers.add_parser( - "run", - help="Run a UV script (local file or URL) on HF infrastructure", - ) - run_parser.add_argument("script", help="UV script to run (local file or URL)") - run_parser.add_argument("script_args", nargs="...", help="Arguments for the script", default=[]) - run_parser.add_argument( - "--repo", - help="Repository name for the script (creates ephemeral if not specified)", - ) - run_parser.add_argument("--flavor", type=str, default="cpu-basic", help="Hardware flavor (default: cpu-basic)") - run_parser.add_argument("-e", "--env", action="append", help="Environment variables") - run_parser.add_argument("-s", "--secret", action="append", help="Secret environment variables") - run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") - run_parser.add_argument( - "--secret-env-file", - type=str, - help="Read in a file of secret environment variables.", - ) - run_parser.add_argument("--timeout", type=str, help="Max duration (e.g., 30s, 5m, 1h)") - run_parser.add_argument("-d", "--detach", action="store_true", help="Run in background") - run_parser.add_argument("--token", type=str, help="HF token") - # UV options - run_parser.add_argument("--with", action="append", help="Run with the given packages installed", dest="with_") - run_parser.add_argument( - "-p", "--python", type=str, help="The Python interpreter to use for the run environment" - ) - run_parser.set_defaults(func=UvCommand) - - def __init__(self, args): - """Initialize the command with parsed arguments.""" - self.args = args - - def run(self): - """Execute UV command.""" - if self.args.uv_command == "run": - self._run_script(self.args) - - def _run_script(self, args): - """Run a UV script on HF infrastructure.""" - print("Note: huggingface-cli jobs uv run is experimental and subject to change.") - api = HfApi(token=args.token) - - if args.script.startswith("http://") or args.script.startswith("https://"): - # Direct URL execution - no upload needed - script_url = args.script - print(f"Running script from URL: {script_url}") - else: - # Local file - upload to HF - script_path = Path(args.script) - if not script_path.exists(): - print(f"Error: Script not found: {args.script}") - return - - # Determine repository - repo_id = self._determine_repository(args, api) - is_ephemeral = args.repo is None - - # Create repo if needed - try: - api.repo_info(repo_id, repo_type="dataset") - if not is_ephemeral: - print(f"Using existing repository: {repo_id}") - except RepositoryNotFoundError: - print(f"Creating repository: {repo_id}") - create_repo(repo_id, repo_type="dataset", private=True, exist_ok=True) - - # Upload script - print(f"Uploading {script_path.name}...") - with open(script_path, "r") as f: - script_content = f.read() - - filename = script_path.name - - api.upload_file( - path_or_fileobj=script_content.encode(), - path_in_repo=filename, - repo_id=repo_id, - repo_type="dataset", - ) - - script_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{filename}" - repo_url = f"https://huggingface.co/datasets/{repo_id}" - - print(f"✓ Script uploaded to: {repo_url}/blob/main/{filename}") - - # Create and upload minimal README - readme_content = self._create_minimal_readme(repo_id, filename, is_ephemeral) - api.upload_file( - path_or_fileobj=readme_content.encode(), - path_in_repo="README.md", - repo_id=repo_id, - repo_type="dataset", - ) - - if is_ephemeral: - print(f"✓ Temporary repository created: {repo_id}") - - # Prepare docker image (always use Python 3.12) - docker_image = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim" - - # Build command - uv_args = [] - for with_arg in args.with_: - uv_args += ["--with", with_arg] - if args.python: - uv_args += ["--python", args.python] - command = ["uv", "run"] + uv_args + [script_url] + args.script_args - - # Create RunCommand args - run_args = Namespace( - dockerImage=docker_image, - command=command, - env=args.env, - secret=args.secret, - env_file=args.env_file, - secret_env_file=args.secret_env_file, - flavor=args.flavor, - timeout=args.timeout, - detach=args.detach, - token=args.token, - ) - - print("Starting job on HF infrastructure...") - RunCommand(run_args).run() - - def _determine_repository(self, args, api): - """Determine which repository to use for the script.""" - # Use provided repo - if args.repo: - repo_id = args.repo - if "/" not in repo_id: - username = api.whoami()["name"] - repo_id = f"{username}/{repo_id}" - return repo_id - - # Create ephemeral repo - username = api.whoami()["name"] - timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") - - # Simple hash for uniqueness - script_hash = hashlib.md5(Path(args.script).read_bytes()).hexdigest()[:8] - - return f"{username}/huggingface-cli-jobs-uv-run-{timestamp}-{script_hash}" - - def _create_minimal_readme(self, repo_id, script_name, is_ephemeral): - """Create minimal README content.""" - timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") - - if is_ephemeral: - # Ephemeral repository README - return f"""--- -tags: -- huggingface-cli-jobs-uv-script -- ephemeral ---- - -# UV Script: {script_name} - -Executed via `huggingface-cli jobs uv run` on {timestamp} - -## Run this script - -```bash -huggingface-cli jobs run ghcr.io/astral-sh/uv:python3.12-bookworm-slim \\ - uv run https://huggingface.co/datasets/{repo_id}/resolve/main/{script_name} -``` - ---- -*Created with [huggingface-cli jobs](https://github.com/huggingface/huggingface-cli jobs)* -""" - # Named repository README - repo_name = repo_id.split("/")[-1] - return f"""--- -tags: -- huggingface-cli-jobs-uv-script -viewer: false ---- - -# {repo_name} - -UV scripts repository - -## Scripts -- `{script_name}` - Added {timestamp} - -## Run - -```bash -huggingface-cli jobs uv run {script_name} --repo {repo_name} -``` - ---- -*Created with [huggingface-cli jobs](https://github.com/huggingface/huggingface-cli jobs)* -""" diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index e2a32dd14a..0d41d8002f 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -14,11 +14,13 @@ # limitations under the License. from __future__ import annotations +import hashlib import inspect import io import json import re import struct +import time import warnings from collections import defaultdict from concurrent.futures import Future, ThreadPoolExecutor @@ -27,6 +29,7 @@ from functools import wraps from itertools import islice from pathlib import Path +from textwrap import dedent from typing import ( TYPE_CHECKING, Any, @@ -65,6 +68,7 @@ _warn_on_overwriting_operations, ) from ._inference_endpoints import InferenceEndpoint, InferenceEndpointType +from ._jobs_api import JobInfo, JobUrl from ._space_api import SpaceHardware, SpaceRuntime, SpaceStorage, SpaceVariable from ._upload_large_folder import upload_large_folder_internal from .community import ( @@ -9940,6 +9944,488 @@ def auth_check( r = get_session().get(path, headers=headers) hf_raise_for_status(r) + def run_job( + self, + image: str, + command: List[str], + env: Optional[Dict[str, Any]] = None, + secrets: Optional[Dict[str, Any]] = None, + flavor: str = "cpu-basic", + timeout: Optional[Union[int, float, str]] = None, + token: Union[bool, str, None] = None, + ) -> JobUrl: + """ + Run compute Jobs on Hugging Face infrastructure. + + Args: + image (`str`): + The Docker image to use. + Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`. + Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`. + + command (`List[str]`): + The command to run. Example: `["echo", "hello"]`. + + env (`Dict[str, Any]`, *optional*): + Defines the environment variables for the Job. + + secrets (`Dict[str, Any]`, *optional*): + Defines the secret environment variables for the Job. + + flavor (`str`, defaults to `"cpu-basic"`): + "Flavor for the hardware, as in Hugging Face Spaces. + + timeout (`Union[int, float, str]`, *optional*): + Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). + Example: `300` or `"5m"` for 5 minutes. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + Run your first Job: + + ```python + >>> from huggingface_hub import run_job + >>> run_job("python:3.12", ["python", "-c" ,"print('Hello from HF compute!')"]) + ``` + + Run a GPU Job: + + ``` + >>> from huggingface_hub import run_job + >>> image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel" + >>> command = ["python", "-c", "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")"] + >>> run_job(image, command, flavor="a10g-small") + ``` + + """ + # prepare payload to send to HF Jobs API + input_json: Dict[str, Optional[Union[str, float, list[str], Dict[str, Optional[str]]]]] = { + "command": command, + "arguments": [], + "environment": env or {}, + "flavor": flavor, + } + # secrets are optional + if secrets: + input_json["secrets"] = secrets + # timeout is optional + if timeout: + time_units_factors = {"s": 1, "m": 60, "h": 3600, "d": 3600 * 24} + if isinstance(timeout, str) and timeout[-1] in time_units_factors: + input_json["timeoutSeconds"] = int(float(timeout[:-1]) * time_units_factors[timeout[-1]]) + else: + input_json["timeoutSeconds"] = int(timeout) + # input is either from docker hub or from HF spaces + for prefix in ( + "https://huggingface.co/spaces/", + "https://hf.co/spaces/", + "huggingface.co/spaces/", + "hf.co/spaces/", + ): + if image.startswith(prefix): + input_json["spaceId"] = image[len(prefix) :] + break + else: + input_json["dockerImage"] = image + username = self.whoami(token=token)["name"] + response = get_session().post( + f"https://huggingface.co/api/jobs/{username}", + json=input_json, + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + job_info = response.json() + job_id = job_info["id"] + job_url = f"{self.endpoint}/jobs/{username}/{job_id}" + return JobUrl(job_url, endpoint=self.endpoint) + + def fetch_job_logs( + self, + job_id: str, + token: Union[bool, str, None] = None, + ) -> Iterable[str]: + """ + Fetch all the logs from a compute Job on Hugging Face infrastructure. + + Args: + job_id (`str`): + ID of the Job. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + + ```python + >>> from huggingface_hub import fetch_job_logs, run_job + >>> job = run_job("python:3.12", ["python", "-c" ,"print('Hello from HF compute!')"]) + >>> for log in fetch_job_logs(job.job_id): + ... print(log) + Hello from HF compute! + ``` + """ + username = self.whoami(token=token)["name"] + logging_finished = logging_started = False + job_finished = False + # - We need to retry because sometimes the /logs doesn't return logs when the job just started. + # (for example it can return only two lines: one for "Job started" and one empty line) + # - Timeouts can happen in case of build errors + # - ChunkedEncodingError can happen in case of stopped logging in the middle of streaming + # - Infinite empty log stream can happen in case of build error + # (the logs stream is infinite and empty except for the Job started message) + # - there is a ": keep-alive" every 30 seconds + + # We don't use http_backoff since we need to check ourselves if ConnectionError.__context__ is a TimeoutError + max_retries = 5 + min_wait_time = 1 + max_wait_time = 10 + sleep_time = 0 + for _ in range(max_retries): + time.sleep(sleep_time) + sleep_time = min(max_wait_time, max(min_wait_time, sleep_time * 2)) + try: + resp = get_session().get( + f"https://huggingface.co/api/jobs/{username}/{job_id}/logs", + headers=self._build_hf_headers(token=token), + stream=True, + timeout=120, + ) + log = None + for line in resp.iter_lines(chunk_size=1): + line = line.decode("utf-8") + if line and line.startswith("data: {"): + data = json.loads(line[len("data: ") :]) + # timestamp = data["timestamp"] + if not data["data"].startswith("===== Job started"): + logging_started = True + log = data["data"] + yield log + logging_finished = logging_started + except requests.exceptions.ChunkedEncodingError: + # Response ended prematurely + break + except KeyboardInterrupt: + break + except requests.exceptions.ConnectionError as err: + is_timeout = err.__context__ and isinstance(err.__context__.__cause__, TimeoutError) + if logging_started or not is_timeout: + raise + if logging_finished or job_finished: + break + job_status = ( + get_session() + .get( + f"https://huggingface.co/api/jobs/{username}/{job_id}", + headers=self._build_hf_headers(token=token), + ) + .json() + ) + if "status" in job_status and job_status["status"]["stage"] not in ("RUNNING", "UPDATING"): + job_finished = True + + def list_jobs( + self, + timeout: Optional[int] = None, + token: Union[bool, str, None] = None, + ) -> List[JobInfo]: + """ + List compute Jobs on Hugging Face infrastructure. + + Args: + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + """ + username = whoami(token=token)["name"] + response = get_session().get( + f"{self.endpoint}/api/jobs/{username}", + headers=self._build_hf_headers(token=token), + timeout=timeout, + ) + response.raise_for_status() + return [JobInfo(**job_info) for job_info in response.json()] + + def inspect_job( + self, + job_id: str, + token: Union[bool, str, None] = None, + ) -> JobInfo: + """ + Inspect a compute Job on Hugging Face infrastructure. + + Args: + job_id (`str`): + ID of the Job. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + + ```python + >>> from huggingface_hub import inspect_job, run_job + >>> job = run_job("python:3.12", ["python", "-c" ,"print('Hello from HF compute!')"]) + >>> inspect_job(job.job_id) + JobInfo( + id='68780d00bbe36d38803f645f', + created_at=datetime.datetime(2025, 7, 16, 20, 35, 12, 808000, tzinfo=datetime.timezone.utc), + docker_image='python:3.12', + space_id=None, + command=['python', '-c', "print('Hello from HF compute!')"], + arguments=[], + environment={}, + secrets={}, + flavor='cpu-basic', + status=JobStatus(stage='RUNNING', message=None) + ) + ``` + """ + username = self.whoami(token=token)["name"] + response = get_session().get( + f"{self.endpoint}/api/jobs/{username}/{job_id}", + headers=self._build_hf_headers(token=token), + ) + response.raise_for_status() + return JobInfo(**response.json()) + + def cancel_job( + self, + job_id: str, + token: Union[bool, str, None] = None, + ) -> None: + """ + Cancel a compute Job on Hugging Face infrastructure. + + Args: + job_id (`str`): + ID of the Job. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + """ + username = self.whoami(token=token)["name"] + get_session().post( + f"{self.endpoint}/api/jobs/{username}/{job_id}/cancel", + headers=self._build_hf_headers(token=token), + ).raise_for_status() + + @experimental + def run_uv_job( + self, + script: str, + script_args: Optional[List[str]] = None, + dependencies: Optional[List[str]] = None, + python: Optional[str] = None, + env: Optional[Dict[str, Any]] = None, + secrets: Optional[Dict[str, Any]] = None, + flavor: str = "cpu-basic", + timeout: Optional[Union[int, float, str]] = None, + token: Union[bool, str, None] = None, + _repo: Optional[str] = None, + ) -> JobUrl: + """ + Run a UV script Job on Hugging Face infrastructure. + + Args: + script (`str`): + Path or URL of the UV script. + + script_args (`List[str]`, *optional*) + Arguments to pass to the script. + + dependencies (`List[str]`, *optional*) + Dependencies to use to run the UV script. + + python (`str`, *optional*) + Use a specific Python version. Default is 3.12. + + env (`Dict[str, Any]`, *optional*): + Defines the environment variables for the Job. + + secrets (`Dict[str, Any]`, *optional*): + Defines the secret environment variables for the Job. + + flavor (`str`, defaults to `"cpu-basic"`): + "Flavor for the hardware, as in Hugging Face Spaces. + + timeout (`Union[int, float, str]`, *optional*): + Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). + Example: `300` or `"5m"` for 5 minutes. + + token `(Union[bool, str, None]`, *optional*): + A valid user access token. If not provided, the locally saved token will be used, which is the + recommended authentication method. Set to `False` to disable authentication. + Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. + + Example: + + ```python + >>> from huggingface_hub import run_uv_job + >>> script = "https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/trl/scripts/sft.py" + >>> run_uv_job(script, dependencies=["trl"], flavor="a10g-small") + ``` + """ + + if script.startswith("http://") or script.startswith("https://"): + # Direct URL execution - no upload needed + script_url = script + else: + # Local file - upload to HF + script_path = Path(script) + + def _determine_repository(): + """Determine which repository to use for the script.""" + # Use provided repo + if _repo: + repo_id = _repo + if "/" not in repo_id: + username = self.whoami(token=token)["name"] + repo_id = f"{username}/{repo_id}" + return repo_id + + # Create ephemeral repo + username = self.whoami(token=token)["name"] + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + + # Simple hash for uniqueness + script_hash = hashlib.md5(Path(script).read_bytes()).hexdigest()[:8] + + return f"{username}/huggingface-cli-jobs-uv-run-{timestamp}-{script_hash}" + + def _create_minimal_readme(repo_id, script_name, is_ephemeral): + """Create minimal README content.""" + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") + + if is_ephemeral: + # Ephemeral repository README + return dedent( + f"""--- + tags: + - huggingface-cli-jobs-uv-script + - ephemeral + --- + + # UV Script: {script_name} + + Executed via `huggingface-cli jobs uv run` on {timestamp} + + ## Run this script + + ```bash + huggingface-cli jobs run ghcr.io/astral-sh/uv:python3.12-bookworm-slim \\ + uv run https://huggingface.co/datasets/{repo_id}/resolve/main/{script_name} + ``` + + --- + *Created with [huggingface-cli jobs](https://github.com/huggingface/huggingface-cli jobs)* + """ + ) + # Named repository README + repo_name = repo_id.split("/")[-1] + return dedent( + f"""--- + tags: + - huggingface-cli-jobs-uv-script + viewer: false + --- + + # {repo_name} + + UV scripts repository + + ## Scripts + - `{script_name}` - Added {timestamp} + + ## Run + + ```bash + huggingface-cli jobs uv run {script_name} --repo {repo_name} + ``` + + --- + *Created with [huggingface-cli jobs](https://github.com/huggingface/huggingface-cli jobs)* + """ + ) + + # Determine repository + repo_id = _determine_repository() + is_ephemeral = _repo is None + + # Create repo if needed + try: + api.repo_info(repo_id, repo_type="dataset") + if not is_ephemeral: + logger.info(f"Using existing repository: {repo_id}") + except RepositoryNotFoundError: + logger.info(f"Creating repository: {repo_id}") + create_repo(repo_id, repo_type="dataset", private=True, exist_ok=True) + + # Upload script + logger.info(f"Uploading {script_path.name}...") + with open(script_path, "r") as f: + script_content = f.read() + + filename = script_path.name + + api.upload_file( + path_or_fileobj=script_content.encode(), + path_in_repo=filename, + repo_id=repo_id, + repo_type="dataset", + ) + + script_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{filename}" + repo_url = f"https://huggingface.co/datasets/{repo_id}" + + logger.info(f"✓ Script uploaded to: {repo_url}/blob/main/{filename}") + + # Create and upload minimal README + readme_content = _create_minimal_readme(repo_id, filename, is_ephemeral) + api.upload_file( + path_or_fileobj=readme_content.encode(), + path_in_repo="README.md", + repo_id=repo_id, + repo_type="dataset", + ) + + if is_ephemeral: + logger.info(f"✓ Temporary repository created: {repo_id}") + + # Prepare docker image (always use Python 3.12) + image = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim" + + # Build command + uv_args = [] + if dependencies: + for dependency in dependencies: + uv_args += ["--with", dependency] + if python: + uv_args += ["--python", python] + script_args = script_args or [] + command = ["uv", "run"] + uv_args + [script_url] + script_args + + # Create RunCommand args + return self.run_job( + image=image, + command=command, + env=env, + secrets=secrets, + flavor=flavor, + timeout=timeout, + token=token, + ) + def _parse_revision_from_pr_url(pr_url: str) -> str: """Safely parse revision number from a PR url. @@ -10096,3 +10582,10 @@ def _parse_revision_from_pr_url(pr_url: str) -> str: list_organization_members = api.list_organization_members list_user_followers = api.list_user_followers list_user_following = api.list_user_following + +# Jobs API +run_job = api.run_job +fetch_job_logs = api.fetch_job_logs +list_jobs = api.list_jobs +inspect_job = api.inspect_job +cancel_job = api.cancel_job From 1bf5f66e546b18a4fcade92da35f1128c909ecc1 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 16 Jul 2025 23:59:38 +0200 Subject: [PATCH 14/40] minor --- src/huggingface_hub/commands/jobs.py | 4 ++-- src/huggingface_hub/hf_api.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py index b81e7063b6..4e72bcc501 100644 --- a/src/huggingface_hub/commands/jobs.py +++ b/src/huggingface_hub/commands/jobs.py @@ -90,7 +90,7 @@ def register_subcommand(parser: _SubParsersAction) -> None: def __init__(self, args: Namespace) -> None: self.image: str = args.image - self.command: list[str] = args.command + self.command: List[str] = args.command self.env: dict[str, Optional[str]] = {} for env_value in args.env or []: self.env.update(dotenv_values(stream=io.StringIO(env_value))) @@ -351,7 +351,7 @@ def register_subcommand(parser: _SubParsersAction) -> None: def __init__(self, args: Namespace) -> None: self.token: Optional[str] = args.token or None - self.job_ids: list[str] = args.job_ids + self.job_ids: List[str] = args.job_ids def run(self) -> None: api = HfApi(token=self.token) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 0d41d8002f..6cec8fe4e3 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -10003,7 +10003,7 @@ def run_job( """ # prepare payload to send to HF Jobs API - input_json: Dict[str, Optional[Union[str, float, list[str], Dict[str, Optional[str]]]]] = { + input_json: Dict[str, Optional[Union[str, float, List[str], Dict[str, Optional[str]]]]] = { "command": command, "arguments": [], "environment": env or {}, From aefb493d8db9d5f41c8e917576eed194770c44a4 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 17 Jul 2025 00:00:09 +0200 Subject: [PATCH 15/40] more comments --- src/huggingface_hub/commands/jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py index 4e72bcc501..7851c70530 100644 --- a/src/huggingface_hub/commands/jobs.py +++ b/src/huggingface_hub/commands/jobs.py @@ -362,7 +362,7 @@ def run(self) -> None: class CancelCommand(BaseHuggingfaceCLICommand): @staticmethod def register_subcommand(parser: _SubParsersAction) -> None: - run_parser = parser.add_parser("jobs cancel", help="Cancel a Job") + run_parser = parser.add_parser("cancel", help="Cancel a Job") run_parser.add_argument("job_id", type=str, help="Job ID") run_parser.add_argument( "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" From 31a3d9742ab15beafff62f28f489d8ff1d754d8a Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 17 Jul 2025 16:58:40 +0200 Subject: [PATCH 16/40] uv run local_script.py --- src/huggingface_hub/hf_api.py | 171 ++++++++++++++-------------------- 1 file changed, 72 insertions(+), 99 deletions(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 6cec8fe4e3..c068d00a24 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -14,7 +14,6 @@ # limitations under the License. from __future__ import annotations -import hashlib import inspect import io import json @@ -10276,108 +10275,53 @@ def run_uv_job( >>> run_uv_job(script, dependencies=["trl"], flavor="a10g-small") ``` """ + env = env or {} + secrets = secrets or {} + + # Prepare docker image (always use Python 3.12) + image = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim" + + # Build command + uv_args = [] + if dependencies: + for dependency in dependencies: + uv_args += ["--with", dependency] + if python: + uv_args += ["--python", python] + script_args = script_args or [] if script.startswith("http://") or script.startswith("https://"): # Direct URL execution - no upload needed - script_url = script + command = ["uv", "run"] + uv_args + [script] + script_args else: # Local file - upload to HF script_path = Path(script) + filename = script_path.name - def _determine_repository(): - """Determine which repository to use for the script.""" - # Use provided repo - if _repo: - repo_id = _repo - if "/" not in repo_id: - username = self.whoami(token=token)["name"] - repo_id = f"{username}/{repo_id}" - return repo_id - - # Create ephemeral repo + # Parse repo + if _repo: + repo_id = _repo + if "/" not in repo_id: + username = self.whoami(token=token)["name"] + repo_id = f"{username}/{repo_id}" + repo_id = _repo + else: username = self.whoami(token=token)["name"] - timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") - - # Simple hash for uniqueness - script_hash = hashlib.md5(Path(script).read_bytes()).hexdigest()[:8] - - return f"{username}/huggingface-cli-jobs-uv-run-{timestamp}-{script_hash}" - - def _create_minimal_readme(repo_id, script_name, is_ephemeral): - """Create minimal README content.""" - timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") - - if is_ephemeral: - # Ephemeral repository README - return dedent( - f"""--- - tags: - - huggingface-cli-jobs-uv-script - - ephemeral - --- - - # UV Script: {script_name} - - Executed via `huggingface-cli jobs uv run` on {timestamp} - - ## Run this script - - ```bash - huggingface-cli jobs run ghcr.io/astral-sh/uv:python3.12-bookworm-slim \\ - uv run https://huggingface.co/datasets/{repo_id}/resolve/main/{script_name} - ``` - - --- - *Created with [huggingface-cli jobs](https://github.com/huggingface/huggingface-cli jobs)* - """ - ) - # Named repository README - repo_name = repo_id.split("/")[-1] - return dedent( - f"""--- - tags: - - huggingface-cli-jobs-uv-script - viewer: false - --- - - # {repo_name} - - UV scripts repository - - ## Scripts - - `{script_name}` - Added {timestamp} - - ## Run - - ```bash - huggingface-cli jobs uv run {script_name} --repo {repo_name} - ``` - - --- - *Created with [huggingface-cli jobs](https://github.com/huggingface/huggingface-cli jobs)* - """ - ) - - # Determine repository - repo_id = _determine_repository() - is_ephemeral = _repo is None + repo_id = f"{username}/huggingface-cli-jobs-uv-run-scripts" # Create repo if needed try: api.repo_info(repo_id, repo_type="dataset") - if not is_ephemeral: - logger.info(f"Using existing repository: {repo_id}") + logger.debug(f"Using existing repository: {repo_id}") except RepositoryNotFoundError: logger.info(f"Creating repository: {repo_id}") create_repo(repo_id, repo_type="dataset", private=True, exist_ok=True) # Upload script - logger.info(f"Uploading {script_path.name}...") + logger.info(f"Uploading {script_path.name} to {repo_id}...") with open(script_path, "r") as f: script_content = f.read() - filename = script_path.name - api.upload_file( path_or_fileobj=script_content.encode(), path_in_repo=filename, @@ -10388,10 +10332,33 @@ def _create_minimal_readme(repo_id, script_name, is_ephemeral): script_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{filename}" repo_url = f"https://huggingface.co/datasets/{repo_id}" - logger.info(f"✓ Script uploaded to: {repo_url}/blob/main/{filename}") + logger.debug(f"✓ Script uploaded to: {repo_url}/blob/main/{filename}") # Create and upload minimal README - readme_content = _create_minimal_readme(repo_id, filename, is_ephemeral) + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") + readme_content = dedent( + f""" + --- + tags: + - huggingface-cli-jobs-uv-script + - ephemeral + viewer: false + --- + + # UV Script: {filename} + + Executed via `huggingface-cli jobs uv run` on {timestamp} + + ## Run this script + + ```bash + huggingface-cli jobs uv run {filename} + ``` + + --- + *Created with [huggingface-cli jobs](https://github.com/huggingface/huggingface-cli jobs)* + """ + ) api.upload_file( path_or_fileobj=readme_content.encode(), path_in_repo="README.md", @@ -10399,21 +10366,27 @@ def _create_minimal_readme(repo_id, script_name, is_ephemeral): repo_type="dataset", ) - if is_ephemeral: - logger.info(f"✓ Temporary repository created: {repo_id}") - - # Prepare docker image (always use Python 3.12) - image = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim" + secrets["UV_SCRIPT_HF_TOKEN"] = token or self.token or get_token() + secrets["UV_SCRIPT_URL"] = script_url - # Build command - uv_args = [] - if dependencies: - for dependency in dependencies: - uv_args += ["--with", dependency] - if python: - uv_args += ["--python", python] - script_args = script_args or [] - command = ["uv", "run"] + uv_args + [script_url] + script_args + pre_command = ( + dedent( + """ + import urllib.request + import os + from pathlib import Path + o = urllib.request.build_opener() + o.addheaders = [("Authorization", "Bearer " + os.environ["UV_SCRIPT_HF_TOKEN"])] + Path("/tmp/script.py").write_bytes(o.open(os.environ["UV_SCRIPT_URL"]).read()) + """ + ) + .strip() + .replace('"', r"\"") + .split("\n") + ) + pre_command = ["python", "-c", '"' + "; ".join(pre_command) + '"'] + command = ["uv", "run"] + uv_args + ["/tmp/script.py"] + script_args + command = ["bash", "-c", " ".join(pre_command) + " && " + " ".join(command)] # Create RunCommand args return self.run_job( From f7c8be9b11d79d1689c6451f28ead321c4dc1dab Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 17 Jul 2025 19:05:13 +0200 Subject: [PATCH 17/40] lucain's comments --- docs/source/en/guides/cli.md | 5 --- setup.py | 1 - src/huggingface_hub/commands/jobs.py | 42 ++++++++----------- src/huggingface_hub/hf_api.py | 4 +- src/huggingface_hub/utils/_dotenv.py | 50 ++++++++++++++++++++++ tests/test_cli.py | 4 +- tests/test_utils_dotenv.py | 63 ++++++++++++++++++++++++++++ 7 files changed, 135 insertions(+), 34 deletions(-) create mode 100644 src/huggingface_hub/utils/_dotenv.py create mode 100644 tests/test_utils_dotenv.py diff --git a/docs/source/en/guides/cli.md b/docs/source/en/guides/cli.md index dd078313f4..fc9f8be8c5 100644 --- a/docs/source/en/guides/cli.md +++ b/docs/source/en/guides/cli.md @@ -635,11 +635,6 @@ Run compute jobs on Hugging Face infrastructure with a familiar Docker-like inte - 📊 **Live Monitoring**: Stream logs in real-time, just like running locally - 💰 **Pay-as-you-go**: Only pay for the seconds you use -### Prerequisites - -- A Hugging Face account (currently in testing for HF staff) -- Authenticate with the Hugging Gace Hub (e.g. `huggingface-cli login`) - ### Quick Start #### 1. Run your first job diff --git a/setup.py b/setup.py index b834fe7fcc..7de6594de1 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,6 @@ def get_version() -> str: "requests", "tqdm>=4.42.1", "typing-extensions>=3.7.4.3", # to be able to import TypeAlias - "dotenv", ] extras = {} diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py index 7851c70530..9f18cd301e 100644 --- a/src/huggingface_hub/commands/jobs.py +++ b/src/huggingface_hub/commands/jobs.py @@ -18,20 +18,19 @@ huggingface-cli jobs run image command """ -import io import json import os import re from argparse import Namespace, _SubParsersAction from dataclasses import asdict -from datetime import datetime -from typing import Any, Dict, List, Optional, Union +from pathlib import Path +from typing import Dict, List, Optional, Union import requests -from dotenv import dotenv_values from huggingface_hub import HfApi from huggingface_hub.utils import logging +from huggingface_hub.utils._dotenv import load_dotenv from . import BaseHuggingfaceCLICommand @@ -60,7 +59,7 @@ def register_subcommand(parser: _SubParsersAction) -> None: run_parser = parser.add_parser("run", help="Run a Job") run_parser.add_argument("image", type=str, help="The Docker image to use.") run_parser.add_argument("-e", "--env", action="append", help="Set environment variables.") - run_parser.add_argument("-s", "--secret", action="append", help="Set secret environment variables.") + run_parser.add_argument("-s", "--secrets", action="append", help="Set secret environment variables.") run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") run_parser.add_argument("--secret-env-file", type=str, help="Read in a file of secret environment variables.") run_parser.add_argument( @@ -92,15 +91,15 @@ def __init__(self, args: Namespace) -> None: self.image: str = args.image self.command: List[str] = args.command self.env: dict[str, Optional[str]] = {} - for env_value in args.env or []: - self.env.update(dotenv_values(stream=io.StringIO(env_value))) if args.env_file: - self.env.update(dotenv_values(args.env_file)) + self.env.update(load_dotenv(Path(args.env_file).read_text())) + for env_value in args.env or []: + self.env.update(load_dotenv(env_value)) self.secrets: dict[str, Optional[str]] = {} - for secret in args.secret or []: - self.secrets.update(dotenv_values(stream=io.StringIO(secret))) if args.secret_env_file: - self.secrets.update(dotenv_values(args.secret_env_file)) + self.secrets.update(load_dotenv(Path(args.secret_env_file).read_text())) + for secret in args.secrets or []: + self.secrets.update(load_dotenv(secret)) self.flavor: str = args.flavor self.timeout: Optional[str] = args.timeout self.detach: bool = args.detach @@ -334,11 +333,6 @@ def _print_output(self, rows, headers): ) -class JSONEncoder(json.JSONEncoder): - def default(self, o: Any) -> Any: - return str(o) if isinstance(o, datetime) else super().default(o) - - class InspectCommand(BaseHuggingfaceCLICommand): @staticmethod def register_subcommand(parser: _SubParsersAction) -> None: @@ -356,7 +350,7 @@ def __init__(self, args: Namespace) -> None: def run(self) -> None: api = HfApi(token=self.token) jobs = [api.inspect_job(job_id) for job_id in self.job_ids] - print(JSONEncoder(indent=4).encode([asdict(job) for job in jobs])) + print(json.dumps([asdict(job) for job in jobs], indent=4, default=str)) class CancelCommand(BaseHuggingfaceCLICommand): @@ -404,7 +398,7 @@ def register_subcommand(parser): ) run_parser.add_argument("--flavor", type=str, default="cpu-basic", help="Hardware flavor (default: cpu-basic)") run_parser.add_argument("-e", "--env", action="append", help="Environment variables") - run_parser.add_argument("-s", "--secret", action="append", help="Secret environment variables") + run_parser.add_argument("-s", "--secrets", action="append", help="Secret environment variables") run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") run_parser.add_argument( "--secret-env-file", @@ -428,15 +422,15 @@ def __init__(self, args: Namespace) -> None: self.dependencies = args.with_ self.python = args.python self.env: dict[str, Optional[str]] = {} - for env_value in args.env or []: - self.env.update(dotenv_values(stream=io.StringIO(env_value))) if args.env_file: - self.env.update(dotenv_values(args.env_file)) + self.env.update(load_dotenv(Path(args.env_file).read_text())) + for env_value in args.env or []: + self.env.update(load_dotenv(env_value)) self.secrets: dict[str, Optional[str]] = {} - for secret in args.secret or []: - self.secrets.update(dotenv_values(stream=io.StringIO(secret))) if args.secret_env_file: - self.secrets.update(dotenv_values(args.secret_env_file)) + self.secrets.update(load_dotenv(Path(args.secret_env_file).read_text())) + for secret in args.secrets or []: + self.secrets.update(load_dotenv(secret)) self.flavor: Optional[str] = args.flavor self.timeout: Optional[str] = args.timeout self.detach: bool = args.detach diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index c068d00a24..f3b406436e 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -10307,7 +10307,7 @@ def run_uv_job( repo_id = _repo else: username = self.whoami(token=token)["name"] - repo_id = f"{username}/huggingface-cli-jobs-uv-run-scripts" + repo_id = f"{username}/hf-cli-jobs-uv-run-scripts" # Create repo if needed try: @@ -10340,7 +10340,7 @@ def run_uv_job( f""" --- tags: - - huggingface-cli-jobs-uv-script + - hf-cli-jobs-uv-script - ephemeral viewer: false --- diff --git a/src/huggingface_hub/utils/_dotenv.py b/src/huggingface_hub/utils/_dotenv.py new file mode 100644 index 0000000000..a276881833 --- /dev/null +++ b/src/huggingface_hub/utils/_dotenv.py @@ -0,0 +1,50 @@ +import re +from typing import Dict + + +def load_dotenv(dotenv_str: str) -> Dict[str, str]: + """ + Parse a DOTENV-format string and return a dictionary of key-value pairs. + Handles quoted values, comments, export keyword, and blank lines. + """ + env: Dict[str, str] = {} + line_pattern = re.compile( + r""" + ^\s* + (?:export\s+)? # optional export + ([A-Za-z_][A-Za-z0-9_]*) # key + \s*=\s* + ( # value group + (?: + '(?:\\'|[^'])*' # single-quoted value + | "(?:\\"|[^"])*" # double-quoted value + | [^#\n\r]+? # unquoted value + ) + )? + \s*(?:\#.*)?$ # optional inline comment + """, + re.VERBOSE, + ) + + for line in dotenv_str.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue # Skip comments and empty lines + + match = line_pattern.match(line) + if not match: + continue # Skip malformed lines + + key, raw_val = match.group(1), match.group(2) or "" + val = raw_val.strip() + + # Remove surrounding quotes if quoted + if (val.startswith('"') and val.endswith('"')) or (val.startswith("'") and val.endswith("'")): + val = val[1:-1] + val = val.replace(r"\n", "\n").replace(r"\t", "\t").replace(r"\"", '"').replace(r"\\", "\\") + if raw_val.startswith('"'): + val = val.replace(r"\$", "$") # only in double quotes + + env[key] = val + + return env diff --git a/tests/test_cli.py b/tests/test_cli.py index 5ad29e320b..fa047737ce 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -860,8 +860,8 @@ def setUp(self) -> None: commands_parser = self.parser.add_subparsers() JobsCommands.register_subcommand(commands_parser) - @patch("requests.post", return_value=DummyResponse({"id": "my-job-id"})) - @patch("huggingface_hub.commands.jobs.run.whoami", return_value={"name": "my-username"}) + @patch("requests.Session.post", return_value=DummyResponse({"id": "my-job-id"})) + @patch("huggingface_hub.hf_api.HfApi.whoami", return_value={"name": "my-username"}) def test_run(self, whoami: Mock, requests_post: Mock) -> None: input_args = ["jobs", "run", "--detach", "ubuntu", "echo", "hello"] cmd = RunCommand(self.parser.parse_args(input_args)) diff --git a/tests/test_utils_dotenv.py b/tests/test_utils_dotenv.py new file mode 100644 index 0000000000..63c2df22d7 --- /dev/null +++ b/tests/test_utils_dotenv.py @@ -0,0 +1,63 @@ +from huggingface_hub.utils._dotenv import load_dotenv + + +def test_basic_key_value(): + data = "KEY=value" + assert load_dotenv(data) == {"KEY": "value"} + + +def test_whitespace_and_comments(): + data = """ + # This is a comment + KEY = value # inline comment + EMPTY= + """ + assert load_dotenv(data) == {"KEY": "value", "EMPTY": ""} + + +def test_quoted_values(): + data = """ + SINGLE='single quoted' + DOUBLE="double quoted" + ESCAPED="line\\nbreak" + """ + assert load_dotenv(data) == {"SINGLE": "single quoted", "DOUBLE": "double quoted", "ESCAPED": "line\nbreak"} + + +def test_export_and_inline_comment(): + data = "export KEY=value # this is a comment" + assert load_dotenv(data) == {"KEY": "value"} + + +def test_ignore_invalid_lines(): + data = """ + this is not valid + KEY=value + """ + assert load_dotenv(data) == {"KEY": "value"} + + +def test_complex_quotes(): + data = r""" + QUOTED="some value with # not comment" + ESCAPE="escaped \$dollar and \\backslash" + """ + assert load_dotenv(data) == { + "QUOTED": "some value with # not comment", + "ESCAPE": "escaped $dollar and \\backslash", + } + + +def test_no_value(): + data = "NOVALUE=" + assert load_dotenv(data) == {"NOVALUE": ""} + + +def test_multiple_lines(): + data = """ + A=1 + B="two" + C='three' + D=4 + """ + assert load_dotenv(data) == {"A": "1", "B": "two", "C": "three", "D": "4"} From 541aa6a07810c9751715c8f57bf9e121a69d4ae1 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Thu, 17 Jul 2025 19:06:30 +0200 Subject: [PATCH 18/40] more lucain's comments --- src/huggingface_hub/commands/jobs.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py index 9f18cd301e..b8a0012af6 100644 --- a/src/huggingface_hub/commands/jobs.py +++ b/src/huggingface_hub/commands/jobs.py @@ -61,7 +61,7 @@ def register_subcommand(parser: _SubParsersAction) -> None: run_parser.add_argument("-e", "--env", action="append", help="Set environment variables.") run_parser.add_argument("-s", "--secrets", action="append", help="Set secret environment variables.") run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") - run_parser.add_argument("--secret-env-file", type=str, help="Read in a file of secret environment variables.") + run_parser.add_argument("--secrets-file", type=str, help="Read in a file of secret environment variables.") run_parser.add_argument( "--flavor", type=str, @@ -96,8 +96,8 @@ def __init__(self, args: Namespace) -> None: for env_value in args.env or []: self.env.update(load_dotenv(env_value)) self.secrets: dict[str, Optional[str]] = {} - if args.secret_env_file: - self.secrets.update(load_dotenv(Path(args.secret_env_file).read_text())) + if args.secrets_file: + self.secrets.update(load_dotenv(Path(args.secrets_file).read_text())) for secret in args.secrets or []: self.secrets.update(load_dotenv(secret)) self.flavor: str = args.flavor @@ -401,7 +401,7 @@ def register_subcommand(parser): run_parser.add_argument("-s", "--secrets", action="append", help="Secret environment variables") run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") run_parser.add_argument( - "--secret-env-file", + "--secrets-file", type=str, help="Read in a file of secret environment variables.", ) @@ -427,8 +427,8 @@ def __init__(self, args: Namespace) -> None: for env_value in args.env or []: self.env.update(load_dotenv(env_value)) self.secrets: dict[str, Optional[str]] = {} - if args.secret_env_file: - self.secrets.update(load_dotenv(Path(args.secret_env_file).read_text())) + if args.secrets_file: + self.secrets.update(load_dotenv(Path(args.secrets_file).read_text())) for secret in args.secrets or []: self.secrets.update(load_dotenv(secret)) self.flavor: Optional[str] = args.flavor From 251e719dda4d341a5125c0148ba572f26f0ab183 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Mon, 21 Jul 2025 18:20:52 +0200 Subject: [PATCH 19/40] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: célina Co-authored-by: Lucain --- src/huggingface_hub/hf_api.py | 12 +++++------- src/huggingface_hub/utils/_dotenv.py | 1 + tests/test_utils_dotenv.py | 1 + 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index f3b406436e..d3bb97d3f2 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -10112,7 +10112,7 @@ def fetch_job_logs( except KeyboardInterrupt: break except requests.exceptions.ConnectionError as err: - is_timeout = err.__context__ and isinstance(err.__context__.__cause__, TimeoutError) + is_timeout = err.__context__ and isinstance(getattr(err.__context__, "__cause__", None), TimeoutError) if logging_started or not is_timeout: raise if logging_finished or job_finished: @@ -10297,21 +10297,19 @@ def run_uv_job( # Local file - upload to HF script_path = Path(script) filename = script_path.name - + username = self.whoami(token=token)["name"] # Parse repo if _repo: repo_id = _repo if "/" not in repo_id: - username = self.whoami(token=token)["name"] repo_id = f"{username}/{repo_id}" repo_id = _repo else: - username = self.whoami(token=token)["name"] repo_id = f"{username}/hf-cli-jobs-uv-run-scripts" # Create repo if needed try: - api.repo_info(repo_id, repo_type="dataset") + self.repo_info(repo_id, repo_type="dataset") logger.debug(f"Using existing repository: {repo_id}") except RepositoryNotFoundError: logger.info(f"Creating repository: {repo_id}") @@ -10322,7 +10320,7 @@ def run_uv_job( with open(script_path, "r") as f: script_content = f.read() - api.upload_file( + self.upload_file( path_or_fileobj=script_content.encode(), path_in_repo=filename, repo_id=repo_id, @@ -10359,7 +10357,7 @@ def run_uv_job( *Created with [huggingface-cli jobs](https://github.com/huggingface/huggingface-cli jobs)* """ ) - api.upload_file( + self.upload_file( path_or_fileobj=readme_content.encode(), path_in_repo="README.md", repo_id=repo_id, diff --git a/src/huggingface_hub/utils/_dotenv.py b/src/huggingface_hub/utils/_dotenv.py index a276881833..f5400e6190 100644 --- a/src/huggingface_hub/utils/_dotenv.py +++ b/src/huggingface_hub/utils/_dotenv.py @@ -1,3 +1,4 @@ +# AI-generated module (ChatGPT) import re from typing import Dict diff --git a/tests/test_utils_dotenv.py b/tests/test_utils_dotenv.py index 63c2df22d7..ae622262b4 100644 --- a/tests/test_utils_dotenv.py +++ b/tests/test_utils_dotenv.py @@ -1,3 +1,4 @@ +# AI-generated module (ChatGPT) from huggingface_hub.utils._dotenv import load_dotenv From 97a856ba5f1ac9fedca7496cb4bed743a3e867c1 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Mon, 21 Jul 2025 18:22:47 +0200 Subject: [PATCH 20/40] style --- src/huggingface_hub/utils/_dotenv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huggingface_hub/utils/_dotenv.py b/src/huggingface_hub/utils/_dotenv.py index f5400e6190..6e3c13d611 100644 --- a/src/huggingface_hub/utils/_dotenv.py +++ b/src/huggingface_hub/utils/_dotenv.py @@ -1,4 +1,4 @@ -# AI-generated module (ChatGPT) +# AI-generated module (ChatGPT) import re from typing import Dict From 1102968ab6c3848cf90843b4294c69ab52e9659c Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Mon, 21 Jul 2025 18:32:29 +0200 Subject: [PATCH 21/40] minor --- docs/source/en/guides/cli.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/en/guides/cli.md b/docs/source/en/guides/cli.md index fc9f8be8c5..e32ad0ec6b 100644 --- a/docs/source/en/guides/cli.md +++ b/docs/source/en/guides/cli.md @@ -709,8 +709,8 @@ You can pass environment variables to your job using ``` ```bash -# Pass secrets from a local .secrets.env file - they will be encrypted server side ->>> huggingface-cli jobs run --secret-env-file .secrets.env python:3.12 python -c "import os; print(os.environ['MY_SECRET'])" +# Pass secrets from a local .env.secrets file - they will be encrypted server side +>>> huggingface-cli jobs run --secrets-file .env.secrets python:3.12 python -c "import os; print(os.environ['MY_SECRET'])" ``` ### Hardware From 99b538af3c6129a24ed70770fe2254317306176b Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Tue, 22 Jul 2025 15:43:13 +0200 Subject: [PATCH 22/40] Remove JobUrl and add url in JobInfo directly --- src/huggingface_hub/_jobs_api.py | 63 +++++++--------------------- src/huggingface_hub/commands/jobs.py | 8 ++-- src/huggingface_hub/hf_api.py | 14 +++---- 3 files changed, 26 insertions(+), 59 deletions(-) diff --git a/src/huggingface_hub/_jobs_api.py b/src/huggingface_hub/_jobs_api.py index 8305d55c01..6af39d3bfa 100644 --- a/src/huggingface_hub/_jobs_api.py +++ b/src/huggingface_hub/_jobs_api.py @@ -20,7 +20,6 @@ from huggingface_hub import constants from huggingface_hub._space_api import SpaceHardware from huggingface_hub.utils._datetime import parse_datetime -from huggingface_hub.utils._http import fix_hf_endpoint_in_url class JobStage(str, Enum): @@ -43,52 +42,6 @@ class JobStage(str, Enum): RUNNING = "RUNNING" -class JobUrl(str): - """Subclass of `str` describing a job URL on the Hub. - - `JobUrl` is returned by `HfApi.create_job`. It inherits from `str` for backward - compatibility. At initialization, the URL is parsed to populate properties: - - endpoint (`str`) - - namespace (`Optional[str]`) - - job_id (`str`) - - url (`str`) - - Args: - url (`Any`): - String value of the job url. - endpoint (`str`, *optional*): - Endpoint of the Hub. Defaults to . - - Example: - ```py - >>> HfApi.run_job("ubuntu", ["echo", "hello"]) - JobUrl('https://huggingface.co/jobs/lhoestq/6877b757344d8f02f6001012', endpoint='https://huggingface.co', job_id='6877b757344d8f02f6001012') - ``` - - Raises: - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) - If URL cannot be parsed. - """ - - def __new__(cls, url: Any, endpoint: Optional[str] = None): - url = fix_hf_endpoint_in_url(url, endpoint=endpoint) - return super(JobUrl, cls).__new__(cls, url) - - def __init__(self, url: Any, endpoint: Optional[str] = None) -> None: - super().__init__() - # Parse URL - self.endpoint = endpoint or constants.ENDPOINT - namespace, job_id = url.split("/")[-2:] - - # Populate fields - self.namespace = namespace - self.job_id = job_id - self.url = str(self) # just in case it's needed - - def __repr__(self) -> str: - return f"JobUrl('{self}', endpoint='{self.endpoint}', job_id='{self.job_id}')" - - @dataclass class JobStatus: stage: JobStage @@ -99,6 +52,12 @@ def __init__(self, **kwargs) -> None: self.message = kwargs.get("message") +@dataclass +class JobOwner: + id: str + name: str + + @dataclass class JobInfo: id: str @@ -111,6 +70,11 @@ class JobInfo: secrets: Optional[Dict[str, Any]] flavor: Optional[SpaceHardware] status: Optional[JobStatus] + owner: Optional[JobOwner] + + # Inferred fields + endpoint: str + url: str def __init__(self, **kwargs) -> None: self.id = kwargs["id"] @@ -118,9 +82,14 @@ def __init__(self, **kwargs) -> None: self.created_at = parse_datetime(created_at) if created_at else None self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image") self.space_id = kwargs.get("spaceId") or kwargs.get("space_id") + self.owner = JobOwner(**(kwargs["owner"] if isinstance(kwargs.get("owner"), dict) else {})) self.command = kwargs.get("command") self.arguments = kwargs.get("arguments") self.environment = kwargs.get("environment") self.secrets = kwargs.get("secrets") self.flavor = kwargs.get("flavor") self.status = JobStatus(**(kwargs["status"] if isinstance(kwargs.get("status"), dict) else {})) + + # Inferred fields + self.endpoint = kwargs.get("endpoint", constants.ENDPOINT) + self.url = f"{self.endpoint}/jobs/{self.owner.id}/{self.id}" diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py index b8a0012af6..54b9becb8c 100644 --- a/src/huggingface_hub/commands/jobs.py +++ b/src/huggingface_hub/commands/jobs.py @@ -107,7 +107,7 @@ def __init__(self, args: Namespace) -> None: def run(self) -> None: api = HfApi(token=self.token) - job_url = api.run_job( + job = api.run_job( image=self.image, command=self.command, env=self.env, @@ -117,14 +117,14 @@ def run(self) -> None: token=self.token, ) # Always print the job ID to the user - print(f"Job started with ID: {job_url.job_id}") - print(f"View at: {job_url}") + print(f"Job started with ID: {job.id}") + print(f"View at: {job.url}") if self.detach: return # Now let's stream the logs - for log in api.fetch_job_logs(job_id=job_url.job_id): + for log in api.fetch_job_logs(job_id=job.id): print(log) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index d3bb97d3f2..47e603310a 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -67,7 +67,7 @@ _warn_on_overwriting_operations, ) from ._inference_endpoints import InferenceEndpoint, InferenceEndpointType -from ._jobs_api import JobInfo, JobUrl +from ._jobs_api import JobInfo from ._space_api import SpaceHardware, SpaceRuntime, SpaceStorage, SpaceVariable from ._upload_large_folder import upload_large_folder_internal from .community import ( @@ -9952,7 +9952,7 @@ def run_job( flavor: str = "cpu-basic", timeout: Optional[Union[int, float, str]] = None, token: Union[bool, str, None] = None, - ) -> JobUrl: + ) -> JobInfo: """ Run compute Jobs on Hugging Face infrastructure. @@ -10038,9 +10038,7 @@ def run_job( ) hf_raise_for_status(response) job_info = response.json() - job_id = job_info["id"] - job_url = f"{self.endpoint}/jobs/{username}/{job_id}" - return JobUrl(job_url, endpoint=self.endpoint) + return JobInfo(**job_info, endpoint=self.endpoint) def fetch_job_logs( self, @@ -10149,7 +10147,7 @@ def list_jobs( timeout=timeout, ) response.raise_for_status() - return [JobInfo(**job_info) for job_info in response.json()] + return [JobInfo(**job_info, endpoint=self.endpoint) for job_info in response.json()] def inspect_job( self, @@ -10194,7 +10192,7 @@ def inspect_job( headers=self._build_hf_headers(token=token), ) response.raise_for_status() - return JobInfo(**response.json()) + return JobInfo(**response.json(), endpoint=self.endpoint) def cancel_job( self, @@ -10232,7 +10230,7 @@ def run_uv_job( timeout: Optional[Union[int, float, str]] = None, token: Union[bool, str, None] = None, _repo: Optional[str] = None, - ) -> JobUrl: + ) -> JobInfo: """ Run a UV script Job on Hugging Face infrastructure. From 53fb0aab5d2a0066023e212858a08646a8ef8547 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Tue, 22 Jul 2025 16:21:59 +0200 Subject: [PATCH 23/40] Apply suggestions from code review Co-authored-by: Lucain --- docs/source/en/guides/cli.md | 4 ++-- src/huggingface_hub/_jobs_api.py | 2 +- src/huggingface_hub/commands/jobs.py | 1 - src/huggingface_hub/hf_api.py | 11 ++++++++--- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/source/en/guides/cli.md b/docs/source/en/guides/cli.md index e32ad0ec6b..066b76a7ea 100644 --- a/docs/source/en/guides/cli.md +++ b/docs/source/en/guides/cli.md @@ -678,7 +678,7 @@ Running this will show the following output! This code ran with the following GPU: NVIDIA A10G ``` -That's it! You're now running code on Hugging Face's infrastructure. For more detailed information checkout the [Quickstart Guide](docs/quickstart.md). +That's it! You're now running code on Hugging Face's infrastructure. ### Common Use Cases @@ -721,7 +721,7 @@ Available `--flavor` options: - GPU: `t4-small`, `t4-medium`, `l4x1`, `l4x4`, `a10g-small`, `a10g-large`, `a10g-largex2`, `a10g-largex4`,`a100-large` - TPU: `v5e-1x1`, `v5e-2x2`, `v5e-2x4` -(updated in 03/25 from Hugging Face [suggested_hardware docs](https://huggingface.co/docs/hub/en/spaces-config-reference)) +(updated in 07/2025 from Hugging Face [suggested_hardware docs](https://huggingface.co/docs/hub/en/spaces-config-reference)) ### UV Scripts (Experimental) diff --git a/src/huggingface_hub/_jobs_api.py b/src/huggingface_hub/_jobs_api.py index 6af39d3bfa..3b96531d91 100644 --- a/src/huggingface_hub/_jobs_api.py +++ b/src/huggingface_hub/_jobs_api.py @@ -1,5 +1,5 @@ # coding=utf-8 -# Copyright 2019-present, the HuggingFace Inc. team. +# Copyright 2025-present, the HuggingFace Inc. team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py index 54b9becb8c..e0cc026c7c 100644 --- a/src/huggingface_hub/commands/jobs.py +++ b/src/huggingface_hub/commands/jobs.py @@ -114,7 +114,6 @@ def run(self) -> None: secrets=self.secrets, flavor=self.flavor, timeout=self.timeout, - token=self.token, ) # Always print the job ID to the user print(f"Job started with ID: {job.id}") diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 47e603310a..864a8073f5 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -9945,6 +9945,7 @@ def auth_check( def run_job( self, + *, image: str, command: List[str], env: Optional[Dict[str, Any]] = None, @@ -9972,7 +9973,7 @@ def run_job( Defines the secret environment variables for the Job. flavor (`str`, defaults to `"cpu-basic"`): - "Flavor for the hardware, as in Hugging Face Spaces. + Flavor for the hardware, as in Hugging Face Spaces. timeout (`Union[int, float, str]`, *optional*): Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). @@ -9993,7 +9994,7 @@ def run_job( Run a GPU Job: - ``` + ```python >>> from huggingface_hub import run_job >>> image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel" >>> command = ["python", "-c", "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")"] @@ -10002,7 +10003,7 @@ def run_job( """ # prepare payload to send to HF Jobs API - input_json: Dict[str, Optional[Union[str, float, List[str], Dict[str, Optional[str]]]]] = { + input_json: Dict[str, Any] = { "command": command, "arguments": [], "environment": env or {}, @@ -10042,6 +10043,7 @@ def run_job( def fetch_job_logs( self, + *, job_id: str, token: Union[bool, str, None] = None, ) -> Iterable[str]: @@ -10128,6 +10130,7 @@ def fetch_job_logs( def list_jobs( self, + *, timeout: Optional[int] = None, token: Union[bool, str, None] = None, ) -> List[JobInfo]: @@ -10151,6 +10154,7 @@ def list_jobs( def inspect_job( self, + *, job_id: str, token: Union[bool, str, None] = None, ) -> JobInfo: @@ -10196,6 +10200,7 @@ def inspect_job( def cancel_job( self, + *, job_id: str, token: Union[bool, str, None] = None, ) -> None: From 4e3523dbca437ad5c4b9f57ddd99d46526dc7f75 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 22 Jul 2025 18:15:12 +0200 Subject: [PATCH 24/40] add namespace arg --- docs/source/en/guides/cli.md | 3 ++ src/huggingface_hub/commands/jobs.py | 60 ++++++++++++++++++++----- src/huggingface_hub/hf_api.py | 65 ++++++++++++++++++++++------ 3 files changed, 103 insertions(+), 25 deletions(-) diff --git a/docs/source/en/guides/cli.md b/docs/source/en/guides/cli.md index 066b76a7ea..a7d2844dc4 100644 --- a/docs/source/en/guides/cli.md +++ b/docs/source/en/guides/cli.md @@ -619,6 +619,9 @@ Run compute jobs on Hugging Face infrastructure with a familiar Docker-like inte >>> huggingface-cli jobs run --flavor a10g-small pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel \ ... python -c "import torch; print(torch.cuda.get_device_name())" +# Run in an organization account +>>> huggingface-cli jobs run --namespace my-org-name python:3.12 python -c "print('Running in an org account')" + # Run from Hugging Face Spaces >>> huggingface-cli jobs run hf.co/spaces/lhoestq/duckdb duckdb -c "select 'hello world'" diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py index e0cc026c7c..92b2e51c21 100644 --- a/src/huggingface_hub/commands/jobs.py +++ b/src/huggingface_hub/commands/jobs.py @@ -79,6 +79,11 @@ def register_subcommand(parser: _SubParsersAction) -> None: action="store_true", help="Run the Job in the background and print the Job ID.", ) + run_parser.add_argument( + "--namespace", + type=str, + help="The namespace where the Job will be created. Defaults to the current user's namespace.", + ) run_parser.add_argument( "--token", type=str, @@ -103,6 +108,7 @@ def __init__(self, args: Namespace) -> None: self.flavor: str = args.flavor self.timeout: Optional[str] = args.timeout self.detach: bool = args.detach + self.namespace: Optional[str] = args.namespace self.token: Optional[str] = args.token def run(self) -> None: @@ -114,6 +120,7 @@ def run(self) -> None: secrets=self.secrets, flavor=self.flavor, timeout=self.timeout, + namespace=self.namespace, ) # Always print the job ID to the user print(f"Job started with ID: {job.id}") @@ -132,6 +139,11 @@ class LogsCommand(BaseHuggingfaceCLICommand): def register_subcommand(parser: _SubParsersAction) -> None: run_parser = parser.add_parser("logs", help="Fetch the logs of a Job") run_parser.add_argument("job_id", type=str, help="Job ID") + run_parser.add_argument( + "--namespace", + type=str, + help="The namespace where the job is running. Defaults to the current user's namespace.", + ) run_parser.add_argument( "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" ) @@ -139,11 +151,12 @@ def register_subcommand(parser: _SubParsersAction) -> None: def __init__(self, args: Namespace) -> None: self.job_id: str = args.job_id + self.namespace: Optional[str] = args.namespace self.token: Optional[str] = args.token def run(self) -> None: api = HfApi(token=self.token) - for log in api.fetch_job_logs(job_id=self.job_id): + for log in api.fetch_job_logs(job_id=self.job_id, namespace=self.namespace): print(log) @@ -184,6 +197,11 @@ def register_subcommand(parser: _SubParsersAction) -> None: action="store_true", help="Show all Jobs (default shows just running)", ) + run_parser.add_argument( + "--namespace", + type=str, + help="The namespace from where it lists the jobs. Defaults to the current user's namespace.", + ) run_parser.add_argument( "--token", type=str, @@ -207,7 +225,8 @@ def register_subcommand(parser: _SubParsersAction) -> None: def __init__(self, args: Namespace) -> None: self.all: bool = args.all - self.token: Optional[str] = args.token or None + self.namespace: Optional[str] = args.namespace + self.token: Optional[str] = args.token self.format: Optional[str] = args.format self.filters: Dict[str, str] = {} @@ -228,7 +247,7 @@ def run(self) -> None: api = HfApi(token=self.token) # Fetch jobs data - jobs = api.list_jobs() + jobs = api.list_jobs(namespace=self.namespace) # Define table headers table_headers = ["JOB ID", "IMAGE/SPACE", "COMMAND", "CREATED", "STATUS"] @@ -336,6 +355,11 @@ class InspectCommand(BaseHuggingfaceCLICommand): @staticmethod def register_subcommand(parser: _SubParsersAction) -> None: run_parser = parser.add_parser("inspect", help="Display detailed information on one or more Jobs") + run_parser.add_argument( + "--namespace", + type=str, + help="The namespace where the job is running. Defaults to the current user's namespace.", + ) run_parser.add_argument( "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" ) @@ -343,12 +367,13 @@ def register_subcommand(parser: _SubParsersAction) -> None: run_parser.set_defaults(func=InspectCommand) def __init__(self, args: Namespace) -> None: - self.token: Optional[str] = args.token or None + self.namespace: Optional[str] = args.namespace + self.token: Optional[str] = args.token self.job_ids: List[str] = args.job_ids def run(self) -> None: api = HfApi(token=self.token) - jobs = [api.inspect_job(job_id) for job_id in self.job_ids] + jobs = [api.inspect_job(job_id=job_id, namespace=self.namespace) for job_id in self.job_ids] print(json.dumps([asdict(job) for job in jobs], indent=4, default=str)) @@ -357,6 +382,11 @@ class CancelCommand(BaseHuggingfaceCLICommand): def register_subcommand(parser: _SubParsersAction) -> None: run_parser = parser.add_parser("cancel", help="Cancel a Job") run_parser.add_argument("job_id", type=str, help="Job ID") + run_parser.add_argument( + "--namespace", + type=str, + help="The namespace where the job is running. Defaults to the current user's namespace.", + ) run_parser.add_argument( "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens" ) @@ -364,11 +394,12 @@ def register_subcommand(parser: _SubParsersAction) -> None: def __init__(self, args: Namespace) -> None: self.job_id: str = args.job_id - self.token: Optional[str] = args.token or None + self.namespace = args.namespace + self.token: Optional[str] = args.token def run(self) -> None: api = HfApi(token=self.token) - api.cancel_job(self.job_id) + api.cancel_job(job_id=self.job_id, namespace=self.namespace) class UvCommand(BaseHuggingfaceCLICommand): @@ -406,6 +437,11 @@ def register_subcommand(parser): ) run_parser.add_argument("--timeout", type=str, help="Max duration (e.g., 30s, 5m, 1h)") run_parser.add_argument("-d", "--detach", action="store_true", help="Run in background") + run_parser.add_argument( + "--namespace", + type=str, + help="The namespace where the Job will be created. Defaults to the current user's namespace.", + ) run_parser.add_argument("--token", type=str, help="HF token") # UV options run_parser.add_argument("--with", action="append", help="Run with the given packages installed", dest="with_") @@ -433,6 +469,7 @@ def __init__(self, args: Namespace) -> None: self.flavor: Optional[str] = args.flavor self.timeout: Optional[str] = args.timeout self.detach: bool = args.detach + self.namespace: Optional[str] = args.namespace self.token: Optional[str] = args.token self._repo = args.repo @@ -440,7 +477,7 @@ def run(self) -> None: """Execute UV command.""" logging.set_verbosity(logging.INFO) api = HfApi(token=self.token) - job_url = api.run_uv_job( + job = api.run_uv_job( script=self.script, script_args=self.script_args, dependencies=self.dependencies, @@ -449,16 +486,17 @@ def run(self) -> None: secrets=self.secrets, flavor=self.flavor, timeout=self.timeout, + namespace=self.namespace, _repo=self._repo, ) # Always print the job ID to the user - print(f"Job started with ID: {job_url.job_id}") - print(f"View at: {job_url}") + print(f"Job started with ID: {job.id}") + print(f"View at: {job.url}") if self.detach: return # Now let's stream the logs - for log in api.fetch_job_logs(job_id=job_url.job_id): + for log in api.fetch_job_logs(job_id=job.id): print(log) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 864a8073f5..4e4e161893 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -9952,6 +9952,7 @@ def run_job( secrets: Optional[Dict[str, Any]] = None, flavor: str = "cpu-basic", timeout: Optional[Union[int, float, str]] = None, + namespace: Optional[str] = None, token: Union[bool, str, None] = None, ) -> JobInfo: """ @@ -9979,6 +9980,9 @@ def run_job( Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). Example: `300` or `"5m"` for 5 minutes. + namespace (`str`, *optional*): + The namespace where the Job will be created. Defaults to the current user's namespace. + token `(Union[bool, str, None]`, *optional*): A valid user access token. If not provided, the locally saved token will be used, which is the recommended authentication method. Set to `False` to disable authentication. @@ -10031,9 +10035,10 @@ def run_job( break else: input_json["dockerImage"] = image - username = self.whoami(token=token)["name"] + if namespace is None: + namespace = self.whoami(token=token)["name"] response = get_session().post( - f"https://huggingface.co/api/jobs/{username}", + f"https://huggingface.co/api/jobs/{namespace}", json=input_json, headers=self._build_hf_headers(token=token), ) @@ -10045,6 +10050,7 @@ def fetch_job_logs( self, *, job_id: str, + namespace: Optional[str] = None, token: Union[bool, str, None] = None, ) -> Iterable[str]: """ @@ -10054,6 +10060,9 @@ def fetch_job_logs( job_id (`str`): ID of the Job. + namespace (`str`, *optional*): + The namespace where the Job is running. Defaults to the current user's namespace. + token `(Union[bool, str, None]`, *optional*): A valid user access token. If not provided, the locally saved token will be used, which is the recommended authentication method. Set to `False` to disable authentication. @@ -10069,7 +10078,8 @@ def fetch_job_logs( Hello from HF compute! ``` """ - username = self.whoami(token=token)["name"] + if namespace is None: + namespace = self.whoami(token=token)["name"] logging_finished = logging_started = False job_finished = False # - We need to retry because sometimes the /logs doesn't return logs when the job just started. @@ -10090,7 +10100,7 @@ def fetch_job_logs( sleep_time = min(max_wait_time, max(min_wait_time, sleep_time * 2)) try: resp = get_session().get( - f"https://huggingface.co/api/jobs/{username}/{job_id}/logs", + f"https://huggingface.co/api/jobs/{namespace}/{job_id}/logs", headers=self._build_hf_headers(token=token), stream=True, timeout=120, @@ -10120,7 +10130,7 @@ def fetch_job_logs( job_status = ( get_session() .get( - f"https://huggingface.co/api/jobs/{username}/{job_id}", + f"https://huggingface.co/api/jobs/{namespace}/{job_id}", headers=self._build_hf_headers(token=token), ) .json() @@ -10132,20 +10142,28 @@ def list_jobs( self, *, timeout: Optional[int] = None, + namespace: Optional[str] = None, token: Union[bool, str, None] = None, ) -> List[JobInfo]: """ List compute Jobs on Hugging Face infrastructure. Args: + timeout (`float`, *optional*): + Whether to set a timeout for the request to the Hub. + + namespace (`str`, *optional*): + The namespace from where it lists the jobs. Defaults to the current user's namespace. + token `(Union[bool, str, None]`, *optional*): A valid user access token. If not provided, the locally saved token will be used, which is the recommended authentication method. Set to `False` to disable authentication. Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. """ - username = whoami(token=token)["name"] + if namespace is None: + namespace = whoami(token=token)["name"] response = get_session().get( - f"{self.endpoint}/api/jobs/{username}", + f"{self.endpoint}/api/jobs/{namespace}", headers=self._build_hf_headers(token=token), timeout=timeout, ) @@ -10156,6 +10174,7 @@ def inspect_job( self, *, job_id: str, + namespace: Optional[str] = None, token: Union[bool, str, None] = None, ) -> JobInfo: """ @@ -10165,6 +10184,9 @@ def inspect_job( job_id (`str`): ID of the Job. + namespace (`str`, *optional*): + The namespace where the Job is running. Defaults to the current user's namespace. + token `(Union[bool, str, None]`, *optional*): A valid user access token. If not provided, the locally saved token will be used, which is the recommended authentication method. Set to `False` to disable authentication. @@ -10190,9 +10212,10 @@ def inspect_job( ) ``` """ - username = self.whoami(token=token)["name"] + if namespace is None: + namespace = self.whoami(token=token)["name"] response = get_session().get( - f"{self.endpoint}/api/jobs/{username}/{job_id}", + f"{self.endpoint}/api/jobs/{namespace}/{job_id}", headers=self._build_hf_headers(token=token), ) response.raise_for_status() @@ -10202,6 +10225,7 @@ def cancel_job( self, *, job_id: str, + namespace: Optional[str] = None, token: Union[bool, str, None] = None, ) -> None: """ @@ -10211,14 +10235,18 @@ def cancel_job( job_id (`str`): ID of the Job. + namespace (`str`, *optional*): + The namespace where the Job is running. Defaults to the current user's namespace. + token `(Union[bool, str, None]`, *optional*): A valid user access token. If not provided, the locally saved token will be used, which is the recommended authentication method. Set to `False` to disable authentication. Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. """ - username = self.whoami(token=token)["name"] + if namespace is None: + namespace = self.whoami(token=token)["name"] get_session().post( - f"{self.endpoint}/api/jobs/{username}/{job_id}/cancel", + f"{self.endpoint}/api/jobs/{namespace}/{job_id}/cancel", headers=self._build_hf_headers(token=token), ).raise_for_status() @@ -10226,6 +10254,7 @@ def cancel_job( def run_uv_job( self, script: str, + *, script_args: Optional[List[str]] = None, dependencies: Optional[List[str]] = None, python: Optional[str] = None, @@ -10233,6 +10262,7 @@ def run_uv_job( secrets: Optional[Dict[str, Any]] = None, flavor: str = "cpu-basic", timeout: Optional[Union[int, float, str]] = None, + namespace: Optional[str] = None, token: Union[bool, str, None] = None, _repo: Optional[str] = None, ) -> JobInfo: @@ -10265,6 +10295,9 @@ def run_uv_job( Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). Example: `300` or `"5m"` for 5 minutes. + namespace (`str`, *optional*): + The namespace where the Job will be created. Defaults to the current user's namespace. + token `(Union[bool, str, None]`, *optional*): A valid user access token. If not provided, the locally saved token will be used, which is the recommended authentication method. Set to `False` to disable authentication. @@ -10293,6 +10326,9 @@ def run_uv_job( uv_args += ["--python", python] script_args = script_args or [] + if namespace is None: + namespace = self.whoami(token=token)["name"] + if script.startswith("http://") or script.startswith("https://"): # Direct URL execution - no upload needed command = ["uv", "run"] + uv_args + [script] + script_args @@ -10300,15 +10336,14 @@ def run_uv_job( # Local file - upload to HF script_path = Path(script) filename = script_path.name - username = self.whoami(token=token)["name"] # Parse repo if _repo: repo_id = _repo if "/" not in repo_id: - repo_id = f"{username}/{repo_id}" + repo_id = f"{namespace}/{repo_id}" repo_id = _repo else: - repo_id = f"{username}/hf-cli-jobs-uv-run-scripts" + repo_id = f"{namespace}/hf-cli-jobs-uv-run-scripts" # Create repo if needed try: @@ -10397,6 +10432,7 @@ def run_uv_job( secrets=secrets, flavor=flavor, timeout=timeout, + namespace=namespace, token=token, ) @@ -10563,3 +10599,4 @@ def _parse_revision_from_pr_url(pr_url: str) -> str: list_jobs = api.list_jobs inspect_job = api.inspect_job cancel_job = api.cancel_job +run_uv_job = api.run_uv_job From 5db3b42e3d98a14533720610e6c0a60627ee030f Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 22 Jul 2025 18:15:24 +0200 Subject: [PATCH 25/40] fix wrong job url --- src/huggingface_hub/_jobs_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huggingface_hub/_jobs_api.py b/src/huggingface_hub/_jobs_api.py index 3b96531d91..2b8dfd36cf 100644 --- a/src/huggingface_hub/_jobs_api.py +++ b/src/huggingface_hub/_jobs_api.py @@ -92,4 +92,4 @@ def __init__(self, **kwargs) -> None: # Inferred fields self.endpoint = kwargs.get("endpoint", constants.ENDPOINT) - self.url = f"{self.endpoint}/jobs/{self.owner.id}/{self.id}" + self.url = f"{self.endpoint}/jobs/{self.owner.name}/{self.id}" From 76588ef1aeca4750a11fe8d80554d6f0a1120968 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 22 Jul 2025 18:15:33 +0200 Subject: [PATCH 26/40] add missing methods at top level --- src/huggingface_hub/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index deb63ffeef..7d4a9fff11 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -256,6 +256,7 @@ "revision_exists", "run_as_future", "run_job", + "run_uv_job", "scale_to_zero_inference_endpoint", "set_space_sleep_time", "space_info", @@ -917,6 +918,7 @@ "revision_exists", "run_as_future", "run_job", + "run_uv_job", "save_pretrained_keras", "save_torch_model", "save_torch_state_dict", @@ -1244,6 +1246,7 @@ def __dir__(): revision_exists, # noqa: F401 run_as_future, # noqa: F401 run_job, # noqa: F401 + run_uv_job, # noqa: F401 scale_to_zero_inference_endpoint, # noqa: F401 set_space_sleep_time, # noqa: F401 space_info, # noqa: F401 From 63dd90f6a5d98fb8e928688d5e754aaeda7f2759 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 22 Jul 2025 18:16:28 +0200 Subject: [PATCH 27/40] add docs --- docs/source/en/_toctree.yml | 2 + docs/source/en/guides/job.md | 217 +++++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 docs/source/en/guides/job.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 17397ed2fa..c3841b0e97 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -40,6 +40,8 @@ title: Integrate a library - local: guides/webhooks title: Webhooks + - local: guides/jobs + title: Jobs - title: 'Conceptual guides' sections: - local: concepts/git_vs_http diff --git a/docs/source/en/guides/job.md b/docs/source/en/guides/job.md new file mode 100644 index 0000000000..149264ec55 --- /dev/null +++ b/docs/source/en/guides/job.md @@ -0,0 +1,217 @@ + +# Run and manage a Job + +The Hugging Face Hub provides compute for AI and data workflows via Jobs. +A job runs on Hugging Face infrastructure and are defined with a command to run (e.g. a python command), a Docker Image from Hugging Face Spaces or Docker Hub, and a hardware flavor (CPU, GPU, TPU). This guide will show you how to interact with Jobs on the Hub, especially: + +- Run a job. +- Check job status. +- Select the hardware. +- Configure environment variables and secrets. +- Run UV scripts. + +If you want to run and manage a job on the Hub, your machine must be logged in. If you are not, please refer to +[this section](../quick-start#authentication). In the rest of this guide, we will assume that your machine is logged in. + +## Run a Job + +Run compute Jobs defined with a command and a Docker Image on Hugging Face infrastructure (including GPUs and TPUs). + +You can only manage Jobs that you own (under your username namespace) or from organizations in which you have write permissions. +This feature is pay-as-you-go: you only pay for the seconds you use. + +[`run_job`] lets you run any command on Hugging Face's infrastructure: + +```python +# Directly run Python code +>>> from huggingface_hub import run_job +>>> run_job( +... image="python:3.12", +... command=["python", "-c", "print('Hello from the cloud!')"], +... ) + +# Use GPUs without any setup +>>> run_job( +... image="pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel", +... command=["python", "-c", "import torch; print(torch.cuda.get_device_name())"], +... flavor="a10g-small", +... ) + +# Run in an organization account +>>> run_job( +... image="python:3.12", +... command=["python", "-c", "print('Running in an org account')"], +... namespace="my-org-name", +... ) + +# Run from Hugging Face Spaces +>>> run_job( +... image="hf.co/spaces/lhoestq/duckdb", +... command=["duckdb", "-c", "select 'hello world'"], +... ) + +# Run a Python script with `uv` (experimental) +>>> from huggingface_hub import run_uv_job +>>> run_uv_job("my_script.py") +``` + + + +Use [huggingface-cli jobs](./cli#huggingface-cli-jobs) to run jobs in the command line. + + + +[`run_job`] returns the `JobInfo` which has the URL of the Job on Hugging Face, where you can see the Job status and the logs. +Save the Job ID from `JobInfo` to manage the job: + +```python +>>> from huggingface_hub import run_job +>>> job = run_job( +... image="python:3.12", +... command=["python", "-c", "print('Hello from the cloud!')"] +... ) +>>> job.url +https://huggingface.co/jobs/lhoestq/687f911eaea852de79c4a50a +>>> job.id +687f911eaea852de79c4a50a +``` + +Jobs run in the background. The next section guides you through [`inspect_job`] to know a jobs' status and [`fetch_job_logs`] to the view the logs. + +## Check Job status + +```python +# List your jobs +>>> from huggingface_hub import list_jobs +>>> jobs = list_jobs() +>>> jobs[0] +JobInfo(id='687f911eaea852de79c4a50a', created_at=datetime.datetime(2025, 7, 22, 13, 24, 46, 909000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='COMPLETED', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687f911eaea852de79c4a50a') + +# List your running jobs +>>> running_jobs = [job for job in jobs if job.status.stage == "RUNNING"] + +# Inspect the status of a job +>>> from huggingface_hub import inspect_job +>>> inspect_job(job_id=job_id) +JobInfo(id='687f911eaea852de79c4a50a', created_at=datetime.datetime(2025, 7, 22, 13, 24, 46, 909000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='COMPLETED', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687f911eaea852de79c4a50a') + +# View logs from a job +>>> from huggingface_hub import fetch_job_logs +>>> for log in fetch_job_logs(job_id=job_id): +... print(log) +Hello from the cloud! + +# Cancel a job +>>> from huggingface_hub import cancel_job +>>> cancel_job(job_id=job_id) +``` + +Check the status of multiple jobs to know when they're all finished using a loop and [`inspect_job`]: + +```python +# Run multiple jobs in parallel and wait for their completions +>>> import time +>>> from huggingface_hub import inspect_job, run_job +>>> jobs = [run_job(image=image, command=command) for command in commands] +>>> for job in jobs: +... while inspect_job(job_id=job.id).status.stage not in ("COMPLETED", "ERROR"): +... time.sleep(10) +``` + +## Select the hardware + +There are numerous cases where running Jobs on GPUs are useful: + +- **Model Training**: Fine-tune or train models on GPUs (T4, A10G, A100) without managing infrastructure +- **Synthetic Data Generation**: Generate large-scale datasets using LLMs on powerful hardware +- **Data Processing**: Process massive datasets with high-CPU configurations for parallel workloads +- **Batch Inference**: Run offline inference on thousands of samples using optimized GPU setups +- **Experiments & Benchmarks**: Run ML experiments on consistent hardware for reproducible results +- **Development & Debugging**: Test GPU code without local CUDA setup + +Run jobs on GPUs or TPUs with the `flavor` argument. For example, to run a PyTorch job on an A10G GPU: + +```python +# Use an A10G GPU to check PyTorch CUDA +>>> from huggingface_hub import run_job +>>> run_job( +... image="pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel", +... command=["python", "-c", "import torch; print(f'This code ran with the following GPU: {torch.cuda.get_device_name()}')"], +... flavor="a10g-small" +... ) +``` + +Running this will show the following output! + +```bash +This code ran with the following GPU: NVIDIA A10G +``` + +Use this to run a fine tuning script like [trl/scripts/sft.py](https://github.com/huggingface/trl/blob/main/trl/scripts/sft.py) with UV: + +```python +>>> from huggingface_hub import run_uv_job +>>> run_uv_job( +... "sft.py", +... script_args=["--model_name_or_path", "Qwen/Qwen2-0.5B", ...], +... dependencies=["trl"], +... env={"HF_TOKEN": ...}, +... flavor="a10g-small", +... ) +``` + +Available `flavor` options: + +- CPU: `cpu-basic`, `cpu-upgrade` +- GPU: `t4-small`, `t4-medium`, `l4x1`, `l4x4`, `a10g-small`, `a10g-large`, `a10g-largex2`, `a10g-largex4`,`a100-large` +- TPU: `v5e-1x1`, `v5e-2x2`, `v5e-2x4` + +(updated in 07/2025 from Hugging Face [suggested_hardware docs](https://huggingface.co/docs/hub/en/spaces-config-reference)) + +That's it! You're now running code on Hugging Face's infrastructure. For more detailed information checkout the [Quickstart Guide](docs/quickstart.md). + +## Pass Environment variables and Secrets + +You can pass environment variables to your job using `env` and `secrets`: + +```python +# Pass environment variables +>>> from huggingface_hub import run_job +>>> run_job( +... image="python:3.12", +... command=["python", "-c", "import os; print(os.environ['FOO'], os.environ['BAR'])"], +... env={"FOO": "foo", "BAR": "bar"}, +... ) +``` + + +```python +# Pass secrets - they will be encrypted server side +>>> from huggingface_hub import run_job +>>> run_job( +... image="python:3.12", +... command=["python", "-c", "import os; print(os.environ['MY_SECRET'])"], +... secrets={"MY_SECRET": "psswrd"}, +... ) +``` + + +### UV Scripts (Experimental) + +Run UV scripts (Python scripts with inline dependencies) on HF infrastructure: + +```python +# Run a UV script (creates temporary repo) +>>> from huggingface_hub import run_uv_job +>>> run_uv_job("my_script.py") + +# Run with GPU +>>> run_uv_job("ml_training.py", flavor="gpu-t4-small") + +# Run a script directly from a URL +>>> run_uv_job("https://huggingface.co/datasets/username/scripts/resolve/main/example.py") +``` + +UV scripts are Python scripts that include their dependencies directly in the file using a special comment syntax. This makes them perfect for self-contained tasks that don't require complex project setups. Learn more about UV scripts in the [UV documentation](https://docs.astral.sh/uv/guides/scripts/). From bfd326a30b2e61fadfe89e088b49d5db3fdb3be2 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 22 Jul 2025 18:20:39 +0200 Subject: [PATCH 28/40] uv script url as env, not secret --- src/huggingface_hub/hf_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 4e4e161893..b96573215d 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -10403,7 +10403,7 @@ def run_uv_job( ) secrets["UV_SCRIPT_HF_TOKEN"] = token or self.token or get_token() - secrets["UV_SCRIPT_URL"] = script_url + env["UV_SCRIPT_URL"] = script_url pre_command = ( dedent( From c9ab2f11145170ce938207b2991069d40424bfcc Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 22 Jul 2025 18:21:27 +0200 Subject: [PATCH 29/40] rename docs --- docs/source/en/guides/{job.md => jobs.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename docs/source/en/guides/{job.md => jobs.md} (99%) diff --git a/docs/source/en/guides/job.md b/docs/source/en/guides/jobs.md similarity index 99% rename from docs/source/en/guides/job.md rename to docs/source/en/guides/jobs.md index 149264ec55..db2744e1a9 100644 --- a/docs/source/en/guides/job.md +++ b/docs/source/en/guides/jobs.md @@ -1,7 +1,7 @@ -# Run and manage a Job +# Run and manage Jobs The Hugging Face Hub provides compute for AI and data workflows via Jobs. A job runs on Hugging Face infrastructure and are defined with a command to run (e.g. a python command), a Docker Image from Hugging Face Spaces or Docker Hub, and a hardware flavor (CPU, GPU, TPU). This guide will show you how to interact with Jobs on the Hub, especially: From cf59dcade7e83c183a1a929b3262d0c17dcb628d Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 22 Jul 2025 18:28:55 +0200 Subject: [PATCH 30/40] update test --- tests/test_cli.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index fa047737ce..12ee876d63 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -860,7 +860,12 @@ def setUp(self) -> None: commands_parser = self.parser.add_subparsers() JobsCommands.register_subcommand(commands_parser) - @patch("requests.Session.post", return_value=DummyResponse({"id": "my-job-id"})) + @patch( + "requests.Session.post", + return_value=DummyResponse( + {"id": "my-job-id", "owner": {"id": "userid", "name": "user"}, "status": {"stage": "RUNNING"}} + ), + ) @patch("huggingface_hub.hf_api.HfApi.whoami", return_value={"name": "my-username"}) def test_run(self, whoami: Mock, requests_post: Mock) -> None: input_args = ["jobs", "run", "--detach", "ubuntu", "echo", "hello"] From da1d40d0aab592c108866cbb1f75a9c675a0a7d7 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 22 Jul 2025 18:29:16 +0200 Subject: [PATCH 31/40] again --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 12ee876d63..1c25f42783 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -863,7 +863,7 @@ def setUp(self) -> None: @patch( "requests.Session.post", return_value=DummyResponse( - {"id": "my-job-id", "owner": {"id": "userid", "name": "user"}, "status": {"stage": "RUNNING"}} + {"id": "my-job-id", "owner": {"id": "userid", "name": "my-username"}, "status": {"stage": "RUNNING"}} ), ) @patch("huggingface_hub.hf_api.HfApi.whoami", return_value={"name": "my-username"}) From 334d8314da7c0cf4777c9ac37b307143839bda8f Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Tue, 22 Jul 2025 18:37:25 +0200 Subject: [PATCH 32/40] improve docs --- docs/source/en/guides/jobs.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/source/en/guides/jobs.md b/docs/source/en/guides/jobs.md index db2744e1a9..20d0e00954 100644 --- a/docs/source/en/guides/jobs.md +++ b/docs/source/en/guides/jobs.md @@ -78,7 +78,7 @@ https://huggingface.co/jobs/lhoestq/687f911eaea852de79c4a50a 687f911eaea852de79c4a50a ``` -Jobs run in the background. The next section guides you through [`inspect_job`] to know a jobs' status and [`fetch_job_logs`] to the view the logs. +Jobs run in the background. The next section guides you through [`inspect_job`] to know a jobs' status and [`fetch_job_logs`] to view the logs. ## Check Job status @@ -90,7 +90,7 @@ Jobs run in the background. The next section guides you through [`inspect_job`] JobInfo(id='687f911eaea852de79c4a50a', created_at=datetime.datetime(2025, 7, 22, 13, 24, 46, 909000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='COMPLETED', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687f911eaea852de79c4a50a') # List your running jobs ->>> running_jobs = [job for job in jobs if job.status.stage == "RUNNING"] +>>> running_jobs = [job for job in list_jobs() if job.status.stage == "RUNNING"] # Inspect the status of a job >>> from huggingface_hub import inspect_job @@ -139,7 +139,7 @@ Run jobs on GPUs or TPUs with the `flavor` argument. For example, to run a PyTor >>> run_job( ... image="pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel", ... command=["python", "-c", "import torch; print(f'This code ran with the following GPU: {torch.cuda.get_device_name()}')"], -... flavor="a10g-small" +... flavor="a10g-small", ... ) ``` @@ -170,7 +170,7 @@ Available `flavor` options: (updated in 07/2025 from Hugging Face [suggested_hardware docs](https://huggingface.co/docs/hub/en/spaces-config-reference)) -That's it! You're now running code on Hugging Face's infrastructure. For more detailed information checkout the [Quickstart Guide](docs/quickstart.md). +That's it! You're now running code on Hugging Face's infrastructure. ## Pass Environment variables and Secrets @@ -210,6 +210,9 @@ Run UV scripts (Python scripts with inline dependencies) on HF infrastructure: # Run with GPU >>> run_uv_job("ml_training.py", flavor="gpu-t4-small") +# Run with dependencies +>>> run_uv_job("inference.py", dependencies=["transformers", "torch"]) + # Run a script directly from a URL >>> run_uv_job("https://huggingface.co/datasets/username/scripts/resolve/main/example.py") ``` From fed719518b8515c9cd26a4d6a9e9e2916895dc22 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 23 Jul 2025 14:37:19 +0200 Subject: [PATCH 33/40] add image arg to run_uv_job --- src/huggingface_hub/commands/jobs.py | 3 +++ src/huggingface_hub/hf_api.py | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py index 92b2e51c21..f2cd189c8e 100644 --- a/src/huggingface_hub/commands/jobs.py +++ b/src/huggingface_hub/commands/jobs.py @@ -422,6 +422,7 @@ def register_subcommand(parser): ) run_parser.add_argument("script", help="UV script to run (local file or URL)") run_parser.add_argument("script_args", nargs="...", help="Arguments for the script", default=[]) + run_parser.add_argument("--image", type=str, help="Use a custom Docker image with `uv` installed.") run_parser.add_argument( "--repo", help="Repository name for the script (creates ephemeral if not specified)", @@ -456,6 +457,7 @@ def __init__(self, args: Namespace) -> None: self.script_args = args.script_args self.dependencies = args.with_ self.python = args.python + self.image = args.image self.env: dict[str, Optional[str]] = {} if args.env_file: self.env.update(load_dotenv(Path(args.env_file).read_text())) @@ -482,6 +484,7 @@ def run(self) -> None: script_args=self.script_args, dependencies=self.dependencies, python=self.python, + image=self.image, env=self.env, secrets=self.secrets, flavor=self.flavor, diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index b96573215d..6610c1ffe9 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -10258,6 +10258,7 @@ def run_uv_job( script_args: Optional[List[str]] = None, dependencies: Optional[List[str]] = None, python: Optional[str] = None, + image: Optional[str] = None, env: Optional[Dict[str, Any]] = None, secrets: Optional[Dict[str, Any]] = None, flavor: str = "cpu-basic", @@ -10282,6 +10283,9 @@ def run_uv_job( python (`str`, *optional*) Use a specific Python version. Default is 3.12. + image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm-slim"): + Use a custom Docker image with `uv` installed. + env (`Dict[str, Any]`, *optional*): Defines the environment variables for the Job. @@ -10311,12 +10315,10 @@ def run_uv_job( >>> run_uv_job(script, dependencies=["trl"], flavor="a10g-small") ``` """ + image = image or "ghcr.io/astral-sh/uv:python3.12-bookworm-slim" env = env or {} secrets = secrets or {} - # Prepare docker image (always use Python 3.12) - image = "ghcr.io/astral-sh/uv:python3.12-bookworm-slim" - # Build command uv_args = [] if dependencies: From eaaa6a12a3ff7c1183e07a8d2137321b53f84cbe Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 23 Jul 2025 14:41:08 +0200 Subject: [PATCH 34/40] List flavors from SpaceHardware --- src/huggingface_hub/commands/jobs.py | 15 +++++++++------ src/huggingface_hub/hf_api.py | 17 +++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py index 92b2e51c21..2800ff1b1b 100644 --- a/src/huggingface_hub/commands/jobs.py +++ b/src/huggingface_hub/commands/jobs.py @@ -28,7 +28,7 @@ import requests -from huggingface_hub import HfApi +from huggingface_hub import HfApi, SpaceHardware from huggingface_hub.utils import logging from huggingface_hub.utils._dotenv import load_dotenv @@ -65,8 +65,7 @@ def register_subcommand(parser: _SubParsersAction) -> None: run_parser.add_argument( "--flavor", type=str, - help="Flavor for the hardware, as in HF Spaces.", - default="cpu-basic", + help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SpaceHardware)}.", ) run_parser.add_argument( "--timeout", @@ -105,7 +104,7 @@ def __init__(self, args: Namespace) -> None: self.secrets.update(load_dotenv(Path(args.secrets_file).read_text())) for secret in args.secrets or []: self.secrets.update(load_dotenv(secret)) - self.flavor: str = args.flavor + self.flavor: Optional[SpaceHardware] = args.flavor self.timeout: Optional[str] = args.timeout self.detach: bool = args.detach self.namespace: Optional[str] = args.namespace @@ -426,7 +425,11 @@ def register_subcommand(parser): "--repo", help="Repository name for the script (creates ephemeral if not specified)", ) - run_parser.add_argument("--flavor", type=str, default="cpu-basic", help="Hardware flavor (default: cpu-basic)") + run_parser.add_argument( + "--flavor", + type=str, + help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SpaceHardware)}.", + ) run_parser.add_argument("-e", "--env", action="append", help="Environment variables") run_parser.add_argument("-s", "--secrets", action="append", help="Secret environment variables") run_parser.add_argument("--env-file", type=str, help="Read in a file of environment variables.") @@ -466,7 +469,7 @@ def __init__(self, args: Namespace) -> None: self.secrets.update(load_dotenv(Path(args.secrets_file).read_text())) for secret in args.secrets or []: self.secrets.update(load_dotenv(secret)) - self.flavor: Optional[str] = args.flavor + self.flavor: Optional[SpaceHardware] = args.flavor self.timeout: Optional[str] = args.timeout self.detach: bool = args.detach self.namespace: Optional[str] = args.namespace diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index b96573215d..e568fc2da6 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -9950,7 +9950,7 @@ def run_job( command: List[str], env: Optional[Dict[str, Any]] = None, secrets: Optional[Dict[str, Any]] = None, - flavor: str = "cpu-basic", + flavor: Optional[SpaceHardware] = None, timeout: Optional[Union[int, float, str]] = None, namespace: Optional[str] = None, token: Union[bool, str, None] = None, @@ -9973,8 +9973,9 @@ def run_job( secrets (`Dict[str, Any]`, *optional*): Defines the secret environment variables for the Job. - flavor (`str`, defaults to `"cpu-basic"`): - Flavor for the hardware, as in Hugging Face Spaces. + flavor (`str`, *optional*): + Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. + Defaults to `"cpu-basic"`. timeout (`Union[int, float, str]`, *optional*): Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). @@ -10006,6 +10007,9 @@ def run_job( ``` """ + if flavor is None: + flavor = SpaceHardware.CPU_BASIC + # prepare payload to send to HF Jobs API input_json: Dict[str, Any] = { "command": command, @@ -10260,7 +10264,7 @@ def run_uv_job( python: Optional[str] = None, env: Optional[Dict[str, Any]] = None, secrets: Optional[Dict[str, Any]] = None, - flavor: str = "cpu-basic", + flavor: Optional[SpaceHardware] = None, timeout: Optional[Union[int, float, str]] = None, namespace: Optional[str] = None, token: Union[bool, str, None] = None, @@ -10288,8 +10292,9 @@ def run_uv_job( secrets (`Dict[str, Any]`, *optional*): Defines the secret environment variables for the Job. - flavor (`str`, defaults to `"cpu-basic"`): - "Flavor for the hardware, as in Hugging Face Spaces. + flavor (`str`, *optional*): + Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. + Defaults to `"cpu-basic"`. timeout (`Union[int, float, str]`, *optional*): Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). From af0e9fb5d2af107e84c38e6215c14130b155e8c2 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 23 Jul 2025 14:54:38 +0200 Subject: [PATCH 35/40] add to overview --- docs/source/en/guides/overview.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/en/guides/overview.md b/docs/source/en/guides/overview.md index fd0c8c417f..84a846a997 100644 --- a/docs/source/en/guides/overview.md +++ b/docs/source/en/guides/overview.md @@ -127,5 +127,14 @@ Take a look at these guides to learn how to use huggingface_hub to solve real-wo

+ +
+ Jobs +

+ How to run and manage compute Jobs on Hugging Face infrastructure and select the hardware? +

+
+ From e6043aed890d7ab9821cd8ceaa63533e61de95da Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 23 Jul 2025 15:00:00 +0200 Subject: [PATCH 36/40] remove zero GPU from flavors --- src/huggingface_hub/commands/jobs.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/huggingface_hub/commands/jobs.py b/src/huggingface_hub/commands/jobs.py index 92c6896ea4..9509458e9c 100644 --- a/src/huggingface_hub/commands/jobs.py +++ b/src/huggingface_hub/commands/jobs.py @@ -37,6 +37,8 @@ logger = logging.get_logger(__name__) +SUGGESTED_FLAVORS = [item.value for item in SpaceHardware if item.value != "zero-a10g"] + class JobsCommands(BaseHuggingfaceCLICommand): @staticmethod @@ -65,7 +67,7 @@ def register_subcommand(parser: _SubParsersAction) -> None: run_parser.add_argument( "--flavor", type=str, - help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SpaceHardware)}.", + help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SUGGESTED_FLAVORS)}.", ) run_parser.add_argument( "--timeout", @@ -429,7 +431,7 @@ def register_subcommand(parser): run_parser.add_argument( "--flavor", type=str, - help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SpaceHardware)}.", + help=f"Flavor for the hardware, as in HF Spaces. Defaults to `cpu-basic`. Possible values: {', '.join(SUGGESTED_FLAVORS)}.", ) run_parser.add_argument("-e", "--env", action="append", help="Environment variables") run_parser.add_argument("-s", "--secrets", action="append", help="Secret environment variables") From c4443912945144a5d9e46235d236f0979d73d45d Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 23 Jul 2025 15:10:03 +0200 Subject: [PATCH 37/40] add JobInfo etc. from _jobs_api in top level __init__ --- src/huggingface_hub/__init__.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index 21ed05c8dd..c58b3e4aca 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -62,6 +62,12 @@ "InferenceEndpointTimeoutError", "InferenceEndpointType", ], + "_jobs_api": [ + "JobInfo", + "JobOwner", + "JobStage", + "JobStatus", + ], "_login": [ "auth_list", "auth_switch", @@ -662,6 +668,10 @@ "InferenceEndpointTimeoutError", "InferenceEndpointType", "InferenceTimeoutError", + "JobInfo", + "JobOwner", + "JobStage", + "JobStatus", "KerasModelHubMixin", "MCPClient", "ModelCard", @@ -1056,6 +1066,12 @@ def __dir__(): InferenceEndpointTimeoutError, # noqa: F401 InferenceEndpointType, # noqa: F401 ) + from ._jobs_api import ( + JobInfo, # noqa: F401 + JobOwner, # noqa: F401 + JobStage, # noqa: F401 + JobStatus, # noqa: F401 + ) from ._login import ( auth_list, # noqa: F401 auth_switch, # noqa: F401 From ea6579a867fb4aeefe2acdfd455bf6d715c6e72d Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 23 Jul 2025 15:10:51 +0200 Subject: [PATCH 38/40] add package_reference doc page --- docs/source/en/_toctree.yml | 2 ++ docs/source/en/package_reference/jobs.md | 33 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 docs/source/en/package_reference/jobs.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index c3841b0e97..7ee74a0830 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -94,3 +94,5 @@ title: Strict dataclasses - local: package_reference/oauth title: OAuth + - local: package_reference/jobs + title: Jobs diff --git a/docs/source/en/package_reference/jobs.md b/docs/source/en/package_reference/jobs.md new file mode 100644 index 0000000000..eca90bc3ad --- /dev/null +++ b/docs/source/en/package_reference/jobs.md @@ -0,0 +1,33 @@ + + +# Jobs + +Check the [`HfApi`] documentation page for the reference of methods to manage your Jobs on the Hub. + +- Run a Job: [`run_job`] +- Fetch logs: [`fetch_job_logs`] +- Inspect Job: [`inspect_job`] +- List Jobs: [`list_jobs`] +- Cancel Job: [`cancel_job`] +- Run a UV Job: [`run_uv_job`] + +## Data structures + +### JobInfo + +[[autodoc]] JobInfo + +### JobOwner + +[[autodoc]] JobOwner + + +### JobStage + +[[autodoc]] JobStage + +### JobStatus + +[[autodoc]] JobStatus From 3f6a2f794c7df663056c0ddfdd60ae5ca4653c82 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 23 Jul 2025 15:17:31 +0200 Subject: [PATCH 39/40] minor - link JobInfo in docs --- docs/source/en/guides/jobs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/en/guides/jobs.md b/docs/source/en/guides/jobs.md index 20d0e00954..a4ce0b4cb3 100644 --- a/docs/source/en/guides/jobs.md +++ b/docs/source/en/guides/jobs.md @@ -63,8 +63,8 @@ Use [huggingface-cli jobs](./cli#huggingface-cli-jobs) to run jobs in the comman -[`run_job`] returns the `JobInfo` which has the URL of the Job on Hugging Face, where you can see the Job status and the logs. -Save the Job ID from `JobInfo` to manage the job: +[`run_job`] returns the [`JobInfo`] which has the URL of the Job on Hugging Face, where you can see the Job status and the logs. +Save the Job ID from [`JobInfo`] to manage the job: ```python >>> from huggingface_hub import run_job From 3e049db19a853478ba7c777ea19b1fead13d5c72 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 23 Jul 2025 15:31:42 +0200 Subject: [PATCH 40/40] JobInfo docstring --- src/huggingface_hub/_jobs_api.py | 50 ++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/huggingface_hub/_jobs_api.py b/src/huggingface_hub/_jobs_api.py index 2b8dfd36cf..cdfed4f9dd 100644 --- a/src/huggingface_hub/_jobs_api.py +++ b/src/huggingface_hub/_jobs_api.py @@ -60,6 +60,56 @@ class JobOwner: @dataclass class JobInfo: + """ + Contains information about a Job. + + Args: + id (`str`): + Job ID. + created_at (`datetime` or `None`): + When the Job was created. + docker_image (`str` or `None`): + The Docker image from Docker Hub used for the Job. + Can be None if space_id is present instead. + space_id (`str` or `None`): + The Docker image from Hugging Face Spaces used for the Job. + Can be None if docker_image is present instead. + command (`List[str]` or `None`): + Command of the Job, e.g. `["python", "-c", "print('hello world')"]` + arguments (`List[str]` or `None`): + Arguments passed to the command + environment (`Dict[str]` or `None`): + Environment variables of the Job as a dictionary. + secrets (`Dict[str]` or `None`): + Secret environment variables of the Job (encrypted). + flavor (`str` or `None`): + Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. + E.g. `"cpu-basic"`. + status: (`JobStatus` or `None`): + Status of the Job, e.g. `JobStatus(stage="RUNNING", message=None)` + See [`JobStage`] for possible stage values. + status: (`JobOwner` or `None`): + Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq")` + + Example: + + ```python + >>> from huggingface_hub import run_job + >>> job = run_job( + ... image="python:3.12", + ... command=["python", "-c", "print('Hello from the cloud!')"] + ... ) + >>> job + JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998') + >>> job.id + '687fb701029421ae5549d998' + >>> job.url + 'https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998' + >>> job.status.stage + 'RUNNING' + ``` + """ + id: str created_at: Optional[datetime] docker_image: Optional[str]