From c6bd94474797dacda88bb84495ee117e2b9589bf Mon Sep 17 00:00:00 2001 From: Sherin Date: Sat, 15 Mar 2025 13:11:34 +0200 Subject: [PATCH 1/3] Updated quick starts --- ...ng-workloads-with-dynamic-gpu-fractions.md | 112 ++++++++------- .../launching-workloads-with-gpu-fractions.md | 60 ++++---- ...aunching-workloads-with-gpu-memory-swap.md | 130 +++++++++++------- .../quickstart-distributed-training.md | 24 ++-- .../quickstart-standard-training.md | 12 +- .../workspaces/quickstart-jupyter.md | 19 ++- 6 files changed, 203 insertions(+), 154 deletions(-) diff --git a/docs/Researcher/scheduling/launching-workloads-with-dynamic-gpu-fractions.md b/docs/Researcher/scheduling/launching-workloads-with-dynamic-gpu-fractions.md index 2194b987ca..f9f1c139d2 100644 --- a/docs/Researcher/scheduling/launching-workloads-with-dynamic-gpu-fractions.md +++ b/docs/Researcher/scheduling/launching-workloads-with-dynamic-gpu-fractions.md @@ -93,44 +93,52 @@ Before you start, make sure: ```sh runai project set "project-name" - runai workspace submit "workload-name" --image gcr.io/run-ai-lab/pytorch-example-jupyter --gpu-memory-request 4G - --gpu-memory-limit 12G --large-shm --external-url container=8888 - --name-prefix jupyter --command -- start-notebook.sh - --NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} - --NotebookApp.token= + runai workspace submit "workload-name" --image gcr.io/run-ai-lab/pytorch-example-jupyter --gpu-memory-request 4G \ + --gpu-memory-limit 12G --large-shm --external-url container=8888 \ + --name-prefix jupyter --command -- start-notebook.sh \ + --NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} --NotebookApp.token= ``` === "API" Copy the following command to your terminal. Make sure to update the below parameters according to the comments. For more details, see [Workspaces API:](https://api-docs.run.ai/latest/tag/Workspaces) ```bash - curl -L 'https:///api/v1/workloads/workspaces' \ # is the link to the Run:ai user interface. + curl -L 'https:///api/v1/workloads/workspaces' \ -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer ' \ # is the API access token obtained in Step 1. + -H 'Authorization: Bearer ' \ -d '{ "name": "workload-name", - "projectId": "", '\ #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API. - "clusterId": "", \ # is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + "projectId": "", + "clusterId": "", "spec": { "command" : "start-notebook.sh", "args" : "--NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} --NotebookApp.token=''", - "image": "gcr.io/run-ai-lab/pytorch-example-jupyter", - "exposedUrls": [ - { - "container": 8888, - "toolType": "jupyter-notebook", - "toolName": "Jupyter" - } - ], + "image": "jupyter/scipy-notebook", "compute": { - "gpuDevicesRequest": 1, - "gpuMemoryRequest": "4G", - "gpuMemoryLimit": "12G", - "largeShmRequest": true - } + "gpuDevicesRequest": 1, + "gpuMemoryRequest": "4G", + "gpuMemoryLimit": "12G", + "largeShmRequest": true + + }, + "exposedUrls" : [ + { + "container" : 8888, + "toolType": "jupyter-notebook", + "toolName": "Jupyter" + } + ] } - }' + } ``` + + 1. `` is the link to the Run:ai user interface. + 2. `` is the API access token obtained in Step 1. + 3. `` is #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API [Get Projects API](https://app.run.ai/api/docs#tag/Projects/operation/get_projects){target=_blank}. + 4. `` is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + 5. `toolType` will show the Jupyter icon when connecting to the Jupyter tool via the user interface. + 6. `toolName` text will show when connecting to the Jupyter tool via the user interface. + !!! Note The above API snippet runs with Run:ai clusters of 2.18 and above only. @@ -157,49 +165,57 @@ Before you start, make sure: After the workspace is created, it is added to the [workloads](../../platform-admin/workloads/overviews/managing-workloads.md) table === "CLI v2" - Copy the following command to your terminal. Make sure to update the below with the name of your project and workload: + Copy the following command to your terminal. Make sure to update the below with the name of the project and workload. Use the **project** where the previous workspace was created: ```sh runai project set "project-name" - runai workspace submit "workload-name" --image gcr.io/run-ai-lab/pytorch-example-jupyter --gpu-memory-request 4G - --gpu-memory-limit 12G --large-shm --external-url container=8888 - --name-prefix jupyter --command -- start-notebook.sh - --NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} - --NotebookApp.token= + runai workspace submit "workload-name" --image gcr.io/run-ai-lab/pytorch-example-jupyter --gpu-memory-request 4G \ + --gpu-memory-limit 12G --large-shm --external-url container=8888 \ + --name-prefix jupyter --command -- start-notebook.sh \ + --NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} --NotebookApp.token= ``` === "API" - Copy the following command to your terminal. Make sure to update the below parameters according to the comments. For more details, see [Workspaces API](https://api-docs.run.ai/latest/tag/Workspaces): + Copy the following command to your terminal. Make sure to update the below parameters according to the comments. Use the **project** and **cluster** where the previous workspace was created. For more details, see [Workspaces API](https://api-docs.run.ai/latest/tag/Workspaces): ```bash - curl -L 'https:///api/v1/workloads/workspaces' \ # is the link to the Run:ai user interface. + curl -L 'https:///api/v1/workloads/workspaces' \ -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer ' \ # is the API access token obtained in Step 1. + -H 'Authorization: Bearer ' \ -d '{ "name": "workload-name", - "projectId": "", '\ #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API. - "clusterId": "", \ # is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + "projectId": "", + "clusterId": "", "spec": { "command" : "start-notebook.sh", "args" : "--NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} --NotebookApp.token=''", - "image": "gcr.io/run-ai-lab/pytorch-example-jupyter", - "exposedUrls": [ - { - "container": 8888, - "toolType": "jupyter-notebook", - "toolName": "Jupyter" - } - ], + "image": "jupyter/scipy-notebook", "compute": { - "gpuDevicesRequest": 1, - "gpuMemoryRequest": "4G", - "gpuMemoryLimit": "12G", - "largeShmRequest": true - } + "gpuDevicesRequest": 1, + "gpuMemoryRequest": "4G", + "gpuMemoryLimit": "12G", + "largeShmRequest": true + + }, + "exposedUrls" : [ + { + "container" : 8888, + "toolType": "jupyter-notebook", + "toolName": "Jupyter" + } + ] } - }' + } ``` + 1. `` is the link to the Run:ai user interface. + 2. `` is the API access token obtained in Step 1. + 3. `` is #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API [Get Projects API](https://app.run.ai/api/docs#tag/Projects/operation/get_projects){target=_blank}. + 4. `` is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + 5. `toolType` will show the Jupyter icon when connecting to the Jupyter tool via the user interface. + 6. `toolName` text will show when connecting to the Jupyter tool via the user interface. + + !!! Note The above API snippet runs with Run:ai clusters of 2.18 and above only. diff --git a/docs/Researcher/scheduling/launching-workloads-with-gpu-fractions.md b/docs/Researcher/scheduling/launching-workloads-with-gpu-fractions.md index 4f96195c96..aed5edb11c 100644 --- a/docs/Researcher/scheduling/launching-workloads-with-gpu-fractions.md +++ b/docs/Researcher/scheduling/launching-workloads-with-gpu-fractions.md @@ -35,6 +35,7 @@ Before you start, make sure: === "API" To use the API, you will need to obtain a token. Please follow the [API authentication](../../developer/rest-auth.md) article. + ## Step 2: Submitting a workspace === "UI" @@ -104,11 +105,10 @@ Before you start, make sure: ```sh runai project set "project-name" - runai workspace submit "workload-name" --image jupyter/scipy-notebook - --gpu-devices-request 0.1 --command --external-url container=8888 - --name-prefix jupyter --command -- start-notebook.sh - --NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} - --NotebookApp.token= + runai workspace submit "workload-name" --image jupyter/scipy-notebook \ + --gpu-devices-request 0.1 --command --external-url container=8888 \ + --name-prefix jupyter --command -- start-notebook.sh \ + --NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} --NotebookApp.token= ``` This would start a workspace with a pre-configured Jupyter image with 10% of the GPU memory allocated. @@ -125,37 +125,44 @@ Before you start, make sure: This would start a workspace with a pre-configured Jupyter image with 10% of the GPU memory allocated. === "API" - Copy the following command to your terminal. Make sure to update the below parameters according to the comments. For more details, see [Workspaces API](https://api-docs.run.ai/latest/tag/Workspaces): - ```sh - curl -L 'https:///api/v1/workloads/workspaces' \ # is the link to the Run:ai user interface. + ```bash + curl -L 'https:///api/v1/workloads/workspaces' \ -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer ' \ # is the API access token obtained in Step 1. + -H 'Authorization: Bearer ' \ -d '{ "name": "workload-name", - "projectId": "", '\ #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API. - "clusterId": "", \ # is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + "projectId": "", + "clusterId": "", "spec": { "command" : "start-notebook.sh", "args" : "--NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} --NotebookApp.token=''", - "image": "jupyter/base-notebook", - "exposedUrls": [ - { - "container": 8888, - "toolType": "jupyter-notebook", - "toolName": "Jupyter" - } - ], + "image": "jupyter/scipy-notebook", "compute": { - "gpuDevicesRequest": 1, - "gpuRequestType": "portion", - "gpuPortionRequest": 0.1 - - } + "gpuDevicesRequest": 1, + "gpuRequestType": "portion", + "gpuPortionRequest": 0.1 + + }, + "exposedUrls" : [ + { + "container" : 8888, + "toolType": "jupyter-notebook", + "toolName": "Jupyter" + } + ] } - }' - ``` + } + ``` + + 1. `` is the link to the Run:ai user interface. + 2. `` is the API access token obtained in Step 1. + 3. `` is #The ID of the Project the workload is running on. You can get the Project ID via the Get Projects API [Get Projects API](https://app.run.ai/api/docs#tag/Projects/operation/get_projects){target=_blank}. + 4. `` is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + 5. `toolType` will show the Jupyter icon when connecting to the Jupyter tool via the user interface. + 6. `toolName` text will show when connecting to the Jupyter tool via the user interface. + This would start a workspace with a pre-configured Jupyter image with 10% of the GPU memory allocated. @@ -182,6 +189,7 @@ Before you start, make sure: === "API" To connect to the Jupyter Notebook, browse directly to `https:////` + ## Next Steps diff --git a/docs/Researcher/scheduling/launching-workloads-with-gpu-memory-swap.md b/docs/Researcher/scheduling/launching-workloads-with-gpu-memory-swap.md index d092eb4c7a..4e5268bcee 100644 --- a/docs/Researcher/scheduling/launching-workloads-with-gpu-memory-swap.md +++ b/docs/Researcher/scheduling/launching-workloads-with-gpu-memory-swap.md @@ -76,30 +76,30 @@ Before you start, make sure: Copy the following command to your terminal. Make sure to update the below parameters according to the comments. For more details, see [Inferences API](https://api-docs.run.ai/latest/tag/Inferences#operation/create_inference1): - ```sh - curl -L 'https:///api/v1/workloads/inferences' \ # is the link to the Run:ai user interface. + ```bash + curl -L 'https:///api/v1/workloads/inferences' \ -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer ' \ # is the API access token obtained in Step 1. + -H 'Authorization: Bearer ' \ -d '{ "name": "workload-name", "useGivenNameAsPrefix": true, - "projectId": "", '\ #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API. - "clusterId": "", \ # is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + "projectId": "", + "clusterId": "", "spec": { "image": "runai.jfrog.io/core-llm/runai-vllm:v0.6.4-0.10.0", "imagePullPolicy":"IfNotPresent", "environmentVariables": [ { - "name": "RUNAI_MODEL", - "value": "meta-lama/Llama-3.2-1B-Instruct" + "name": "RUNAI_MODEL", + "value": "meta-lama/Llama-3.2-1B-Instruct" }, { - "name": "VLLM_RPC_TIMEOUT", - "value": "60000" + "name": "VLLM_RPC_TIMEOUT", + "value": "60000" }, { - "name": "HF_TOKEN", - "value":"" + "name": "HF_TOKEN", + "value":"" } ], "compute": { @@ -116,10 +116,17 @@ Before you start, make sure: "container": 8000, "protocol": "http", "authorizationType": "public" + } } - }' + } ``` + 1. `` is the link to the Run:ai user interface. + 2. `` is the API access token obtained in Step 1. + 3. `` is #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API [Get Projects API](https://app.run.ai/api/docs#tag/Projects/operation/get_projects){target=_blank}. + 4. `` is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + + !!! Note The above API snippet runs with Run:ai clusters of 2.18 and above only. @@ -143,32 +150,32 @@ Before you start, make sure: === "API" - Copy the following command to your terminal. Make sure to update the below parameters according to the comments. For more details, see [Inferences API](https://api-docs.run.ai/latest/tag/Inferences#operation/create_inference1): + Copy the following command to your terminal. Make sure to update the below parameters according to the comments. Use the **project** and **cluster** where the previous inference workload was created. For more details, see [Inferences API](https://api-docs.run.ai/latest/tag/Inferences#operation/create_inference1): - ```sh - curl -L 'https:///api/v1/workloads/inferences' \ # is the link to the Run:ai user interface. + ```bash + curl -L 'https:///api/v1/workloads/inferences' \ -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer ' \ # is the API access token obtained in Step 1. + -H 'Authorization: Bearer ' \ -d '{ "name": "workload-name", "useGivenNameAsPrefix": true, - "projectId": "", '\ #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API. - "clusterId": "", \ # is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + "projectId": "", + "clusterId": "", "spec": { "image": "runai.jfrog.io/core-llm/runai-vllm:v0.6.4-0.10.0", "imagePullPolicy":"IfNotPresent", "environmentVariables": [ { - "name": "RUNAI_MODEL", - "value": "meta-lama/Llama-3.2-1B-Instruct" + "name": "RUNAI_MODEL", + "value": "meta-lama/Llama-3.2-1B-Instruct" }, { - "name": "VLLM_RPC_TIMEOUT", - "value": "60000" + "name": "VLLM_RPC_TIMEOUT", + "value": "60000" }, { - "name": "HF_TOKEN", - "value":"" + "name": "HF_TOKEN", + "value":"" } ], "compute": { @@ -185,10 +192,15 @@ Before you start, make sure: "container": 8000, "protocol": "http", "authorizationType": "public" + } } - }' + } ``` - + + 1. `` is the link to the Run:ai user interface. + 2. `` is the API access token obtained in Step 1. + 3. `` is #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API [Get Projects API](https://app.run.ai/api/docs#tag/Projects/operation/get_projects){target=_blank}. + 4. `` is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. !!! Note The above API snippet runs with Run:ai clusters of 2.18 and above only. @@ -255,35 +267,42 @@ Before you start, make sure: === "API" - Copy the following command to your terminal. Make sure to update the below parameters according to the comments. For more details, see [Workspaces API](https://api-docs.run.ai/latest/tag/Workspaces): + Copy the following command to your terminal. Make sure to update the below parameters according to the comments. Use the **project** and **cluster** where the previous inference workloads were created. For more details, see [Workspaces API](https://api-docs.run.ai/latest/tag/Workspaces): - ```sh - curl -L 'https:///api/v1/workloads/workspaces' \ # is the link to the Run:ai user interface. + ```bash + curl -L 'https:///api/v1/workloads/workspaces' \ -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer ' \ # is the API access token obtained in Step 1. + -H 'Authorization: Bearer ' \ -d '{ "name": "workload-name", - "projectId": "", '\ #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API. - "clusterId": "", \ # is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + "projectId": "", + "clusterId": "", "spec": { "image": "runai.jfrog.io/core-llm/llm-app", "environmentVariables": [ { - "name": "RUNAI_MODEL_NAME", - "value": "meta-llama/Llama-3.2-1B-Instruct" + "name": "RUNAI_MODEL_NAME", + "value": "meta-llama/Llama-3.2-1B-Instruct" }, { - "name": "RUNAI_MODEL_BASE_URL", - "value": "" #The URL for connecting an external service related to the workload. You can get the URL via the List workloads API. + "name": "RUNAI_MODEL_BASE_URL", + "value": "" } ], "compute": { "cpuCoreRequest":0.1, "cpuMemoryRequest": "100M", + } } - }' + } ``` + 1. `` is the link to the Run:ai user interface. + 2. `` is the API access token obtained in Step 1. + 3. `` is #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API [Get Projects API](https://app.run.ai/api/docs#tag/Projects/operation/get_projects){target=_blank}. + 4. `` is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + + !!! Note The above API snippet runs with Run:ai clusters of 2.18 and above only. @@ -316,35 +335,42 @@ Before you start, make sure: === "API" - Copy the following command to your terminal. Make sure to update the below parameters according to the comments. For more details, see [Workspaces API](https://api-docs.run.ai/latest/tag/Workspaces): + Copy the following command to your terminal. Make sure to update the below parameters according to the comments. Use the **project** and **cluster** where the previous inference workloads were created. For more details, see [Workspaces API](https://api-docs.run.ai/latest/tag/Workspaces): - ```sh - curl -L 'https:///api/v1/workloads/workspaces' \ # is the link to the Run:ai user interface. + ```bash + curl -L 'https:///api/v1/workloads/workspaces' \ -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer ' \ # is the API access token obtained in Step 1. + -H 'Authorization: Bearer ' \ -d '{ "name": "workload-name", - "projectId": "", '\ #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API. - "clusterId": "", \ # is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + "projectId": "", + "clusterId": "", "spec": { "image": "runai.jfrog.io/core-llm/llm-app", "environmentVariables": [ - { - "name": "RUNAI_MODEL_NAME", - "value": "meta-llama/Llama-3.2-1B-Instruct" - }, - { - "name": "RUNAI_MODEL_BASE_URL", - "value": "" #The URL for connecting an external service related to the workload. You can get the URL via the List workloads API. - } + { + "name": "RUNAI_MODEL_NAME", + "value": "meta-llama/Llama-3.2-1B-Instruct" + }, + { + "name": "RUNAI_MODEL_BASE_URL", + "value": "" + } ], "compute": { "cpuCoreRequest":0.1, "cpuMemoryRequest": "100M", + } } - }' + } ``` + 1. `` is the link to the Run:ai user interface. + 2. `` is the API access token obtained in Step 1. + 3. `` is #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API [Get Projects API](https://app.run.ai/api/docs#tag/Projects/operation/get_projects){target=_blank}. + 4. `` is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. + + !!! Note The above API snippet runs with Run:ai clusters of 2.18 and above only. diff --git a/docs/Researcher/workloads/training/distributed-training/quickstart-distributed-training.md b/docs/Researcher/workloads/training/distributed-training/quickstart-distributed-training.md index 58a382ee1c..bc8d297112 100644 --- a/docs/Researcher/workloads/training/distributed-training/quickstart-distributed-training.md +++ b/docs/Researcher/workloads/training/distributed-training/quickstart-distributed-training.md @@ -82,9 +82,9 @@ Before you start, make sure: ``` bash runai project set "project-name" - runai distributed submit "workload-name" --framework PyTorch \ - -i kubeflow/pytorch-dist-mnist:latest --workers 2 - --gpu-request-type portion --gpu-portion-request 0.1 --gpu-devices-request 1 --cpu-memory-request 100M + runai distributed submit "workloand-name" --framework PyTorch \ + -i kubeflow/pytorch-dist-mnist:latest --workers 2 \ + --gpu-request-type portion --gpu-portion-request 0.1 --gpu-devices-request 1 --cpu-memory-request 100M ``` This would start a distributed training workload based on kubeflow/pytorch-dist-mnist:latest with one master and two workers. @@ -93,9 +93,9 @@ Before you start, make sure: Copy the following command to your terminal. Make sure to update the below with the name of your project and workload: ``` bash - runai config project "project-name" + runai config project "project-name" runai submit-dist pytorch "workload-name" --workers=2 -g 0.1 \ - -i kubeflow/pytorch-dist-mnist:latest + -i kubeflow/pytorch-dist-mnist:latest ``` This would start a distributed training workload based on kubeflow/pytorch-dist-mnist:latest with one master and two workers. @@ -104,13 +104,13 @@ Before you start, make sure: Copy the following command to your terminal. Make sure to update the below parameters according to the comments. For more details, see [Distributed API reference](https://api-docs.run.ai/latest/tag/Distributed): ``` bash - curl -L 'https:///api/v1/workloads/distributed' \ # (1) + curl -L 'https:///api/v1/workloads/distributed' \ -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer ' \ # (2) + -H 'Authorization: Bearer ' \ -d '{ "name": "workload-name", - "projectId": "", '\ # (3) - "clusterId": "", \ # (4) + "projectId": "", + "clusterId": "", "spec": { "compute": { "cpuCoreRequest": 0.1, @@ -120,10 +120,10 @@ Before you start, make sure: "gpuPortionRequest": 0.1 }, "image": "kubeflow/pytorch-dist-mnist:latest", - "numWorkers": 2, \ - "distributedFramework": "PyTorch" \ + "numWorkers": 2, + "distributedFramework": "PyTorch" } - }' + } ``` 1. `` is the link to the Run:ai user interface. diff --git a/docs/Researcher/workloads/training/standard-training/quickstart-standard-training.md b/docs/Researcher/workloads/training/standard-training/quickstart-standard-training.md index 8e5043228a..07ec8651d3 100644 --- a/docs/Researcher/workloads/training/standard-training/quickstart-standard-training.md +++ b/docs/Researcher/workloads/training/standard-training/quickstart-standard-training.md @@ -103,20 +103,20 @@ Before you start, make sure: Copy the following command to your terminal. Make sure to update the below parameters according to the comments. For more details, see [Trainings API reference](https://api-docs.run.ai/latest/tag/Trainings): ``` bash - curl -L 'https:///api/v1/workloads/trainings' \ # (1) + curl -L 'https:///api/v1/workloads/trainings' \ -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer ' \ # (2) + -H 'Authorization: Bearer ' \ -d '{ "name": "workload-name", - "projectId": "", '\ # (3) - "clusterId": "", \ # (4) + "projectId": "", + "clusterId": "", "spec": { "image": "runai.jfrog.io/demo/quickstart", "compute": { "gpuDevicesRequest": 1 - }, + } } - }' + } ``` 1. `` is the link to the Run:ai user interface. diff --git a/docs/Researcher/workloads/workspaces/quickstart-jupyter.md b/docs/Researcher/workloads/workspaces/quickstart-jupyter.md index b551bc9df5..5fc9356081 100644 --- a/docs/Researcher/workloads/workspaces/quickstart-jupyter.md +++ b/docs/Researcher/workloads/workspaces/quickstart-jupyter.md @@ -101,9 +101,8 @@ Before you start, make sure: ``` bash runai project set "project-name" - runai workspace submit jupyter-notebook -i jupyter/scipy-notebook -g 1 \ - --external-url container=8888 --command \ - -- start-notebook.sh --NotebookApp.base_url=/\${RUNAI_PROJECT}/\${RUNAI_JOB_NAME} --NotebookApp.token='' + runai workspace submit "workload-name" --image jupyter/scipy-notebook --gpu-devices-request 0 --command --external-url \ + container=8888 -- start-notebook.sh --NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} --NotebookApp.token= ``` === "CLI V1 [Deprecated]" @@ -120,13 +119,13 @@ Before you start, make sure: Copy the following command to your terminal. Make sure to update the below parameters according to the comments. For more details, see [Workspaces API reference](https://api-docs.run.ai/latest/tag/Workspaces): ``` bash - curl -L 'https:///api/v1/workloads/workspaces' \ # (1) + curl -L 'https:///api/v1/workloads/workspaces' \ -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer ' \ # (2) + -H 'Authorization: Bearer ' \ -d '{ "name": "workload-name", - "projectId": "", '\ # (3) - "clusterId": "", \ # (4) + "projectId": "", + "clusterId": "", "spec": { "command" : "start-notebook.sh", "args" : "--NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} --NotebookApp.token=''", @@ -137,8 +136,8 @@ Before you start, make sure: "exposedUrls" : [ { "container" : 8888, - "toolType": "jupyter-notebook", \ # (5) - "toolName": "Jupyter" \ # (6) + "toolType": "jupyter-notebook", + "toolName": "Jupyter" } ] } @@ -147,7 +146,7 @@ Before you start, make sure: 1. `` is the link to the Run:ai user interface. 2. `` is the API access token obtained in Step 1. - 3. `` is #The ID of the Project the workspace is running on. You can get the Project ID via the Get Projects API [Get Projects API](https://app.run.ai/api/docs#tag/Projects/operation/get_projects){target=_blank}. + 3. `` is #The ID of the Project the workload is running on. You can get the Project ID via the Get Projects API [Get Projects API](https://app.run.ai/api/docs#tag/Projects/operation/get_projects){target=_blank}. 4. `` is the unique identifier of the Cluster. You can get the Cluster UUID by adding the "Cluster ID" column to the Clusters view. 5. `toolType` will show the Jupyter icon when connecting to the Jupyter tool via the user interface. 6. `toolName` text will show when connecting to the Jupyter tool via the user interface. From a93001ad217c668aee46d7a402fe9374827ea64b Mon Sep 17 00:00:00 2001 From: Sherin Date: Tue, 18 Mar 2025 13:03:33 +0200 Subject: [PATCH 2/3] Updated quick start --- .../launching-workloads-with-dynamic-gpu-fractions.md | 4 ++-- .../distributed-training/quickstart-distributed-training.md | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/Researcher/scheduling/launching-workloads-with-dynamic-gpu-fractions.md b/docs/Researcher/scheduling/launching-workloads-with-dynamic-gpu-fractions.md index f9f1c139d2..363b4027bf 100644 --- a/docs/Researcher/scheduling/launching-workloads-with-dynamic-gpu-fractions.md +++ b/docs/Researcher/scheduling/launching-workloads-with-dynamic-gpu-fractions.md @@ -113,7 +113,7 @@ Before you start, make sure: "spec": { "command" : "start-notebook.sh", "args" : "--NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} --NotebookApp.token=''", - "image": "jupyter/scipy-notebook", + "image": "gcr.io/run-ai-lab/pytorch-example-jupyter", "compute": { "gpuDevicesRequest": 1, "gpuMemoryRequest": "4G", @@ -189,7 +189,7 @@ Before you start, make sure: "spec": { "command" : "start-notebook.sh", "args" : "--NotebookApp.base_url=/${RUNAI_PROJECT}/${RUNAI_JOB_NAME} --NotebookApp.token=''", - "image": "jupyter/scipy-notebook", + "image": "gcr.io/run-ai-lab/pytorch-example-jupyter", "compute": { "gpuDevicesRequest": 1, "gpuMemoryRequest": "4G", diff --git a/docs/Researcher/workloads/training/distributed-training/quickstart-distributed-training.md b/docs/Researcher/workloads/training/distributed-training/quickstart-distributed-training.md index bc8d297112..b6205520fc 100644 --- a/docs/Researcher/workloads/training/distributed-training/quickstart-distributed-training.md +++ b/docs/Researcher/workloads/training/distributed-training/quickstart-distributed-training.md @@ -83,8 +83,8 @@ Before you start, make sure: ``` bash runai project set "project-name" runai distributed submit "workloand-name" --framework PyTorch \ - -i kubeflow/pytorch-dist-mnist:latest --workers 2 \ - --gpu-request-type portion --gpu-portion-request 0.1 --gpu-devices-request 1 --cpu-memory-request 100M + -i kubeflow/pytorch-dist-mnist:latest --workers 2 \ + --gpu-request-type portion --gpu-portion-request 0.1 --gpu-devices-request 1 --cpu-memory-request 100M ``` This would start a distributed training workload based on kubeflow/pytorch-dist-mnist:latest with one master and two workers. @@ -95,7 +95,7 @@ Before you start, make sure: ``` bash runai config project "project-name" runai submit-dist pytorch "workload-name" --workers=2 -g 0.1 \ - -i kubeflow/pytorch-dist-mnist:latest + -i kubeflow/pytorch-dist-mnist:latest ``` This would start a distributed training workload based on kubeflow/pytorch-dist-mnist:latest with one master and two workers. From 1d09360ead7f0071eeda23ee7f5d6a57194ae2c3 Mon Sep 17 00:00:00 2001 From: Sherin Date: Tue, 18 Mar 2025 18:30:32 +0200 Subject: [PATCH 3/3] Update launching-workloads-with-gpu-memory-swap.md --- ...aunching-workloads-with-gpu-memory-swap.md | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/Researcher/scheduling/launching-workloads-with-gpu-memory-swap.md b/docs/Researcher/scheduling/launching-workloads-with-gpu-memory-swap.md index 4e5268bcee..409cd19a22 100644 --- a/docs/Researcher/scheduling/launching-workloads-with-gpu-memory-swap.md +++ b/docs/Researcher/scheduling/launching-workloads-with-gpu-memory-swap.md @@ -89,18 +89,18 @@ Before you start, make sure: "image": "runai.jfrog.io/core-llm/runai-vllm:v0.6.4-0.10.0", "imagePullPolicy":"IfNotPresent", "environmentVariables": [ - { + { "name": "RUNAI_MODEL", "value": "meta-lama/Llama-3.2-1B-Instruct" - }, - { + }, + { "name": "VLLM_RPC_TIMEOUT", "value": "60000" - }, - { + }, + { "name": "HF_TOKEN", "value":"" - } + } ], "compute": { "gpuDevicesRequest": 1, @@ -165,18 +165,18 @@ Before you start, make sure: "image": "runai.jfrog.io/core-llm/runai-vllm:v0.6.4-0.10.0", "imagePullPolicy":"IfNotPresent", "environmentVariables": [ - { + { "name": "RUNAI_MODEL", "value": "meta-lama/Llama-3.2-1B-Instruct" - }, - { + }, + { "name": "VLLM_RPC_TIMEOUT", "value": "60000" - }, - { + }, + { "name": "HF_TOKEN", "value":"" - } + } ], "compute": { "gpuDevicesRequest": 1, @@ -280,14 +280,14 @@ Before you start, make sure: "spec": { "image": "runai.jfrog.io/core-llm/llm-app", "environmentVariables": [ - { + { "name": "RUNAI_MODEL_NAME", "value": "meta-llama/Llama-3.2-1B-Instruct" - }, - { + }, + { "name": "RUNAI_MODEL_BASE_URL", "value": "" - } + } ], "compute": { "cpuCoreRequest":0.1,