From 7ef1b6549969a9a86c35fe03903457ad4cfa5322 Mon Sep 17 00:00:00 2001
From: manu-sj <152865565+manu-sj@users.noreply.github.com>
Date: Thu, 3 Jul 2025 09:42:07 +0200
Subject: [PATCH] [HWORKS-2190][APPEND] Updating job configuration to include
 file, pyfiles, archives and jars (#478)

* updating docs for jobs configs to include files, pyFiles, jars and archives

* updating based on review comments

* updating documentation for notebooks and python Jobs
---
 docs/user_guides/projects/jobs/notebook_job.md | 1 +
 docs/user_guides/projects/jobs/pyspark_job.md  | 6 +++++-
 docs/user_guides/projects/jobs/python_job.md   | 1 +
 docs/user_guides/projects/jobs/spark_job.md    | 7 ++++++-
 4 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/docs/user_guides/projects/jobs/notebook_job.md b/docs/user_guides/projects/jobs/notebook_job.md
index 364b5900e..7c724bcb7 100644
--- a/docs/user_guides/projects/jobs/notebook_job.md
+++ b/docs/user_guides/projects/jobs/notebook_job.md
@@ -179,6 +179,7 @@ The following table describes the JSON payload returned by `jobs_api.get_configu
 | `resourceConfig.gpus`   | number (int)   | Number of GPUs to be allocated                       | `0`                      |
 | `logRedirection`        | boolean        | Whether logs are redirected                          | `true`                   |
 | `jobType`               | string         | Type of job                                          | `"PYTHON"`               |
+| `files`        | string   | HDFS path(s) to files to be provided to the Notebook Job. Multiple files can be included in a single string, separated by commas. <br>Example: `"hdfs:///Project/<project_name>/Resources/file1.py,hdfs:///Project/<project_name>/Resources/file2.txt"` | `null` |
 
 
 ## Accessing project data
diff --git a/docs/user_guides/projects/jobs/pyspark_job.md b/docs/user_guides/projects/jobs/pyspark_job.md
index c0cb7e804..e329312f3 100644
--- a/docs/user_guides/projects/jobs/pyspark_job.md
+++ b/docs/user_guides/projects/jobs/pyspark_job.md
@@ -217,7 +217,7 @@ The following table describes the JSON payload returned by `jobs_api.get_configu
 | Field                                      | Type           | Description                                         | Default                    |
 | ------------------------------------------ | -------------- |-----------------------------------------------------| -------------------------- |
 | `type`                                     | string         | Type of the job configuration                       | `"sparkJobConfiguration"`  |
-| `appPath`               | string         | Project path to script (e.g `Resources/foo.py`) | `null`            |
+| `appPath`                                  | string         | Project path to script (e.g `Resources/foo.py`)     | `null`                     |
 | `environmentName`                          | string         | Name of the project spark environment               | `"spark-feature-pipeline"` |
 | `spark.driver.cores`                       | number (float) | Number of CPU cores allocated for the driver        | `1.0`                      |
 | `spark.driver.memory`                      | number (int)   | Memory allocated for the driver (in MB)             | `2048`                     |
@@ -229,6 +229,10 @@ The following table describes the JSON payload returned by `jobs_api.get_configu
 | `spark.dynamicAllocation.maxExecutors`     | number (int)   | Maximum number of executors with dynamic allocation | `2`                        |
 | `spark.dynamicAllocation.initialExecutors` | number (int)   | Initial number of executors with dynamic allocation | `1`                        |
 | `spark.blacklist.enabled`                  | boolean        | Whether executor/node blacklisting is enabled       | `false`                    |
+| `files`        | string   | HDFS path(s) to files to be provided to the Spark application. Multiple files can be included in a single string, separated by commas. <br>Example: `"hdfs:///Project/<project_name>/Resources/file1.py,hdfs:///Project/<project_name>/Resources/file2.txt"` | `null` |
+| `pyFiles`      | string   | HDFS path(s) to Python files to be provided to the Spark application. These will be added to the `PYTHONPATH` so they can be imported as modules. Multiple files can be included in a single string, separated by commas. <br>Example: `"hdfs:///Project/<project_name>/Resources/module1.py,hdfs:///Project/<project_name>/Resources/module2.py"` | `null` |
+| `jars`         | string   | HDFS path(s) to JAR files to be provided to the Spark application. These will be added to the classpath. Multiple files can be included in a single string, separated by commas. <br>Example: `"hdfs:///Project/<project_name>/Resources/lib1.jar,hdfs:///Project/<project_name>/Resources/lib2.jar"` | `null` |
+| `archives`     | string   | HDFS path(s) to archive files to be provided to the Spark application. Multiple files can be included in a single string, separated by commas. <br>Example: `"hdfs:///Project/<project_name>/Resources/archive1.zip,hdfs:///Project/<project_name>/Resources/archive2.tar.gz"` | `null` |
 
 
 ## Accessing project data
diff --git a/docs/user_guides/projects/jobs/python_job.md b/docs/user_guides/projects/jobs/python_job.md
index 420e38e49..0fa2a9e9f 100644
--- a/docs/user_guides/projects/jobs/python_job.md
+++ b/docs/user_guides/projects/jobs/python_job.md
@@ -177,6 +177,7 @@ The following table describes the JSON payload returned by `jobs_api.get_configu
 | `resourceConfig.gpus`   | number (int)   | Number of GPUs to be allocated                  | `0`                      |
 | `logRedirection`        | boolean        | Whether logs are redirected                     | `true`                   |
 | `jobType`               | string         | Type of job                                     | `"PYTHON"`               |
+| `files`        | string   | HDFS path(s) to files to be provided to the Python Job. Multiple files can be included in a single string, separated by commas. <br>Example: `"hdfs:///Project/<project_name>/Resources/file1.py,hdfs:///Project/<project_name>/Resources/file2.txt"` | `null` |
 
 
 ## Accessing project data
diff --git a/docs/user_guides/projects/jobs/spark_job.md b/docs/user_guides/projects/jobs/spark_job.md
index 6d0f0510b..6345d5a65 100644
--- a/docs/user_guides/projects/jobs/spark_job.md
+++ b/docs/user_guides/projects/jobs/spark_job.md
@@ -230,7 +230,12 @@ The following table describes the JSON payload returned by `jobs_api.get_configu
 | `spark.dynamicAllocation.minExecutors`     | number (int)   | Minimum number of executors with dynamic allocation     | `1`                        |
 | `spark.dynamicAllocation.maxExecutors`     | number (int)   | Maximum number of executors with dynamic allocation     | `2`                        |
 | `spark.dynamicAllocation.initialExecutors` | number (int)   | Initial number of executors with dynamic allocation     | `1`                        |
-| `spark.blacklist.enabled`                  | boolean        | Whether executor/node blacklisting is enabled           | `false`                    |
+| `spark.blacklist.enabled`                  | boolean        | Whether executor/node blacklisting is enabled           | `false`                    
+| `files`        | string   | HDFS path(s) to files to be provided to the Spark application. Multiple files can be included in a single string, separated by commas. <br>Example: `"hdfs:///Project/<project_name>/Resources/file1.py,hdfs:///Project/<project_name>/Resources/file2.txt"` | `null` |
+| `pyFiles`      | string   | HDFS path(s) to Python files to be provided to the Spark application. These will be added to the `PYTHONPATH` so they can be imported as modules. Multiple files can be included in a single string, separated by commas. <br>Example: `"hdfs:///Project/<project_name>/Resources/module1.py,hdfs:///Project/<project_name>/Resources/module2.py"` | `null` |
+| `jars`         | string   | HDFS path(s) to JAR files to be provided to the Spark application. These will be added to the classpath. Multiple files can be included in a single string, separated by commas. <br>Example: `"hdfs:///Project/<project_name>/Resources/lib1.jar,hdfs:///Project/<project_name>/Resources/lib2.jar"` | `null` |
+| `archives`     | string   | HDFS path(s) to archive files to be provided to the Spark application. Multiple files can be included in a single string, separated by commas. <br>Example: `"hdfs:///Project/<project_name>/Resources/archive1.zip,hdfs:///Project/<project_name>/Resources/archive2.tar.gz"` | `null` |
+
 
 ## Accessing project data