From f00659875dd5719fe3a30483aec29079fbba5830 Mon Sep 17 00:00:00 2001 From: gwdgithubnom Date: Thu, 3 Jul 2025 15:25:46 +0800 Subject: [PATCH] [fix] Resolve the issue where PySpark fails to run due to different Python minor versions. Run Python directly. (#1) --- python/pyspark/core/context.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/python/pyspark/core/context.py b/python/pyspark/core/context.py index f4d3bbcf8f5b9..ee42f00cad4b2 100644 --- a/python/pyspark/core/context.py +++ b/python/pyspark/core/context.py @@ -340,7 +340,7 @@ def _do_init( ) os.environ["SPARK_BUFFER_SIZE"] = str(self._jvm.PythonUtils.getSparkBufferSize(self._jsc)) - self.pythonExec = os.environ.get("PYSPARK_PYTHON", "python3") + self.pythonExec = os.environ.get("PYSPARK_PYTHON", self._get_python_exec_from_conf()) self.pythonVer = "%d.%d" % sys.version_info[:2] # Broadcast's __reduce__ method stores Broadcast instances here. @@ -414,6 +414,30 @@ def signal_handler(signal: Any, frame: Any) -> NoReturn: ): signal.signal(signal.SIGINT, signal_handler) + def _get_python_exec_from_conf(self) -> str: + """ + Used to refine the value of the pythonExec variable serialized by the driver in client mode when PYSPARK_PYTHON is not configured. + If the environment variable PYSPARK_PYTHON is not set, this method will first try to find the Python environment from the Spark configuration object. + If no valid configuration is found, it will return the default Python executable 'python3'. + + Returns: + str: The path to the Python executable. + """ + # List of configuration keys to check for Python executable path + config_keys = [ + "spark.pyspark.driver.python", + "spark.pyspark.python", + "spark.executorEnv.PYSPARK_DRIVER_PYTHON", + "spark.executorEnv.PYSPARK_PYTHON" + ] + python_exec = "python3" + for key in config_keys: + python_exec = self._conf.get(key, None) + if python_exec is not None and python_exec.strip() != "": + python_exec = python_exec.strip() + break + return python_exec + def __repr__(self) -> str: return "".format( master=self.master,