Skip to content

bug: tolerating metadata written by different engines with Delta Lake #830

@djouallah

Description

@djouallah

i was appending data to a delta table, worked fine before, when i upgraded, I am getting the following error, notice, it is saving to an abfss location

QueryExecutionException                   Traceback (most recent call last)
File <timed exec>:16

File ~/jupyter-env/python3.11/lib/python3.11/site-packages/pyspark/sql/connect/readwriter.py:679, in DataFrameWriter.save(self, path, format, mode, partitionBy, **options)
    677     self.format(format)
    678 self._write.path = path
--> 679 _, _, ei = self._spark.client.execute_command(
    680     self._write.command(self._spark.client), self._write.observations
    681 )
    682 self._callback(ei)

File ~/jupyter-env/python3.11/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py:1148, in SparkConnectClient.execute_command(self, command, observations)
   1146     req.user_context.user_id = self._user_id
   1147 req.plan.command.CopyFrom(command)
-> 1148 data, _, metrics, observed_metrics, properties = self._execute_and_fetch(
   1149     req, observations or {}
   1150 )
   1151 # Create a query execution object.
   1152 ei = ExecutionInfo(metrics, observed_metrics)

File ~/jupyter-env/python3.11/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py:1560, in SparkConnectClient._execute_and_fetch(self, req, observations, self_destruct)
   1557 properties: Dict[str, Any] = {}
   1559 with Progress(handlers=self._progress_handlers, operation_id=req.operation_id) as progress:
-> 1560     for response in self._execute_and_fetch_as_iterator(
   1561         req, observations, progress=progress
   1562     ):
   1563         if isinstance(response, StructType):
   1564             schema = response

File ~/jupyter-env/python3.11/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py:1537, in SparkConnectClient._execute_and_fetch_as_iterator(self, req, observations, progress)
   1535     raise kb
   1536 except Exception as error:
-> 1537     self._handle_error(error)

File ~/jupyter-env/python3.11/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py:1811, in SparkConnectClient._handle_error(self, error)
   1809     self.thread_local.inside_error_handling = True
   1810     if isinstance(error, grpc.RpcError):
-> 1811         self._handle_rpc_error(error)
   1812     raise error
   1813 finally:

File ~/jupyter-env/python3.11/lib/python3.11/site-packages/pyspark/sql/connect/client/core.py:1882, in SparkConnectClient._handle_rpc_error(self, rpc_error)
   1879             if info.metadata["errorClass"] == "INVALID_HANDLE.SESSION_CHANGED":
   1880                 self._closed = True
-> 1882             raise convert_exception(
   1883                 info,
   1884                 status.message,
   1885                 self._fetch_enriched_error(info),
   1886                 self._display_server_stack_trace(),
   1887             ) from None
   1889     raise SparkConnectGrpcException(status.message) from None
   1890 else:

QueryExecutionException: Delta transaction failed, version 0 already exists.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions