apache · aokolnychyi · Oct 5, 2025 · aokolnychyi · Oct 14, 2025 · aokolnychyi
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -229,6 +229,12 @@ object AnalysisContext {
     set(context)
     try f finally { set(originContext) }
   }
+
+  private[sql] def withAnalysisContext[A](context: AnalysisContext)(f: => A): A = {
+    val originContext = value.get()
+    set(context)
+    try f finally { set(originContext) }
+  }
 }
 
 object Analyzer {

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala
@@ -139,7 +139,8 @@ class RelationResolution(override val catalogManager: CatalogManager)
                 ident,
                 table,
                 u.clearWritePrivileges.options,
-                u.isStreaming
+                u.isStreaming,
+                finalTimeTravelSpec
               )
               loaded.foreach(AnalysisContext.get.relationCache.update(key, _))
               u.getTagValue(LogicalPlan.PLAN_ID_TAG)
@@ -162,7 +163,8 @@ class RelationResolution(override val catalogManager: CatalogManager)
       ident: Identifier,
       table: Option[Table],
       options: CaseInsensitiveStringMap,
-      isStreaming: Boolean): Option[LogicalPlan] = {
+      isStreaming: Boolean,
+      timeTravelSpec: Option[TimeTravelSpec]): Option[LogicalPlan] = {
     table.map {
       // To utilize this code path to execute V1 commands, e.g. INSERT,
       // either it must be session catalog, or tracksPartitionsInCatalog
@@ -189,6 +191,7 @@ class RelationResolution(override val catalogManager: CatalogManager)
 
       case table =>
         if (isStreaming) {
+          assert(timeTravelSpec.isEmpty, "time travel is not allowed in streaming")
           val v1Fallback = table match {
             case withFallback: V2TableWithV1Fallback =>
               Some(UnresolvedCatalogRelation(withFallback.v1Table, isStreaming = true))
@@ -210,7 +213,7 @@ class RelationResolution(override val catalogManager: CatalogManager)
         } else {
           SubqueryAlias(
             catalog.name +: ident.asMultipartIdentifier,
-            DataSourceV2Relation.create(table, Some(catalog), Some(ident), options)
+            DataSourceV2Relation.create(table, Some(catalog), Some(ident), options, timeTravelSpec)
           )
         }
     }

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala
@@ -27,8 +27,12 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 sealed trait TimeTravelSpec
 
-case class AsOfTimestamp(timestamp: Long) extends TimeTravelSpec
-case class AsOfVersion(version: String) extends TimeTravelSpec
+case class AsOfTimestamp(timestamp: Long) extends TimeTravelSpec {
+  override def toString: String = s"TIMESTAMP AS OF $timestamp"
+}
+case class AsOfVersion(version: String) extends TimeTravelSpec {
+  override def toString: String = s"VERSION AS OF '$version'"
+}
 
 object TimeTravelSpec {
   def create(

diff --git a/...t/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/...t/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
+import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation, TimeTravelSpec}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Expression, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, ExposesMetadataColumns, Histogram, HistogramBin, LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
@@ -45,7 +45,8 @@ abstract class DataSourceV2RelationBase(
     output: Seq[AttributeReference],
     catalog: Option[CatalogPlugin],
     identifier: Option[Identifier],
-    options: CaseInsensitiveStringMap)
+    options: CaseInsensitiveStringMap,
+    timeTravelSpec: Option[TimeTravelSpec] = None)
   extends LeafNode with MultiInstanceRelation with NamedRelation {
 
   import DataSourceV2Implicits._
@@ -65,7 +66,12 @@ abstract class DataSourceV2RelationBase(
   override def skipSchemaResolution: Boolean = table.supports(TableCapability.ACCEPT_ANY_SCHEMA)
 
   override def simpleString(maxFields: Int): String = {
-    s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name"
+    val outputString = truncatedString(output, "[", ", ", "]", maxFields)
+    val nameWithTimeTravelSpec = timeTravelSpec match {
+      case Some(spec) => s"$name $spec"
+      case _ => name
+    }
+    s"RelationV2$outputString $nameWithTimeTravelSpec"
   }
 
   override def computeStats(): Statistics = {
@@ -96,8 +102,9 @@ case class DataSourceV2Relation(
     override val output: Seq[AttributeReference],
     catalog: Option[CatalogPlugin],
     identifier: Option[Identifier],
-    options: CaseInsensitiveStringMap)
-  extends DataSourceV2RelationBase(table, output, catalog, identifier, options)
+    options: CaseInsensitiveStringMap,
+    timeTravelSpec: Option[TimeTravelSpec] = None)
+  extends DataSourceV2RelationBase(table, output, catalog, identifier, options, timeTravelSpec)
   with ExposesMetadataColumns {
 
   import DataSourceV2Implicits._
@@ -117,7 +124,7 @@ case class DataSourceV2Relation(
   def withMetadataColumns(): DataSourceV2Relation = {
     val newMetadata = metadataOutput.filterNot(outputSet.contains)
     if (newMetadata.nonEmpty) {
-      DataSourceV2Relation(table, output ++ newMetadata, catalog, identifier, options)
+      copy(output = output ++ newMetadata)
     } else {
       this
     }
@@ -151,7 +158,12 @@ case class DataSourceV2ScanRelation(
   override def name: String = relation.name
 
   override def simpleString(maxFields: Int): String = {
-    s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name"
+    val outputString = truncatedString(output, "[", ", ", "]", maxFields)
+    val nameWithTimeTravelSpec = relation.timeTravelSpec match {
+      case Some(spec) => s"$name $spec"
+      case _ => name
+    }
+    s"RelationV2$outputString $nameWithTimeTravelSpec"
   }
 
   override def computeStats(): Statistics = {
@@ -235,17 +247,29 @@ object ExtractV2Table {
   def unapply(relation: DataSourceV2Relation): Option[Table] = Some(relation.table)
 }
 
+object ExtractV2CatalogAndIdentifier {
+  def unapply(relation: DataSourceV2Relation): Option[(CatalogPlugin, Identifier)] = {
+    relation match {
+      case DataSourceV2Relation(_, _, Some(catalog), Some(identifier), _, _) =>
+        Some((catalog, identifier))
+      case _ =>
+        None
+    }
+  }
+}
+
 object DataSourceV2Relation {
   def create(
       table: Table,
       catalog: Option[CatalogPlugin],
       identifier: Option[Identifier],
-      options: CaseInsensitiveStringMap): DataSourceV2Relation = {
+      options: CaseInsensitiveStringMap,
+      timeTravelSpec: Option[TimeTravelSpec] = None): DataSourceV2Relation = {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
     // The v2 source may return schema containing char/varchar type. We replace char/varchar
     // with "annotated" string type here as the query engine doesn't support char/varchar yet.
     val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(table.columns.asSchema)
-    DataSourceV2Relation(table, toAttributes(schema), catalog, identifier, options)
+    DataSourceV2Relation(table, toAttributes(schema), catalog, identifier, options, timeTravelSpec)
   }
 
   def create(

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
@@ -730,6 +730,10 @@ abstract class InMemoryBaseTable(
       }
     }
   }
+
+  def copy(): Table = {
+    throw new UnsupportedOperationException(s"copy is not supported for ${getClass.getName}")
+  }
 }
 
 object InMemoryBaseTable {

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
@@ -125,6 +125,41 @@ class InMemoryTable(
     new InMemoryWriterBuilderWithOverWrite(info)
   }
 
+  override def copy(): Table = {
+    val copiedTable = new InMemoryTable(
+      name,
+      columns(),
+      partitioning,
+      properties,
+      constraints,
+      distribution,
+      ordering,
+      numPartitions,
+      advisoryPartitionSize,
+      isDistributionStrictlyRequired,
+      numRowsPerSplit)
+
+    copiedTable.dataMap.synchronized {
+      dataMap.foreach { case (key, splits) =>
+        val copiedSplits = splits.map { bufferedRows =>
+          val copiedBufferedRows = new BufferedRows(bufferedRows.key, bufferedRows.schema)
+          copiedBufferedRows.rows ++= bufferedRows.rows.map(_.copy())
+          copiedBufferedRows
+        }
+        copiedTable.dataMap.put(key, copiedSplits)
+      }
+    }
+
+    copiedTable.commits ++= commits.map(_.copy())
+
+    copiedTable.setCurrentVersion(currentVersion())
+    if (validatedVersion() != null) {
+      copiedTable.setValidatedVersion(validatedVersion())
+    }
+
+    copiedTable
+  }
+
   class InMemoryWriterBuilderWithOverWrite(override val info: LogicalWriteInfo)
     extends InMemoryWriterBuilder(info) with SupportsOverwrite {
 

diff --git a/...catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala b/...catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
@@ -66,6 +66,19 @@ class BasicInMemoryTableCatalog extends TableCatalog {
     }
   }
 
+  def pinTable(ident: Identifier, version: String): Unit = {
+    Option(tables.get(ident)) match {
+      case Some(table: InMemoryBaseTable) =>
+        val versionIdent = Identifier.of(ident.namespace, ident.name + version)
+        val versionTable = table.copy()
+        tables.put(versionIdent, versionTable)
+      case Some(table) =>
+        throw new UnsupportedOperationException(s"Can't pin ${table.getClass.getName}")
+      case _ =>
+        throw new NoSuchTableException(ident.asMultipartIdentifier)
+    }
+  }
+
   override def loadTable(ident: Identifier, version: String): Table = {
     val versionIdent = Identifier.of(ident.namespace, ident.name + version)
     Option(tables.get(versionIdent)) match {

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -21,18 +21,21 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.internal.{Logging, MessageWithContext}
 import org.apache.spark.internal.LogKeys._
+import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression}
 import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint
 import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint, View}
 import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.classic.{Dataset, SparkSession}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.command.CommandUtils
 import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation, LogicalRelationWithTable}
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, ExtractV2Table, FileTable}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation,
+  ExtractV2CatalogAndIdentifier, ExtractV2Table, FileTable}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK
@@ -82,6 +85,10 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     cachedData.isEmpty
   }
 
+  private[sql] def numCachedEntries: Int = {
+    cachedData.size
+  }
+
   // Test-only
   def cacheQuery(query: Dataset[_]): Unit = {
     cacheQuery(query, tableName = None, storageLevel = MEMORY_AND_DISK)
@@ -211,15 +218,29 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       plan: LogicalPlan,
       cascade: Boolean,
       blocking: Boolean): Unit = {
-    uncacheByCondition(spark, _.sameResult(plan), cascade, blocking)
+    EliminateSubqueryAliases(plan) match {
+      case r @ ExtractV2CatalogAndIdentifier(catalog, ident) if r.timeTravelSpec.isEmpty =>
+        val nameParts = ident.toQualifiedNameParts(catalog)
+        uncacheTableOrView(spark, nameParts, cascade, blocking)
+      case _ =>
+        uncacheByCondition(spark, _.sameResult(plan), cascade, blocking)
+    }
   }
 
-  def uncacheTableOrView(spark: SparkSession, name: Seq[String], cascade: Boolean): Unit = {
+  def uncacheTableOrView(
+      spark: SparkSession,
+      name: Seq[String],
+      cascade: Boolean,
+      blocking: Boolean = false): Unit = {
     uncacheByCondition(
-      spark, isMatchedTableOrView(_, name, spark.sessionState.conf), cascade, blocking = false)
+      spark, isMatchedTableOrView(_, name, spark.sessionState.conf), cascade, blocking)
   }
 
-  private def isMatchedTableOrView(plan: LogicalPlan, name: Seq[String], conf: SQLConf): Boolean = {
+  private def isMatchedTableOrView(
+      plan: LogicalPlan,
+      name: Seq[String],
+      conf: SQLConf,
+      includeTimeTravel: Boolean = true): Boolean = {
     def isSameName(nameInCache: Seq[String]): Boolean = {
       nameInCache.length == name.length && nameInCache.zip(name).forall(conf.resolver.tupled)
     }
@@ -228,9 +249,9 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       case LogicalRelationWithTable(_, Some(catalogTable)) =>
         isSameName(catalogTable.identifier.nameParts)
 
-      case DataSourceV2Relation(_, _, Some(catalog), Some(v2Ident), _) =>
-        import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
-        isSameName(v2Ident.toQualifiedNameParts(catalog))
+      case DataSourceV2Relation(_, _, Some(catalog), Some(v2Ident), _, timeTravelSpec) =>
+        val nameInCache = v2Ident.toQualifiedNameParts(catalog)
+        isSameName(nameInCache) && (includeTimeTravel || timeTravelSpec.isEmpty)
 
       case v: View =>
         isSameName(v.desc.identifier.nameParts)
@@ -299,8 +320,27 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
    * normalized before being used further.
    */
   def recacheByPlan(spark: SparkSession, plan: LogicalPlan): Unit = {
-    val normalized = QueryExecution.normalize(spark, plan)
-    recacheByCondition(spark, _.plan.exists(_.sameResult(normalized)))
+    EliminateSubqueryAliases(plan) match {
+      case r @ ExtractV2CatalogAndIdentifier(catalog, ident) if r.timeTravelSpec.isEmpty =>
+        val nameParts = ident.toQualifiedNameParts(catalog)
+        recacheByTableName(spark, nameParts)
+      case _ =>
+        val normalized = QueryExecution.normalize(spark, plan)
+        recacheByCondition(spark, _.plan.exists(_.sameResult(normalized)))
+    }
+  }
+
+  /**
+   * Re-caches all cache entries that reference the given table name.
+   */
+  def recacheByTableName(
+      spark: SparkSession,
+      name: Seq[String],
+      includeTimeTravel: Boolean = true): Unit = {
+    def shouldInvalidate(entry: CachedData): Boolean = {
+      entry.plan.exists(isMatchedTableOrView(_, name, spark.sessionState.conf, includeTimeTravel))
+    }
+    recacheByCondition(spark, shouldInvalidate)
   }
 
   /**
-Original file line number
+Diff line change
@@ Expand Up / @@ -730,6 +730,10 @@ abstract class InMemoryBaseTable( @@
           }
         }
       }
+      def copy(): Table = {
+        throw new UnsupportedOperationException(s"copy is not supported for ${getClass.getName}")
+      }
     }
     object InMemoryBaseTable {
@@ Expand Down @@