yhuang-db
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregateExpressionResolver.scala
Lines changed: 35 additions & 61 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregateExpressionResolver.scala
Lines changed: 35 additions & 61 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregateResolver.scala
Lines changed: 18 additions & 15 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AggregateResolver.scala
Lines changed: 18 additions & 15 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AliasResolver.scala
Lines changed: 15 additions & 3 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AliasResolver.scala
Lines changed: 15 additions & 3 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AnalyzerBridgeState.scala
Lines changed: 21 additions & 4 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AnalyzerBridgeState.scala
Lines changed: 21 additions & 4 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AttributeScopeStack.scala
Lines changed: 9 additions & 7 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AttributeScopeStack.scala
Lines changed: 9 additions & 7 deletions
@@ -17,18 +17,14 @@
 
 package org.apache.spark.sql.catalyst.analysis.resolver
 
-import java.util.IdentityHashMap
-
-import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{
   AnsiTypeCoercion,
   CollationTypeCoercion,
   TypeCoercion
 }
-import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, OuterReference, SubExprUtils}
+import org.apache.spark.sql.catalyst.expressions.{Expression, OuterReference, SubExprUtils}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, ListAgg}
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Sort}
 import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.errors.QueryCompilationErrors
 
@@ -90,8 +86,6 @@ class AggregateExpressionResolver(
    *    1. Update the [[ExpressionResolver.expressionResolutionContextStack]];
    *    2. Handle [[OuterReference]] in [[AggregateExpression]], if there are any (see
    *    `handleOuterAggregateExpression`);
-   *    3. Handle [[AggregateExpression]] in [[Sort]] operator (see
-   *    `handleAggregateExpressionInSort`);
    *  - Validation:
    *   1. [[ListAgg]] is not allowed in DISTINCT aggregates if it contains [[SortOrder]] different
    *      from its child;
@@ -124,12 +118,7 @@ class AggregateExpressionResolver(
     if (expressionResolutionContext.hasOuterReferences) {
       handleOuterAggregateExpression(aggregateExpressionWithChildrenResolved)
     } else {
-      traversals.current.parentOperator match {
-        case Sort(_, _, aggregate: Aggregate, _) =>
-          handleAggregateExpressionInSort(aggregateExpressionWithChildrenResolved, aggregate)
-        case other =>
-          aggregateExpressionWithChildrenResolved
-      }
+      aggregateExpressionWithChildrenResolved
     }
   }
 
@@ -163,12 +152,15 @@ class AggregateExpressionResolver(
    *  - Create a new subtree without [[OuterReference]]s;
    *  - Alias this subtree and put it inside the current [[SubqueryScope]];
    *  - If outer aggregates are allowed, replace the [[AggregateExpression]] with an
-   *    [[OuterReference]] to the auto-generated [[Alias]] that we created. This alias will later
-   *    be injected into the outer [[Aggregate]]; We store the name that needs to be used for the
-   *    [[OuterReference]] in [[OuterReference.SINGLE_PASS_SQL_STRING_OVERRIDE]] computed based on
-   *    the [[AggregateExpression]] without [[OuterReference]] pulled out.
+   *    [[OuterReference]] to the auto-generated [[Alias]] that we created in case the subtree
+   *    without [[OuterReference]]s can't be found in the outer
+   *    [[Aggregate.aggregateExpressions]] list. Otherwise, use the [[Alias]] from the outer
+   *    [[Aggregate]]. This alias will later be injected into the outer [[Aggregate]];
+   *  - Store the name that needs to be used for the [[OuterReference]] in
+   *    [[OuterReference.SINGLE_PASS_SQL_STRING_OVERRIDE]] computed based on the
+   *    [[AggregateExpression]] without [[OuterReference]] pulled out.
    *  - In case we have an [[AggregateExpression]] inside a [[Sort]] operator, we need to handle it
-   *    in a special way (see [[handleAggregateExpressionInSort]] for more details).
+   *    in a special way (see [[handleAggregateExpressionOutsideAggregate]] for more details).
    *  - Return the original [[AggregateExpression]] otherwise. This is done to stay compatible
    *    with the fixed-point Analyzer - a proper exception will be thrown later by
    *    [[ValidateSubqueryExpression]].
@@ -183,19 +175,12 @@ class AggregateExpressionResolver(
     }
 
     val resolvedOuterAggregateExpression =
-      if (subqueryRegistry.currentScope.isOuterAggregateAllowed) {
-        val aggregateExpressionWithStrippedOuterReferences =
-          SubExprUtils.stripOuterReference(aggregateExpression)
-
-        val outerAggregateExpressionAlias = autoGeneratedAliasProvider.newOuterAlias(
-          child = aggregateExpressionWithStrippedOuterReferences
-        )
-        subqueryRegistry.currentScope.addOuterAggregateExpression(
-          outerAggregateExpressionAlias,
-          aggregateExpressionWithStrippedOuterReferences
+      if (subqueryRegistry.currentScope.aggregateExpressionsExtractor.isDefined) {
+        extractOuterAggregateExpression(
+          aggregateExpression = aggregateExpression,
+          aggregateExpressionsExtractor =
+            subqueryRegistry.currentScope.aggregateExpressionsExtractor.get
         )
-
-        OuterReference(outerAggregateExpressionAlias.toAttribute)
       } else {
         aggregateExpression
       }
@@ -211,41 +196,30 @@ class AggregateExpressionResolver(
     }
   }
 
-  /**
-   * If we order by an [[AggregateExpression]] which is not present in the [[Aggregate]] operator
-   * (child of the [[Sort]]) we have to extract it (by adding it to the
-   * `extractedAggregateExpressionAliases` list of the current expression tree traversal) and add
-   * it to the [[Aggregate]] operator afterwards (this is done in the [[SortResolver]]).
-   */
-  private def handleAggregateExpressionInSort(
-      aggregateExpression: Expression,
-      aggregate: Aggregate): Expression = {
-    val aliasChildToAliasInAggregateExpressions = new IdentityHashMap[Expression, Alias]
-    val aggregateExpressionsSemanticComparator = new SemanticComparator(
-      aggregate.aggregateExpressions.collect {
-        case alias: Alias =>
-          aliasChildToAliasInAggregateExpressions.put(alias.child, alias)
-          alias.child
-      }
+  private def extractOuterAggregateExpression(
+      aggregateExpression: AggregateExpression,
+      aggregateExpressionsExtractor: GroupingAndAggregateExpressionsExtractor): OuterReference = {
+    val aggregateExpressionWithStrippedOuterReferences =
+      SubExprUtils.stripOuterReference(aggregateExpression)
+
+    val outerAggregateExpressionAlias = autoGeneratedAliasProvider.newOuterAlias(
+      child = aggregateExpressionWithStrippedOuterReferences
     )
 
-    val referencedAggregateExpression =
-      aggregateExpressionsSemanticComparator.collectFirst(aggregateExpression)
+    val (_, referencedAggregateExpressionAlias) =
+      aggregateExpressionsExtractor.collectFirstAggregateExpression(
+        aggregateExpressionWithStrippedOuterReferences
+      )
 
-    referencedAggregateExpression match {
-      case Some(expression) =>
-        aliasChildToAliasInAggregateExpressions.get(expression) match {
-          case null =>
-            throw SparkException.internalError(
-              s"No parent alias for expression $expression while extracting aggregate" +
-              s"expressions in Sort operator."
-            )
-          case alias: Alias => alias.toAttribute
-        }
+    referencedAggregateExpressionAlias match {
+      case Some(alias) =>
+        subqueryRegistry.currentScope.addAliasForOuterAggregateExpression(alias)
+        OuterReference(alias.toAttribute)
       case None =>
-        val alias = autoGeneratedAliasProvider.newAlias(child = aggregateExpression)
-        traversals.current.extractedAggregateExpressionAliases.add(alias)
-        alias.toAttribute
+        subqueryRegistry.currentScope.addAliasForOuterAggregateExpression(
+          outerAggregateExpressionAlias
+        )
+        OuterReference(outerAggregateExpressionAlias.toAttribute)
     }
   }
 
 
@@ -27,12 +27,11 @@ import org.apache.spark.sql.catalyst.analysis.{
   UnresolvedAttribute
 }
 import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
   AttributeReference,
   Expression,
   ExprId,
   ExprUtils,
-  IntegerLiteral,
-  Literal,
   NamedExpression
 }
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project}
@@ -69,7 +68,9 @@ class AggregateResolver(operatorResolver: Resolver, expressionResolver: Expressi
    * output of [[Aggregate]] and return the result.
    */
   def resolve(unresolvedAggregate: Aggregate): LogicalPlan = {
-    val resolvedAggregate = scopes.withNewScope() {
+    scopes.pushScope()
+
+    val resolvedAggregate = try {
       val resolvedChild = operatorResolver.resolve(unresolvedAggregate.child)
 
       val resolvedAggregateExpressions = expressionResolver.resolveAggregateExpressions(
@@ -124,9 +125,11 @@ class AggregateResolver(operatorResolver: Resolver, expressionResolver: Expressi
           operator = resolvedAggregate,
           outputList = resolvedAggregate.aggregateExpressions,
           groupingAttributeIds = Some(getGroupingAttributeIds(resolvedAggregate)),
-          aggregateListAliases = scopes.current.getAggregateListAliases
+          aggregateListAliases = scopes.current.getTopAggregateExpressionAliases
         )
       }
+    } finally {
+      scopes.popScope()
     }
 
     scopes.overwriteOutputAndExtendHiddenOutput(
@@ -179,10 +182,13 @@ class AggregateResolver(operatorResolver: Resolver, expressionResolver: Expressi
    *
    * Example 5:
    *
-   * {{{ SELECT col1, 5 FROM VALUES(1) GROUP BY ALL; }}}
-   * this one should be grouped by keyword `ALL`. If there is an aggregate expression which is a
-   * [[Literal]] with the Integer data type - preserve the ordinal literal in order to pass logical
-   * plan comparison. The grouping expressions list will be [col1, 2].
+   * {{{ SELECT col1 AS b, sum(col2) + col1 FROM VALUES (1, 2) GROUP BY ALL; }}}
+   * this one should be grouped by keyword `ALL`. It means that the grouping expressions list is
+   * going to contain all the aggregate expressions that don't have aggregate expressions in their
+   * subtrees. The grouping expressions list will be [col1 AS `col1`].
+   * All the [[Alias]]es should be stripped in order to pass logical plan comparison and to prevent
+   * unintentional exceptions from being thrown by [[ExprUtils.assertValidAggregation]], so the
+   * final grouping expressions list will be [col1].
    */
   private def tryResolveGroupByAll(
       aggregateExpressions: ResolvedAggregateExpressions,
@@ -195,13 +201,10 @@ class AggregateResolver(operatorResolver: Resolver, expressionResolver: Expressi
       )
     }
 
-    aggregateExpressions.resolvedExpressionsWithoutAggregates.zipWithIndex.map {
-      case (expression, index) =>
-        expression match {
-          case IntegerLiteral(_) =>
-            Literal(index + 1)
-          case _ => expression
-        }
+    aggregateExpressions.resolvedExpressionsWithoutAggregates.map {
+      case alias: Alias =>
+        alias.child
+      case other => other
     }
   }
 
 
@@ -18,7 +18,12 @@
 package org.apache.spark.sql.catalyst.analysis.resolver
 
 import org.apache.spark.sql.catalyst.analysis.{AliasResolution, MultiAlias, UnresolvedAlias}
-import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
+  Expression,
+  NamedExpression,
+  OuterReference
+}
 
 /**
  * Resolver class that resolves unresolved aliases and handles user-specified aliases.
@@ -29,12 +34,17 @@ class AliasResolver(expressionResolver: ExpressionResolver)
   private val scopes = expressionResolver.getNameScopes
   private val expressionResolutionContextStack =
     expressionResolver.getExpressionResolutionContextStack
+  private val autoGeneratedAliasProvider = new AutoGeneratedAliasProvider(
+    expressionResolver.getExpressionIdAssigner
+  )
 
   /**
    * Resolves [[UnresolvedAlias]] by resolving its child and computing the alias name by calling
    * [[AliasResolution]] on the result. After resolving it, we assign a correct exprId to the
-   * resulting [[Alias]]. Here we allow inner aliases to persist until the end of single-pass
-   * resolution, after which they will be removed in the post-processing phase.
+   * resulting [[Alias]]. In case result of the [[AliasResolution]] call is an [[OuterReference]],
+   * we create a new [[Alias]] using the [[AutoGeneratedAliasProvider]]. Here we allow inner
+   * aliases to persist until the end of single-pass resolution, after which they will be removed
+   * in the post-processing phase.
    */
   override def resolve(unresolvedAlias: UnresolvedAlias): NamedExpression =
     scopes.current.lcaRegistry.withNewLcaScope {
@@ -52,6 +62,8 @@ class AliasResolver(expressionResolver: ExpressionResolver)
           )
         case alias: Alias =>
           expressionResolver.getExpressionIdAssigner.mapExpression(alias)
+        case outerReference: OuterReference =>
+          autoGeneratedAliasProvider.newAlias(outerReference)
       }
     }
 
 
@@ -34,22 +34,39 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
  * @param catalogRelationsWithResolvedMetadata A map from [[UnresolvedCatalogRelation]] to the
  *   relations with resolved metadata. It allows us to reuse the relation metadata and avoid
  *   duplicate catalog/table lookups.
+ * @param hiveRelationsWithResolvedMetadata A map from [[HiveTableRelation]] to their resolved
+ *   [[LogicalRelation]] counterparts. We cannot import those nodes here because of recursive
+ *   dependencies, so we rely on overridden [[LogicalPlan.equals]] and [[LogicalPlan.hashCode]].
+ *   Keys are canonicalized to compensate for stats added by [[DetermineTableStats]].
  */
 case class AnalyzerBridgeState(
     relationsWithResolvedMetadata: AnalyzerBridgeState.RelationsWithResolvedMetadata =
       new AnalyzerBridgeState.RelationsWithResolvedMetadata,
     catalogRelationsWithResolvedMetadata: AnalyzerBridgeState.CatalogRelationsWithResolvedMetadata =
-      new AnalyzerBridgeState.CatalogRelationsWithResolvedMetadata
+      new AnalyzerBridgeState.CatalogRelationsWithResolvedMetadata,
+    hiveRelationsWithResolvedMetadata: AnalyzerBridgeState.HiveRelationsWithResolvedMetadata =
+      new AnalyzerBridgeState.HiveRelationsWithResolvedMetadata
 ) {
   def addUnresolvedRelation(unresolvedRelation: UnresolvedRelation, relation: LogicalPlan): Unit = {
     relationsWithResolvedMetadata.put(
-        BridgedRelationId(unresolvedRelation, AnalysisContext.get.catalogAndNamespace),
-        relation
-      )
+      BridgedRelationId(unresolvedRelation, AnalysisContext.get.catalogAndNamespace),
+      relation
+    )
+  }
+
+  def addLogicalRelationForHiveRelation(
+      hiveRelation: LogicalPlan,
+      logicalRelation: LogicalPlan): Unit = {
+    hiveRelationsWithResolvedMetadata.put(hiveRelation.canonicalized, logicalRelation)
+  }
+
+  def getLogicalRelationForHiveRelation(hiveRelation: LogicalPlan): Option[LogicalPlan] = {
+    Option(hiveRelationsWithResolvedMetadata.get(hiveRelation.canonicalized))
   }
 }
 
 object AnalyzerBridgeState {
   type RelationsWithResolvedMetadata = HashMap[BridgedRelationId, LogicalPlan]
   type CatalogRelationsWithResolvedMetadata = HashMap[UnresolvedCatalogRelation, LogicalPlan]
+  type HiveRelationsWithResolvedMetadata = HashMap[LogicalPlan, LogicalPlan]
 }
@@ -80,21 +80,23 @@ class AttributeScopeStack {
   }
 
   /**
-   * Execute `body` in the context of a fresh attribute scope. Used by [[Project]] and [[Aggregate]]
-   * validation code since those operators introduce a new scope with fresh expression IDs.
+   * Push a fresh attribute scope. Used by [[Project]] and [[Aggregate]] validation code since
+   * those operators introduce a new scope with fresh expression IDs.
    */
-  def withNewScope[R](isSubqueryRoot: Boolean = false)(body: => R): Unit = {
+  def pushScope(isSubqueryRoot: Boolean = false): Unit = {
     stack.push(
       AttributeScope(
         attributes = AttributeSet(Seq.empty),
         isSubqueryRoot = isSubqueryRoot
       )
     )
-    try {
-      body
-    } finally {
+  }
+
+  /**
+   * Pop current attribute scope.
+   */
+  def popScope(): Unit = {
       stack.pop()
-    }
   }
 
   override def toString: String = stack.toString