@@ -1060,15 +1060,6 @@ class TYtNativeGateway : public IYtGateway {
1060
1060
}
1061
1061
const bool initial = NYql::HasSetting (publish.Settings ().Ref (), EYtSettingType::Initial);
1062
1062
1063
- std::unordered_map<EYtSettingType, TString> strOpts;
1064
- for (const auto & setting : publish.Settings ().Ref ().Children ()) {
1065
- if (setting->ChildrenSize () == 2 ) {
1066
- strOpts.emplace (FromString<EYtSettingType>(setting->Head ().Content ()), setting->Tail ().Content ());
1067
- } else if (setting->ChildrenSize () == 1 ) {
1068
- strOpts.emplace (FromString<EYtSettingType>(setting->Head ().Content ()), TString ());;
1069
- }
1070
- }
1071
-
1072
1063
YQL_CLOG (INFO, ProviderYt) << " Mode: " << mode << " , IsInitial: " << initial;
1073
1064
1074
1065
TSession::TPtr session = GetSession (options.SessionId ());
@@ -1079,15 +1070,35 @@ class TYtNativeGateway : public IYtGateway {
1079
1070
TVector<TSrcTable> src;
1080
1071
ui64 chunksCount = 0 ;
1081
1072
ui64 dataSize = 0 ;
1082
- std::unordered_set<TString> columnGroups;
1073
+ TSet<TString> srcColumnGroupAlts;
1074
+ bool first = true ;
1075
+ const TStructExprType* itemType = nullptr ;
1083
1076
for (auto out: publish.Input ()) {
1084
1077
auto outTableWithCluster = GetOutTableWithCluster (out);
1085
1078
auto outTable = outTableWithCluster.first .Cast <TYtOutTable>();
1086
1079
src.emplace_back (outTable.Name ().StringValue (), outTableWithCluster.second );
1087
- if (auto columnGroupSetting = NYql::GetSetting (outTable.Settings ().Ref (), EYtSettingType::ColumnGroups)) {
1088
- columnGroups.emplace (columnGroupSetting->Tail ().Content ());
1089
- } else {
1090
- columnGroups.emplace ();
1080
+ if (first) {
1081
+ itemType = GetSeqItemType (*outTable.Ref ().GetTypeAnn ()).Cast <TStructExprType>();
1082
+ if (auto columnGroupSetting = NYql::GetSetting (outTable.Settings ().Ref (), EYtSettingType::ColumnGroups)) {
1083
+ srcColumnGroupAlts.emplace (columnGroupSetting->Tail ().Content ());
1084
+ TString expanded;
1085
+ if (ExpandDefaultColumnGroup (columnGroupSetting->Tail ().Content (), *itemType, expanded)) {
1086
+ srcColumnGroupAlts.insert (expanded);
1087
+ }
1088
+ }
1089
+ first = false ;
1090
+ } else if (!srcColumnGroupAlts.empty ()) {
1091
+ if (auto columnGroupSetting = NYql::GetSetting (outTable.Settings ().Ref (), EYtSettingType::ColumnGroups)) {
1092
+ if (!srcColumnGroupAlts.contains (columnGroupSetting->Tail ().Content ())) {
1093
+ TString expanded;
1094
+ if (!ExpandDefaultColumnGroup (columnGroupSetting->Tail ().Content (), *GetSeqItemType (*outTable.Ref ().GetTypeAnn ()).Cast <TStructExprType>(), expanded)
1095
+ || !srcColumnGroupAlts.contains (expanded)) {
1096
+ srcColumnGroupAlts.clear ();
1097
+ }
1098
+ }
1099
+ } else {
1100
+ srcColumnGroupAlts.clear ();
1101
+ }
1091
1102
}
1092
1103
auto stat = TYtTableStatInfo (outTable.Stat ());
1093
1104
chunksCount += stat.ChunkCount ;
@@ -1099,7 +1110,38 @@ class TYtNativeGateway : public IYtGateway {
1099
1110
if (src.size () > 10 ) {
1100
1111
YQL_CLOG (INFO, ProviderYt) << " ...total input tables=" << src.size ();
1101
1112
}
1102
- TString srcColumnGroups = columnGroups.size () == 1 ? *columnGroups.cbegin () : TString ();
1113
+
1114
+ bool forceMerge = false ;
1115
+ bool forceTransform = false ;
1116
+ std::unordered_map<EYtSettingType, TString> strOpts;
1117
+ for (const auto & setting : publish.Settings ().Ref ().Children ()) {
1118
+ const auto settingType = FromString<EYtSettingType>(setting->Head ().Content ());
1119
+ if (setting->ChildrenSize () == 2 ) {
1120
+ TString value = TString{setting->Tail ().Content ()};
1121
+ if (EYtSettingType::ColumnGroups == settingType) {
1122
+ bool groupDiff = false ;
1123
+ if (srcColumnGroupAlts.empty ()) {
1124
+ groupDiff = true ;
1125
+ } else {
1126
+ if (!srcColumnGroupAlts.contains (value)) {
1127
+ TString expanded;
1128
+ YQL_ENSURE (itemType);
1129
+ if (ExpandDefaultColumnGroup (value, *itemType, expanded)) {
1130
+ value = std::move (expanded);
1131
+ groupDiff = !srcColumnGroupAlts.contains (value);
1132
+ }
1133
+ }
1134
+ }
1135
+ if (groupDiff) {
1136
+ forceMerge = forceTransform = true ;
1137
+ YQL_CLOG (INFO, ProviderYt) << " Column groups diff forces merge" ;
1138
+ }
1139
+ }
1140
+ strOpts.emplace (settingType, value);
1141
+ } else if (setting->ChildrenSize () == 1 ) {
1142
+ strOpts.emplace (settingType, TString ());
1143
+ }
1144
+ }
1103
1145
1104
1146
bool combineChunks = false ;
1105
1147
if (auto minChunkSize = options.Config ()->MinPublishedAvgChunkSize .Get ()) {
@@ -1111,6 +1153,7 @@ class TYtNativeGateway : public IYtGateway {
1111
1153
YQL_CLOG (INFO, ProviderYt) << " Output: " << cluster << ' .' << dst;
1112
1154
if (combineChunks) {
1113
1155
YQL_CLOG (INFO, ProviderYt) << " Use chunks combining" ;
1156
+ forceMerge = true ;
1114
1157
}
1115
1158
if (Services_.Config ->GetLocalChainTest ()) {
1116
1159
if (!src.empty ()) {
@@ -1130,9 +1173,9 @@ class TYtNativeGateway : public IYtGateway {
1130
1173
const ui32 dstEpoch = TEpochInfo::Parse (publish.Publish ().Epoch ().Ref ()).GetOrElse (0 );
1131
1174
auto execCtx = MakeExecCtx (std::move (options), session, cluster, node.Get (), &ctx);
1132
1175
1133
- return session->Queue_ ->Async ([execCtx, src = std::move (src), dst, dstEpoch, isAnonymous, mode, initial, srcColumnGroups, combineChunks , strOpts = std::move (strOpts)] () mutable {
1176
+ return session->Queue_ ->Async ([execCtx, src = std::move (src), dst, dstEpoch, isAnonymous, mode, initial, combineChunks, forceMerge, forceTransform , strOpts = std::move (strOpts)] () mutable {
1134
1177
YQL_LOG_CTX_ROOT_SESSION_SCOPE (execCtx->LogCtx_ );
1135
- return ExecPublish (execCtx, std::move (src), dst, dstEpoch, isAnonymous, mode, initial, srcColumnGroups, combineChunks , strOpts);
1178
+ return ExecPublish (execCtx, std::move (src), dst, dstEpoch, isAnonymous, mode, initial, combineChunks, forceMerge, forceTransform , strOpts);
1136
1179
})
1137
1180
.Apply ([nodePos] (const TFuture<void >& f) {
1138
1181
try {
@@ -2414,8 +2457,9 @@ class TYtNativeGateway : public IYtGateway {
2414
2457
const bool isAnonymous,
2415
2458
EYtWriteMode mode,
2416
2459
const bool initial,
2417
- const TString& srcColumnGroups,
2418
2460
const bool combineChunks,
2461
+ bool forceMerge,
2462
+ bool forceTransform,
2419
2463
const std::unordered_map<EYtSettingType, TString>& strOpts)
2420
2464
{
2421
2465
TString tmpFolder = GetTablesTmpFolder (*execCtx->Options_ .Config ());
@@ -2490,8 +2534,6 @@ class TYtNativeGateway : public IYtGateway {
2490
2534
);
2491
2535
}
2492
2536
2493
- bool forceMerge = combineChunks;
2494
-
2495
2537
NYT::MergeNodes (yqlAttrs, GetUserAttributes (execCtx->GetEntryForCluster (src.back ().Cluster )->Tx , src.back ().Name , true ));
2496
2538
NYT::MergeNodes (yqlAttrs, YqlOpOptionsToAttrs (execCtx->Session_ ->OperationOptions_ ));
2497
2539
if (EYtWriteMode::RenewKeepMeta == mode) {
@@ -2582,8 +2624,6 @@ class TYtNativeGateway : public IYtGateway {
2582
2624
}
2583
2625
}
2584
2626
2585
- bool forceTransform = false ;
2586
-
2587
2627
#define DEFINE_OPT (name, attr, transform ) \
2588
2628
auto dst##name = isAnonymous \
2589
2629
? execCtx->Options_ .Config ()->Temporary ##name.Get (cluster) \
@@ -2612,10 +2652,6 @@ class TYtNativeGateway : public IYtGateway {
2612
2652
NYT::TNode columnGroupsSpec;
2613
2653
if (const auto it = strOpts.find (EYtSettingType::ColumnGroups); it != strOpts.cend () && execCtx->Options_ .Config ()->OptimizeFor .Get (cluster).GetOrElse (NYT::OF_LOOKUP_ATTR) != NYT::OF_LOOKUP_ATTR) {
2614
2654
columnGroupsSpec = NYT::NodeFromYsonString (it->second );
2615
- if (it->second != srcColumnGroups) {
2616
- forceMerge = forceTransform = true ;
2617
- YQL_CLOG (INFO, ProviderYt) << " Column groups diff forces merge, src=" << srcColumnGroups << " , dst=" << it->second ;
2618
- }
2619
2655
}
2620
2656
2621
2657
TFuture<void > res;
@@ -2656,7 +2692,7 @@ class TYtNativeGateway : public IYtGateway {
2656
2692
input = TRichYPath (std::get<0 >(*p)).TransactionId (std::get<1 >(*p)).OriginalPath (NYT::AddPathPrefix (dstPath, NYT::TConfig::Get ()->Prefix )).Columns (columns);
2657
2693
}
2658
2694
} else {
2659
- input = TRichYPath (dstPath).Columns (columns);
2695
+ input = TRichYPath (dstPath).Columns (columns);
2660
2696
}
2661
2697
mergeSpec.AddInput (input);
2662
2698
}
0 commit comments