10
10
#include < ydb/library/yql/minikql/computation/mkql_computation_node_holders.h>
11
11
#include < ydb/library/yql/dq/runtime/dq_async_stats.h>
12
12
#include < ydb/public/sdk/cpp/client/ydb_topic/topic.h>
13
+
14
+ #include < util/string/join.h>
13
15
#include < util/generic/queue.h>
14
16
15
17
#include < ydb/core/fq/libs/row_dispatcher/json_parser.h>
@@ -62,13 +64,11 @@ struct TEvPrivate {
62
64
struct TEvCreateSession : public NActors ::TEventLocal<TEvCreateSession, EvCreateSession> {};
63
65
struct TEvPrintState : public NActors ::TEventLocal<TEvPrintState, EvPrintState> {};
64
66
struct TEvStatus : public NActors ::TEventLocal<TEvStatus, EvStatus> {};
65
-
66
67
struct TEvDataFiltered : public NActors ::TEventLocal<TEvDataFiltered, EvDataFiltered> {
67
68
TEvDataFiltered (ui64 offset, ui64 numberValues)
68
69
: Offset(offset)
69
70
, NumberValues(numberValues)
70
71
{}
71
-
72
72
const ui64 Offset;
73
73
const ui64 NumberValues;
74
74
};
@@ -95,7 +95,7 @@ TVector<TString> GetVector(const google::protobuf::RepeatedPtrField<TString>& va
95
95
class TTopicSession : public TActorBootstrapped <TTopicSession> {
96
96
97
97
private:
98
- using TParserInputType = std::pair< TVector< TString>, TVector< TString>>; // TODO: remove after YQ-3594
98
+ using TParserInputType = TSet< std::pair<TString, TString>>;
99
99
100
100
struct ClientsInfo {
101
101
ClientsInfo (const NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev)
@@ -114,6 +114,7 @@ class TTopicSession : public TActorBootstrapped<TTopicSession> {
114
114
bool DataArrivedSent = false ;
115
115
TMaybe<ui64> NextMessageOffset;
116
116
ui64 LastSendedNextMessageOffset = 0 ;
117
+ TVector<ui64> FieldsIds;
117
118
};
118
119
119
120
struct TTopicEventProcessor {
@@ -130,6 +131,11 @@ class TTopicSession : public TActorBootstrapped<TTopicSession> {
130
131
const TString& LogPrefix;
131
132
};
132
133
134
+ struct TParserSchema {
135
+ TVector<ui64> FieldsMap; // index - FieldId (from FieldsIndexes), value - parsing schema offset
136
+ TParserInputType InputType;
137
+ };
138
+
133
139
const TString TopicPath;
134
140
NActors::TActorId RowDispatcherActorId;
135
141
ui32 PartitionId;
@@ -147,9 +153,10 @@ class TTopicSession : public TActorBootstrapped<TTopicSession> {
147
153
std::unique_ptr<TJsonParser> Parser;
148
154
NConfig::TRowDispatcherConfig Config;
149
155
ui64 UsedSize = 0 ;
150
- TMaybe<TParserInputType> CurrentParserTypes;
151
156
const ::NMonitoring::TDynamicCounterPtr Counters;
152
157
TTopicSessionMetrics Metrics;
158
+ TParserSchema ParserSchema;
159
+ THashMap<TString, ui64> FieldsIndexes;
153
160
154
161
public:
155
162
explicit TTopicSession (
@@ -176,7 +183,7 @@ class TTopicSession : public TActorBootstrapped<TTopicSession> {
176
183
void SendToParsing (const TVector<NYdb::NTopic::TReadSessionEvent::TDataReceivedEvent::TMessage>& messages);
177
184
void SendToFiltering (ui64 offset, const TVector<TVector<std::string_view>>& parsedValues);
178
185
void SendData (ClientsInfo& info);
179
- void InitParser ( const NYql::NPq::NProto::TDqPqTopicSource& sourceParams );
186
+ void UpdateParser ( );
180
187
void FatalError (const TString& message, const std::unique_ptr<TJsonFilter>* filter = nullptr );
181
188
void SendDataArrived (ClientsInfo& client);
182
189
void StopReadSession ();
@@ -200,6 +207,9 @@ class TTopicSession : public TActorBootstrapped<TTopicSession> {
200
207
201
208
void PrintInternalState ();
202
209
void SendSessionError (NActors::TActorId readActorId, const TString& message);
210
+ TVector<TVector<std::string_view>> RebuildJson (const ClientsInfo& info, const TVector<TVector<std::string_view>>& parsedValues);
211
+ void UpdateParserSchema (const TParserInputType& inputType);
212
+ void UpdateFieldsIds (ClientsInfo& clientInfo);
203
213
204
214
private:
205
215
@@ -340,11 +350,11 @@ void TTopicSession::CreateTopicSession() {
340
350
return ;
341
351
}
342
352
343
- // Use any sourceParams.
344
- const NYql::NPq::NProto::TDqPqTopicSource& sourceParams = Clients.begin ()->second .Settings .GetSource ();
345
-
346
353
if (!ReadSession) {
347
- InitParser (sourceParams);
354
+ UpdateParser ();
355
+
356
+ // Use any sourceParams.
357
+ const NYql::NPq::NProto::TDqPqTopicSource& sourceParams = Clients.begin ()->second .Settings .GetSource ();
348
358
ReadSession = GetTopicClient (sourceParams).CreateReadSession (GetReadSessionSettings (sourceParams));
349
359
SubscribeOnNextEvent ();
350
360
}
@@ -362,8 +372,21 @@ void TTopicSession::Handle(NFq::TEvPrivate::TEvCreateSession::TPtr&) {
362
372
CreateTopicSession ();
363
373
}
364
374
375
+ TVector<TVector<std::string_view>> TTopicSession::RebuildJson (const ClientsInfo& info, const TVector<TVector<std::string_view>>& parsedValues) {
376
+ TVector<TVector<std::string_view>> result;
377
+ const auto & offsets = ParserSchema.FieldsMap ;
378
+ result.reserve (info.FieldsIds .size ());
379
+ for (auto fieldId : info.FieldsIds ) {
380
+ Y_ENSURE (fieldId < offsets.size (), " fieldId " << fieldId << " , offsets.size() " << offsets.size ());
381
+ auto offset = offsets[fieldId];
382
+ Y_ENSURE (offset < parsedValues.size (), " offset " << offset << " , jsonBatch.size() " << parsedValues.size ());
383
+ result.push_back (parsedValues[offset]);
384
+ }
385
+ return result;
386
+ }
387
+
365
388
void TTopicSession::Handle (NFq::TEvPrivate::TEvDataAfterFilteration::TPtr& ev) {
366
- LOG_ROW_DISPATCHER_TRACE (" TEvDataAfterFilteration, read actor id " << ev->Get ()->ReadActorId .ToString ());
389
+ LOG_ROW_DISPATCHER_TRACE (" TEvDataAfterFilteration, read actor id " << ev->Get ()->ReadActorId .ToString () << " , " << ev-> Get ()-> Json );
367
390
auto it = Clients.find (ev->Get ()->ReadActorId );
368
391
if (it == Clients.end ()) {
369
392
LOG_ROW_DISPATCHER_ERROR (" Skip DataAfterFilteration, wrong read actor, id " << ev->Get ()->ReadActorId .ToString ());
@@ -537,12 +560,12 @@ void TTopicSession::SendToParsing(const TVector<NYdb::NTopic::TReadSessionEvent:
537
560
538
561
void TTopicSession::SendToFiltering (ui64 offset, const TVector<TVector<std::string_view>>& parsedValues) {
539
562
Y_ENSURE (parsedValues, " Expected non empty schema" );
540
- LOG_ROW_DISPATCHER_TRACE (" TEvDataParsed , offset " << offset << " , data:\n " << Parser->GetDebugString (parsedValues));
563
+ LOG_ROW_DISPATCHER_TRACE (" SendToFiltering , offset " << offset << " , data:\n " << Parser->GetDebugString (parsedValues));
541
564
542
565
for (auto & [actorId, info] : Clients) {
543
566
try {
544
567
if (info.Filter ) {
545
- info.Filter ->Push (offset, parsedValues);
568
+ info.Filter ->Push (offset, RebuildJson (info, parsedValues) );
546
569
}
547
570
} catch (const std::exception& e) {
548
571
FatalError (e.what (), &info.Filter );
@@ -590,28 +613,38 @@ void TTopicSession::SendData(ClientsInfo& info) {
590
613
info.LastSendedNextMessageOffset = *info.NextMessageOffset ;
591
614
}
592
615
616
+ void TTopicSession::UpdateFieldsIds (ClientsInfo& info) {
617
+ for (auto name : info.Settings .GetSource ().GetColumns ()) {
618
+ auto it = FieldsIndexes.find (name);
619
+ if (it == FieldsIndexes.end ()) {
620
+ auto nextIndex = FieldsIndexes.size ();
621
+ info.FieldsIds .push_back (nextIndex);
622
+ FieldsIndexes[name] = nextIndex;
623
+ } else {
624
+ info.FieldsIds .push_back (it->second );
625
+ }
626
+ }
627
+ }
628
+
593
629
void TTopicSession::Handle (NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev) {
594
630
auto it = Clients.find (ev->Sender );
595
631
if (it != Clients.end ()) {
596
632
FatalError (" Internal error: sender " + ev->Sender .ToString ());
597
633
return ;
598
634
}
599
635
600
- LOG_ROW_DISPATCHER_INFO (" New client, read actor id " << ev->Sender .ToString ());
636
+ LOG_ROW_DISPATCHER_INFO (" New client: read actor id " << ev->Sender .ToString () << " , predicate: "
637
+ << ev->Get ()->Record .GetSource ().GetPredicate () << " , offset: " << ev->Get ()->Record .GetOffset ());
601
638
602
639
auto columns = GetVector (ev->Get ()->Record .GetSource ().GetColumns ());
603
640
auto types = GetVector (ev->Get ()->Record .GetSource ().GetColumnTypes ());
604
- auto parserType = std::make_pair (columns, types);
605
- if (CurrentParserTypes && *CurrentParserTypes != parserType) {
606
- SendSessionError (ev->Sender , " Different columns/types, use same in all queries" );
607
- return ;
608
- }
609
641
610
642
try {
611
643
auto & clientInfo = Clients.emplace (
612
644
std::piecewise_construct,
613
645
std::forward_as_tuple (ev->Sender ),
614
646
std::forward_as_tuple (ev)).first ->second ;
647
+ UpdateFieldsIds (clientInfo);
615
648
616
649
TString predicate = clientInfo.Settings .GetSource ().GetPredicate ();
617
650
if (!predicate.empty ()) {
@@ -626,11 +659,9 @@ void TTopicSession::Handle(NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev) {
626
659
ClientsWithoutPredicate.insert (ev->Sender );
627
660
}
628
661
629
- LOG_ROW_DISPATCHER_INFO (" New client: offset " << clientInfo.NextMessageOffset << " , predicate: " << clientInfo.Settings .GetSource ().GetPredicate ());
630
-
631
662
if (ReadSession) {
632
663
if (clientInfo.Settings .HasOffset () && (clientInfo.Settings .GetOffset () <= LastMessageOffset)) {
633
- LOG_ROW_DISPATCHER_INFO (" New client has less offset than the last message, stop (restart) topic session" );
664
+ LOG_ROW_DISPATCHER_INFO (" New client has less offset ( " << clientInfo. Settings . GetOffset () << " ) than the last message ( " << LastMessageOffset << " ) , stop (restart) topic session" );
634
665
StopReadSession ();
635
666
}
636
667
}
@@ -641,7 +672,7 @@ void TTopicSession::Handle(NFq::TEvRowDispatcher::TEvStartSession::TPtr& ev) {
641
672
} catch (...) {
642
673
FatalError (" Adding new client failed, " + CurrentExceptionMessage ());
643
674
}
644
-
675
+ UpdateParser ();
645
676
PrintInternalState ();
646
677
if (!ReadSession) {
647
678
Schedule (TDuration::Seconds (Config.GetTimeoutBeforeStartSessionSec ()), new NFq::TEvPrivate::TEvCreateSession ());
@@ -665,20 +696,69 @@ void TTopicSession::Handle(NFq::TEvRowDispatcher::TEvStopSession::TPtr& ev) {
665
696
666
697
auto it = Clients.find (ev->Sender );
667
698
if (it == Clients.end ()) {
668
- LOG_ROW_DISPATCHER_DEBUG (" Wrong ClientSettings" ); // TODO
699
+ LOG_ROW_DISPATCHER_DEBUG (" Wrong ClientSettings" );
669
700
return ;
670
701
}
671
702
Clients.erase (it);
672
703
ClientsWithoutPredicate.erase (ev->Sender );
704
+ if (Clients.empty ()) {
705
+ StopReadSession ();
706
+ }
707
+ UpdateParser ();
673
708
}
674
709
675
- void TTopicSession::InitParser (const NYql::NPq::NProto::TDqPqTopicSource& sourceParams) {
676
- if (Parser) {
710
+ void CollectColumns (const NYql::NPq::NProto::TDqPqTopicSource& sourceParams, TSet<std::pair<TString, TString>>& columns) {
711
+ auto size = sourceParams.GetColumns ().size ();
712
+ Y_ENSURE (size == sourceParams.GetColumnTypes ().size ());
713
+
714
+ for (int i = 0 ; i < size; ++i) {
715
+ auto name = sourceParams.GetColumns ().Get (i);
716
+ auto type = sourceParams.GetColumnTypes ().Get (i);
717
+ columns.emplace (name, type);
718
+ }
719
+ }
720
+
721
+ void TTopicSession::UpdateParserSchema (const TParserInputType& inputType) {
722
+ ParserSchema.FieldsMap .clear ();
723
+ ParserSchema.FieldsMap .resize (FieldsIndexes.size ());
724
+ ui64 offset = 0 ;
725
+ for (const auto & [name, type]: inputType) {
726
+ Y_ENSURE (FieldsIndexes.contains (name));
727
+ ui64 index = FieldsIndexes[name];
728
+ ParserSchema.FieldsMap [index] = offset++;
729
+ }
730
+ ParserSchema.InputType = inputType;
731
+ }
732
+
733
+ void TTopicSession::UpdateParser () {
734
+ TSet<std::pair<TString, TString>> namesWithTypes;
735
+ for (auto & [readActorId, info] : Clients) {
736
+ CollectColumns (info.Settings .GetSource (), namesWithTypes);
737
+ }
738
+
739
+ if (namesWithTypes == ParserSchema.InputType ) {
677
740
return ;
678
741
}
742
+ if (namesWithTypes.empty ()) {
743
+ LOG_ROW_DISPATCHER_INFO (" No columns to parse, reset parser" );
744
+ Parser.reset ();
745
+ return ;
746
+ }
747
+
679
748
try {
680
- CurrentParserTypes = std::make_pair (GetVector (sourceParams.GetColumns ()), GetVector (sourceParams.GetColumnTypes ()));
681
- Parser = NewJsonParser (GetVector (sourceParams.GetColumns ()), GetVector (sourceParams.GetColumnTypes ()));
749
+ UpdateParserSchema (namesWithTypes);
750
+
751
+ TVector<TString> names;
752
+ TVector<TString> types;
753
+ names.reserve (namesWithTypes.size ());
754
+ types.reserve (namesWithTypes.size ());
755
+ for (const auto & [name, type] : namesWithTypes) {
756
+ names.push_back (name);
757
+ types.push_back (type);
758
+ }
759
+
760
+ LOG_ROW_DISPATCHER_TRACE (" Init JsonParser with columns: " << JoinSeq (' ,' , names));
761
+ Parser = NewJsonParser (names, types);
682
762
} catch (const NYql::NPureCalc::TCompileError& e) {
683
763
FatalError (e.GetIssues ());
684
764
}
0 commit comments