@@ -1021,7 +1021,8 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil
1021
1021
return false ;
1022
1022
}
1023
1023
1024
- if (!buildInfo.Sample .Rows .empty ()) {
1024
+ if (buildInfo.KMeans .State == TIndexBuildInfo::TKMeans::Sample &&
1025
+ !buildInfo.Sample .Rows .empty ()) {
1025
1026
if (buildInfo.Sample .State == TIndexBuildInfo::TSample::EState::Collect) {
1026
1027
LOG_D (" FillVectorIndex SendUploadSampleKRequest " << buildInfo.DebugString ());
1027
1028
SendUploadSampleKRequest (buildInfo);
@@ -1033,7 +1034,8 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil
1033
1034
ClearDoneShards (txc, buildInfo);
1034
1035
1035
1036
if (!buildInfo.Sample .Rows .empty ()) {
1036
- if (buildInfo.KMeans .NextState ()) {
1037
+ if (buildInfo.KMeans .State == TIndexBuildInfo::TKMeans::Sample) {
1038
+ buildInfo.KMeans .State = TIndexBuildInfo::TKMeans::Reshuffle;
1037
1039
LOG_D (" FillVectorIndex NextState " << buildInfo.DebugString ());
1038
1040
PersistKMeansState (txc, buildInfo);
1039
1041
Progress (BuildId);
@@ -1603,29 +1605,6 @@ struct TSchemeShard::TIndexBuilder::TTxReplySampleK: public TSchemeShard::TIndex
1603
1605
}
1604
1606
1605
1607
NIceDb::TNiceDb db (txc.DB );
1606
- if (record.ProbabilitiesSize ()) {
1607
- Y_ENSURE (record.RowsSize ());
1608
- auto & probabilities = record.GetProbabilities ();
1609
- auto & rows = *record.MutableRows ();
1610
- Y_ENSURE (probabilities.size () == rows.size ());
1611
- auto & sample = buildInfo.Sample .Rows ;
1612
- auto from = sample.size ();
1613
- for (int i = 0 ; i != probabilities.size (); ++i) {
1614
- if (probabilities[i] >= buildInfo.Sample .MaxProbability ) {
1615
- break ;
1616
- }
1617
- sample.emplace_back (probabilities[i], std::move (rows[i]));
1618
- }
1619
- if (buildInfo.Sample .MakeWeakTop (buildInfo.KMeans .K )) {
1620
- from = 0 ;
1621
- }
1622
- for (; from < sample.size (); ++from) {
1623
- db.Table <Schema::KMeansTreeSample>().Key (buildInfo.Id , from).Update (
1624
- NIceDb::TUpdate<Schema::KMeansTreeSample::Probability>(sample[from].P ),
1625
- NIceDb::TUpdate<Schema::KMeansTreeSample::Data>(sample[from].Row )
1626
- );
1627
- }
1628
- }
1629
1608
1630
1609
TBillingStats stats{0 , 0 , record.GetReadRows (), record.GetReadBytes ()};
1631
1610
shardStatus.Processed += stats;
@@ -1639,6 +1618,32 @@ struct TSchemeShard::TIndexBuilder::TTxReplySampleK: public TSchemeShard::TIndex
1639
1618
switch (shardStatus.Status ) {
1640
1619
case NKikimrIndexBuilder::EBuildStatus::DONE:
1641
1620
if (buildInfo.InProgressShards .erase (shardIdx)) {
1621
+ if (record.ProbabilitiesSize ()) {
1622
+ Y_ENSURE (record.RowsSize ());
1623
+ auto & probabilities = record.GetProbabilities ();
1624
+ auto & rows = *record.MutableRows ();
1625
+ Y_ENSURE (probabilities.size () == rows.size ());
1626
+ auto & sample = buildInfo.Sample .Rows ;
1627
+ auto from = sample.size ();
1628
+ for (int i = 0 ; i != probabilities.size (); ++i) {
1629
+ if (probabilities[i] >= buildInfo.Sample .MaxProbability ) {
1630
+ break ;
1631
+ }
1632
+ sample.emplace_back (probabilities[i], std::move (rows[i]));
1633
+ }
1634
+ if (buildInfo.Sample .MakeWeakTop (buildInfo.KMeans .K )) {
1635
+ from = 0 ;
1636
+ }
1637
+ for (; from < sample.size (); ++from) {
1638
+ db.Table <Schema::KMeansTreeSample>().Key (buildInfo.Id , from).Update (
1639
+ NIceDb::TUpdate<Schema::KMeansTreeSample::Probability>(sample[from].P ),
1640
+ NIceDb::TUpdate<Schema::KMeansTreeSample::Data>(sample[from].Row )
1641
+ );
1642
+ }
1643
+ for (; from < 2 *buildInfo.KMeans .K ; ++from) {
1644
+ db.Table <Schema::KMeansTreeSample>().Key (buildInfo.Id , from).Delete ();
1645
+ }
1646
+ }
1642
1647
buildInfo.DoneShards .emplace_back (shardIdx);
1643
1648
}
1644
1649
break ;
0 commit comments