@@ -4516,6 +4516,36 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
4516
4516
4517
4517
// Read index build
4518
4518
{
4519
+ auto fillBuildInfoSafe = [&](TIndexBuildInfo& buildInfo, const TString& stepName, const auto & fill) {
4520
+ try {
4521
+ fill (buildInfo);
4522
+ } catch (const std::exception& exc) {
4523
+ LOG_ERROR_S (ctx, NKikimrServices::BUILD_INDEX,
4524
+ " Init " << stepName << " unhandled exception, id#" << buildInfo.Id
4525
+ << " " << TypeName (exc) << " : " << exc.what () << Endl
4526
+ << TBackTrace::FromCurrentException ().PrintToString ()
4527
+ << " , TIndexBuildInfo: " << buildInfo);
4528
+
4529
+ // in-memory volatile state:
4530
+ buildInfo.IsBroken = true ;
4531
+ buildInfo.AddIssue (TStringBuilder () << " Init " << stepName << " unhandled exception " << exc.what ());
4532
+ }
4533
+ };
4534
+
4535
+ auto fillBuildInfoByIdSafe = [&](TIndexBuildId id, const TString& stepName, const auto & fill) {
4536
+ const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4537
+ Y_ASSERT (buildInfoPtr);
4538
+ if (!buildInfoPtr) {
4539
+ LOG_ERROR_S (ctx, NKikimrServices::BUILD_INDEX,
4540
+ " Init " << stepName << " BuildInfo not found: id#" << id);
4541
+ return ;
4542
+ }
4543
+ auto & buildInfo = *buildInfoPtr->Get ();
4544
+ if (!buildInfo.IsBroken ) {
4545
+ fillBuildInfoSafe (buildInfo, stepName, fill);
4546
+ }
4547
+ };
4548
+
4519
4549
// read main info
4520
4550
{
4521
4551
auto rowset = db.Table <Schema::IndexBuild>().Range ().Select ();
@@ -4524,17 +4554,21 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
4524
4554
}
4525
4555
4526
4556
while (!rowset.EndOfSet ()) {
4527
- TIndexBuildInfo::TPtr indexInfo = TIndexBuildInfo::FromRow (rowset);
4528
-
4529
- auto [it, emplaced] = Self->IndexBuilds .emplace (indexInfo->Id , indexInfo);
4530
- Y_ABORT_UNLESS (emplaced);
4531
- if (indexInfo->Uid ) {
4532
- // TODO(mbkkt) It also should be unique, but we're not sure.
4533
- Y_ASSERT (!Self->IndexBuildsByUid .contains (indexInfo->Uid ));
4534
- Self->IndexBuildsByUid [indexInfo->Uid ] = indexInfo;
4557
+ TIndexBuildInfo::TPtr buildInfo = new TIndexBuildInfo ();
4558
+ fillBuildInfoSafe (*buildInfo, " IndexBuild" , [&](TIndexBuildInfo& buildInfo) {
4559
+ TIndexBuildInfo::FillFromRow (rowset, &buildInfo);
4560
+ });
4561
+
4562
+ // Note: broken build are also added to IndexBuilds
4563
+ Y_ASSERT (!Self->IndexBuilds .contains (buildInfo->Id ));
4564
+ Self->IndexBuilds [buildInfo->Id ] = buildInfo;
4565
+
4566
+ if (buildInfo->Uid ) {
4567
+ Y_ASSERT (!Self->IndexBuildsByUid .contains (buildInfo->Uid ));
4568
+ Self->IndexBuildsByUid [buildInfo->Uid ] = buildInfo;
4535
4569
}
4536
4570
4537
- OnComplete.ToProgress (indexInfo ->Id );
4571
+ OnComplete.ToProgress (buildInfo ->Id );
4538
4572
4539
4573
if (!rowset.Next ()) {
4540
4574
return false ;
@@ -4556,19 +4590,18 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
4556
4590
4557
4591
while (!rowset.EndOfSet ()) {
4558
4592
TIndexBuildId id = rowset.GetValue <Schema::KMeansTreeProgress::Id>();
4559
- const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4560
- Y_VERIFY_S (buildInfoPtr, " BuildIndex not found: id# " << id);
4561
- auto & buildInfo = *buildInfoPtr->Get ();
4562
- buildInfo.KMeans .Set (
4563
- rowset.GetValue <Schema::KMeansTreeProgress::Level>(),
4564
- rowset.GetValue <Schema::KMeansTreeProgress::ParentBegin>(),
4565
- rowset.GetValue <Schema::KMeansTreeProgress::Parent>(),
4566
- rowset.GetValue <Schema::KMeansTreeProgress::ChildBegin>(),
4567
- rowset.GetValue <Schema::KMeansTreeProgress::Child>(),
4568
- rowset.GetValue <Schema::KMeansTreeProgress::State>(),
4569
- rowset.GetValue <Schema::KMeansTreeProgress::TableSize>()
4570
- );
4571
- buildInfo.Sample .Rows .reserve (buildInfo.KMeans .K * 2 );
4593
+ fillBuildInfoByIdSafe (id, " KMeansTreeProgress" , [&](TIndexBuildInfo& buildInfo) {
4594
+ buildInfo.KMeans .Set (
4595
+ rowset.GetValue <Schema::KMeansTreeProgress::Level>(),
4596
+ rowset.GetValue <Schema::KMeansTreeProgress::ParentBegin>(),
4597
+ rowset.GetValue <Schema::KMeansTreeProgress::Parent>(),
4598
+ rowset.GetValue <Schema::KMeansTreeProgress::ChildBegin>(),
4599
+ rowset.GetValue <Schema::KMeansTreeProgress::Child>(),
4600
+ rowset.GetValue <Schema::KMeansTreeProgress::State>(),
4601
+ rowset.GetValue <Schema::KMeansTreeProgress::TableSize>()
4602
+ );
4603
+ buildInfo.Sample .Rows .reserve (buildInfo.KMeans .K * 2 );
4604
+ });
4572
4605
4573
4606
if (!rowset.Next ()) {
4574
4607
return false ;
@@ -4587,13 +4620,12 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
4587
4620
size_t sampleCount = 0 ;
4588
4621
while (!rowset.EndOfSet ()) {
4589
4622
TIndexBuildId id = rowset.GetValue <Schema::KMeansTreeSample::Id>();
4590
- const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4591
- Y_VERIFY_S (buildInfoPtr, " BuildIndex not found: id# " << id);
4592
- auto & buildInfo = *buildInfoPtr->Get ();
4593
- buildInfo.Sample .Add (
4594
- rowset.GetValue <Schema::KMeansTreeSample::Probability>(),
4595
- rowset.GetValue <Schema::KMeansTreeSample::Data>()
4596
- );
4623
+ fillBuildInfoByIdSafe (id, " KMeansTreeSample" , [&](TIndexBuildInfo& buildInfo) {
4624
+ buildInfo.Sample .Add (
4625
+ rowset.GetValue <Schema::KMeansTreeSample::Probability>(),
4626
+ rowset.GetValue <Schema::KMeansTreeSample::Data>()
4627
+ );
4628
+ });
4597
4629
sampleCount++;
4598
4630
4599
4631
if (!rowset.Next ()) {
@@ -4615,11 +4647,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
4615
4647
4616
4648
while (!rowset.EndOfSet ()) {
4617
4649
TIndexBuildId id = rowset.GetValue <Schema::IndexBuildColumns::Id>();
4618
- const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4619
- Y_VERIFY_S (buildInfoPtr, " BuildIndex not found"
4620
- << " : id# " << id);
4621
- auto & buildInfo = *buildInfoPtr->Get ();
4622
- buildInfo.AddIndexColumnInfo (rowset);
4650
+ fillBuildInfoByIdSafe (id, " IndexBuildColumns" , [&](TIndexBuildInfo& buildInfo) {
4651
+ buildInfo.AddIndexColumnInfo (rowset);
4652
+ });
4623
4653
4624
4654
if (!rowset.Next ()) {
4625
4655
return false ;
@@ -4635,11 +4665,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
4635
4665
4636
4666
while (!rowset.EndOfSet ()) {
4637
4667
TIndexBuildId id = rowset.GetValue <Schema::BuildColumnOperationSettings::Id>();
4638
- const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4639
- Y_VERIFY_S (buildInfoPtr, " BuildIndex not found"
4640
- << " : id# " << id);
4641
- auto & buildInfo = *buildInfoPtr->Get ();
4642
- buildInfo.AddBuildColumnInfo (rowset);
4668
+ fillBuildInfoByIdSafe (id, " BuildColumnOperationSettings" , [&](TIndexBuildInfo& buildInfo) {
4669
+ buildInfo.AddBuildColumnInfo (rowset);
4670
+ });
4643
4671
4644
4672
if (!rowset.Next ()) {
4645
4673
return false ;
@@ -4656,11 +4684,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase<TSchemeShard> {
4656
4684
4657
4685
while (!rowset.EndOfSet ()) {
4658
4686
TIndexBuildId id = rowset.GetValue <Schema::IndexBuildShardStatus::Id>();
4659
- const auto * buildInfoPtr = Self->IndexBuilds .FindPtr (id);
4660
- Y_VERIFY_S (buildInfoPtr, " BuildIndex not found"
4661
- << " : id# " << id);
4662
- auto & buildInfo = *buildInfoPtr->Get ();
4663
- buildInfo.AddShardStatus (rowset);
4687
+ fillBuildInfoByIdSafe (id, " IndexBuildShardStatus" , [&](TIndexBuildInfo& buildInfo) {
4688
+ buildInfo.AddShardStatus (rowset);
4689
+ });
4664
4690
4665
4691
if (!rowset.Next ()) {
4666
4692
return false ;
0 commit comments