Skip to content

Commit 771fd96

Browse files
authored
[ENH]: soft delete databases, add FinishDatabaseDeletion gRPC method to hard delete databases (#4627)
## Description of changes Databases are now always soft deleted instead of being hard deleted. Databases are hard deleted when the new `FinishDatabaseDeletion` method is called (the garbage collector calls this in the next PR). ## Test plan _How are these changes tested?_ - [x] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust Updated existing test to cover soft delete behavior. ## Documentation Changes _Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs section](https://github.com/chroma-core/chroma/tree/main/docs/docs.trychroma.com)?_ n/a
1 parent 4e79f10 commit 771fd96

File tree

10 files changed

+189
-46
lines changed

10 files changed

+189
-46
lines changed

go/pkg/sysdb/coordinator/coordinator.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package coordinator
22

33
import (
44
"context"
5+
"time"
56

67
"github.com/chroma-core/chroma/go/pkg/common"
78
"github.com/chroma-core/chroma/go/pkg/proto/coordinatorpb"
@@ -255,3 +256,15 @@ func (s *Coordinator) BatchGetCollectionVersionFilePaths(ctx context.Context, re
255256
func (s *Coordinator) BatchGetCollectionSoftDeleteStatus(ctx context.Context, req *coordinatorpb.BatchGetCollectionSoftDeleteStatusRequest) (*coordinatorpb.BatchGetCollectionSoftDeleteStatusResponse, error) {
256257
return s.catalog.BatchGetCollectionSoftDeleteStatus(ctx, req.CollectionIds)
257258
}
259+
260+
func (s *Coordinator) FinishDatabaseDeletion(ctx context.Context, req *coordinatorpb.FinishDatabaseDeletionRequest) (*coordinatorpb.FinishDatabaseDeletionResponse, error) {
261+
numDeleted, err := s.catalog.FinishDatabaseDeletion(ctx, time.Unix(req.CutoffTime.Seconds, int64(req.CutoffTime.Nanos)))
262+
if err != nil {
263+
return nil, err
264+
}
265+
266+
res := &coordinatorpb.FinishDatabaseDeletionResponse{
267+
NumDeleted: numDeleted,
268+
}
269+
return res, nil
270+
}

go/pkg/sysdb/coordinator/table_catalog.go

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -183,27 +183,36 @@ func (tc *Catalog) DeleteDatabase(ctx context.Context, deleteDatabase *model.Del
183183
if len(databases) == 0 {
184184
return common.ErrDatabaseNotFound
185185
}
186-
err = tc.metaDomain.DatabaseDb(txCtx).Delete(databases[0].ID)
186+
err = tc.metaDomain.DatabaseDb(txCtx).SoftDelete(databases[0].ID)
187187
if err != nil {
188188
return err
189189
}
190+
191+
collections, err := tc.metaDomain.CollectionDb(txCtx).GetCollections(nil, nil, deleteDatabase.Tenant, deleteDatabase.Name, nil, nil)
192+
if err != nil {
193+
return err
194+
}
195+
196+
for _, collection := range collections {
197+
collectionID, err := types.Parse(collection.Collection.ID)
198+
if err != nil {
199+
return err
200+
}
201+
202+
err = tc.softDeleteCollection(txCtx, &model.DeleteCollection{
203+
ID: collectionID,
204+
TenantID: deleteDatabase.Tenant,
205+
DatabaseName: deleteDatabase.Name,
206+
})
207+
if err != nil {
208+
return err
209+
}
210+
}
211+
190212
return nil
191213
})
192214
}
193215

194-
func (tc *Catalog) GetAllDatabases(ctx context.Context, ts types.Timestamp) ([]*model.Database, error) {
195-
databases, err := tc.metaDomain.DatabaseDb(ctx).GetAllDatabases()
196-
if err != nil {
197-
log.Error("error getting all databases", zap.Error(err))
198-
return nil, err
199-
}
200-
result := make([]*model.Database, 0, len(databases))
201-
for _, database := range databases {
202-
result = append(result, convertDatabaseToModel(database))
203-
}
204-
return result, nil
205-
}
206-
207216
func (tc *Catalog) CreateTenant(ctx context.Context, createTenant *model.CreateTenant, ts types.Timestamp) (*model.Tenant, error) {
208217
var result *model.Tenant
209218

@@ -681,7 +690,7 @@ func (tc *Catalog) softDeleteCollection(ctx context.Context, deleteCollection *m
681690

682691
// Generate new name with timestamp and random number
683692
oldName := *collections[0].Collection.Name
684-
newName := fmt.Sprintf("_deleted_%s_%s", oldName, *types.FromUniqueID(deleteCollection.ID))
693+
newName := fmt.Sprintf("_deleted_%s_%s", oldName, deleteCollection.ID.String())
685694

686695
dbCollection := &dbmodel.Collection{
687696
ID: deleteCollection.ID.String(),
@@ -2211,3 +2220,7 @@ func (tc *Catalog) GetVersionFileNamesForCollection(ctx context.Context, tenantI
22112220

22122221
return collectionEntry.VersionFileName, nil
22132222
}
2223+
2224+
func (tc *Catalog) FinishDatabaseDeletion(ctx context.Context, cutoffTime time.Time) (uint64, error) {
2225+
return tc.metaDomain.DatabaseDb(ctx).FinishDatabaseDeletion(cutoffTime)
2226+
}

go/pkg/sysdb/grpc/collection_service.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ func (s *Server) GetCollectionWithSegments(ctx context.Context, req *coordinator
253253
collection, segments, err := s.coordinator.GetCollectionWithSegments(ctx, parsedCollectionID)
254254
if err != nil {
255255
log.Error("GetCollectionWithSegments failed. ", zap.Error(err), zap.String("collection_id", collectionID))
256-
if err == common.ErrCollectionNotFound {
256+
if err == common.ErrCollectionNotFound || err == common.ErrCollectionSoftDeleted {
257257
return res, grpcutils.BuildNotFoundGrpcError(err.Error())
258258
}
259259
return res, grpcutils.BuildInternalGrpcError(err.Error())
@@ -404,7 +404,7 @@ func (s *Server) ForkCollection(ctx context.Context, req *coordinatorpb.ForkColl
404404
collection, segments, err := s.coordinator.ForkCollection(ctx, forkCollection)
405405
if err != nil {
406406
log.Error("ForkCollection failed. ", zap.Error(err), zap.String("collection_id", sourceCollectionID))
407-
if err == common.ErrCollectionNotFound {
407+
if err == common.ErrCollectionNotFound || err == common.ErrCollectionSoftDeleted {
408408
return res, grpcutils.BuildNotFoundGrpcError(err.Error())
409409
}
410410
if err == common.ErrCollectionLogPositionStale {

go/pkg/sysdb/grpc/tenant_database_service.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,11 @@ func (s *Server) GetLastCompactionTimeForTenant(ctx context.Context, req *coordi
158158
}
159159
return res, nil
160160
}
161+
162+
func (s *Server) FinishDatabaseDeletion(ctx context.Context, req *coordinatorpb.FinishDatabaseDeletionRequest) (*coordinatorpb.FinishDatabaseDeletionResponse, error) {
163+
res, err := s.coordinator.FinishDatabaseDeletion(ctx, req)
164+
if err != nil {
165+
return nil, grpcutils.BuildInternalGrpcError(err.Error())
166+
}
167+
return res, nil
168+
}

go/pkg/sysdb/grpc/tenant_database_service_test.go

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/chroma-core/chroma/go/pkg/sysdb/metastore/db/dbcore"
1515
"github.com/chroma-core/chroma/go/pkg/sysdb/metastore/db/dbmodel"
1616
s3metastore "github.com/chroma-core/chroma/go/pkg/sysdb/metastore/s3"
17+
"github.com/chroma-core/chroma/go/pkg/types"
1718
"github.com/google/uuid"
1819
"github.com/pingcap/log"
1920
"github.com/stretchr/testify/suite"
@@ -116,7 +117,7 @@ func (suite *TenantDatabaseServiceTestSuite) TestServer_DeleteDatabase() {
116117
tenantName := "TestDeleteDatabase"
117118
databaseName := "TestDeleteDatabase"
118119
// Generate random uuid for db id
119-
databaseeId := uuid.New().String()
120+
databaseId := uuid.New().String()
120121

121122
_, err := suite.catalog.CreateTenant(context.Background(), &model.CreateTenant{
122123
Name: tenantName,
@@ -127,29 +128,61 @@ func (suite *TenantDatabaseServiceTestSuite) TestServer_DeleteDatabase() {
127128
_, err = suite.catalog.CreateDatabase(context.Background(), &model.CreateDatabase{
128129
Tenant: tenantName,
129130
Name: databaseName,
130-
ID: databaseeId,
131+
ID: databaseId,
131132
Ts: time.Now().Unix(),
132133
}, time.Now().Unix())
133134
suite.NoError(err)
134135

136+
collectionID := types.NewUniqueID()
135137
_, _, err = suite.catalog.CreateCollection(context.Background(), &model.CreateCollection{
138+
ID: collectionID,
136139
TenantID: tenantName,
137140
DatabaseName: databaseName,
138141
Name: "TestCollection",
139142
}, time.Now().Unix())
140143
suite.NoError(err)
141144

145+
timeBeforeSoftDelete := time.Now()
146+
142147
err = suite.catalog.DeleteDatabase(context.Background(), &model.DeleteDatabase{
143148
Tenant: tenantName,
144149
Name: databaseName,
145150
})
146151
suite.NoError(err)
147152

148-
// Check that associated collection was deleted
149-
var count int64
153+
// Check that associated collection was soft deleted
150154
var collections []*dbmodel.Collection
151-
suite.NoError(suite.db.Find(&collections).Count(&count).Error)
152-
suite.Equal(int64(0), count)
155+
suite.NoError(suite.db.Find(&collections).Error)
156+
suite.Equal(1, len(collections))
157+
suite.Equal(true, collections[0].IsDeleted)
158+
159+
// Database should not be eligible for hard deletion yet because it still has a (soft deleted) collection
160+
numDeleted, err := suite.catalog.FinishDatabaseDeletion(context.Background(), time.Now())
161+
suite.NoError(err)
162+
suite.Equal(uint64(0), numDeleted)
163+
164+
// Hard delete associated collection
165+
suite.NoError(err)
166+
suite.NoError(suite.catalog.DeleteCollection(context.Background(), &model.DeleteCollection{
167+
TenantID: tenantName,
168+
DatabaseName: databaseName,
169+
ID: collectionID,
170+
}, false))
171+
172+
// Database should now be eligible for hard deletion, but first verify that database is not deleted if cutoff time is prior to soft delete
173+
numDeleted, err = suite.catalog.FinishDatabaseDeletion(context.Background(), timeBeforeSoftDelete)
174+
suite.NoError(err)
175+
suite.Equal(uint64(0), numDeleted)
176+
177+
// Hard delete database
178+
numDeleted, err = suite.catalog.FinishDatabaseDeletion(context.Background(), time.Now())
179+
suite.NoError(err)
180+
suite.Equal(uint64(1), numDeleted)
181+
182+
// Verify that database is hard deleted
183+
var databases []*dbmodel.Database
184+
suite.NoError(suite.db.Debug().Where("id = ?", databaseId).Find(&databases).Error)
185+
suite.Equal(0, len(databases))
153186
}
154187

155188
func TestTenantDatabaseServiceTestSuite(t *testing.T) {

go/pkg/sysdb/metastore/db/dao/database.go

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package dao
22

33
import (
44
"errors"
5+
"time"
56

67
"github.com/chroma-core/chroma/go/pkg/common"
78
"github.com/chroma-core/chroma/go/pkg/sysdb/metastore/db/dbmodel"
@@ -28,21 +29,12 @@ func (s *databaseDb) DeleteByTenantIdAndName(tenantId string, databaseName strin
2829
return len(databases), err
2930
}
3031

31-
func (s *databaseDb) GetAllDatabases() ([]*dbmodel.Database, error) {
32-
var databases []*dbmodel.Database
33-
query := s.db.Table("databases")
34-
35-
if err := query.Find(&databases).Error; err != nil {
36-
return nil, err
37-
}
38-
return databases, nil
39-
}
40-
4132
func (s *databaseDb) ListDatabases(limit *int32, offset *int32, tenantID string) ([]*dbmodel.Database, error) {
4233
var databases []*dbmodel.Database
4334
query := s.db.Table("databases").
4435
Select("databases.id, databases.name, databases.tenant_id").
4536
Where("databases.tenant_id = ?", tenantID).
37+
Where("databases.is_deleted = ?", false).
4638
Order("databases.created_at ASC")
4739

4840
if limit != nil {
@@ -65,7 +57,8 @@ func (s *databaseDb) GetDatabases(tenantID string, databaseName string) ([]*dbmo
6557
query := s.db.Table("databases").
6658
Select("databases.id, databases.name, databases.tenant_id").
6759
Where("databases.name = ?", databaseName).
68-
Where("databases.tenant_id = ?", tenantID)
60+
Where("databases.tenant_id = ?", tenantID).
61+
Where("databases.is_deleted = ?", false)
6962

7063
if err := query.Find(&databases).Error; err != nil {
7164
log.Error("GetDatabases", zap.Error(err))
@@ -95,13 +88,13 @@ func (s *databaseDb) Insert(database *dbmodel.Database) error {
9588
return err
9689
}
9790

98-
func (s *databaseDb) Delete(databaseID string) error {
91+
func (s *databaseDb) SoftDelete(databaseID string) error {
9992
return s.db.Transaction(func(tx *gorm.DB) error {
100-
if err := tx.Where("id = ?", databaseID).Delete(&dbmodel.Database{}).Error; err != nil {
101-
return err
102-
}
103-
104-
if err := tx.Where("database_id = ?", databaseID).Delete(&dbmodel.Collection{}).Error; err != nil {
93+
if err := tx.Table("databases").
94+
Where("id = ?", databaseID).
95+
Update("is_deleted", true).
96+
Update("updated_at", time.Now()).
97+
Error; err != nil {
10598
return err
10699
}
107100

@@ -113,11 +106,44 @@ func (s *databaseDb) GetDatabasesByTenantID(tenantID string) ([]*dbmodel.Databas
113106
var databases []*dbmodel.Database
114107
query := s.db.Table("databases").
115108
Select("databases.id, databases.name, databases.tenant_id").
116-
Where("databases.tenant_id = ?", tenantID)
109+
Where("databases.tenant_id = ?", tenantID).
110+
Where("databases.is_deleted = ?", false)
117111

118112
if err := query.Find(&databases).Error; err != nil {
119113
log.Error("GetDatabasesByTenantID", zap.Error(err))
120114
return nil, err
121115
}
122116
return databases, nil
123117
}
118+
119+
func (s *databaseDb) FinishDatabaseDeletion(cutoffTime time.Time) (uint64, error) {
120+
numDeleted := uint64(0)
121+
122+
for {
123+
// Only hard delete databases that were soft deleted prior to the cutoff time and have no collections
124+
databasesSubQuery := s.db.
125+
Table("databases d").
126+
Select("d.id").
127+
Joins("LEFT JOIN collections c ON c.database_id = d.id").
128+
Where("d.is_deleted = ?", true).
129+
Where("d.updated_at < ?", cutoffTime).
130+
Group("d.id").
131+
Having("COUNT(c.id) = 0").
132+
Limit(1000)
133+
134+
res := s.db.Table("databases").
135+
Where("id IN (?)", databasesSubQuery).
136+
Delete(&dbmodel.Database{})
137+
if res.Error != nil {
138+
return numDeleted, res.Error
139+
}
140+
141+
numDeleted += uint64(res.RowsAffected)
142+
143+
if res.RowsAffected == 0 {
144+
break
145+
}
146+
}
147+
148+
return numDeleted, nil
149+
}

go/pkg/sysdb/metastore/db/dbmodel/database.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ func (v Database) TableName() string {
2222

2323
//go:generate mockery --name=IDatabaseDb
2424
type IDatabaseDb interface {
25-
GetAllDatabases() ([]*Database, error)
2625
GetDatabases(tenantID string, databaseName string) ([]*Database, error)
2726
ListDatabases(limit *int32, offset *int32, tenantID string) ([]*Database, error)
2827
Insert(in *Database) error
2928
DeleteAll() error
30-
Delete(databaseID string) error
29+
SoftDelete(databaseID string) error
30+
FinishDatabaseDeletion(cutoffTime time.Time) (uint64, error)
3131
}

idl/chromadb/proto/coordinator.proto

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ message DeleteDatabaseRequest {
4646

4747
message DeleteDatabaseResponse {}
4848

49+
message FinishDatabaseDeletionRequest {
50+
google.protobuf.Timestamp cutoff_time = 1;
51+
}
52+
53+
message FinishDatabaseDeletionResponse {
54+
uint64 num_deleted = 1;
55+
}
56+
4957
message CreateTenantRequest {
5058
string name = 2; // Names are globally unique
5159
}
@@ -496,6 +504,7 @@ service SysDB {
496504
rpc GetDatabase(GetDatabaseRequest) returns (GetDatabaseResponse) {}
497505
rpc ListDatabases(ListDatabasesRequest) returns (ListDatabasesResponse) {}
498506
rpc DeleteDatabase(DeleteDatabaseRequest) returns (DeleteDatabaseResponse) {}
507+
rpc FinishDatabaseDeletion(FinishDatabaseDeletionRequest) returns (FinishDatabaseDeletionResponse) {}
499508
rpc CreateTenant(CreateTenantRequest) returns (CreateTenantResponse) {}
500509
rpc GetTenant(GetTenantRequest) returns (GetTenantResponse) {}
501510
rpc CreateSegment(CreateSegmentRequest) returns (CreateSegmentResponse) {}

0 commit comments

Comments
 (0)