Skip to content

Commit ea9e5c0

Browse files
authored
* add test case for non versioned content * change vector search filter syntax, update dataset filter to match
1 parent 3901211 commit ea9e5c0

File tree

4 files changed

+24
-16
lines changed

4 files changed

+24
-16
lines changed

packages/datasets/src/bin/huggingFace/uploadCodeExampleDatasetToHuggingFace.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,6 @@ function makeLoadPagesFilter(
127127
sourceType: { $in: publicDatasetSourceTypes },
128128
url: { $nin: forbiddenUrls },
129129
action: { $ne: "deleted" },
130-
$or: [
131-
{ "metadata.version.isCurrent": { $exists: false } },
132-
{ "metadata.version.isCurrent": true },
133-
],
130+
"metadata.version.isCurrent": { $ne: false },
134131
};
135132
}

packages/datasets/src/pageDataset/loadPageDataset.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,7 @@ export async function loadPagesDataset({
2828
sourceType: { $in: dataSourceTypes },
2929
url: { $nin: forbiddenUrls },
3030
action: { $ne: "deleted" },
31-
$or: [
32-
{ "metadata.version.isCurrent": { $exists: false } },
33-
{ "metadata.version.isCurrent": true },
34-
],
31+
"metadata.version.isCurrent": { $ne: false },
3532
},
3633
},
3734
{

packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.test.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,26 @@ describe("nearest neighbor search", () => {
335335
).toHaveLength(5);
336336
});
337337

338+
it("Should be able to handle content that is not versioned", async () => {
339+
assert(store);
340+
const query = "tell me about MongoDB";
341+
const filter = {
342+
sourceName: "blog", // blog is not versioned
343+
};
344+
const { embedding } = await embedder.embed({
345+
text: query,
346+
});
347+
348+
const matches = await store.findNearestNeighbors(embedding, {
349+
...findNearestNeighborOptions,
350+
filter,
351+
});
352+
353+
expect(
354+
matches.filter((match) => match.metadata?.version === undefined)
355+
).toHaveLength(5);
356+
});
357+
338358
it("Should filter content to sourceType requested", async () => {
339359
assert(store);
340360
const query = "db.collection.insertOne()";

packages/mongodb-rag-core/src/contentStore/MongoDbEmbeddedContentStore.ts

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -263,10 +263,7 @@ export function makeMongoDbEmbeddedContentStore({
263263
type MongoDbAtlasVectorSearchFilter = {
264264
sourceName?: string;
265265
"metadata.version.label"?: string;
266-
"metadata.version.isCurrent"?: boolean;
267-
$or?: {
268-
"metadata.version.isCurrent": boolean | null;
269-
}[];
266+
"metadata.version.isCurrent"?: boolean | { $ne: boolean };
270267
sourceType?: string;
271268
};
272269

@@ -289,10 +286,7 @@ const handleFilters = (
289286
// 1. current=true was explicitly requested, or
290287
// 2. [Default] no version filters were specified (current and label are undefined)
291288
else if (current === true || current === undefined) {
292-
vectorSearchFilter["$or"] = [
293-
{ "metadata.version.isCurrent": true },
294-
{ "metadata.version.isCurrent": null },
295-
];
289+
vectorSearchFilter["metadata.version.isCurrent"] = { $ne: false }; // Include unversioned embeddings
296290
}
297291
// Only find embeddings that are explicitly marked as non-current (isCurrent: false)
298292
else if (current === false) {

0 commit comments

Comments
 (0)