Skip to content

Commit 9fe310e

Browse files
Autocomplete: Ship jaccard similarity changes (#3135)
This PR ships the new jaccard similarity fork. A/B tests show no regressions and changes over the baseline, so there's no need to have two implementations. ## Test plan - This PR just removes a feature flag and dead code.
1 parent 80dbffa commit 9fe310e

File tree

11 files changed

+366
-861
lines changed

11 files changed

+366
-861
lines changed

lib/shared/src/experimentation/FeatureFlagProvider.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@ export enum FeatureFlag {
1919
// Enables the bfg-mixed context retriever that will combine BFG with the default local editor
2020
// context.
2121
CodyAutocompleteContextBfgMixed = 'cody-autocomplete-context-bfg-mixed',
22-
// Enables the new-jaccard-similarity context strategy that can find more than one match per
23-
// open file and includes matches from the same file.
24-
CodyAutocompleteContextNewJaccardSimilarity = 'cody-autocomplete-new-jaccard-similarity',
2522
// Enable latency adjustments based on accept/reject streaks
2623
CodyAutocompleteUserLatency = 'cody-autocomplete-user-latency',
2724
// Dynamically decide wether to show a single line or multiple lines for completions.

vscode/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ This is a log of all notable changes to Cody for VS Code. [Unreleased] changes a
1212

1313
### Changed
1414

15+
- Autocomplete: Enable the recent jaccard similarity improvements by default. [pull/3135](https://github.com/sourcegraph/cody/pull/3135)
16+
1517
## [1.4.3]
1618

1719
### Changed

vscode/src/completions/completion-provider-config.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ class CompletionProviderConfig {
1111

1212
private flagsToResolve = [
1313
FeatureFlag.CodyAutocompleteContextBfgMixed,
14-
FeatureFlag.CodyAutocompleteContextNewJaccardSimilarity,
1514
FeatureFlag.CodyAutocompleteDynamicMultilineCompletions,
1615
FeatureFlag.CodyAutocompleteHotStreak,
1716
FeatureFlag.CodyAutocompleteSingleMultilineRequest,
@@ -77,9 +76,6 @@ class CompletionProviderConfig {
7776
const { config } = this
7877

7978
const bfgMixedContextFlag = this.getPrefetchedFlag(FeatureFlag.CodyAutocompleteContextBfgMixed)
80-
const newJaccardSimilarityContextFlag = this.getPrefetchedFlag(
81-
FeatureFlag.CodyAutocompleteContextNewJaccardSimilarity
82-
)
8379

8480
const contextStrategy: ContextStrategy =
8581
config.autocompleteExperimentalGraphContext === 'bfg'
@@ -94,9 +90,7 @@ class CompletionProviderConfig {
9490
? 'new-jaccard-similarity'
9591
: bfgMixedContextFlag
9692
? 'bfg-mixed'
97-
: newJaccardSimilarityContextFlag
98-
? 'new-jaccard-similarity'
99-
: 'jaccard-similarity'
93+
: 'jaccard-similarity'
10094

10195
return contextStrategy
10296
}

vscode/src/completions/context/context-strategy.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import type { ContextRetriever } from '../types'
44

55
import type { BfgRetriever } from './retrievers/bfg/bfg-retriever'
66
import { JaccardSimilarityRetriever } from './retrievers/jaccard-similarity/jaccard-similarity-retriever'
7-
import { JaccardSimilarityRetriever as NewJaccardSimilarityRetriever } from './retrievers/new-jaccard-similarity/jaccard-similarity-retriever'
87
import { SectionHistoryRetriever } from './retrievers/section-history/section-history-retriever'
98

109
export type ContextStrategy =
@@ -47,10 +46,6 @@ export class DefaultContextStrategyFactory implements ContextStrategyFactory {
4746
this.localRetriever = new JaccardSimilarityRetriever()
4847
this.disposables.push(this.localRetriever)
4948
break
50-
case 'new-jaccard-similarity':
51-
this.localRetriever = new NewJaccardSimilarityRetriever()
52-
this.disposables.push(this.localRetriever)
53-
break
5449
case 'local-mixed':
5550
this.localRetriever = new JaccardSimilarityRetriever()
5651
// Filling the graphRetriever field with another local retriever but that's alright
@@ -101,8 +96,7 @@ export class DefaultContextStrategyFactory implements ContextStrategyFactory {
10196
break
10297

10398
// The jaccard similarity strategies only uses the local retriever
104-
case 'jaccard-similarity':
105-
case 'new-jaccard-similarity': {
99+
case 'jaccard-similarity': {
106100
if (this.localRetriever) {
107101
retrievers.push(this.localRetriever)
108102
}
Lines changed: 133 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import dedent from 'dedent'
12
import { describe, expect, it } from 'vitest'
23

3-
import { bestJaccardMatch, getWords } from './bestJaccardMatch'
4+
import { bestJaccardMatches, getWordOccurrences } from './bestJaccardMatch'
45

56
const targetSnippet = `
67
import { bestJaccardMatch, getWords } from './context'
@@ -25,30 +26,18 @@ describe('getWords', () => {
2526
})
2627
`
2728

28-
const matchSnippet = `
29-
describe('bestJaccardMatch', () => {
30-
it('should return the best match', () => {
31-
const matchText = [
32-
'foo',
33-
'bar',
34-
'baz',
35-
'qux',
36-
'quux',
37-
].join('\n')
38-
})
39-
})
40-
`
29+
const MAX_MATCHES = 50
4130

4231
describe('getWords', () => {
4332
it('works with regular text', () => {
44-
expect(getWords('foo bar baz')).toEqual(
33+
expect(getWordOccurrences('foo bar baz')).toEqual(
4534
new Map<string, number>([
4635
['foo', 1],
4736
['bar', 1],
4837
['baz', 1],
4938
])
5039
)
51-
expect(getWords('running rocks slipped over')).toEqual(
40+
expect(getWordOccurrences('running rocks slipped over')).toEqual(
5241
new Map<string, number>([
5342
['run', 1],
5443
['rock', 1],
@@ -58,7 +47,7 @@ describe('getWords', () => {
5847
})
5948

6049
it('works with code snippets', () => {
61-
expect(getWords(targetSnippet)).toEqual(
50+
expect(getWordOccurrences(targetSnippet)).toEqual(
6251
new Map<string, number>([
6352
['import', 1],
6453
['bestjaccardmatch', 1],
@@ -87,61 +76,158 @@ describe('getWords', () => {
8776
})
8877

8978
describe('bestJaccardMatch', () => {
90-
it('should return the best match in 5 line windows', () => {
91-
const matchText = [
92-
'foo',
93-
'bar',
94-
'baz',
95-
'qux',
96-
'quux',
97-
'quuz',
98-
'corge',
99-
'grault',
100-
'garply',
101-
'waldo',
102-
'fred',
103-
'plugh',
104-
'xyzzy',
105-
'thud',
106-
].join('\n')
107-
expect(bestJaccardMatch('foo\nbar\nbaz', matchText, 3)).toEqual({
79+
it('should return the best match', () => {
80+
const matchText = dedent`
81+
foo
82+
bar
83+
baz
84+
qux
85+
quux
86+
quuz
87+
corge
88+
grault
89+
garply
90+
waldo
91+
fred
92+
plugh
93+
xyzzy
94+
thud
95+
`
96+
expect(bestJaccardMatches('foo\nbar\nbaz', matchText, 3, MAX_MATCHES)[0]).toEqual({
10897
score: 1,
10998
content: 'foo\nbar\nbaz',
110-
startLine: 0,
11199
endLine: 2,
100+
startLine: 0,
112101
})
113-
expect(bestJaccardMatch('bar\nquux', matchText, 4)).toEqual({
102+
expect(bestJaccardMatches('bar\nquux', matchText, 4, MAX_MATCHES)[0]).toEqual({
114103
score: 0.5,
115104
content: 'bar\nbaz\nqux\nquux',
116-
startLine: 1,
117105
endLine: 4,
106+
startLine: 1,
118107
})
119108
expect(
120-
bestJaccardMatch(
109+
bestJaccardMatches(
121110
['grault', 'notexist', 'garply', 'notexist', 'waldo', 'notexist', 'notexist'].join('\n'),
122111
matchText,
123-
6
124-
)
112+
6,
113+
MAX_MATCHES
114+
)[0]
125115
).toEqual({
126116
score: 0.3,
127-
content: ['corge', 'grault', 'garply', 'waldo', 'fred', 'plugh'].join('\n'),
128-
startLine: 6,
129-
endLine: 11,
117+
startLine: 4,
118+
endLine: 9,
119+
content: ['quux', 'quuz', 'corge', 'grault', 'garply', 'waldo'].join('\n'),
130120
})
131121
})
132122

123+
it('returns more than one match', () => {
124+
const matchText = dedent`
125+
foo
126+
bar
127+
baz
128+
qux
129+
foo
130+
quuz
131+
corge
132+
grault
133+
garply
134+
waldo
135+
fred
136+
plugh
137+
xyzzy
138+
thud`
139+
140+
const matches = bestJaccardMatches('foo\nbar\nbaz', matchText, 3, MAX_MATCHES)
141+
142+
expect(matches).toHaveLength(4)
143+
expect(matches.map(match => match.content.split('\n'))).toEqual([
144+
['foo', 'bar', 'baz'],
145+
['qux', 'foo', 'quuz'],
146+
['corge', 'grault', 'garply'],
147+
['waldo', 'fred', 'plugh'],
148+
])
149+
})
150+
133151
it('works with code snippets', () => {
134-
expect(bestJaccardMatch(targetSnippet, matchSnippet, 5)).toMatchInlineSnapshot(`
152+
expect(
153+
bestJaccardMatches(
154+
targetSnippet,
155+
dedent`
156+
describe('bestJaccardMatch', () => {
157+
it('should return the best match', () => {
158+
const matchText = [
159+
'foo',
160+
'bar',
161+
'baz',
162+
'qux',
163+
'quux',
164+
].join('\n')
165+
})
166+
})
167+
`,
168+
5,
169+
MAX_MATCHES
170+
)[0]
171+
).toMatchInlineSnapshot(`
135172
{
136173
"content": "describe('bestJaccardMatch', () => {
137174
it('should return the best match', () => {
138175
const matchText = [
139176
'foo',
140177
'bar',",
141-
"endLine": 5,
178+
"endLine": 4,
142179
"score": 0.08695652173913043,
143-
"startLine": 1,
180+
"startLine": 0,
144181
}
145182
`)
146183
})
184+
185+
it('works for input texts that are shorter than the window size', () => {
186+
expect(bestJaccardMatches('foo', 'foo', 10, MAX_MATCHES)[0]).toEqual({
187+
content: 'foo',
188+
endLine: 0,
189+
score: 1,
190+
startLine: 0,
191+
})
192+
})
193+
194+
it('skips over windows with empty start lines', () => {
195+
const matches = bestJaccardMatches(
196+
'foo',
197+
dedent`
198+
// foo
199+
// unrelated 1
200+
// unrelated 2
201+
202+
203+
// foo
204+
// unrelated 3
205+
// unrelated 4
206+
`,
207+
3,
208+
MAX_MATCHES
209+
)
210+
211+
expect(matches[0].content).toBe('// foo\n// unrelated 1\n// unrelated 2')
212+
expect(matches[1].content).toBe('// foo\n// unrelated 3\n// unrelated 4')
213+
})
214+
215+
it("does not skips over windows with empty start lines if we're at the en", () => {
216+
const matches = bestJaccardMatches(
217+
targetSnippet,
218+
dedent`
219+
// foo
220+
// unrelated
221+
// unrelated
222+
223+
224+
// foo
225+
`,
226+
3,
227+
MAX_MATCHES
228+
)
229+
230+
expect(matches[0].content).toBe('\n\n// foo')
231+
expect(matches[1].content).toBe('// foo\n// unrelated\n// unrelated')
232+
})
147233
})

0 commit comments

Comments
 (0)