Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit abf9ce3

Browse files
authored
symbols: respect limits, merge properly (#61056)
fixes #60698 This fixes the following bugs for unindexed symbol search: Now we respect the limit given by "frontend". Before, the upper limit of unindexed symbol search was hard coded to 500. This limit could not be overwritten with the count: filter. Merge symbol matches properly, applying the same logic as we do for indexed search: Before, symbol matches within the same file were simply appended which created duplicates for AND/OR queries if separate terms of the query matched the same symbol. Test plan: New unit test Partially covered by existing tests
1 parent 2b9ddff commit abf9ce3

File tree

6 files changed

+162
-38
lines changed

6 files changed

+162
-38
lines changed

cmd/symbols/internal/api/handler.go

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,10 @@ import (
1111
internalgrpc "github.com/sourcegraph/sourcegraph/internal/grpc"
1212
"github.com/sourcegraph/sourcegraph/internal/grpc/defaults"
1313
"github.com/sourcegraph/sourcegraph/internal/search"
14-
"github.com/sourcegraph/sourcegraph/internal/search/result"
1514
proto "github.com/sourcegraph/sourcegraph/internal/symbols/v1"
1615
internaltypes "github.com/sourcegraph/sourcegraph/internal/types"
1716
)
1817

19-
const maxNumSymbolResults = 500
20-
2118
type grpcService struct {
2219
searchFunc types.SearchFunc
2320
readFileFunc func(context.Context, internaltypes.RepoCommitPath) ([]byte, error)
@@ -60,21 +57,12 @@ func NewHandler(
6057
handleStatus func(http.ResponseWriter, *http.Request),
6158
ctagsBinary string,
6259
) http.Handler {
63-
searchFuncWrapper := func(ctx context.Context, args search.SymbolsParameters) (result.Symbols, error) {
64-
// Massage the arguments to ensure that First is set to a reasonable value.
65-
if args.First < 0 || args.First > maxNumSymbolResults {
66-
args.First = maxNumSymbolResults
67-
}
68-
69-
return searchFunc(ctx, args)
70-
}
71-
7260
rootLogger := logger.Scoped("symbolsServer")
7361

7462
// Initialize the gRPC server
7563
grpcServer := defaults.NewServer(rootLogger)
7664
proto.RegisterSymbolsServiceServer(grpcServer, &grpcService{
77-
searchFunc: searchFuncWrapper,
65+
searchFunc: searchFunc,
7866
readFileFunc: readFileFunc,
7967
ctagsBinary: ctagsBinary,
8068
logger: rootLogger.Scoped("grpc"),

cmd/symbols/internal/database/store/search.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,16 @@ func scanSymbols(rows *sql.Rows, queryErr error) (symbols []result.Symbol, err e
4444
return symbols, nil
4545
}
4646

47+
// This limit prevents users from accidentally running a query that returns an
48+
// extremely large number of results. It is arbitrary, but it should be at least
49+
// as high as the default limit frontend sends to the symbol service.
50+
const maxSymbolLimit = 50_000
51+
4752
func (s *store) Search(ctx context.Context, args search.SymbolsParameters) ([]result.Symbol, error) {
53+
if args.First < 0 || args.First > maxSymbolLimit {
54+
return nil, errors.Newf("limit %d out of bounds [0, %d]", args.First, maxSymbolLimit)
55+
}
56+
4857
return scanSymbols(s.Query(ctx, sqlf.Sprintf(
4958
`
5059
SELECT

internal/search/result/file.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ package result
33
import (
44
"net/url"
55
"path"
6+
"slices"
67
"strings"
78
"unicode/utf8"
89

910
"github.com/sourcegraph/log"
11+
1012
"github.com/sourcegraph/sourcegraph/internal/api"
1113
"github.com/sourcegraph/sourcegraph/internal/search/filter"
1214
"github.com/sourcegraph/sourcegraph/internal/types"
@@ -164,15 +166,19 @@ func (fm *FileMatch) Select(selectPath filter.SelectPath) Match {
164166
return nil
165167
}
166168

167-
// AppendMatches appends the line matches from src as well as updating match
168-
// counts and limit.
169169
func (fm *FileMatch) AppendMatches(src *FileMatch) {
170170
// TODO merge hunk matches smartly
171171
fm.ChunkMatches = append(fm.ChunkMatches, src.ChunkMatches...)
172-
fm.Symbols = append(fm.Symbols, src.Symbols...)
172+
fm.mergeSymbols(src)
173173
fm.LimitHit = fm.LimitHit || src.LimitHit
174174
}
175175

176+
func (fm *FileMatch) mergeSymbols(src *FileMatch) {
177+
fm.Symbols = append(fm.Symbols, src.Symbols...)
178+
slices.SortFunc(fm.Symbols, compareSymbolMatches)
179+
fm.Symbols = DedupSymbols(fm.Symbols)
180+
}
181+
176182
// Limit will mutate fm such that it only has limit results. limit is a number
177183
// greater than 0.
178184
//

internal/search/result/file_test.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,107 @@ import (
66
"github.com/stretchr/testify/require"
77
)
88

9+
func mkSymbolMatch(name string, line int) *SymbolMatch {
10+
return &SymbolMatch{
11+
Symbol: Symbol{
12+
Name: name,
13+
Line: line,
14+
},
15+
}
16+
}
17+
18+
func TestAppendSymbols(t *testing.T) {
19+
cases := []struct {
20+
name string
21+
input1 *FileMatch
22+
input2 *FileMatch
23+
output *FileMatch
24+
}{
25+
{
26+
name: "duplicate symbol",
27+
input1: &FileMatch{
28+
Symbols: []*SymbolMatch{
29+
mkSymbolMatch("sym2", 42),
30+
mkSymbolMatch("sym1", 41),
31+
},
32+
},
33+
input2: &FileMatch{
34+
Symbols: []*SymbolMatch{
35+
mkSymbolMatch("sym2", 42),
36+
mkSymbolMatch("sym3", 43),
37+
},
38+
},
39+
output: &FileMatch{
40+
Symbols: []*SymbolMatch{
41+
mkSymbolMatch("sym1", 41),
42+
mkSymbolMatch("sym2", 42),
43+
mkSymbolMatch("sym3", 43),
44+
},
45+
},
46+
},
47+
{
48+
name: "same line, different symbol",
49+
input1: &FileMatch{
50+
Symbols: []*SymbolMatch{
51+
mkSymbolMatch("sym1", 41),
52+
},
53+
},
54+
input2: &FileMatch{
55+
Symbols: []*SymbolMatch{
56+
mkSymbolMatch("sym2", 41),
57+
},
58+
},
59+
output: &FileMatch{
60+
Symbols: []*SymbolMatch{
61+
mkSymbolMatch("sym1", 41),
62+
mkSymbolMatch("sym2", 41),
63+
},
64+
},
65+
},
66+
{
67+
name: "empty left side",
68+
input1: &FileMatch{},
69+
input2: &FileMatch{
70+
Symbols: []*SymbolMatch{
71+
mkSymbolMatch("sym1", 41),
72+
},
73+
},
74+
output: &FileMatch{
75+
Symbols: []*SymbolMatch{
76+
mkSymbolMatch("sym1", 41),
77+
},
78+
},
79+
},
80+
{
81+
name: "empty right side",
82+
input1: &FileMatch{
83+
Symbols: []*SymbolMatch{
84+
mkSymbolMatch("sym1", 41),
85+
},
86+
},
87+
input2: &FileMatch{},
88+
output: &FileMatch{
89+
Symbols: []*SymbolMatch{
90+
mkSymbolMatch("sym1", 41),
91+
},
92+
},
93+
},
94+
{
95+
name: "both empty",
96+
input1: &FileMatch{},
97+
input2: &FileMatch{},
98+
output: &FileMatch{},
99+
},
100+
}
101+
102+
for _, tc := range cases {
103+
t.Run(tc.name, func(t *testing.T) {
104+
tc.input1.mergeSymbols(tc.input2)
105+
require.Equal(t, tc.output, tc.input1)
106+
})
107+
}
108+
}
109+
9110
func TestConvertMatches(t *testing.T) {
10111
t.Run("AsLineMatches", func(t *testing.T) {
11112
cases := []struct {

internal/search/result/symbol.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package result
22

33
import (
4+
"cmp"
45
"fmt"
56
"net/url"
67
"strconv"
@@ -142,6 +143,45 @@ func (s *SymbolMatch) URL() *url.URL {
142143
return base
143144
}
144145

146+
func compareSymbolMatches(a, b *SymbolMatch) int {
147+
if v := cmp.Compare(a.Symbol.Line, b.Symbol.Line); v != 0 {
148+
return v
149+
}
150+
151+
if v := cmp.Compare(a.Symbol.Name, b.Symbol.Name); v != 0 {
152+
return v
153+
}
154+
155+
if v := cmp.Compare(a.Symbol.Kind, b.Symbol.Kind); v != 0 {
156+
return v
157+
}
158+
159+
if v := cmp.Compare(a.Symbol.Parent, b.Symbol.Parent); v != 0 {
160+
return v
161+
}
162+
163+
return cmp.Compare(a.Symbol.ParentKind, b.Symbol.ParentKind)
164+
}
165+
166+
// DedupSymbols removes duplicate symbols from the list. We use a heuristic to
167+
// determine duplicate matches. We regard a match as the same if they have the
168+
// same symbol info and appear on the same line. I am unaware of a language
169+
// where you can break that assumption.
170+
func DedupSymbols(symbols []*SymbolMatch) []*SymbolMatch {
171+
if len(symbols) <= 1 {
172+
return symbols
173+
}
174+
dedup := symbols[:1]
175+
for _, sym := range symbols[1:] {
176+
last := dedup[len(dedup)-1]
177+
if compareSymbolMatches(sym, last) == 0 {
178+
continue
179+
}
180+
dedup = append(dedup, sym)
181+
}
182+
return dedup
183+
}
184+
145185
func urlFragmentFromRange(lspRange lsp.Range) string {
146186
if lspRange.Start == lspRange.End {
147187
return "L" + lineSpecFromPosition(lspRange.Start, false)

internal/search/zoekt/indexed_search.go

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -532,28 +532,8 @@ func zoektFileMatchToSymbolResults(repoName types.MinimalRepo, inputRev string,
532532

533533
// We deduplicate symbol matches. For example searching for "foo AND bar"
534534
// will return the symbol "foobar" twice. However, sourcegraph's result
535-
// type for symbol is modeled as a result per symbol. Additionally we use
536-
// a heuristic to determine duplicate matches. We regard a match as the
537-
// same if they have the same symbol info and appear on the same line. I
538-
// am unaware of a language where you can break that assumption.
539-
if len(symbols) <= 1 {
540-
return symbols
541-
}
542-
dedup := symbols[:1]
543-
for _, sym := range symbols[1:] {
544-
last := dedup[len(dedup)-1]
545-
if (sym.Symbol.Line == last.Symbol.Line) &&
546-
(sym.Symbol.Name == last.Symbol.Name) &&
547-
(sym.Symbol.Kind == last.Symbol.Kind) &&
548-
(sym.Symbol.Parent == last.Symbol.Parent) &&
549-
(sym.Symbol.ParentKind == last.Symbol.ParentKind) {
550-
continue
551-
}
552-
dedup = append(dedup, sym)
553-
}
554-
symbols = dedup
555-
556-
return symbols
535+
// type for symbol is modeled as a result per symbol.
536+
return result.DedupSymbols(symbols)
557537
}
558538

559539
// contextWithoutDeadline returns a context which will cancel if the cOld is

0 commit comments

Comments
 (0)