From 83948ccf190752c8c7054621cc6c4da37e0b03c0 Mon Sep 17 00:00:00 2001 From: Cerek Hillen Date: Wed, 18 Oct 2023 22:26:11 -0700 Subject: [PATCH 1/4] make it easier to see which value is which --- github/pull_request.go | 56 +++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/github/pull_request.go b/github/pull_request.go index ea0b7f0..f63db8e 100644 --- a/github/pull_request.go +++ b/github/pull_request.go @@ -3,6 +3,7 @@ package github import ( "bufio" "context" + "regexp" "strconv" "strings" @@ -140,6 +141,10 @@ func sdkFilesToInternalFiles(sdkFiles []*github.CommitFile) ([]*pullRequestFile, return internalFiles, nil } +var patchLinesRegexp = regexp.MustCompile( + `^@@ -(?P\d+),(?P\d+) \+(?P\d+),(?P\d) @@.*$`, +) + // Convert a file patch to an array of lineBounds. // // A couple optimization are used specific to our use case: @@ -153,29 +158,40 @@ func patchToLineBounds(patch string) ([]lineBound, error) { scanner := bufio.NewScanner(strings.NewReader(patch)) for scanner.Scan() { line := scanner.Text() - // patch header lines are formatted like: - // @@ -0,0 +1,5 @@ - if len(line) >= 15 && strings.HasPrefix(line, "@@ -") && strings.Contains(line[2:], " @@") { - // split into four pieces: (0) @@, (1) old line number and offset, (2), new line number and offset, (3) @@ - segments := strings.Split(line, " ") - if len(segments) < 4 { - continue - } - - // split and parse new line numbers - bounds := strings.Split(segments[2], ",") - start, err := strconv.Atoi(bounds[0][1:]) // drop the "+" - if err != nil { - return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", bounds[0][1:], line) - } - endOffset, err := strconv.Atoi(bounds[1]) // one-indexed offset (subtract 1 when using) - if err != nil { - return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", bounds[0][1:], line) - } + match := namedMatch(patchLinesRegexp, line) + if match == nil { + continue + } - lineBounds = append(lineBounds, lineBound{start: start, end: start + endOffset - 1}) + start, err := strconv.Atoi(match["new_start"]) + if err != nil { + return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", match["new_start"], line) } + endOffset, err := strconv.Atoi(match["new_rows"]) // one-indexed offset (subtract 1 when using) + if err != nil { + return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", match["new_rows"], line) + } + + lineBounds = append(lineBounds, lineBound{ + start: start, + end: start + endOffset - 1, + }) } return lineBounds, nil } + +func namedMatch(re *regexp.Regexp, text string) map[string]string { + matches := patchLinesRegexp.FindStringSubmatch(text) + if matches == nil { + return nil + } + matchesByName := map[string]string{} + for i, name := range re.SubexpNames() { + if i == 0 || name == "" { + continue + } + matchesByName[name] = matches[i] + } + return matchesByName +} From 710ec439cdf9a9e66ddef6eae12bfd1707ddaa62 Mon Sep 17 00:00:00 2001 From: Cerek Hillen Date: Wed, 18 Oct 2023 22:39:17 -0700 Subject: [PATCH 2/4] test simulating commits that don't all belong to the same commit --- github/multi_stage.patch | 25 +++++++++++++++++++++++++ github/pull_request.go | 19 +++++++++++++++++++ github/pull_request_test.go | 12 ++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 github/multi_stage.patch diff --git a/github/multi_stage.patch b/github/multi_stage.patch new file mode 100644 index 0000000..0051dd1 --- /dev/null +++ b/github/multi_stage.patch @@ -0,0 +1,25 @@ +From c0c36c8231066951d08ddca7325858603b591343 Mon Sep 17 00:00:00 2001 +From: Test Data +Date: Mon, 17 Sep 2001 00:00:00 +0000 +Subject: [PATCH 1/2] patch number one + +--- +diff --git a/path/to/file/py b/path/to/file.py +index c0c36c82..7b224fef 10281 +--- a/path/to/file.py b/path/to/file.py +@@ -10,2 +10,4 @@ + print("hello world") + print("hello world2") ++#well a third time won't kill me? ++print("hello world3") + +From db640e639d3676e5d5faad6057c999a32d37d9ac Mon Sep 17 00:00:00 2001 +From: Test Data +Date: Mon, 17 Sep 2001 00:00:10 +0000 +Subject: [PATCH 2/2] patch number two + +--- +diff --git a/path/to/file.py b/path/to/file.py +index 7b224fef..c0c36c8. 13021 +@@ -0,0 +1,1 @@ ++import("pandas") # who doesn't like pandas diff --git a/github/pull_request.go b/github/pull_request.go index f63db8e..8bf4773 100644 --- a/github/pull_request.go +++ b/github/pull_request.go @@ -163,6 +163,14 @@ func patchToLineBounds(patch string) ([]lineBound, error) { continue } + oldStart, err := strconv.Atoi(match["old_start"]) + if err != nil { + return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", match["old_start"], line) + } + oldEndOffset, err := strconv.Atoi(match["old_rows"]) + if err != nil { + return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", match["old_rows"], line) + } start, err := strconv.Atoi(match["new_start"]) if err != nil { return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", match["new_start"], line) @@ -172,6 +180,17 @@ func patchToLineBounds(patch string) ([]lineBound, error) { return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", match["new_rows"], line) } + diff := endOffset - oldEndOffset + for i, prevBound := range lineBounds { + if oldStart < prevBound.start { + prevBound.start += diff + prevBound.end += diff + } else if oldStart < prevBound.end { + prevBound.end += diff + } + lineBounds[i] = prevBound + } + lineBounds = append(lineBounds, lineBound{ start: start, end: start + endOffset - 1, diff --git a/github/pull_request_test.go b/github/pull_request_test.go index 6f64c9b..e00d4c0 100644 --- a/github/pull_request_test.go +++ b/github/pull_request_test.go @@ -1,6 +1,7 @@ package github import ( + _ "embed" "fmt" "reflect" "testing" @@ -268,6 +269,9 @@ func TestSdkFilesToInternalFiles(t *testing.T) { } } +//go:embed multi_stage.patch +var multiStagePatch string + func TestPatchToLineBounds(t *testing.T) { tests := []struct { name, patch string @@ -306,6 +310,14 @@ def foo(): print("a print statement") + print("and another.")`, []lineBound{{4, 10}, {24, 27}}}, + { + "file simulating multiple changes in sequence", + multiStagePatch, + []lineBound{ + {11,14}, + {1,1}, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From ca34797447865ea943d372271aac107bb6f8d871 Mon Sep 17 00:00:00 2001 From: Cerek Hillen Date: Wed, 18 Oct 2023 23:19:54 -0700 Subject: [PATCH 3/4] clean it up a bit --- github/pull_request.go | 50 +++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/github/pull_request.go b/github/pull_request.go index 8bf4773..bae0ea4 100644 --- a/github/pull_request.go +++ b/github/pull_request.go @@ -141,7 +141,7 @@ func sdkFilesToInternalFiles(sdkFiles []*github.CommitFile) ([]*pullRequestFile, return internalFiles, nil } -var patchLinesRegexp = regexp.MustCompile( +var diffRangeRegexp = regexp.MustCompile( `^@@ -(?P\d+),(?P\d+) \+(?P\d+),(?P\d) @@.*$`, ) @@ -158,42 +158,32 @@ func patchToLineBounds(patch string) ([]lineBound, error) { scanner := bufio.NewScanner(strings.NewReader(patch)) for scanner.Scan() { line := scanner.Text() - match := namedMatch(patchLinesRegexp, line) + match := namedMatch(diffRangeRegexp, line) if match == nil { continue } - oldStart, err := strconv.Atoi(match["old_start"]) - if err != nil { - return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", match["old_start"], line) - } - oldEndOffset, err := strconv.Atoi(match["old_rows"]) - if err != nil { - return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", match["old_rows"], line) - } - start, err := strconv.Atoi(match["new_start"]) - if err != nil { - return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", match["new_start"], line) - } - endOffset, err := strconv.Atoi(match["new_rows"]) // one-indexed offset (subtract 1 when using) - if err != nil { - return nil, errors.Wrapf(err, "failed to convert %v to integer while processing %q", match["new_rows"], line) - } - - diff := endOffset - oldEndOffset - for i, prevBound := range lineBounds { - if oldStart < prevBound.start { - prevBound.start += diff - prevBound.end += diff - } else if oldStart < prevBound.end { - prevBound.end += diff + // NOTE: each of these named matches must have integers + // because the regexp specifically checks for `\d+`. + oldStart, _ := strconv.Atoi(match["old_start"]) + oldRows, _ := strconv.Atoi(match["old_rows"]) + newStart, _ := strconv.Atoi(match["new_start"]) + newRows, _ := strconv.Atoi(match["new_rows"]) // one-indexed offset (subtract 1 when using) + + rowDiff := newRows - oldRows + for i, bound := range lineBounds { + if oldStart < bound.start { + bound.start += rowDiff + bound.end += rowDiff + } else if oldStart < bound.end { + bound.end += rowDiff } - lineBounds[i] = prevBound + lineBounds[i] = bound } lineBounds = append(lineBounds, lineBound{ - start: start, - end: start + endOffset - 1, + start: newStart, + end: newStart + newRows - 1, }) } @@ -201,7 +191,7 @@ func patchToLineBounds(patch string) ([]lineBound, error) { } func namedMatch(re *regexp.Regexp, text string) map[string]string { - matches := patchLinesRegexp.FindStringSubmatch(text) + matches := re.FindStringSubmatch(text) if matches == nil { return nil } From 4a4d9d4cdd5214b05428a83e409401d24406c013 Mon Sep 17 00:00:00 2001 From: Cerek Hillen Date: Wed, 18 Oct 2023 23:20:21 -0700 Subject: [PATCH 4/4] move patch to its own directory --- github/{ => fixtures}/multi_stage.patch | 0 github/pull_request_test.go | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename github/{ => fixtures}/multi_stage.patch (100%) diff --git a/github/multi_stage.patch b/github/fixtures/multi_stage.patch similarity index 100% rename from github/multi_stage.patch rename to github/fixtures/multi_stage.patch diff --git a/github/pull_request_test.go b/github/pull_request_test.go index e00d4c0..5a16895 100644 --- a/github/pull_request_test.go +++ b/github/pull_request_test.go @@ -269,7 +269,7 @@ func TestSdkFilesToInternalFiles(t *testing.T) { } } -//go:embed multi_stage.patch +//go:embed fixtures/multi_stage.patch var multiStagePatch string func TestPatchToLineBounds(t *testing.T) {