Skip to content
This repository was archived by the owner on May 24, 2024. It is now read-only.

Commit be8da62

Browse files
Shard cache per half year (#95)
* shard cache per half year Signed-off-by: Ayman <enkhalifapro@gmail.com> * clean up Signed-off-by: Ayman <enkhalifapro@gmail.com> * clean up code Signed-off-by: Ayman <enkhalifapro@gmail.com> * handle next year cache Signed-off-by: Ayman <enkhalifapro@gmail.com> * clean up Signed-off-by: Ayman <enkhalifapro@gmail.com> * clean up Signed-off-by: Ayman <enkhalifapro@gmail.com> * code clean up Signed-off-by: Ayman <enkhalifapro@gmail.com> --------- Signed-off-by: Ayman <enkhalifapro@gmail.com> Co-authored-by: Ayman <enkhalifapro@gmail.com>
1 parent 2967014 commit be8da62

File tree

3 files changed

+60
-31
lines changed

3 files changed

+60
-31
lines changed

cmd/git/git.go

Lines changed: 57 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,11 @@ const (
103103
// Success status
104104
Success = "success"
105105
// GitConnector ...
106-
GitConnector = "git-connector"
107-
PackSize = 1000
108-
HotRepoCount = 50000
106+
GitConnector = "git-connector"
107+
PackSize = 1000
108+
HotRepoCount = 50000
109+
YearFirstHalf = "first-half"
110+
YearSecondHalf = "second-half"
109111
)
110112

111113
var (
@@ -524,16 +526,18 @@ var (
524526
// GitTrailerPPAuthors - trailer name to authors map (for pair programming)
525527
GitTrailerPPAuthors = map[string]string{"Signed-off-by": "authors_signed_off", "Co-authored-by": "co_authors"}
526528
// max upstream date
527-
gMaxUpstreamDt time.Time
528-
gMaxUpstreamDtMtx = &sync.Mutex{}
529-
cachedCommits = make(map[string]CommitCache)
530-
commitsCacheFile = "commits-cache.csv"
531-
createdCommits = make(map[string]bool)
532-
IsHotRep = false
533-
CommitsByYearCacheFile = "commits-cache-%s.csv"
534-
CommitsUpdateCacheFile = "commits-update-cache.csv"
535-
CurrentCacheYear = 1970
536-
CachedCommitsUpdates = make(map[string]CommitCache)
529+
gMaxUpstreamDt time.Time
530+
gMaxUpstreamDtMtx = &sync.Mutex{}
531+
cachedCommits = make(map[string]CommitCache)
532+
commitsCacheFile = "commits-cache.csv"
533+
createdCommits = make(map[string]bool)
534+
IsHotRep = false
535+
CommitsByYearCacheFile = "commits-cache-%s.csv"
536+
CommitsUpdateCacheFile = "commits-update-cache.csv"
537+
CurrentCacheYear = 1970
538+
CachedCommitsUpdates = make(map[string]CommitCache)
539+
CommitsByYearHalfCacheFile = "commits-cache-%s-%s.csv"
540+
CurrentCacheYearHalf = YearFirstHalf
537541
)
538542

539543
// Publisher - for streaming data to Kinesis
@@ -1869,7 +1873,7 @@ func (j *DSGit) GitEnrichItems(ctx *shared.Ctx, thrN int, items []interface{}, d
18691873
return
18701874
}
18711875
} else {
1872-
if err = j.createYearCacheFile(commits, path); err != nil {
1876+
if err = j.createYearHalfCacheFile(commits, path); err != nil {
18731877
return
18741878
}
18751879
}
@@ -3160,7 +3164,10 @@ func (j *DSGit) SyncV2(ctx *shared.Ctx) (err error) {
31603164
if commitsCount >= HotRepoCount {
31613165
IsHotRep = true
31623166
CurrentCacheYear = from.Year()
3163-
j.getYearCache(lastSync)
3167+
if int(from.Month()) > 6 {
3168+
CurrentCacheYearHalf = YearSecondHalf
3169+
}
3170+
j.getYearHalfCache(lastSync)
31643171
j.getUpdateCache(lastSync)
31653172
} else {
31663173
j.getCache(lastSync)
@@ -3420,14 +3427,15 @@ func (j *DSGit) createCacheFile(cache []CommitCache, path string) error {
34203427
return nil
34213428
}
34223429

3423-
func (j *DSGit) createYearCacheFile(cache []CommitCache, path string) error {
3424-
nextYearCache := make([]CommitCache, 0)
3430+
func (j *DSGit) createYearHalfCacheFile(cache []CommitCache, path string) error {
3431+
nextYearHalfCache := make([]CommitCache, 0)
34253432
for _, comm := range cache {
34263433
comm.FileLocation = path
3427-
if comm.CommitDate.Year() == CurrentCacheYear {
3434+
commitYearHalf := getDateYearHalf(comm.CommitDate)
3435+
if comm.CommitDate.Year() == CurrentCacheYear && commitYearHalf == CurrentCacheYearHalf {
34283436
cachedCommits[comm.EntityID] = comm
34293437
} else {
3430-
nextYearCache = append(nextYearCache, comm)
3438+
nextYearHalfCache = append(nextYearHalfCache, comm)
34313439
}
34323440
}
34333441
records := [][]string{
@@ -3438,7 +3446,7 @@ func (j *DSGit) createYearCacheFile(cache []CommitCache, path string) error {
34383446
}
34393447

34403448
yearSTR := strconv.Itoa(CurrentCacheYear)
3441-
cacheFile := fmt.Sprintf(CommitsByYearCacheFile, yearSTR)
3449+
cacheFile := fmt.Sprintf(CommitsByYearHalfCacheFile, yearSTR, CurrentCacheYearHalf)
34423450
csvFile, err := os.Create(cacheFile)
34433451
if err != nil {
34443452
return err
@@ -3453,7 +3461,6 @@ func (j *DSGit) createYearCacheFile(cache []CommitCache, path string) error {
34533461
if err != nil {
34543462
return err
34553463
}
3456-
cachedCommits = make(map[string]CommitCache)
34573464
err = j.cacheProvider.UpdateMultiPartFileByKey(j.endpoint, cacheFile)
34583465
if err != nil {
34593466
return err
@@ -3463,18 +3470,40 @@ func (j *DSGit) createYearCacheFile(cache []CommitCache, path string) error {
34633470
if err != nil {
34643471
return err
34653472
}
3466-
loadCacheToMemory(records)
3467-
if len(nextYearCache) > 0 {
3468-
CurrentCacheYear = nextYearCache[0].CommitDate.Year()
3469-
if err = j.createYearCacheFile(nextYearCache, path); err != nil {
3473+
if len(nextYearHalfCache) > 0 {
3474+
//CurrentCacheYear = nextYearHalfCache[0].CommitDate.Year()
3475+
updateYearHalf(nextYearHalfCache[0].CommitDate)
3476+
if err = j.createYearHalfCacheFile(nextYearHalfCache, path); err != nil {
34703477
return err
34713478
}
34723479
cachedCommits = make(map[string]CommitCache)
3473-
j.getYearCache(os.Getenv("LAST_SYNC"))
3480+
j.getYearHalfCache(os.Getenv("LAST_SYNC"))
34743481
}
34753482
return nil
34763483
}
34773484

3485+
func getDateYearHalf(commitDate time.Time) string {
3486+
monthNumber := int(commitDate.Month())
3487+
if monthNumber > 6 {
3488+
return YearSecondHalf
3489+
}
3490+
return YearFirstHalf
3491+
}
3492+
3493+
func updateYearHalf(commitDate time.Time) {
3494+
cuHalf := getDateYearHalf(commitDate)
3495+
if cuHalf == CurrentCacheYearHalf {
3496+
return
3497+
}
3498+
3499+
if CurrentCacheYearHalf == YearFirstHalf {
3500+
CurrentCacheYearHalf = YearSecondHalf
3501+
return
3502+
}
3503+
CurrentCacheYearHalf = YearFirstHalf
3504+
CurrentCacheYear += 1
3505+
}
3506+
34783507
func (j *DSGit) createUpdateCacheFile(cache []CommitCache, path string) error {
34793508
for _, comm := range cache {
34803509
comm.FileLocation = path
@@ -3631,9 +3660,9 @@ func (j *DSGit) getUpdateCache(lastSync string) {
36313660
}
36323661
}
36333662

3634-
func (j *DSGit) getYearCache(lastSync string) {
3663+
func (j *DSGit) getYearHalfCache(lastSync string) {
36353664
yearSTR := strconv.Itoa(CurrentCacheYear)
3636-
commentBytes, err := j.cacheProvider.GetFileByKey(j.endpoint, fmt.Sprintf(CommitsByYearCacheFile, yearSTR))
3665+
commentBytes, err := j.cacheProvider.GetFileByKey(j.endpoint, fmt.Sprintf(CommitsByYearHalfCacheFile, yearSTR, CurrentCacheYearHalf))
36373666
if err != nil {
36383667
return
36393668
}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module github.com/LF-Engineering/insights-datasource-git
33
go 1.17
44

55
require (
6-
github.com/LF-Engineering/insights-datasource-shared v1.5.30-0.20230410030513-945f1d5a92a4
6+
github.com/LF-Engineering/insights-datasource-shared v1.5.30-0.20230411073313-68b5e7a0b0ef
77
github.com/LF-Engineering/lfx-event-schema v0.1.37
88
github.com/aws/aws-lambda-go v1.27.1
99
github.com/aws/aws-sdk-go v1.42.25

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
2-
github.com/LF-Engineering/insights-datasource-shared v1.5.30-0.20230410030513-945f1d5a92a4 h1:q9DZVrh19QzM0s6EEbAHFkf51Ubw4jw5g1qlKASlHvc=
3-
github.com/LF-Engineering/insights-datasource-shared v1.5.30-0.20230410030513-945f1d5a92a4/go.mod h1:9DmFQbC8nnm1C7k+/tDo3Rmqzzx7AzmhPBlFouXaBZ8=
2+
github.com/LF-Engineering/insights-datasource-shared v1.5.30-0.20230411073313-68b5e7a0b0ef h1:Mwv6SkvJgLQi2/jdJCSWbjG/CFolOiQtRb3Ydhb4Oe8=
3+
github.com/LF-Engineering/insights-datasource-shared v1.5.30-0.20230411073313-68b5e7a0b0ef/go.mod h1:9DmFQbC8nnm1C7k+/tDo3Rmqzzx7AzmhPBlFouXaBZ8=
44
github.com/LF-Engineering/lfx-event-schema v0.1.14/go.mod h1:CfFIZ4mwzo88umf5+KxDQEzqlVkPG7Vx8eLK2oDfWIs=
55
github.com/LF-Engineering/lfx-event-schema v0.1.37 h1:ny46D2NdCXokvJZ01GJcw2RfQM64ousJjaYsrRj5zzg=
66
github.com/LF-Engineering/lfx-event-schema v0.1.37/go.mod h1:CfFIZ4mwzo88umf5+KxDQEzqlVkPG7Vx8eLK2oDfWIs=

0 commit comments

Comments
 (0)