1
1
package main
2
2
3
3
import (
4
+ "bytes"
4
5
"context"
5
6
"crypto/sha256"
6
7
"encoding/base64"
8
+ "encoding/csv"
7
9
"encoding/json"
8
10
"flag"
9
11
"fmt"
@@ -57,14 +59,16 @@ const (
57
59
var (
58
60
gMaxUpdatedAt time.Time
59
61
gMaxUpdatedAtMtx = & sync.Mutex {}
62
+ cachedSpaces = make (map [string ]EntityCache )
63
+ spacesCacheFile = "spaces-cache.csv"
60
64
// ConfluenceDataSource - constant
61
65
//ConfluenceDataSource = &models.DataSource{Name: "Confluence", Slug: "confluence", Model: "documentation"}
62
66
//gConfluenceMetaData = &models.MetaData{BackendName: "confluence", BackendVersion: ConfluenceBackendVersion}
63
67
)
64
68
65
69
// Publisher - publish data to S3
66
70
type Publisher interface {
67
- PushEvents (action , source , eventType , subEventType , env string , data []interface {}) error
71
+ PushEvents (action , source , eventType , subEventType , env string , data []interface {}) ( string , error )
68
72
}
69
73
70
74
// DSConfluence - DS implementation for confluence - does nothing at all, just presents a skeleton code
@@ -344,8 +348,8 @@ func (j *DSConfluence) GetHistoricalContents(ctx *shared.Ctx, content map[string
344
348
headers ,
345
349
nil ,
346
350
nil ,
347
- map [[2 ]int ]struct {}{{200 , 200 }: {}}, // JSON statuses: 200
348
- nil , // Error statuses
351
+ map [[2 ]int ]struct {}{{200 , 200 }: {}}, // JSON statuses: 200
352
+ nil , // Error statuses
349
353
map [[2 ]int ]struct {}{{200 , 200 }: {}, {500 , 500 }: {}, {404 , 404 }: {}}, // OK statuses: 200
350
354
map [[2 ]int ]struct {}{{200 , 200 }: {}}, // Cache statuses: 200
351
355
false , // retry
@@ -574,6 +578,7 @@ func (j *DSConfluence) Sync(ctx *shared.Ctx) (err error) {
574
578
if ctx .DateTo != nil {
575
579
j .log .WithFields (logrus.Fields {"operation" : "Sync" }).Infof ("%s fetching till %v" , j .URL , ctx .DateTo )
576
580
}
581
+ j .getCachedContent ()
577
582
// NOTE: Non-generic starts here
578
583
var (
579
584
sDateFrom string
@@ -1291,8 +1296,7 @@ func (j *DSConfluence) GetModelData(ctx *shared.Ctx, docs []interface{}) (data m
1291
1296
SourceTimestamp : updatedOn ,
1292
1297
Children : kids ,
1293
1298
}
1294
- cacheID := fmt .Sprintf ("content-%s" , confluenceContentID )
1295
- isCreated , err := j .cacheProvider .IsKeyCreated (j .endpoint , cacheID )
1299
+ isCreated := isKeyCreated (confluenceContentID )
1296
1300
if err != nil {
1297
1301
j .log .WithFields (logrus.Fields {"operation" : "GetModelData" }).Errorf ("error getting cache for endpoint %s. error: %+v" , j .endpoint , err )
1298
1302
return data , err
@@ -1338,30 +1342,26 @@ func (j *DSConfluence) ConfluenceEnrichItems(ctx *shared.Ctx, thrN int, items []
1338
1342
contentsStr := "contents"
1339
1343
envStr := os .Getenv ("STAGE" )
1340
1344
// Push the event
1341
- d := make ([]map [string ]interface {}, 0 )
1342
1345
for k , v := range data {
1343
1346
switch k {
1344
1347
case "created" :
1345
1348
ev , _ := v [0 ].(insightsConf.ContentCreatedEvent )
1346
- err = j .Publisher .PushEvents (ev .Event (), insightsStr , ConfluenceDataSource , contentsStr , envStr , v )
1347
- cacheData , err : = j .cachedCreatedContent (v )
1349
+ path , err : = j .Publisher .PushEvents (ev .Event (), insightsStr , ConfluenceDataSource , contentsStr , envStr , v )
1350
+ err = j .cachedCreatedContent (v , path )
1348
1351
if err != nil {
1349
1352
j .log .WithFields (logrus.Fields {"operation" : "ConfluenceEnrichItems" }).Errorf ("cachedCreatedContent error: %+v" , err )
1350
1353
return
1351
1354
}
1352
- d = append (d , cacheData ... )
1353
1355
case "updated" :
1354
- updates , cacheData , err := j .preventUpdateDuplication (v )
1356
+ updates , err := j .preventUpdateDuplication (v )
1355
1357
if err != nil {
1356
1358
j .log .WithFields (logrus.Fields {"operation" : "ConfluenceEnrichItems" }).Errorf ("preventUpdateDuplication error: %+v" , err )
1357
1359
return
1358
1360
}
1359
- if len (cacheData ) > 0 {
1360
- d = append (d , cacheData ... )
1361
- }
1361
+
1362
1362
if len (updates ) > 0 {
1363
1363
ev , _ := updates [0 ].(insightsConf.ContentUpdatedEvent )
1364
- err = j .Publisher .PushEvents (ev .Event (), insightsStr , ConfluenceDataSource , contentsStr , envStr , updates )
1364
+ _ , err = j .Publisher .PushEvents (ev .Event (), insightsStr , ConfluenceDataSource , contentsStr , envStr , updates )
1365
1365
}
1366
1366
default :
1367
1367
err = fmt .Errorf ("unknown confluence event type '%s'" , k )
@@ -1370,11 +1370,8 @@ func (j *DSConfluence) ConfluenceEnrichItems(ctx *shared.Ctx, thrN int, items []
1370
1370
break
1371
1371
}
1372
1372
}
1373
- if len (d ) > 0 {
1374
- err = j .cacheProvider .Create (j .endpoint , d )
1375
- if err != nil {
1376
- j .log .WithFields (logrus.Fields {"operation" : "ConfluenceEnrichItems" }).Errorf ("error creating cache for endpoint %s. Error: %+v" , j .endpoint , err )
1377
- }
1373
+ if err = j .createCacheFile ([]EntityCache {}, "" ); err != nil {
1374
+ j .log .WithFields (logrus.Fields {"operation" : "ConfluenceEnrichItems" }).Errorf ("error creating cache for endpoint %s. Error: %+v" , j .endpoint , err )
1378
1375
}
1379
1376
} else {
1380
1377
jsonBytes , err = jsoniter .Marshal (data )
@@ -1567,11 +1564,9 @@ func (j *DSConfluence) AddCacheProvider() {
1567
1564
j .endpoint = strings .ReplaceAll (strings .TrimPrefix (strings .TrimPrefix (j .URL , "https://" ), "http://" ), "/" , "-" )
1568
1565
}
1569
1566
1570
- func (j * DSConfluence ) cachedCreatedContent (v []interface {}) ([]map [string ]interface {}, error ) {
1571
- cacheData := make ([]map [string ]interface {}, 0 )
1567
+ func (j * DSConfluence ) cachedCreatedContent (v []interface {}, path string ) error {
1572
1568
for _ , val := range v {
1573
1569
content := val .(insightsConf.ContentCreatedEvent ).Payload
1574
- id := fmt .Sprintf ("%s-%s" , "content" , val .(insightsConf.ContentCreatedEvent ).Payload .ID )
1575
1570
c := insightsConf.Content {
1576
1571
ID : content .ID ,
1577
1572
EndpointID : content .EndpointID ,
@@ -1587,22 +1582,24 @@ func (j *DSConfluence) cachedCreatedContent(v []interface{}) ([]map[string]inter
1587
1582
}
1588
1583
b , err := json .Marshal (c )
1589
1584
if err != nil {
1590
- return cacheData , err
1585
+ return err
1591
1586
}
1592
1587
contentHash := fmt .Sprintf ("%x" , sha256 .Sum256 (b ))
1593
- cacheData = append (cacheData , map [string ]interface {}{
1594
- "id" : id ,
1595
- "data" : map [string ]interface {}{
1596
- contentHashField : contentHash ,
1597
- },
1598
- })
1588
+ tStamp := content .SyncTimestamp .Unix ()
1589
+ cachedSpaces [content .ID ] = EntityCache {
1590
+ Timestamp : fmt .Sprintf ("%v" , tStamp ),
1591
+ EntityID : content .ID ,
1592
+ SourceEntityID : content .ContentID ,
1593
+ FileLocation : path ,
1594
+ Hash : contentHash ,
1595
+ Orphaned : false ,
1596
+ }
1599
1597
}
1600
- return cacheData , nil
1598
+ return nil
1601
1599
}
1602
1600
1603
- func (j * DSConfluence ) preventUpdateDuplication (v []interface {}) ([]interface {}, [] map [ string ] interface {}, error ) {
1601
+ func (j * DSConfluence ) preventUpdateDuplication (v []interface {}) ([]interface {}, error ) {
1604
1602
updatedVals := make ([]interface {}, 0 , len (v ))
1605
- cacheData := make ([]map [string ]interface {}, 0 )
1606
1603
for _ , val := range v {
1607
1604
content := val .(insightsConf.ContentUpdatedEvent ).Payload
1608
1605
c := insightsConf.Content {
@@ -1620,25 +1617,108 @@ func (j *DSConfluence) preventUpdateDuplication(v []interface{}) ([]interface{},
1620
1617
}
1621
1618
b , err := json .Marshal (c )
1622
1619
if err != nil {
1623
- return updatedVals , cacheData , nil
1620
+ return updatedVals , nil
1624
1621
}
1625
1622
contentHash := fmt .Sprintf ("%x" , sha256 .Sum256 (b ))
1626
- cacheID := fmt .Sprintf ("content-%s" , content .ID )
1627
- byt , err := j .cacheProvider .GetFileByKey (j .endpoint , cacheID )
1628
- if err != nil {
1629
- return updatedVals , cacheData , nil
1623
+ cacheCon , ok := cachedSpaces [content .ID ]
1624
+ if ! ok {
1625
+ continue
1630
1626
}
1631
- cachedHash := make (map [string ]interface {})
1632
- err = json .Unmarshal (byt , & cachedHash )
1633
- if contentHash != cachedHash ["contentHash" ] {
1627
+ if contentHash != cacheCon .Hash {
1634
1628
updatedVals = append (updatedVals , val )
1635
- cacheData = append (cacheData , map [string ]interface {}{
1636
- "id" : cacheID ,
1637
- "data" : map [string ]interface {}{
1638
- contentHashField : contentHash ,
1639
- },
1640
- })
1629
+ cacheCon .Hash = contentHash
1630
+ cachedSpaces [content .ID ] = cacheCon
1641
1631
}
1642
1632
}
1643
- return updatedVals , cacheData , nil
1633
+ return updatedVals , nil
1634
+ }
1635
+
1636
+ func (j * DSConfluence ) getCachedContent () {
1637
+ comB , err := j .cacheProvider .GetFileByKey (j .endpoint , spacesCacheFile )
1638
+ if err != nil {
1639
+ return
1640
+ }
1641
+ reader := csv .NewReader (bytes .NewBuffer (comB ))
1642
+ records , err := reader .ReadAll ()
1643
+ if err != nil {
1644
+ return
1645
+ }
1646
+ for i , record := range records {
1647
+ if i == 0 {
1648
+ continue
1649
+ }
1650
+ orphaned , err := strconv .ParseBool (record [5 ])
1651
+ if err != nil {
1652
+ orphaned = false
1653
+ }
1654
+
1655
+ cachedSpaces [record [1 ]] = EntityCache {
1656
+ Timestamp : record [0 ],
1657
+ EntityID : record [1 ],
1658
+ SourceEntityID : record [2 ],
1659
+ FileLocation : record [3 ],
1660
+ Hash : record [4 ],
1661
+ Orphaned : orphaned ,
1662
+ }
1663
+ }
1664
+ }
1665
+
1666
+ func (j * DSConfluence ) createCacheFile (cache []EntityCache , path string ) error {
1667
+ for _ , comm := range cache {
1668
+ comm .FileLocation = path
1669
+ cachedSpaces [comm .EntityID ] = comm
1670
+ }
1671
+ records := [][]string {
1672
+ {"timestamp" , "entity_id" , "source_entity_id" , "file_location" , "hash" , "orphaned" },
1673
+ }
1674
+ for _ , c := range cachedSpaces {
1675
+ records = append (records , []string {c .Timestamp , c .EntityID , c .SourceEntityID , c .FileLocation , c .Hash , strconv .FormatBool (c .Orphaned )})
1676
+ }
1677
+
1678
+ csvFile , err := os .Create (spacesCacheFile )
1679
+ if err != nil {
1680
+ return err
1681
+ }
1682
+
1683
+ w := csv .NewWriter (csvFile )
1684
+ err = w .WriteAll (records )
1685
+ if err != nil {
1686
+ return err
1687
+ }
1688
+ err = csvFile .Close ()
1689
+ if err != nil {
1690
+ return err
1691
+ }
1692
+ file , err := os .ReadFile (spacesCacheFile )
1693
+ if err != nil {
1694
+ return err
1695
+ }
1696
+ err = os .Remove (spacesCacheFile )
1697
+ if err != nil {
1698
+ return err
1699
+ }
1700
+ err = j .cacheProvider .UpdateFileByKey (j .endpoint , spacesCacheFile , file )
1701
+ if err != nil {
1702
+ return err
1703
+ }
1704
+
1705
+ return nil
1706
+ }
1707
+
1708
+ func isKeyCreated (id string ) bool {
1709
+ _ , ok := cachedSpaces [id ]
1710
+ if ok {
1711
+ return true
1712
+ }
1713
+ return false
1714
+ }
1715
+
1716
+ // EntityCache single commit cache schema
1717
+ type EntityCache struct {
1718
+ Timestamp string `json:"timestamp"`
1719
+ EntityID string `json:"entity_id"`
1720
+ SourceEntityID string `json:"source_entity_id"`
1721
+ FileLocation string `json:"file_location"`
1722
+ Hash string `json:"hash"`
1723
+ Orphaned bool `json:"orphaned"`
1644
1724
}
0 commit comments