Skip to content

Commit 5d1fb3c

Browse files
committed
draft implementation of github tag crawler
1 parent 8270688 commit 5d1fb3c

28 files changed

+451
-198
lines changed

Package.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ let package:Package = .init(
7575
.package(url: "https://github.com/tayloraswift/swift-mongodb", .upToNextMinor(
7676
from: "0.8.2")),
7777

78+
.package(url: "https://github.com/apple/swift-atomics", .upToNextMinor(
79+
from: "1.1.0")),
7880
.package(url: "https://github.com/apple/swift-nio", .upToNextMinor(
7981
from: "2.57.0")),
8082
.package(url: "https://github.com/apple/swift-nio-http2", .upToNextMinor(
@@ -379,6 +381,7 @@ let package:Package = .init(
379381
.target(name: "GitHubIntegration"),
380382
.target(name: "UnidocAnalysis"),
381383
.target(name: "UnidocLinker"),
384+
.target(name: "UnixTime"),
382385
.product(name: "MongoDB", package: "swift-mongodb"),
383386
]),
384387

@@ -454,6 +457,8 @@ let package:Package = .init(
454457
.target(name: "Multiparts"),
455458
.target(name: "System"),
456459
.target(name: "UnidocPages"),
460+
461+
.product(name: "Atomics", package: "swift-atomics"),
457462
]),
458463

459464

Sources/UnidocDB/Packages/PackageDatabase.Packages.swift

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,56 @@ extension PackageDatabase.Packages:DatabaseCollection
4040
$0[PackageRecord[.cell]] = (+)
4141
}
4242
},
43+
.init
44+
{
45+
$0[.unique] = false
46+
$0[.name] = "crawled"
47+
$0[.key] = .init
48+
{
49+
$0[PackageRecord[.crawled]] = (+)
50+
}
51+
},
4352
]
4453
}
4554
extension PackageDatabase.Packages
55+
{
56+
public
57+
func recode(with session:Mongo.Session) async throws -> (modified:Int, of:Int)
58+
{
59+
try await self.recode(through: PackageRecord.self,
60+
with: session,
61+
by: .now.advanced(by: .seconds(30)))
62+
}
63+
}
64+
extension PackageDatabase.Packages
65+
{
66+
public
67+
func stalest(_ limit:Int, with session:Mongo.Session) async throws -> [PackageRecord]
68+
{
69+
try await session.run(
70+
command: Mongo.Find<Mongo.SingleBatch<PackageRecord>>.init(Self.name,
71+
limit: limit)
72+
{
73+
$0[.sort] = .init
74+
{
75+
$0[PackageRecord[.crawled]] = (+)
76+
}
77+
$0[.hint] = .init
78+
{
79+
$0[PackageRecord[.crawled]] = (+)
80+
}
81+
},
82+
against: self.database)
83+
}
84+
85+
public
86+
func update(record:PackageRecord,
87+
with session:Mongo.Session) async throws -> Bool?
88+
{
89+
try await self.update(record, with: session)
90+
}
91+
}
92+
extension PackageDatabase.Packages
4693
{
4794
/// Registers the given package identifier in the database, returning its package
4895
/// coordinate. This is really just a glorified string internment system.
@@ -157,19 +204,6 @@ extension PackageDatabase.Packages
157204
return placement.first ?? .first
158205
}
159206

160-
@discardableResult
161-
public
162-
func update(_ package:Int32,
163-
repo:PackageRepo,
164-
with session:Mongo.Session) async throws -> Bool?
165-
{
166-
try await self.update(field: PackageRecord[.repo],
167-
by: PackageRecord[.cell],
168-
of: package,
169-
to: repo,
170-
with: session)
171-
}
172-
173207
private
174208
func scan(with session:Mongo.Session) async throws -> SearchIndex<Int32>
175209
{

Sources/UnidocDB/Packages/PackageRecord.swift

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import BSONEncoding
33
import ModuleGraphs
44
import MongoQL
55
import SymbolGraphs
6+
import UnixTime
67

78
@frozen public
89
struct PackageRecord:Identifiable, Equatable, Sendable
@@ -13,15 +14,24 @@ struct PackageRecord:Identifiable, Equatable, Sendable
1314
/// but packages that track remote repositories will always have positive cell numbers.
1415
public
1516
let cell:Int32
17+
18+
/// When this package *record* was last crawled. This is different from the time when the
19+
/// package itself was last updated.
20+
public
21+
var crawled:BSON.Millisecond
1622
/// The repo this package tracks. Currently only GitHub repos are supported.
1723
public
1824
var repo:PackageRepo?
1925

2026
@inlinable public
21-
init(id:PackageIdentifier, cell:Int32, repo:PackageRepo? = nil)
27+
init(id:PackageIdentifier,
28+
cell:Int32,
29+
crawled:BSON.Millisecond = .now(),
30+
repo:PackageRepo? = nil)
2231
{
2332
self.id = id
2433
self.cell = cell
34+
self.crawled = crawled
2535
self.repo = repo
2636
}
2737
}
@@ -32,6 +42,7 @@ extension PackageRecord:MongoMasterCodingModel
3242
{
3343
case id = "_id"
3444
case cell = "P"
45+
case crawled = "T"
3546
case repo = "R"
3647
}
3748
}
@@ -42,6 +53,7 @@ extension PackageRecord:BSONDocumentEncodable
4253
{
4354
bson[.id] = self.id
4455
bson[.cell] = self.cell
56+
bson[.crawled] = self.crawled
4557
bson[.repo] = self.repo
4658
}
4759
}
@@ -52,6 +64,7 @@ extension PackageRecord:BSONDocumentDecodable
5264
{
5365
self.init(id: try bson[.id].decode(),
5466
cell: try bson[.cell].decode(),
67+
crawled: try bson[.crawled]?.decode() ?? .now(),
5568
repo: try bson[.repo]?.decode())
5669
}
5770
}

Sources/UnidocPages/Templates/Site.Admin.Action.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ extension Site.Admin
1010
case lintPackageEditions = "lint-package-editions"
1111

1212
case recodePackageEditions = "recode-package-editions"
13+
case recodePackageRecords = "recode-package-records"
1314
case recodeUnidocVertices = "recode-unidoc-vertices"
1415

1516
case rebuild = "rebuild"
@@ -27,6 +28,7 @@ extension Site.Admin.Action
2728
case .dropUnidocDB: return "Drop Unidoc Database"
2829
case .lintPackageEditions: return "Lint Package Editions"
2930
case .recodePackageEditions: return "Recode Package Editions"
31+
case .recodePackageRecords: return "Recode Package Records"
3032
case .recodeUnidocVertices: return "Recode Unidoc Vertices"
3133
case .rebuild: return "Rebuild Collections"
3234
case .upload: return "Upload Snapshots"

Sources/UnidocPages/Templates/Site.Admin.Confirm.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ extension Site.Admin.Confirm
4141
case .recodePackageEditions:
4242
prompt = "This will recode all package editions. Are you sure?"
4343

44+
case .recodePackageRecords:
45+
prompt = "This will recode all package records. Are you sure?"
46+
4447
case .recodeUnidocVertices:
4548
prompt = "This will recode all Unidoc vertices. Are you sure?"
4649

Sources/UnidocPages/Templates/Site.Admin.swift

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,41 @@ extension Site
1111
{
1212
public
1313
let configuration:Mongo.ReplicaSetConfiguration
14+
15+
public
16+
let crawlingErrors:Int
17+
public
18+
let packagesCrawled:Int
19+
public
20+
let packagesUpdated:Int
21+
public
22+
let tagsCrawled:Int
23+
public
24+
let tagsUpdated:Int
25+
1426
public
1527
let tour:ServerTour
1628
public
1729
let real:Bool
1830

1931
@inlinable public
20-
init(configuration:Mongo.ReplicaSetConfiguration, tour:ServerTour, real:Bool)
32+
init(configuration:Mongo.ReplicaSetConfiguration,
33+
crawlingErrors:Int,
34+
packagesCrawled:Int,
35+
packagesUpdated:Int,
36+
tagsCrawled:Int,
37+
tagsUpdated:Int,
38+
tour:ServerTour,
39+
real:Bool)
2140
{
2241
self.configuration = configuration
42+
43+
self.crawlingErrors = crawlingErrors
44+
self.packagesCrawled = packagesCrawled
45+
self.packagesUpdated = packagesUpdated
46+
self.tagsCrawled = tagsCrawled
47+
self.tagsUpdated = tagsUpdated
48+
2349
self.tour = tour
2450
self.real = real
2551
}
@@ -148,6 +174,7 @@ extension Site.Admin:AdministrativePage
148174
[
149175
.lintPackageEditions,
150176
.recodePackageEditions,
177+
.recodePackageRecords,
151178
.recodeUnidocVertices,
152179

153180
.dropUnidocDB,
@@ -186,6 +213,21 @@ extension Site.Admin:AdministrativePage
186213

187214
$0[.dt] = "bytes transferred (content only)"
188215
$0[.dd] = "\(self.tour.stats.bytes.total)"
216+
217+
$0[.dt] = "crawling errors"
218+
$0[.dd] = "\(self.crawlingErrors)"
219+
220+
$0[.dt] = "packages crawled"
221+
$0[.dd] = "\(self.packagesCrawled)"
222+
223+
$0[.dt] = "packages updated"
224+
$0[.dd] = "\(self.packagesUpdated)"
225+
226+
$0[.dt] = "tags crawled"
227+
$0[.dd] = "\(self.tagsCrawled)"
228+
229+
$0[.dt] = "tags updated"
230+
$0[.dd] = "\(self.tagsUpdated)"
189231
}
190232

191233
main += ServerTour.StatsBreakdown.init(self.tour.stats)

Sources/UnidocServer/Caching/Cache.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ actor Cache<Key> where Key:CacheKey
1313
private
1414
var table:[Key: ServerResource]
1515

16-
init(source assets:FilePath, reload:Bool)
16+
init(source assets:FilePath = "Assets", reload:Bool)
1717
{
1818
self.assets = assets
1919
self.reload = reload

Sources/UnidocServer/DatabaseOperation.swift

Lines changed: 0 additions & 20 deletions
This file was deleted.

Sources/UnidocServer/StatefulOperation.swift renamed to Sources/UnidocServer/InteractiveOperation.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import HTTP
22
import UnidocPages
33

4-
protocol StatefulOperation:Sendable
4+
protocol InteractiveOperation:Sendable
55
{
66
var statisticalType:WritableKeyPath<ServerTour.Stats.ByType, Int> { get }
77

8-
func load(from state:ServerState,
8+
func load(from server:Server.State,
99
with cookies:Server.Request.Cookies) async throws -> ServerResponse?
1010
}
11-
extension StatefulOperation
11+
extension InteractiveOperation
1212
{
1313
var statisticalType:WritableKeyPath<ServerTour.Stats.ByType, Int>
1414
{

0 commit comments

Comments
 (0)