Skip to content

Commit cdc4e80

Browse files
committed
factor the GitHub crawling functions into a type GitHubPlugin.Crawler
1 parent 0250517 commit cdc4e80

11 files changed

+284
-215
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<div align="center">
22

3-
<strong><em><code>unidoc</code></em></strong><br><small><code>0.2.2</code></small>
3+
<strong><em><code>unidoc</code></em></strong><br><small><code>0.2.3</code></small>
44

55
[![ci build status](https://github.com/kelvin13/swift-unidoc/actions/workflows/build.yml/badge.svg)](https://github.com/kelvin13/swift-unidoc/actions/workflows/build.yml)
66

Sources/UnidocPages/Templates/Site.Admin.swift

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ extension Site
1313
let configuration:Mongo.ReplicaSetConfiguration
1414

1515
public
16-
let crawlingErrors:Int
16+
let errorsCrawling:Int
1717
public
18-
let packagesCrawled:Int
18+
let reposCrawled:Int
1919
public
20-
let packagesUpdated:Int
20+
let reposUpdated:Int
2121
public
2222
let tagsCrawled:Int
2323
public
@@ -30,19 +30,19 @@ extension Site
3030

3131
@inlinable public
3232
init(configuration:Mongo.ReplicaSetConfiguration,
33-
crawlingErrors:Int,
34-
packagesCrawled:Int,
35-
packagesUpdated:Int,
33+
errorsCrawling:Int,
34+
reposCrawled:Int,
35+
reposUpdated:Int,
3636
tagsCrawled:Int,
3737
tagsUpdated:Int,
3838
tour:ServerTour,
3939
real:Bool)
4040
{
4141
self.configuration = configuration
4242

43-
self.crawlingErrors = crawlingErrors
44-
self.packagesCrawled = packagesCrawled
45-
self.packagesUpdated = packagesUpdated
43+
self.errorsCrawling = errorsCrawling
44+
self.reposCrawled = reposCrawled
45+
self.reposUpdated = reposUpdated
4646
self.tagsCrawled = tagsCrawled
4747
self.tagsUpdated = tagsUpdated
4848

@@ -214,19 +214,19 @@ extension Site.Admin:AdministrativePage
214214
$0[.dt] = "bytes transferred (content only)"
215215
$0[.dd] = "\(self.tour.stats.bytes.total)"
216216

217-
$0[.dt] = "crawling errors"
218-
$0[.dd] = "\(self.crawlingErrors)"
217+
$0[.dt] = "GitHub crawling errors"
218+
$0[.dd] = "\(self.errorsCrawling)"
219219

220-
$0[.dt] = "packages crawled"
221-
$0[.dd] = "\(self.packagesCrawled)"
220+
$0[.dt] = "GitHub repos crawled"
221+
$0[.dd] = "\(self.reposCrawled)"
222222

223-
$0[.dt] = "packages updated"
224-
$0[.dd] = "\(self.packagesUpdated)"
223+
$0[.dt] = "GitHub repos updated"
224+
$0[.dd] = "\(self.reposUpdated)"
225225

226-
$0[.dt] = "tags crawled"
226+
$0[.dt] = "GitHub tags crawled"
227227
$0[.dd] = "\(self.tagsCrawled)"
228228

229-
$0[.dt] = "tags updated"
229+
$0[.dt] = "GitHub tags updated"
230230
$0[.dd] = "\(self.tagsUpdated)"
231231
}
232232

Sources/UnidocServer/Operations/Server.Operation.AdminDashboard.swift

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@ extension Server.Operation.AdminDashboard:RestrictedOperation
1414
func load(from server:Server.State) async throws -> ServerResponse?
1515
{
1616
let page:Site.Admin = .init(configuration: try await server.db.sessions.run(
17-
command: Mongo.ReplicaSetGetConfiguration.init(),
18-
against: .admin),
19-
crawlingErrors: server._crawlingErrors.load(ordering: .relaxed),
20-
packagesCrawled: server._packagesCrawled.load(ordering: .relaxed),
21-
packagesUpdated: server._packagesUpdated.load(ordering: .relaxed),
22-
tagsCrawled: server._tagsCrawled.load(ordering: .relaxed),
23-
tagsUpdated: server._tagsUpdated.load(ordering: .relaxed),
24-
tour: server.tour,
25-
real: server.mode == .secured)
17+
command: Mongo.ReplicaSetGetConfiguration.init(),
18+
against: .admin),
19+
errorsCrawling: server.github?.errors ?? 0,
20+
reposCrawled: server.github?.reposCrawled ?? 0,
21+
reposUpdated: server.github?.reposUpdated ?? 0,
22+
tagsCrawled: server.github?.tagsCrawled ?? 0,
23+
tagsUpdated: server.github?.tagsUpdated ?? 0,
24+
tour: server.tour,
25+
real: server.mode == .secured)
2626

2727
return .resource(page.rendered())
2828
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import Atomics
2+
3+
extension GitHubPlugin
4+
{
5+
final
6+
class Counters:Sendable
7+
{
8+
let reposCrawled:UnsafeAtomic<Int>
9+
let reposUpdated:UnsafeAtomic<Int>
10+
let tagsCrawled:UnsafeAtomic<Int>
11+
let tagsUpdated:UnsafeAtomic<Int>
12+
let errors:UnsafeAtomic<Int>
13+
14+
init()
15+
{
16+
self.reposCrawled = .create(0)
17+
self.reposUpdated = .create(0)
18+
self.tagsCrawled = .create(0)
19+
self.tagsUpdated = .create(0)
20+
self.errors = .create(0)
21+
}
22+
23+
deinit
24+
{
25+
self.reposCrawled.destroy()
26+
self.reposUpdated.destroy()
27+
self.tagsCrawled.destroy()
28+
self.tagsUpdated.destroy()
29+
self.errors.destroy()
30+
}
31+
}
32+
}
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import GitHubAPI
2+
import GitHubClient
3+
import MongoDB
4+
import SemanticVersions
5+
import UnidocDB
6+
import UnixTime
7+
8+
extension GitHubPlugin
9+
{
10+
struct Crawler
11+
{
12+
private
13+
let count:Counters
14+
private
15+
let api:GitHubClient<GitHubOAuth.API>
16+
private
17+
let db:Server.DB
18+
19+
init(count:Counters, api:GitHubClient<GitHubOAuth.API>, db:Server.DB)
20+
{
21+
self.count = count
22+
self.api = api
23+
self.db = db
24+
}
25+
}
26+
}
27+
extension GitHubPlugin.Crawler
28+
{
29+
func run() async throws
30+
{
31+
while true
32+
{
33+
async
34+
let cooldown:Void = Task.sleep(for: .seconds(30))
35+
36+
let session:Mongo.Session = try await .init(from: self.db.sessions)
37+
do
38+
{
39+
try await self.api.connect
40+
{
41+
try await self.refresh(stalest: 10, from: $0, with: session)
42+
}
43+
}
44+
catch let error as any GitHubRateLimitError
45+
{
46+
try await Task.sleep(for: error.until - .now())
47+
}
48+
catch let error
49+
{
50+
print("Crawling error: \(error)")
51+
self.count.errors.wrappingIncrement(ordering: .relaxed)
52+
}
53+
54+
try await cooldown
55+
}
56+
}
57+
58+
private
59+
func refresh(stalest count:Int,
60+
from github:GitHubClient<GitHubOAuth.API>.Connection,
61+
with session:Mongo.Session) async throws
62+
{
63+
let stale:[PackageRecord] = try await self.db.package.packages.stalest(count,
64+
with: session)
65+
66+
for package:PackageRecord in stale
67+
{
68+
guard case .github(let old) = package.repo
69+
else
70+
{
71+
fatalError("unreachable: non-GitHub package was marked as stale!")
72+
}
73+
74+
let repo:GitHub.Repo = try await github.get(
75+
from: "/repos/\(old.owner.login)/\(old.name)")
76+
77+
switch try await self.db.package.packages.update(record: .init(id: package.id,
78+
cell: package.cell,
79+
repo: .github(repo)),
80+
with: session)
81+
{
82+
case nil:
83+
// Might happen if package database is dropped while crawling.
84+
continue
85+
86+
case _?:
87+
// To MongoDB, all repo updates look like modifications, since the package
88+
// record contains a timestamp.
89+
self.count.reposCrawled.wrappingIncrement(ordering: .relaxed)
90+
}
91+
if repo != old
92+
{
93+
self.count.reposUpdated.wrappingIncrement(ordering: .relaxed)
94+
}
95+
96+
let tags:[GitHub.Tag] = try await github.get(
97+
from: "/repos/\(repo.owner.login)/\(repo.name)/tags")
98+
99+
// Import tags in chronological order.
100+
for tag:GitHub.Tag in tags.reversed()
101+
{
102+
guard
103+
let _:SemanticVersion = .init(refname: tag.name)
104+
else
105+
{
106+
// We don’t care about non-semver tags.
107+
continue
108+
}
109+
110+
switch try await self.db.package.editions.register(tag,
111+
package: package.cell,
112+
with: session)
113+
{
114+
case _?:
115+
self.count.tagsUpdated.wrappingIncrement(ordering: .relaxed)
116+
fallthrough
117+
118+
case nil:
119+
self.count.tagsCrawled.wrappingIncrement(ordering: .relaxed)
120+
}
121+
}
122+
}
123+
}
124+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import Atomics
2+
import GitHubClient
3+
import GitHubAPI
4+
5+
extension GitHubPlugin
6+
{
7+
@dynamicMemberLookup
8+
struct Partner
9+
{
10+
private
11+
let count:Counters
12+
13+
let oauth:GitHubClient<GitHubOAuth>
14+
// let app:GitHubClient<GitHubApp>
15+
let api:GitHubClient<GitHubOAuth.API>
16+
17+
init(count:Counters,
18+
oauth:GitHubClient<GitHubOAuth>,
19+
api:GitHubClient<GitHubOAuth.API>)
20+
{
21+
self.count = count
22+
self.oauth = oauth
23+
self.api = api
24+
}
25+
}
26+
}
27+
extension GitHubPlugin.Partner
28+
{
29+
subscript<Int>(
30+
dynamicMember keyPath:KeyPath<GitHubPlugin.Counters, UnsafeAtomic<Int>>) -> Int
31+
{
32+
self.count[keyPath: keyPath].load(ordering: .relaxed)
33+
}
34+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import Atomics
2+
import GitHubClient
3+
import GitHubAPI
4+
import HTTPClient
5+
import NIOPosix
6+
import NIOSSL
7+
8+
struct GitHubPlugin:Sendable
9+
{
10+
let niossl:NIOSSLContext
11+
let count:Counters
12+
13+
let oauth:GitHubOAuth
14+
let app:GitHubApp
15+
16+
init(niossl:NIOSSLContext, oauth:GitHubOAuth, app:GitHubApp)
17+
{
18+
self.niossl = niossl
19+
self.count = .init()
20+
21+
self.oauth = oauth
22+
self.app = app
23+
}
24+
}
25+
extension GitHubPlugin
26+
{
27+
func partner(on threads:MultiThreadedEventLoopGroup) throws -> Partner
28+
{
29+
let root:HTTP2Client = .init(threads: threads,
30+
niossl: niossl,
31+
remote: "github.com")
32+
let api:HTTP2Client = .init(threads: threads,
33+
niossl: niossl,
34+
remote: "api.github.com")
35+
36+
return .init(count: self.count,
37+
oauth: .init(http2: root, app: self.oauth),
38+
api: .init(http2: api, app: self.oauth.api))
39+
}
40+
41+
func crawl(on threads:MultiThreadedEventLoopGroup, db:Server.DB) async throws
42+
{
43+
let api:HTTP2Client = .init(threads: threads,
44+
niossl: niossl,
45+
remote: "api.github.com")
46+
47+
let crawler:Crawler = .init(count: self.count,
48+
api: .init(http2: api, app: self.oauth.api),
49+
db: db)
50+
51+
try await crawler.run()
52+
}
53+
}

Sources/UnidocServer/Server/Server.GitHubPartner.swift

Lines changed: 0 additions & 18 deletions
This file was deleted.

0 commit comments

Comments
 (0)