Skip to content

Commit b40a3ed

Browse files
authored
Merge pull request #300 from tayloraswift/crawling-tickets
Crawling tickets
2 parents 7139803 + 69bced9 commit b40a3ed

24 files changed

+454
-219
lines changed

Package.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,6 @@ let package:Package = .init(
476476

477477
.target(name: "UnidocDB",
478478
dependencies: [
479-
.target(name: "GitHubAPI"),
480479
.target(name: "UnidocRecords_LZ77"),
481480
.target(name: "UnidocLinker"),
482481
.target(name: "UnidocRecords"),
@@ -506,6 +505,7 @@ let package:Package = .init(
506505
.target(name: "UnidocRecords",
507506
dependencies: [
508507
.target(name: "FNV1"),
508+
.target(name: "GitHubAPI"),
509509
.target(name: "SymbolGraphs"),
510510
.target(name: "UnidocAPI"),
511511
.product(name: "MD5", package: "swift-hash"),

Sources/GitHubAPI/GitHub.Node.swift

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import JSON
2+
3+
extension GitHub
4+
{
5+
@frozen public
6+
struct Node:RawRepresentable, Equatable, Hashable, Sendable
7+
{
8+
public
9+
let rawValue:String
10+
11+
@inlinable public
12+
init(rawValue:String)
13+
{
14+
self.rawValue = rawValue
15+
}
16+
}
17+
}
18+
extension GitHub.Node:CustomStringConvertible
19+
{
20+
@inlinable public
21+
var description:String { self.rawValue }
22+
}
23+
extension GitHub.Node:LosslessStringConvertible
24+
{
25+
@inlinable public
26+
init(_ description:String) { self.init(rawValue: description) }
27+
}
28+
extension GitHub.Node:ExpressibleByStringLiteral
29+
{
30+
@inlinable public
31+
init(stringLiteral:String) { self.init(rawValue: stringLiteral) }
32+
}
33+
extension GitHub.Node:JSONEncodable, JSONDecodable
34+
{
35+
}

Sources/GitHubAPI/GitHub.Repo.swift

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@ extension GitHub
77
{
88
public
99
let id:Int32
10+
11+
public
12+
let owner:Owner
1013
public
11-
var owner:Owner
14+
let name:String
1215
public
13-
var name:String
16+
let node:Node
1417

1518
/// The repo’s license, if GitHub was able to detect it.
1619
public
@@ -76,6 +79,7 @@ extension GitHub
7679
init(id:Int32,
7780
owner:Owner,
7881
name:String,
82+
node:Node,
7983
license:License? = nil,
8084
topics:[String] = [],
8185
master:String?,
@@ -97,6 +101,7 @@ extension GitHub
97101
self.id = id
98102
self.owner = owner
99103
self.name = name
104+
self.node = node
100105
self.license = license
101106
self.topics = topics
102107
self.master = master
@@ -123,30 +128,28 @@ extension GitHub.Repo:JSONObjectDecodable
123128
enum CodingKey:String, Sendable
124129
{
125130
case id
131+
case node_id
132+
126133
case owner
127134
case name
128-
129-
@available(*, unavailable)
130-
case node = "node_id"
131-
132135
case license
133136
case topics
134-
case master = "default_branch"
137+
case default_branch
135138
// not `watchers_count`, which is just stargazers
136-
case watchers = "subscribers_count"
137-
case forks = "forks_count"
138-
case stars = "stargazers_count"
139+
case subscribers_count
140+
case forks_count
141+
case stargazers_count
139142
case size
140143
case archived
141144
case disabled
142145
case fork
143146
case visibility
144147
case language
145148
case homepage
146-
case about = "description"
147-
case created = "created_at"
148-
case updated = "updated_at"
149-
case pushed = "pushed_at"
149+
case description
150+
case created_at
151+
case updated_at
152+
case pushed_at
150153
}
151154

152155
public
@@ -158,23 +161,24 @@ extension GitHub.Repo:JSONObjectDecodable
158161
self.init(id: try json[.id].decode(),
159162
owner: try json[.owner].decode(),
160163
name: try json[.name].decode(),
164+
node: try json[.node_id].decode(),
161165
license: try json[.license]?.decode(),
162166
topics: try json[.topics]?.decode() ?? [],
163-
master: try json[.master]?.decode(),
164-
watchers: try json[.watchers]?.decode(),
165-
forks: try json[.forks].decode(),
166-
stars: try json[.stars].decode(),
167+
master: try json[.default_branch]?.decode(),
168+
watchers: try json[.subscribers_count]?.decode(),
169+
forks: try json[.forks_count].decode(),
170+
stars: try json[.stargazers_count].decode(),
167171
size: try json[.size].decode(),
168172
archived: try json[.archived].decode(),
169173
disabled: try json[.disabled].decode(),
170174
fork: try json[.fork].decode(),
171175
visibility: try json[.visibility]?.decode(),
172176
language: try json[.language]?.decode(),
173177
homepage: try json[.homepage]?.decode(),
174-
about: try json[.about]?.decode(),
175-
created: try json[.created].decode(),
176-
updated: try json[.updated].decode(),
177-
pushed: try json[.pushed].decode())
178+
about: try json[.description]?.decode(),
179+
created: try json[.created_at].decode(),
180+
updated: try json[.updated_at].decode(),
181+
pushed: try json[.pushed_at].decode())
178182

179183
// String field normalization.
180184
if case true? = self.homepage?.isEmpty
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import BSON
2+
import GitHubAPI
3+
import MongoQL
4+
import UnidocRecords
5+
import UnixTime
6+
7+
extension Unidoc
8+
{
9+
@frozen public
10+
struct CrawlingTicket<ID>:Identifiable, Sendable where ID:Hashable, ID:Sendable
11+
{
12+
public
13+
let id:ID
14+
public
15+
let node:GitHub.Node
16+
public
17+
var time:UnixMillisecond
18+
public
19+
var last:UnixMillisecond?
20+
21+
@inlinable public
22+
init(id:ID, node:GitHub.Node, time:UnixMillisecond = .zero, last:UnixMillisecond? = nil)
23+
{
24+
self.id = id
25+
self.node = node
26+
self.time = time
27+
self.last = last
28+
}
29+
}
30+
}
31+
extension Unidoc.CrawlingTicket:Mongo.MasterCodingModel
32+
{
33+
@frozen public
34+
enum CodingKey:String, Sendable
35+
{
36+
case id = "_id"
37+
case node = "N"
38+
case time = "T"
39+
case last = "L"
40+
}
41+
}
42+
extension Unidoc.CrawlingTicket:BSONDocumentEncodable, BSONEncodable where ID:BSONEncodable
43+
{
44+
public
45+
func encode(to bson:inout BSON.DocumentEncoder<CodingKey>)
46+
{
47+
bson[.id] = self.id
48+
bson[.node] = self.node
49+
bson[.time] = self.time
50+
bson[.last] = self.last
51+
}
52+
}
53+
extension Unidoc.CrawlingTicket:BSONDocumentDecodable, BSONDecodable where ID:BSONDecodable
54+
{
55+
@inlinable public
56+
init(bson:BSON.DocumentDecoder<CodingKey>) throws
57+
{
58+
self.init(id: try bson[.id].decode(),
59+
node: try bson[.node].decode(),
60+
time: try bson[.time].decode(),
61+
last: try bson[.last]?.decode())
62+
}
63+
}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import BSON
2+
import MongoDB
3+
import UnidocRecords
4+
import UnixTime
5+
6+
extension Unidoc.DB
7+
{
8+
@frozen public
9+
struct CrawlingTickets
10+
{
11+
public
12+
let database:Mongo.Database
13+
14+
@inlinable internal
15+
init(database:Mongo.Database)
16+
{
17+
self.database = database
18+
}
19+
}
20+
}
21+
extension Unidoc.DB.CrawlingTickets
22+
{
23+
public static
24+
let indexTime:Mongo.CollectionIndex = .init("Time",
25+
unique: false)
26+
{
27+
$0[Unidoc.CrawlingTicket<Unidoc.Package>[.time]] = (+)
28+
}
29+
}
30+
extension Unidoc.DB.CrawlingTickets:Mongo.CollectionModel
31+
{
32+
public
33+
typealias Element = Unidoc.CrawlingTicket<Unidoc.Package>
34+
35+
@inlinable public static
36+
var name:Mongo.Collection { "CrawlingTickets" }
37+
38+
@inlinable public static
39+
var indexes:[Mongo.CollectionIndex] { [ Self.indexTime ] }
40+
}
41+
extension Unidoc.DB.CrawlingTickets
42+
{
43+
/// Creates tickets that do not exist yet, or updates the state of existing tickets. This
44+
/// won’t overwrite the scheduled times in existing tickets.
45+
public
46+
func create(tickets:[Unidoc.CrawlingTicket<Unidoc.Package>],
47+
with session:Mongo.Session) async throws -> Mongo.Updates<Unidoc.Package>
48+
{
49+
let response:Mongo.UpdateResponse<Unidoc.Package> = try await session.run(
50+
command: Mongo.Update<Mongo.Many, Unidoc.Package>.init(Self.name)
51+
{
52+
for ticket:Unidoc.CrawlingTicket<Unidoc.Package> in tickets
53+
{
54+
$0
55+
{
56+
$0[.upsert] = true
57+
$0[.q] { $0[Element[.id]] = ticket.id }
58+
$0[.u]
59+
{
60+
$0[.setOnInsert]
61+
{
62+
$0[Element[.id]] = ticket.id
63+
$0[Element[.time]] = ticket.time
64+
}
65+
$0[.set]
66+
{
67+
$0[Element[.node]] = ticket.node
68+
$0[Element[.last]] = ticket.last
69+
}
70+
}
71+
}
72+
}
73+
},
74+
against: self.database)
75+
76+
return try response.updates()
77+
}
78+
79+
public
80+
func find(stalest limit:Int,
81+
with session:Mongo.Session) async throws -> [Unidoc.CrawlingTicket<Unidoc.Package>]
82+
{
83+
let command:Mongo.Find<Mongo.SingleBatch<Element>> = .init(Self.name,
84+
limit: limit)
85+
{
86+
$0[.sort] { $0[Element[.time]] = (+) }
87+
$0[.hint] = Self.indexTime.id
88+
}
89+
90+
return try await session.run(command: command, against: self.database)
91+
}
92+
93+
/// Updates the state of an existing crawling ticket.
94+
@discardableResult
95+
public
96+
func move(ticket:Unidoc.Package,
97+
time:UnixMillisecond,
98+
last:UnixMillisecond? = nil,
99+
with session:Mongo.Session) async throws -> Bool?
100+
{
101+
try await self.update(with: session)
102+
{
103+
$0
104+
{
105+
$0[.q] { $0[Element[.id]] = ticket }
106+
$0[.u]
107+
{
108+
$0[.set]
109+
{
110+
// We shouldn’t blindly store the existing `time` value into `last`,
111+
// because it might be an extreme value, like 0.
112+
$0[Element[.time]] = time
113+
$0[Element[.last]] = last
114+
}
115+
}
116+
}
117+
}
118+
}
119+
}

Sources/UnidocDB/Crawling/Unidoc.DB.CrawlingWindows.swift

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
import BSON
22
import MongoDB
3-
import SymbolGraphs
4-
import Symbols
53
import UnidocRecords
64
import UnixTime
75

Sources/UnidocDB/Editions/Unidoc.DB.Editions.swift

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import BSON
2-
import GitHubAPI
32
import JSONEncoding
43
import MongoDB
54
import SemanticVersions

0 commit comments

Comments
 (0)