Skip to content

Commit 1c20a2c

Browse files
committed
Added github remote repository access.
You turn it on via `--access=github`. Note A local git checkout is still the default value for `--access`; I do not want to make github access the default until we make the experience a bit more user-friendly. Most notable are the following to-do's: 1. To-do: Fallback to local checkout if the github accessor errors out 2. To-do: Add a warning to stdout if the user runs github access without passing an access token via GITHUB_TOKEN env var (since doing that will hit rate limit much faster).
1 parent f17ff8d commit 1c20a2c

File tree

5 files changed

+227
-9
lines changed

5 files changed

+227
-9
lines changed

Cargo.lock

Lines changed: 23 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@ git2 = "0.11"
2121
log = "0.4"
2222
pbr = "1.0.2"
2323
regex = "1.3.4"
24-
reqwest = { version = "0.10.2", features = ["blocking"] }
24+
reqwest = { version = "0.10.2", features = ["blocking", "json"] }
2525
rustc_version = "0.2"
26+
serde = { version = "1.0.104", features = ["derive"] }
27+
serde_json = "1.0"
2628
structopt = "0.3.9"
2729
tar = "0.4"
2830
tee = "0.1"

src/github.rs

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
use failure::Error;
2+
use reqwest::{self, blocking::Client, blocking::Response};
3+
use serde::{Deserialize, Serialize};
4+
5+
use crate::Commit;
6+
7+
#[derive(Serialize, Deserialize, Debug)]
8+
struct GithubCommitElem { commit: GithubCommit, sha: String }
9+
#[derive(Serialize, Deserialize, Debug)]
10+
struct GithubCommit { author: GithubAuthor, committer: GithubAuthor, message: String, }
11+
#[derive(Serialize, Deserialize, Debug)]
12+
struct GithubAuthor { date: String, email: String, name: String }
13+
14+
type GitDate = chrono::DateTime<chrono::Utc>;
15+
16+
impl GithubCommitElem {
17+
fn date(&self) -> Result<GitDate, Error> {
18+
Ok(self.commit.committer.date.parse()?)
19+
}
20+
21+
fn git_commit(self) -> Result<Commit, Error> {
22+
let date = self.date()?;
23+
Ok(Commit {
24+
sha: self.sha,
25+
date,
26+
summary: self.commit.message,
27+
})
28+
}
29+
}
30+
31+
fn headers() -> Result<reqwest::header::HeaderMap, Error> {
32+
let mut headers = reqwest::header::HeaderMap::new();
33+
let user_agent = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"));
34+
let user_agent = reqwest::header::HeaderValue::from_static(user_agent);
35+
headers.insert(reqwest::header::USER_AGENT, user_agent);
36+
if let Ok(token) = std::env::var("GITHUB_TOKEN") {
37+
eprintln!("adding local env GITHUB_TOKEN value to headers in github query");
38+
let value = reqwest::header::HeaderValue::from_str(&format!("token {}", token))?;
39+
headers.insert(reqwest::header::AUTHORIZATION, value);
40+
}
41+
Ok(headers)
42+
}
43+
44+
pub(crate) fn get_commit(sha: &str) -> Result<Commit, Error> {
45+
let url = SingleCommitUrl { sha }.url();
46+
let client = Client::builder()
47+
.default_headers(headers()?)
48+
.build()?;
49+
let response: Response = client.get(&url).send()?;
50+
let elem: GithubCommitElem = response.json()?;
51+
elem.git_commit()
52+
}
53+
54+
#[derive(Copy, Clone, Debug)]
55+
pub(crate) struct CommitsQuery<'a> {
56+
pub since_date: &'a str,
57+
pub most_recent_sha: &'a str,
58+
pub earliest_sha: &'a str,
59+
}
60+
61+
/// Returns the bors merge commits between the two specified boundaries
62+
/// (boundaries inclusive).
63+
64+
impl<'a> CommitsQuery<'a> {
65+
pub fn get_commits(&self) -> Result<Vec<Commit>, Error> {
66+
get_commits(*self)
67+
}
68+
}
69+
70+
const PER_PAGE: usize = 100;
71+
const OWNER: &'static str = "rust-lang";
72+
const REPO: &'static str = "rust";
73+
74+
75+
trait ToUrl { fn url(&self) -> String; }
76+
struct CommitsUrl<'a> { page: usize, author: &'a str, since: &'a str, sha: &'a str }
77+
struct SingleCommitUrl<'a> { sha: &'a str }
78+
79+
impl<'a> ToUrl for CommitsUrl<'a> {
80+
fn url(&self) -> String {
81+
format!("https://api.github.com/repos/{OWNER}/{REPO}/commits\
82+
?page={PAGE}&per_page={PER_PAGE}\
83+
&author={AUTHOR}&since={SINCE}&sha={SHA}",
84+
OWNER=OWNER, REPO=REPO,
85+
PAGE=self.page, PER_PAGE=PER_PAGE,
86+
AUTHOR=self.author, SINCE=self.since, SHA=self.sha)
87+
}
88+
}
89+
90+
impl<'a> ToUrl for SingleCommitUrl<'a> {
91+
fn url(&self) -> String {
92+
format!("https://api.github.com/repos/{OWNER}/{REPO}/commits/{REF}",
93+
OWNER=OWNER, REPO=REPO, REF=self.sha)
94+
}
95+
}
96+
97+
fn get_commits(q: CommitsQuery) -> Result<Vec<Commit>, Error> {
98+
// build up commit sequence, by feeding in `sha` as the starting point, and
99+
// working way backwards to max(`q.since_date`, `q.earliest_sha`).
100+
let mut commits = Vec::new();
101+
102+
// focus on Pull Request merges, all authored and committed by bors.
103+
let author = "bors";
104+
105+
let client = Client::builder()
106+
.default_headers(headers()?)
107+
.build()?;
108+
for page in 1.. {
109+
let url = CommitsUrl { page, author, since: q.since_date, sha: q.most_recent_sha }.url();
110+
111+
let response: Response = client.get(&url).send()?;
112+
113+
let action = parse_paged_elems(response, |elem: GithubCommitElem| {
114+
let date: chrono::DateTime<chrono::Utc> = match elem.commit.committer.date.parse() {
115+
Ok(date) => date,
116+
Err(err) => return Loop::Err(err.into()),
117+
};
118+
let sha = elem.sha.clone();
119+
let summary = elem.commit.message;
120+
let commit = Commit { sha, date, summary };
121+
commits.push(commit);
122+
123+
if elem.sha == q.earliest_sha {
124+
eprintln!("ending github query because we found starting sha: {}", elem.sha);
125+
return Loop::Break;
126+
}
127+
128+
Loop::Next
129+
})?;
130+
131+
if let Loop::Break = action { break; }
132+
}
133+
134+
eprintln!("get_commits_between returning commits, len: {}", commits.len());
135+
136+
// reverse to obtain chronological order
137+
commits.reverse();
138+
Ok(commits)
139+
}
140+
141+
enum Loop<E> { Break, Next, Err(E) }
142+
enum Void { }
143+
144+
fn parse_paged_elems<Elem: for<'a> serde::Deserialize<'a>>(response: Response,
145+
mut k: impl FnMut(Elem) -> Loop<Error>)
146+
-> Result<Loop<Void>, Error>
147+
{
148+
// parse the JSON into an array of the expected Elem type
149+
let elems: Vec<Elem> = response.json()?;
150+
151+
// if `elems` is empty, then we've run out of useful pages to lookup.
152+
if elems.len() == 0 { return Ok(Loop::Break); }
153+
154+
for elem in elems.into_iter() {
155+
let act = k(elem);
156+
157+
// the callback will tell us if we should terminate loop early (e.g. due to matching `sha`)
158+
match act {
159+
Loop::Break => return Ok(Loop::Break),
160+
Loop::Err(e) => return Err(e),
161+
Loop::Next => continue,
162+
}
163+
}
164+
165+
// by default, we keep searching on next page from github.
166+
Ok(Loop::Next)
167+
}

src/main.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ mod least_satisfying;
3535
mod repo_access;
3636

3737
use crate::least_satisfying::{least_satisfying, Satisfies};
38-
use crate::repo_access::{AccessViaLocalGit, RustRepositoryAccessor};
38+
use crate::repo_access::{AccessViaGithub, AccessViaLocalGit, RustRepositoryAccessor};
3939

4040
#[derive(Debug, Clone, PartialEq)]
4141
pub struct Commit {
@@ -946,7 +946,7 @@ impl Config {
946946
let repo_access: Box<dyn RustRepositoryAccessor>;
947947
repo_access = match args.access.as_ref().map(|x|x.as_str()) {
948948
None | Some("checkout") => Box::new(AccessViaLocalGit),
949-
Some("github") => unimplemented!(),
949+
Some("github") => Box::new(AccessViaGithub),
950950
Some(other) => bail!("unknown access argument: {}", other),
951951
};
952952

src/repo_access.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,13 @@ pub(crate) trait RustRepositoryAccessor {
2424
#[path="git.rs"]
2525
mod git;
2626

27+
#[path="github.rs"]
28+
mod github;
29+
2730
pub(crate) struct AccessViaLocalGit;
2831

32+
pub(crate) struct AccessViaGithub;
33+
2934
impl RustRepositoryAccessor for AccessViaLocalGit {
3035
fn commit(&self, commit_ref: &str) -> Result<Commit, Error> {
3136
self::git::get_commit(commit_ref)
@@ -38,3 +43,30 @@ impl RustRepositoryAccessor for AccessViaLocalGit {
3843
})
3944
}
4045
}
46+
47+
impl RustRepositoryAccessor for AccessViaGithub {
48+
fn commit(&self, commit_ref: &str) -> Result<Commit, Error> {
49+
github::get_commit(commit_ref)
50+
}
51+
52+
fn commits(&self, start_sha: &str, end_sha: &str) -> Result<Vec<Commit>, Error> {
53+
// `earliest_date` is an lower bound on what we should search in our
54+
// github query. Why is it `start` date minus 1?
55+
//
56+
// Because: the "since" parameter in the github API is an exclusive
57+
// bound. We need an inclusive bound, so we go yet another day prior for
58+
// this bound on the github search.
59+
let since_date = self.bound_to_date(Bound::Commit(start_sha.to_string()))? - chrono::Duration::days(1);
60+
61+
eprintln!("fetching (via remote github) commits from max({}, {}) to {}",
62+
start_sha, since_date, end_sha);
63+
64+
let query = github::CommitsQuery {
65+
since_date: &since_date.format(crate::YYYY_MM_DD).to_string(),
66+
earliest_sha: start_sha,
67+
most_recent_sha: end_sha,
68+
};
69+
70+
query.get_commits()
71+
}
72+
}

0 commit comments

Comments
 (0)