Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only fetch PR diff once. #1749

Merged
merged 1 commit into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 73 additions & 28 deletions src/github.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,30 @@ pub struct Label {
/// needed at this time (merged_at, diff_url, html_url, patch_url, url).
#[derive(Debug, serde::Deserialize)]
pub struct PullRequestDetails {
// none for now
/// This is a slot to hold the diff for a PR.
///
/// This will be filled in only once as an optimization since multiple
/// handlers want to see PR changes, and getting the diff can be
/// expensive.
#[serde(skip)]
files_changed: tokio::sync::OnceCell<Vec<FileDiff>>,
}

/// Representation of a diff to a single file.
#[derive(Debug)]
pub struct FileDiff {
/// The full path of the file.
pub path: String,
/// The diff for the file.
pub diff: String,
}

impl PullRequestDetails {
pub fn new() -> PullRequestDetails {
PullRequestDetails {
files_changed: tokio::sync::OnceCell::new(),
}
}
}

/// An issue or pull request.
Expand Down Expand Up @@ -786,23 +809,33 @@ impl Issue {
}

/// Returns the diff in this event, for Open and Synchronize events for now.
pub async fn diff(&self, client: &GithubClient) -> anyhow::Result<Option<String>> {
///
/// Returns `None` if the issue is not a PR.
pub async fn diff(&self, client: &GithubClient) -> anyhow::Result<Option<&[FileDiff]>> {
let Some(pr) = &self.pull_request else {
return Ok(None);
};
let (before, after) = if let (Some(base), Some(head)) = (&self.base, &self.head) {
(base.sha.clone(), head.sha.clone())
(&base.sha, &head.sha)
} else {
return Ok(None);
};

let mut req = client.get(&format!(
"{}/compare/{}...{}",
self.repository().url(),
before,
after
));
req = req.header("Accept", "application/vnd.github.v3.diff");
let (diff, _) = client.send_req(req).await?;
let body = String::from_utf8_lossy(&diff).to_string();
Ok(Some(body))
let diff = pr
.files_changed
.get_or_try_init::<anyhow::Error, _, _>(|| async move {
let url = format!("{}/compare/{before}...{after}", self.repository().url());
let mut req = client.get(&url);
req = req.header("Accept", "application/vnd.github.v3.diff");
let (diff, _) = client
.send_req(req)
.await
.with_context(|| format!("failed to fetch diff comparison for {url}"))?;
let body = String::from_utf8_lossy(&diff);
Ok(parse_diff(&body))
})
.await?;
Ok(Some(diff))
}

/// Returns the commits from this pull request (no commits are returned if this `Issue` is not
Expand Down Expand Up @@ -982,19 +1015,29 @@ pub struct CommitBase {
pub repo: Repository,
}

pub fn files_changed(diff: &str) -> Vec<&str> {
let mut files = Vec::new();
for line in diff.lines() {
// mostly copied from highfive
if line.starts_with("diff --git ") {
files.push(
line[line.find(" b/").unwrap()..]
.strip_prefix(" b/")
.unwrap(),
);
}
}
pub fn parse_diff(diff: &str) -> Vec<FileDiff> {
// This does not properly handle filenames with spaces.
let re = regex::Regex::new("(?m)^diff --git .* b/(.*)").unwrap();
let mut files: Vec<_> = re
.captures_iter(diff)
.map(|cap| {
let start = cap.get(0).unwrap().start();
let path = cap.get(1).unwrap().as_str().to_string();
(start, path)
})
.collect();
// Break the list up into (start, end) pairs starting from the "diff --git" line.
files.push((diff.len(), String::new()));
files
.windows(2)
.map(|w| {
let (start, end) = (&w[0], &w[1]);
FileDiff {
path: start.1.clone(),
diff: diff[start.0..end.0].to_string(),
}
})
.collect()
}

#[derive(Debug, serde::Deserialize)]
Expand Down Expand Up @@ -1503,7 +1546,7 @@ impl Repository {
self.full_name
)
})?;
issue.pull_request = Some(PullRequestDetails {});
issue.pull_request = Some(PullRequestDetails::new());
Ok(issue)
}

Expand Down Expand Up @@ -2484,7 +2527,8 @@ index fb9cee43b2d..b484c25ea51 100644
zulip_stream = 245100 # #t-compiler/wg-prioritization/alerts
topic = "#{number} {title}"
"##;
assert_eq!(files_changed(input), vec!["triagebot.toml".to_string()]);
let files: Vec<_> = parse_diff(input).into_iter().map(|d| d.path).collect();
assert_eq!(files, vec!["triagebot.toml".to_string()]);
}

#[test]
Expand Down Expand Up @@ -2516,8 +2560,9 @@ index c58310947d2..3b0854d4a9b 100644
}
+
"##;
let files: Vec<_> = parse_diff(input).into_iter().map(|d| d.path).collect();
assert_eq!(
files_changed(input),
files,
vec![
"library/stdarch".to_string(),
"src/librustdoc/clean/types.rs".to_string(),
Expand Down
138 changes: 68 additions & 70 deletions src/handlers/assign.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

use crate::{
config::AssignConfig,
github::{self, Event, Issue, IssuesAction, Selection},
github::{self, Event, FileDiff, Issue, IssuesAction, Selection},
handlers::{Context, GithubClient, IssuesEvent},
interactions::EditIssueBody,
};
Expand Down Expand Up @@ -80,13 +80,11 @@ struct AssignData {
}

/// Input for auto-assignment when a PR is created.
pub(super) struct AssignInput {
git_diff: String,
}
pub(super) struct AssignInput {}

/// Prepares the input when a new PR is opened.
pub(super) async fn parse_input(
ctx: &Context,
_ctx: &Context,
event: &IssuesEvent,
config: Option<&AssignConfig>,
) -> Result<Option<AssignInput>, String> {
Expand All @@ -100,15 +98,7 @@ pub(super) async fn parse_input(
{
return Ok(None);
}
let git_diff = match event.issue.diff(&ctx.github).await {
Ok(None) => return Ok(None),
Err(e) => {
log::error!("failed to fetch diff: {:?}", e);
return Ok(None);
}
Ok(Some(diff)) => diff,
};
Ok(Some(AssignInput { git_diff }))
Ok(Some(AssignInput {}))
}

/// Handles the work of setting an assignment for a new PR and posting a
Expand All @@ -117,11 +107,18 @@ pub(super) async fn handle_input(
ctx: &Context,
config: &AssignConfig,
event: &IssuesEvent,
input: AssignInput,
_input: AssignInput,
) -> anyhow::Result<()> {
let Some(diff) = event.issue.diff(&ctx.github).await? else {
bail!(
"expected issue {} to be a PR, but the diff could not be determined",
event.issue.number
)
};

// Don't auto-assign or welcome if the user manually set the assignee when opening.
if event.issue.assignees.is_empty() {
let (assignee, from_comment) = determine_assignee(ctx, event, config, &input).await?;
let (assignee, from_comment) = determine_assignee(ctx, event, config, &diff).await?;
if assignee.as_deref() == Some("ghost") {
// "ghost" is GitHub's placeholder account for deleted accounts.
// It is used here as a convenient way to prevent assignment. This
Expand Down Expand Up @@ -180,7 +177,7 @@ pub(super) async fn handle_input(
if config.warn_non_default_branch {
warnings.extend(non_default_branch(event));
}
warnings.extend(modifies_submodule(&input.git_diff));
warnings.extend(modifies_submodule(diff));
if !warnings.is_empty() {
let warnings: Vec<_> = warnings
.iter()
Expand Down Expand Up @@ -222,9 +219,9 @@ fn non_default_branch(event: &IssuesEvent) -> Option<String> {
}

/// Returns a message if the PR modifies a git submodule.
fn modifies_submodule(diff: &str) -> Option<String> {
fn modifies_submodule(diff: &[FileDiff]) -> Option<String> {
let re = regex::Regex::new(r"\+Subproject\scommit\s").unwrap();
if re.is_match(diff) {
if diff.iter().any(|fd| re.is_match(&fd.diff)) {
Some(SUBMODULE_WARNING_MSG.to_string())
} else {
None
Expand Down Expand Up @@ -278,7 +275,7 @@ async fn determine_assignee(
ctx: &Context,
event: &IssuesEvent,
config: &AssignConfig,
input: &AssignInput,
diff: &[FileDiff],
) -> anyhow::Result<(Option<String>, bool)> {
let teams = crate::team_data::teams(&ctx.github).await?;
if let Some(name) = find_assign_command(ctx, event) {
Expand All @@ -298,7 +295,7 @@ async fn determine_assignee(
}
}
// Errors fall-through to try fallback group.
match find_reviewers_from_diff(config, &input.git_diff) {
match find_reviewers_from_diff(config, diff) {
Ok(candidates) if !candidates.is_empty() => {
match find_reviewer_from_names(&teams, config, &event.issue, &candidates) {
Ok(assignee) => return Ok((Some(assignee), false)),
Expand Down Expand Up @@ -346,60 +343,61 @@ async fn determine_assignee(
/// May return an error if the owners map is misconfigured.
///
/// Beware this may return an empty list if nothing matches.
fn find_reviewers_from_diff(config: &AssignConfig, diff: &str) -> anyhow::Result<Vec<String>> {
fn find_reviewers_from_diff(
config: &AssignConfig,
diff: &[FileDiff],
) -> anyhow::Result<Vec<String>> {
// Map of `owners` path to the number of changes found in that path.
// This weights the reviewer choice towards places where the most edits are done.
let mut counts: HashMap<&str, u32> = HashMap::new();
// List of the longest `owners` patterns that match the current path. This
// prefers choosing reviewers from deeply nested paths over those defined
// for top-level paths, under the assumption that they are more
// specialized.
//
// This is a list to handle the situation if multiple paths of the same
// length match.
let mut longest_owner_patterns = Vec::new();
// Iterate over the diff, finding the start of each file. After each file
// is found, it counts the number of modified lines in that file, and
// tracks those in the `counts` map.
for line in diff.split('\n') {
if line.starts_with("diff --git ") {
// Start of a new file.
longest_owner_patterns.clear();
let path = line[line.find(" b/").unwrap()..]
.strip_prefix(" b/")
.unwrap();
// Find the longest `owners` entries that match this path.
let mut longest = HashMap::new();
for owner_pattern in config.owners.keys() {
let ignore = ignore::gitignore::GitignoreBuilder::new("/")
.add_line(None, owner_pattern)
.with_context(|| format!("owner file pattern `{owner_pattern}` is not valid"))?
.build()?;
if ignore.matched_path_or_any_parents(path, false).is_ignore() {
let owner_len = owner_pattern.split('/').count();
longest.insert(owner_pattern, owner_len);
}
}
let max_count = longest.values().copied().max().unwrap_or(0);
longest_owner_patterns.extend(
longest
.iter()
.filter(|(_, count)| **count == max_count)
.map(|x| *x.0),
);
// Give some weight to these patterns to start. This helps with
// files modified without any lines changed.
for owner_pattern in &longest_owner_patterns {
*counts.entry(owner_pattern).or_default() += 1;
// Iterate over the diff, counting the number of modified lines in each
// file, and tracks those in the `counts` map.
for file_diff in diff {
// List of the longest `owners` patterns that match the current path. This
// prefers choosing reviewers from deeply nested paths over those defined
// for top-level paths, under the assumption that they are more
// specialized.
//
// This is a list to handle the situation if multiple paths of the same
// length match.
let mut longest_owner_patterns = Vec::new();

// Find the longest `owners` entries that match this path.
let mut longest = HashMap::new();
for owner_pattern in config.owners.keys() {
let ignore = ignore::gitignore::GitignoreBuilder::new("/")
.add_line(None, owner_pattern)
.with_context(|| format!("owner file pattern `{owner_pattern}` is not valid"))?
.build()?;
if ignore
.matched_path_or_any_parents(&file_diff.path, false)
.is_ignore()
{
let owner_len = owner_pattern.split('/').count();
longest.insert(owner_pattern, owner_len);
}
continue;
}
// Check for a modified line.
if (!line.starts_with("+++") && line.starts_with('+'))
|| (!line.starts_with("---") && line.starts_with('-'))
{
for owner_path in &longest_owner_patterns {
*counts.entry(owner_path).or_default() += 1;
let max_count = longest.values().copied().max().unwrap_or(0);
longest_owner_patterns.extend(
longest
.iter()
.filter(|(_, count)| **count == max_count)
.map(|x| *x.0),
);
// Give some weight to these patterns to start. This helps with
// files modified without any lines changed.
for owner_pattern in &longest_owner_patterns {
*counts.entry(owner_pattern).or_default() += 1;
}

// Count the modified lines.
for line in file_diff.diff.lines() {
if (!line.starts_with("+++") && line.starts_with('+'))
|| (!line.starts_with("---") && line.starts_with('-'))
{
for owner_path in &longest_owner_patterns {
*counts.entry(owner_path).or_default() += 1;
}
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/handlers/assign/tests/tests_from_diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

use super::super::*;
use crate::config::AssignConfig;
use crate::github::parse_diff;
use std::fmt::Write;

fn test_from_diff(diff: &str, config: toml::Value, expected: &[&str]) {
let files = parse_diff(diff);
let aconfig: AssignConfig = config.try_into().unwrap();
assert_eq!(
find_reviewers_from_diff(&aconfig, diff).unwrap(),
find_reviewers_from_diff(&aconfig, &files).unwrap(),
expected.iter().map(|x| x.to_string()).collect::<Vec<_>>()
);
}
Expand Down
Loading
Loading