From d9d5925ad1b76651107963f4af04fc8b32461d01 Mon Sep 17 00:00:00 2001 From: Martin Nordholts Date: Thu, 9 Nov 2023 21:31:16 +0100 Subject: [PATCH] Implement logic to find old E-needs-mcve issues to close This is the first commit in a series of commits to implement the automatic triaging of old E-needs-mcve issues that was proposed and discussed in [t-release/triage] and cross-posted to [T-compiler]. This commit only implements the logic to find what the issues to close, and prints that info to stdout for inspection. Think of it as a dry run. After we have convinced ourselves this logic works as it should, we can implement the final steps: * Actually close the issue * Report closes to "triagebot closed issues" topic in "t-release/triage" Zulip [t-release/triage]: https://rust-lang.zulipchat.com/#narrow/stream/242269-t-release.2Ftriage/topic/auto-close.20E-needs-mcve/near/400273684 [t-compiler]: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/auto.20closing.20E-meeds-mcve/near/399663832 --- .github/workflows/automatic-triage.yml | 13 +++ github-graphql/src/lib.rs | 116 +++++++++++++++++++++++ src/bin/automatic-triage/main.rs | 24 +++++ src/bin/automatic-triage/old_label.rs | 123 +++++++++++++++++++++++++ src/github.rs | 64 ++++++++++++- 5 files changed, 338 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/automatic-triage.yml create mode 100644 src/bin/automatic-triage/main.rs create mode 100644 src/bin/automatic-triage/old_label.rs diff --git a/.github/workflows/automatic-triage.yml b/.github/workflows/automatic-triage.yml new file mode 100644 index 00000000..5be805b2 --- /dev/null +++ b/.github/workflows/automatic-triage.yml @@ -0,0 +1,13 @@ +name: Automatic triage +on: + workflow_dispatch: {} + schedule: + - cron: "0 12 * * 1" # Every Monday at 12:00 UTC + +jobs: + automatic-triage: + name: Automatic triage + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - run: cargo run --bin automatic-triage diff --git a/github-graphql/src/lib.rs b/github-graphql/src/lib.rs index 7230bc68..965103bf 100644 --- a/github-graphql/src/lib.rs +++ b/github-graphql/src/lib.rs @@ -30,6 +30,64 @@ pub mod queries { pub repository: Option, } + #[derive(cynic::Enum, Clone, Copy, Debug)] + #[cynic(rename_all = "SCREAMING_SNAKE_CASE")] + pub enum IssueTimelineItemsItemType { + AddedToProjectEvent, + AssignedEvent, + ClosedEvent, + CommentDeletedEvent, + ConnectedEvent, + ConvertedNoteToIssueEvent, + ConvertedToDiscussionEvent, + CrossReferencedEvent, + DemilestonedEvent, + DisconnectedEvent, + IssueComment, + LabeledEvent, + LockedEvent, + MarkedAsDuplicateEvent, + MentionedEvent, + MilestonedEvent, + MovedColumnsInProjectEvent, + PinnedEvent, + ReferencedEvent, + RemovedFromProjectEvent, + RenamedTitleEvent, + ReopenedEvent, + SubscribedEvent, + TransferredEvent, + UnassignedEvent, + UnlabeledEvent, + UnlockedEvent, + UnmarkedAsDuplicateEvent, + UnpinnedEvent, + UnsubscribedEvent, + UserBlockedEvent, + } + + #[derive(cynic::QueryFragment, Debug)] + #[cynic(graphql_type = "IssueTimelineItemsConnection")] + pub struct IssueTimelineItemsConnection { + pub total_count: i32, + pub page_info: PageInfo, + #[cynic(flatten)] + pub nodes: Vec, + } + + #[derive(cynic::InlineFragments, Debug)] + pub enum IssueTimelineItems { + LabeledEvent(LabeledEvent), + #[cynic(fallback)] + Other, // Implement more when needed + } + + #[derive(cynic::QueryFragment, Debug)] + pub struct LabeledEvent { + pub label: Label, + pub created_at: DateTime, + } + #[derive(cynic::QueryFragment, Debug)] #[cynic(variables = "LeastRecentlyReviewedPullRequestsArguments")] pub struct Repository { @@ -268,6 +326,64 @@ pub mod docs_update_queries { pub struct GitObjectID(pub String); } +#[cynic::schema_for_derives(file = "src/github.graphql", module = "schema")] +pub mod old_label_queries { + use super::queries::*; + use super::schema; + + #[derive(cynic::QueryVariables, Debug, Clone)] + pub struct OldLabelArguments { + pub repository_owner: String, + pub repository_name: String, + pub label: String, + pub after: Option, + } + + #[derive(cynic::QueryFragment, Debug)] + #[cynic(graphql_type = "Query", variables = "OldLabelArguments")] + pub struct OldLabelIssuesQuery { + #[arguments(owner: $repository_owner, name: $repository_name)] + pub repository: Option, + } + + #[derive(cynic::QueryFragment, Debug)] + #[cynic(graphql_type = "Repository", variables = "OldLabelArguments")] + pub struct OldLabelRepository { + #[arguments( + states: "OPEN", + first: 30, + after: $after, + labels: [$label], + orderBy: {direction: "ASC", field: "CREATED_AT"} + )] + pub issues: OldLabelIssueConnection, + } + + #[derive(cynic::QueryFragment, Debug)] + #[cynic(graphql_type = "IssueConnection")] + pub struct OldLabelIssueConnection { + pub total_count: i32, + pub page_info: PageInfo, + #[cynic(flatten)] + pub nodes: Vec, + } + + #[derive(cynic::QueryFragment, Debug)] + #[cynic(graphql_type = "Issue")] + pub struct OldLabelCandidateIssue { + pub number: i32, + pub created_at: DateTime, + pub url: Uri, + pub title: String, + #[arguments(first = 100)] + pub labels: Option, + #[arguments(last = 1)] + pub comments: IssueCommentConnection, + #[arguments(last = 250, itemTypes = Some(vec![IssueTimelineItemsItemType::LabeledEvent]))] + pub timeline_items: Option, + } +} + #[allow(non_snake_case, non_camel_case_types)] mod schema { cynic::use_schema!("src/github.graphql"); diff --git a/src/bin/automatic-triage/main.rs b/src/bin/automatic-triage/main.rs new file mode 100644 index 00000000..85177a0c --- /dev/null +++ b/src/bin/automatic-triage/main.rs @@ -0,0 +1,24 @@ +use reqwest::Client; +use triagebot::github::GithubClient; + +mod old_label; + +#[tokio::main(flavor = "current_thread")] +async fn main() -> anyhow::Result<()> { + dotenv::dotenv().ok(); + tracing_subscriber::fmt::init(); + + let client = GithubClient::new_with_default_token(Client::new()); + + old_label::triage_old_label( + "rust-lang", + "rust", + "E-needs-mcve", + "triaged", // Exclude e.g. label "AsyncAwait-Triaged" + chrono::Duration::days(30 * 12 * 4), + &client, + ) + .await?; + + Ok(()) +} diff --git a/src/bin/automatic-triage/old_label.rs b/src/bin/automatic-triage/old_label.rs new file mode 100644 index 00000000..9c3d0450 --- /dev/null +++ b/src/bin/automatic-triage/old_label.rs @@ -0,0 +1,123 @@ +use chrono::{DateTime, Duration, Utc}; +use triagebot::github::GithubClient; + +use github_graphql::old_label_queries::*; +use github_graphql::queries::*; +use triagebot::github::issues_with_label; + +struct AnalyzedIssue { + number: i32, + url: String, + time_until_close: Duration, +} + +pub async fn triage_old_label( + repository_owner: &str, + repository_name: &str, + label: &str, + exclude_labels_containing: &str, + minimum_age: Duration, + client: &GithubClient, +) -> anyhow::Result<()> { + let now = chrono::Utc::now(); + + let mut issues = issues_with_label(repository_owner, repository_name, label, client) + .await? + .into_iter() + .filter(|issue| filter_excluded_labels(issue, exclude_labels_containing)) + .map(|issue| { + // If an issue is actively discussed, there is no limit on the age of the + // label. We don't want to close issues that people are actively commenting on. + // So require the last comment to also be old. + let last_comment_age = last_comment_age(&issue, &now); + + let label_age = label_age(&issue, label, &now); + + AnalyzedIssue { + number: issue.number, + url: issue.url.0, + time_until_close: minimum_age - std::cmp::min(label_age, last_comment_age), + } + }) + .collect::>(); + + issues.sort_by_key(|issue| std::cmp::Reverse(issue.time_until_close)); + + for issue in issues { + if issue.time_until_close.num_days() > 0 { + println!( + "{} will be closed after {} months", + issue.url, + issue.time_until_close.num_days() / 30 + ); + } else { + println!( + "{} will be closed now (FIXME: Actually implement closing)", + issue.url, + ); + close_issue(issue.number, client).await; + } + } + + Ok(()) +} + +fn filter_excluded_labels(issue: &OldLabelCandidateIssue, exclude_labels_containing: &str) -> bool { + !issue.labels.as_ref().unwrap().nodes.iter().any(|label| { + label + .name + .to_lowercase() + .contains(exclude_labels_containing) + }) +} + +fn last_comment_age(issue: &OldLabelCandidateIssue, now: &DateTime) -> Duration { + let last_comment_at = issue + .comments + .nodes + .last() + .map(|c| c.created_at) + .unwrap_or_else(|| issue.created_at); + + *now - last_comment_at +} + +pub fn label_age(issue: &OldLabelCandidateIssue, label: &str, now: &DateTime) -> Duration { + let timeline_items = &issue.timeline_items.as_ref().unwrap(); + + if timeline_items.page_info.has_next_page { + eprintln!( + "{} has more than 250 `LabeledEvent`s. We need to implement paging!", + issue.url.0 + ); + return Duration::days(30 * 999999); + } + + let mut last_labeled_at = None; + + // The way the GraphQL query is constructed guarantees that we see the + // oldest event first, so we can simply iterate sequentially. And we don't + // need to bother with UnlabeledEvent since in the query we require the + // label to be present, so we know it has not been unlabeled in the last + // event. + for timeline_item in &timeline_items.nodes { + if let IssueTimelineItems::LabeledEvent(LabeledEvent { + label: Label { name }, + created_at, + }) = timeline_item + { + if name == label { + last_labeled_at = Some(created_at); + } + } + } + + now.signed_duration_since( + *last_labeled_at.expect("The GraphQL query only includes issues that has the label"), + ) +} + +async fn close_issue(_number: i32, _client: &GithubClient) { + // FIXME: Actually close the issue + // FIXME: Report to "triagebot closed issues" topic in "t-release/triage" Zulip +} diff --git a/src/github.rs b/src/github.rs index 310d71c3..9347c5c1 100644 --- a/src/github.rs +++ b/src/github.rs @@ -2,8 +2,13 @@ use anyhow::{anyhow, Context}; use async_trait::async_trait; use bytes::Bytes; use chrono::{DateTime, FixedOffset, Utc}; +use cynic::QueryBuilder; use futures::{future::BoxFuture, FutureExt}; +use github_graphql::old_label_queries::{ + OldLabelArguments, OldLabelCandidateIssue, OldLabelIssuesQuery, +}; use hyper::header::HeaderValue; +use log::{debug, info}; use once_cell::sync::OnceCell; use reqwest::header::{AUTHORIZATION, USER_AGENT}; use reqwest::{Client, Request, RequestBuilder, Response, StatusCode}; @@ -1281,7 +1286,6 @@ impl Repository { // commits will only show up once). let mut prs_seen = HashSet::new(); let mut recent_commits = Vec::new(); // This is the final result. - use cynic::QueryBuilder; use github_graphql::docs_update_queries::{ GitObject, RecentCommits, RecentCommitsArguments, }; @@ -2302,7 +2306,6 @@ async fn project_items_by_status( client: &GithubClient, status_filter: impl Fn(Option<&str>) -> bool, ) -> anyhow::Result> { - use cynic::QueryBuilder; use github_graphql::project_items; const DESIGN_MEETING_PROJECT: i32 = 31; @@ -2348,6 +2351,63 @@ async fn project_items_by_status( Ok(all_items) } +pub async fn issues_with_label( + repository_owner: &str, + repository_name: &str, + label: &str, + client: &GithubClient, +) -> anyhow::Result> { + let mut issues: Vec = vec![]; + + let mut args = OldLabelArguments { + repository_owner: repository_owner.to_owned(), + repository_name: repository_name.to_owned(), + label: label.to_owned(), + after: None, + }; + + let mut max_iterations_left = 100; + loop { + max_iterations_left -= 1; + if max_iterations_left < 0 { + anyhow::bail!("Bailing to avoid rate limit depletion in case of buggy code/queries."); + } + + let query = OldLabelIssuesQuery::build(args.clone()); + let req = client.post(Repository::GITHUB_GRAPHQL_API_URL); + let req = req.json(&query); + + info!("GitHub GraphQL API endpoint request (affects rate limit)"); + let data: cynic::GraphQlResponse = client.json(req).await?; + + if let Some(errors) = data.errors { + anyhow::bail!("There were graphql errors. {:?}", errors); + } + + let repository = data + .data + .ok_or_else(|| anyhow::anyhow!("No data returned."))? + .repository + .ok_or_else(|| anyhow::anyhow!("No repository."))?; + + issues.extend(repository.issues.nodes); + + debug!( + "Now have {} issues of {}", + issues.len(), + repository.issues.total_count + ); + + let page_info = repository.issues.page_info; + if !page_info.has_next_page || page_info.end_cursor.is_none() { + break; + } + args.after = page_info.end_cursor; + } + + Ok(issues) +} + pub enum DesignMeetingStatus { Proposed, Scheduled,