Skip to content

Commit

Permalink
Implement logic to find old E-needs-mcve issues to close
Browse files Browse the repository at this point in the history
This is the first commit in a series of commits to implement the
automatic triaging of old E-needs-mcve issues that was proposed and
discussed in [t-release/triage] and cross-posted to [T-compiler].

This commit only implements the logic to find what the issues to close,
and prints that info to stdout for inspection. Think of it as a dry run.

After we have convinced ourselves this logic works as it should, we can
implement the final steps:
* Actually close the issue
* Report closes to "triagebot closed issues" topic in "t-release/triage" Zulip

[t-release/triage]: https://rust-lang.zulipchat.com/#narrow/stream/242269-t-release.2Ftriage/topic/auto-close.20E-needs-mcve/near/400273684
[t-compiler]: https://rust-lang.zulipchat.com/#narrow/stream/131828-t-compiler/topic/auto.20closing.20E-meeds-mcve/near/399663832
  • Loading branch information
Enselic committed Nov 10, 2023
1 parent 619e2f1 commit d9d5925
Show file tree
Hide file tree
Showing 5 changed files with 338 additions and 2 deletions.
13 changes: 13 additions & 0 deletions .github/workflows/automatic-triage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: Automatic triage
on:
workflow_dispatch: {}
schedule:
- cron: "0 12 * * 1" # Every Monday at 12:00 UTC

jobs:
automatic-triage:
name: Automatic triage
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- run: cargo run --bin automatic-triage
116 changes: 116 additions & 0 deletions github-graphql/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,64 @@ pub mod queries {
pub repository: Option<Repository>,
}

#[derive(cynic::Enum, Clone, Copy, Debug)]
#[cynic(rename_all = "SCREAMING_SNAKE_CASE")]
pub enum IssueTimelineItemsItemType {
AddedToProjectEvent,
AssignedEvent,
ClosedEvent,
CommentDeletedEvent,
ConnectedEvent,
ConvertedNoteToIssueEvent,
ConvertedToDiscussionEvent,
CrossReferencedEvent,
DemilestonedEvent,
DisconnectedEvent,
IssueComment,
LabeledEvent,
LockedEvent,
MarkedAsDuplicateEvent,
MentionedEvent,
MilestonedEvent,
MovedColumnsInProjectEvent,
PinnedEvent,
ReferencedEvent,
RemovedFromProjectEvent,
RenamedTitleEvent,
ReopenedEvent,
SubscribedEvent,
TransferredEvent,
UnassignedEvent,
UnlabeledEvent,
UnlockedEvent,
UnmarkedAsDuplicateEvent,
UnpinnedEvent,
UnsubscribedEvent,
UserBlockedEvent,
}

#[derive(cynic::QueryFragment, Debug)]
#[cynic(graphql_type = "IssueTimelineItemsConnection")]
pub struct IssueTimelineItemsConnection {
pub total_count: i32,
pub page_info: PageInfo,
#[cynic(flatten)]
pub nodes: Vec<IssueTimelineItems>,
}

#[derive(cynic::InlineFragments, Debug)]
pub enum IssueTimelineItems {
LabeledEvent(LabeledEvent),
#[cynic(fallback)]
Other, // Implement more when needed
}

#[derive(cynic::QueryFragment, Debug)]
pub struct LabeledEvent {
pub label: Label,
pub created_at: DateTime,
}

#[derive(cynic::QueryFragment, Debug)]
#[cynic(variables = "LeastRecentlyReviewedPullRequestsArguments")]
pub struct Repository {
Expand Down Expand Up @@ -268,6 +326,64 @@ pub mod docs_update_queries {
pub struct GitObjectID(pub String);
}

#[cynic::schema_for_derives(file = "src/github.graphql", module = "schema")]
pub mod old_label_queries {
use super::queries::*;
use super::schema;

#[derive(cynic::QueryVariables, Debug, Clone)]
pub struct OldLabelArguments {
pub repository_owner: String,
pub repository_name: String,
pub label: String,
pub after: Option<String>,
}

#[derive(cynic::QueryFragment, Debug)]
#[cynic(graphql_type = "Query", variables = "OldLabelArguments")]
pub struct OldLabelIssuesQuery {
#[arguments(owner: $repository_owner, name: $repository_name)]
pub repository: Option<OldLabelRepository>,
}

#[derive(cynic::QueryFragment, Debug)]
#[cynic(graphql_type = "Repository", variables = "OldLabelArguments")]
pub struct OldLabelRepository {
#[arguments(
states: "OPEN",
first: 30,
after: $after,
labels: [$label],
orderBy: {direction: "ASC", field: "CREATED_AT"}
)]
pub issues: OldLabelIssueConnection,
}

#[derive(cynic::QueryFragment, Debug)]
#[cynic(graphql_type = "IssueConnection")]
pub struct OldLabelIssueConnection {
pub total_count: i32,
pub page_info: PageInfo,
#[cynic(flatten)]
pub nodes: Vec<OldLabelCandidateIssue>,
}

#[derive(cynic::QueryFragment, Debug)]
#[cynic(graphql_type = "Issue")]
pub struct OldLabelCandidateIssue {
pub number: i32,
pub created_at: DateTime,
pub url: Uri,
pub title: String,
#[arguments(first = 100)]
pub labels: Option<LabelConnection>,
#[arguments(last = 1)]
pub comments: IssueCommentConnection,
#[arguments(last = 250, itemTypes = Some(vec![IssueTimelineItemsItemType::LabeledEvent]))]
pub timeline_items: Option<IssueTimelineItemsConnection>,
}
}

#[allow(non_snake_case, non_camel_case_types)]
mod schema {
cynic::use_schema!("src/github.graphql");
Expand Down
24 changes: 24 additions & 0 deletions src/bin/automatic-triage/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use reqwest::Client;
use triagebot::github::GithubClient;

mod old_label;

#[tokio::main(flavor = "current_thread")]
async fn main() -> anyhow::Result<()> {
dotenv::dotenv().ok();
tracing_subscriber::fmt::init();

let client = GithubClient::new_with_default_token(Client::new());

old_label::triage_old_label(
"rust-lang",
"rust",
"E-needs-mcve",
"triaged", // Exclude e.g. label "AsyncAwait-Triaged"
chrono::Duration::days(30 * 12 * 4),
&client,
)
.await?;

Ok(())
}
123 changes: 123 additions & 0 deletions src/bin/automatic-triage/old_label.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use chrono::{DateTime, Duration, Utc};
use triagebot::github::GithubClient;

use github_graphql::old_label_queries::*;
use github_graphql::queries::*;
use triagebot::github::issues_with_label;

struct AnalyzedIssue {
number: i32,
url: String,
time_until_close: Duration,
}

pub async fn triage_old_label(
repository_owner: &str,
repository_name: &str,
label: &str,
exclude_labels_containing: &str,
minimum_age: Duration,
client: &GithubClient,
) -> anyhow::Result<()> {
let now = chrono::Utc::now();

let mut issues = issues_with_label(repository_owner, repository_name, label, client)
.await?
.into_iter()
.filter(|issue| filter_excluded_labels(issue, exclude_labels_containing))
.map(|issue| {
// If an issue is actively discussed, there is no limit on the age of the
// label. We don't want to close issues that people are actively commenting on.
// So require the last comment to also be old.
let last_comment_age = last_comment_age(&issue, &now);

let label_age = label_age(&issue, label, &now);

AnalyzedIssue {
number: issue.number,
url: issue.url.0,
time_until_close: minimum_age - std::cmp::min(label_age, last_comment_age),
}
})
.collect::<Vec<_>>();

issues.sort_by_key(|issue| std::cmp::Reverse(issue.time_until_close));

for issue in issues {
if issue.time_until_close.num_days() > 0 {
println!(
"{} will be closed after {} months",
issue.url,
issue.time_until_close.num_days() / 30
);
} else {
println!(
"{} will be closed now (FIXME: Actually implement closing)",
issue.url,
);
close_issue(issue.number, client).await;
}
}

Ok(())
}

fn filter_excluded_labels(issue: &OldLabelCandidateIssue, exclude_labels_containing: &str) -> bool {
!issue.labels.as_ref().unwrap().nodes.iter().any(|label| {
label
.name
.to_lowercase()
.contains(exclude_labels_containing)
})
}

fn last_comment_age(issue: &OldLabelCandidateIssue, now: &DateTime<Utc>) -> Duration {
let last_comment_at = issue
.comments
.nodes
.last()
.map(|c| c.created_at)
.unwrap_or_else(|| issue.created_at);

*now - last_comment_at
}

pub fn label_age(issue: &OldLabelCandidateIssue, label: &str, now: &DateTime<Utc>) -> Duration {
let timeline_items = &issue.timeline_items.as_ref().unwrap();

if timeline_items.page_info.has_next_page {
eprintln!(
"{} has more than 250 `LabeledEvent`s. We need to implement paging!",
issue.url.0
);
return Duration::days(30 * 999999);
}

let mut last_labeled_at = None;

// The way the GraphQL query is constructed guarantees that we see the
// oldest event first, so we can simply iterate sequentially. And we don't
// need to bother with UnlabeledEvent since in the query we require the
// label to be present, so we know it has not been unlabeled in the last
// event.
for timeline_item in &timeline_items.nodes {
if let IssueTimelineItems::LabeledEvent(LabeledEvent {
label: Label { name },
created_at,
}) = timeline_item
{
if name == label {
last_labeled_at = Some(created_at);
}
}
}

now.signed_duration_since(
*last_labeled_at.expect("The GraphQL query only includes issues that has the label"),
)
}

async fn close_issue(_number: i32, _client: &GithubClient) {
// FIXME: Actually close the issue
// FIXME: Report to "triagebot closed issues" topic in "t-release/triage" Zulip
}
64 changes: 62 additions & 2 deletions src/github.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@ use anyhow::{anyhow, Context};
use async_trait::async_trait;
use bytes::Bytes;
use chrono::{DateTime, FixedOffset, Utc};
use cynic::QueryBuilder;
use futures::{future::BoxFuture, FutureExt};
use github_graphql::old_label_queries::{
OldLabelArguments, OldLabelCandidateIssue, OldLabelIssuesQuery,
};
use hyper::header::HeaderValue;
use log::{debug, info};
use once_cell::sync::OnceCell;
use reqwest::header::{AUTHORIZATION, USER_AGENT};
use reqwest::{Client, Request, RequestBuilder, Response, StatusCode};
Expand Down Expand Up @@ -1281,7 +1286,6 @@ impl Repository {
// commits will only show up once).
let mut prs_seen = HashSet::new();
let mut recent_commits = Vec::new(); // This is the final result.
use cynic::QueryBuilder;
use github_graphql::docs_update_queries::{
GitObject, RecentCommits, RecentCommitsArguments,
};
Expand Down Expand Up @@ -2302,7 +2306,6 @@ async fn project_items_by_status(
client: &GithubClient,
status_filter: impl Fn(Option<&str>) -> bool,
) -> anyhow::Result<Vec<github_graphql::project_items::ProjectV2Item>> {
use cynic::QueryBuilder;
use github_graphql::project_items;

const DESIGN_MEETING_PROJECT: i32 = 31;
Expand Down Expand Up @@ -2348,6 +2351,63 @@ async fn project_items_by_status(
Ok(all_items)
}

pub async fn issues_with_label(
repository_owner: &str,
repository_name: &str,
label: &str,
client: &GithubClient,
) -> anyhow::Result<Vec<OldLabelCandidateIssue>> {
let mut issues: Vec<OldLabelCandidateIssue> = vec![];

let mut args = OldLabelArguments {
repository_owner: repository_owner.to_owned(),
repository_name: repository_name.to_owned(),
label: label.to_owned(),
after: None,
};

let mut max_iterations_left = 100;
loop {
max_iterations_left -= 1;
if max_iterations_left < 0 {
anyhow::bail!("Bailing to avoid rate limit depletion in case of buggy code/queries.");
}

let query = OldLabelIssuesQuery::build(args.clone());
let req = client.post(Repository::GITHUB_GRAPHQL_API_URL);
let req = req.json(&query);

info!("GitHub GraphQL API endpoint request (affects rate limit)");
let data: cynic::GraphQlResponse<OldLabelIssuesQuery> = client.json(req).await?;

if let Some(errors) = data.errors {
anyhow::bail!("There were graphql errors. {:?}", errors);
}

let repository = data
.data
.ok_or_else(|| anyhow::anyhow!("No data returned."))?
.repository
.ok_or_else(|| anyhow::anyhow!("No repository."))?;

issues.extend(repository.issues.nodes);

debug!(
"Now have {} issues of {}",
issues.len(),
repository.issues.total_count
);

let page_info = repository.issues.page_info;
if !page_info.has_next_page || page_info.end_cursor.is_none() {
break;
}
args.after = page_info.end_cursor;
}

Ok(issues)
}

pub enum DesignMeetingStatus {
Proposed,
Scheduled,
Expand Down

0 comments on commit d9d5925

Please sign in to comment.