Skip to content

Commit

Permalink
feat(web-scraping): introduce Web Page Content trackers
Browse files Browse the repository at this point in the history
  • Loading branch information
azasypkin committed Nov 17, 2023
1 parent a18521b commit b879bf1
Show file tree
Hide file tree
Showing 39 changed files with 5,203 additions and 5,144 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

This file was deleted.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions assets/templates/web_page_content_tracker_changes_email.hbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>"{{tracker_name}}" content tracker detected changes</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
{{> email_styles}}
<style>
.navigate-link {
display: block;
width: 250px;
margin: auto;
padding: 10px 20px;
text-align: center;
text-decoration: none;
color: #5e1d3f;
background-color: #fed047;
border-radius: 5px;
font-weight: bold;
}
</style>
</head>
<body>
<div class="container">
<h1>"{{tracker_name}}" content tracker detected changes: "{{content}}"</h1>
<p>To learn more, visit the <b>Resources Trackers</b> page:</p>
<a class="navigate-link" href="{{back_link}}">Web Scraping → Resources Trackers</a>
<p>If the button above doesn't work, you can navigate to the following URL directly: </p>
<p>{{back_link}}</p>
<a href="{{home_link}}"><img src="cid:secutils-logo" alt="Secutils.dev logo" width="89" height="14" /></a>
</div>
</body>
</html>
4 changes: 2 additions & 2 deletions migrations/20231109215110_web_scraping.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ CREATE TABLE IF NOT EXISTS user_data_web_scraping_trackers
id BLOB PRIMARY KEY,
name TEXT NOT NULL COLLATE NOCASE,
url TEXT NOT NULL,
kind INTEGER NOT NULL,
kind BLOB NOT NULL,
schedule TEXT,
job_id BLOB UNIQUE,
data BLOB NOT NULL,
created_at INTEGER NOT NULL,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
UNIQUE (name, user_id)
UNIQUE (name, kind, user_id)
) STRICT;

-- Create table to store web page trackers history.
Expand Down
8 changes: 4 additions & 4 deletions src/config/scheduler_jobs_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ use cron::Schedule;
/// Configuration for the Secutils.dev scheduler jobs.
#[derive(Clone, Debug)]
pub struct SchedulerJobsConfig {
/// The schedule to use for the `ResourcesTrackersSchedule` job.
pub resources_trackers_schedule: Schedule,
/// The schedule to use for the `ResourcesTrackersFetch` job.
pub resources_trackers_fetch: Schedule,
/// The schedule to use for the `WebPageTrackersSchedule` job.
pub web_page_trackers_schedule: Schedule,
/// The schedule to use for the `WebPageTrackersFetch` job.
pub web_page_trackers_fetch: Schedule,
/// The schedule to use for the `NotificationsSend` job.
pub notifications_send: Schedule,
}
36 changes: 18 additions & 18 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,21 +100,21 @@ fn process_command(version: &str, matches: ArgMatches) -> Result<(), anyhow::Err
search_index_version: 3,
},
jobs: SchedulerJobsConfig {
resources_trackers_schedule: matches
.get_one::<String>("JOBS_RESOURCES_TRACKERS_SCHEDULE")
web_page_trackers_schedule: matches
.get_one::<String>("JOBS_WEB_PAGE_TRACKERS_SCHEDULE")
.ok_or_else(|| {
anyhow!("<JOBS_RESOURCES_TRACKERS_SCHEDULE> argument is not provided.")
anyhow!("<JOBS_WEB_PAGE_TRACKERS_SCHEDULE> argument is not provided.")
})
.and_then(|schedule| {
Schedule::try_from(schedule.as_str())
.with_context(|| "Cannot parse resources trackers schedule job schedule.")
.with_context(|| "Cannot parse web page trackers schedule job schedule.")
})?,
resources_trackers_fetch: matches
.get_one::<String>("JOBS_RESOURCES_TRACKERS_FETCH")
.ok_or_else(|| anyhow!("<JOBS_RESOURCES_TRACKERS_FETCH> argument is not provided."))
web_page_trackers_fetch: matches
.get_one::<String>("JOBS_WEB_PAGE_TRACKERS_FETCH")
.ok_or_else(|| anyhow!("<JOBS_WEB_PAGE_TRACKERS_FETCH> argument is not provided."))
.and_then(|schedule| {
Schedule::try_from(schedule.as_str())
.with_context(|| "Cannot parse resources trackers fetch job schedule.")
.with_context(|| "Cannot parse web page trackers fetch job schedule.")
})?,
notifications_send: matches
.get_one::<String>("JOBS_NOTIFICATIONS_SEND")
Expand Down Expand Up @@ -254,20 +254,20 @@ fn main() -> Result<(), anyhow::Error> {
.help("The URL to access the Web Scraper component."),
)
.arg(
Arg::new("JOBS_RESOURCES_TRACKERS_SCHEDULE")
.long("jobs-resources-trackers-schedule")
Arg::new("JOBS_WEB_PAGE_TRACKERS_SCHEDULE")
.long("jobs-web-page-trackers-schedule")
.global(true)
.env("SECUTILS_JOBS_RESOURCES_TRACKERS_SCHEDULE")
.env("SECUTILS_JOBS_WEB_PAGE_TRACKERS_SCHEDULE")
.default_value("0 * * * * * *")
.help("The cron schedule to use for the resources trackers schedule job."),
.help("The cron schedule to use for the web page trackers schedule job."),
)
.arg(
Arg::new("JOBS_RESOURCES_TRACKERS_FETCH")
.long("jobs-resources-trackers-fetch")
Arg::new("JOBS_WEB_PAGE_TRACKERS_FETCH")
.long("jobs-web-page-trackers-fetch")
.global(true)
.env("SECUTILS_JOBS_RESOURCES_TRACKERS_FETCH")
.env("SECUTILS_JOBS_WEB_PAGE_TRACKERS_FETCH")
.default_value("0 * * * * * *")
.help("The cron schedule to use for the resources trackers fetch job."),
.help("The cron schedule to use for the web page trackers fetch job."),
).arg(
Arg::new("JOBS_NOTIFICATIONS_SEND")
.long("jobs-notifications-send")
Expand Down Expand Up @@ -480,8 +480,8 @@ mod tests {
search_index_version: 3,
},
jobs: SchedulerJobsConfig {
resources_trackers_schedule: Schedule::try_from("0 * 0 * * * *")?,
resources_trackers_fetch: Schedule::try_from("0 * 1 * * * *")?,
web_page_trackers_schedule: Schedule::try_from("0 * 0 * * * *")?,
web_page_trackers_fetch: Schedule::try_from("0 * 1 * * * *")?,
notifications_send: Schedule::try_from("0 * 2 * * * *")?,
},
})
Expand Down
Loading

0 comments on commit b879bf1

Please sign in to comment.