Skip to content

Commit

Permalink
housenumberless-settlements, cron: fetch list of settlements in count…
Browse files Browse the repository at this point in the history
…ry once

And add an sql index.

Before:
sqlite> select count(*) from stats_settlements s where not exists ( select 1 from whole_country c where c.city = s.name );
486
Run Time: real 25.977 (seconds)

After:
sqlite> select count(*) from stats_settlements s where not exists ( select 1 from whole_country c where c.city = s.name );
486
Run Time: real 0.007 (seconds)

Change-Id: I247dc9b861851750d6c186e6fec2410e73507338
  • Loading branch information
vmiklos committed Feb 10, 2025
1 parent 199c3c4 commit 2416650
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 1 deletion.
34 changes: 34 additions & 0 deletions src/area_files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,5 +277,39 @@ pub fn write_whole_country(ctx: &context::Context, result: &str) -> anyhow::Resu
Ok(())
}

pub fn write_settlements_whole_country(ctx: &context::Context, result: &str) -> anyhow::Result<()> {
let overpass: crate::serde::OverpassResult = serde_json::from_str(result)?;

let mut conn = ctx.get_database_connection()?;
let tx = conn.transaction()?;
tx.execute("delete from stats_settlements", [])?;
for element in overpass.elements {
let osm_type = element.osm_type.to_string();
let osm_id = element.id.to_string();
let name = element.tags.name.unwrap_or("".into());
tx.execute(
"insert into stats_settlements (osm_id, osm_type, name) values (?1, ?2, ?3)",
[osm_id, osm_type, name],
)?;
}

let osm_time = overpass.osm3s.timestamp_osm_base.unix_timestamp_nanos();
tx.execute(
r#"insert into mtimes (page, last_modified) values ('stats-settlements/osm-base', ?1)
on conflict(page) do update set last_modified = excluded.last_modified"#,
[osm_time.to_string()],
)?;

let areas_time = overpass.osm3s.timestamp_areas_base.unix_timestamp_nanos();
tx.execute(
r#"insert into mtimes (page, last_modified) values ('stats-settlements/areas-base', ?1)
on conflict(page) do update set last_modified = excluded.last_modified"#,
[areas_time.to_string()],
)?;
tx.commit()?;

Ok(())
}

#[cfg(test)]
mod tests;
27 changes: 27 additions & 0 deletions src/cron.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,32 @@ pub fn update_stats_overpass(ctx: &context::Context) -> anyhow::Result<()> {
Ok(())
}

/// Performs the update of the stats_settlements table.
pub fn update_settlement_stats_overpass(ctx: &context::Context) -> anyhow::Result<()> {
// This changes so infrequently, that just update is when the table is empty, for now: doing it
// daily is a waste.
{
let conn = ctx.get_database_connection()?;
let mut stmt = conn.prepare("select count(*) from stats_settlements")?;
let mut rows = stmt.query([])?;
let row = rows.next()?.context("no row")?;
let count: i64 = row.get(0).unwrap();
if count > 0 {
return Ok(());
}
}

let query = ctx
.get_file_system()
.read_to_string(&ctx.get_abspath("data/housenumberless-settlements-hungary.overpassql"))?;
info!("update_settlement_stats_overpass: talking to overpass");
let response = overpass_query_with_retry(ctx, &query)?;
if !response.is_empty() {
area_files::write_settlements_whole_country(ctx, &response)?;
}
Ok(())
}

/// Performs the update of country-level stats.
fn update_stats(ctx: &context::Context, overpass: bool) -> anyhow::Result<()> {
// Fetch house numbers for the whole country.
Expand All @@ -432,6 +458,7 @@ fn update_stats(ctx: &context::Context, overpass: bool) -> anyhow::Result<()> {

if overpass {
update_stats_overpass(ctx)?;
update_settlement_stats_overpass(ctx)?;
}

info!("update_stats: updating count");
Expand Down
60 changes: 60 additions & 0 deletions src/cron/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,16 @@ fn test_update_stats() {
/*data_path=*/ "",
/*result_path=*/ "src/fixtures/network/overpass-stats.json",
),
context::tests::URLRoute::new(
/*url=*/ "https://overpass-api.de/api/status",
/*data_path=*/ "",
/*result_path=*/ "src/fixtures/network/overpass-status-happy.txt",
),
context::tests::URLRoute::new(
/*url=*/ "https://overpass-api.de/api/interpreter",
/*data_path=*/ "",
/*result_path=*/ "src/fixtures/network/overpass-settlement-stats.json",
),
];
let network = context::tests::TestNetwork::new(&routes);
let network_rc: Rc<dyn context::Network> = Rc::new(network);
Expand All @@ -711,6 +721,7 @@ fn test_update_stats() {
.borrow_mut()
.write_all("first line\nsecond line\n".as_bytes())
.unwrap();
let settlements_overpass_template = context::tests::TestFileSystem::make_file();
let files = context::tests::TestFileSystem::make_files(
&ctx,
&[
Expand All @@ -719,6 +730,10 @@ fn test_update_stats() {
"data/street-housenumbers-hungary.overpassql",
&overpass_template,
),
(
"data/housenumberless-settlements-hungary.overpassql",
&settlements_overpass_template,
),
],
);
let mut file_system = context::tests::TestFileSystem::new();
Expand All @@ -737,6 +752,14 @@ fn test_update_stats() {
)
.unwrap();
assert!(!last_modified.is_empty());
let last_modified: String = conn
.query_row(
"select last_modified from mtimes where page = ?1",
["stats-settlements/osm-base"],
|row| row.get(0),
)
.unwrap();
assert!(!last_modified.is_empty());

let mut stmt = conn
.prepare("select count from counts where category = 'ref'")
Expand All @@ -748,6 +771,23 @@ fn test_update_stats() {
assert_eq!(num_ref, 300);
}

/// Tests update_settlement_stats_overpass(), the case when the table is non-empty already.
#[test]
fn test_update_settlement_stats_overpass() {
let ctx = context::tests::make_test_context().unwrap();
{
let conn = ctx.get_database_connection().unwrap();
conn.execute_batch(
"insert into stats_settlements (osm_id, osm_type, name) values (1, 'node', 'mysettlement');",
)
.unwrap();
}

update_settlement_stats_overpass(&ctx).unwrap();

// No error: no network traffic as the table was non-empty already.
}

/// Tests update_stats(): the case when we keep getting HTTP errors.
#[test]
fn test_update_stats_http_error() {
Expand All @@ -768,6 +808,7 @@ fn test_update_stats_http_error() {
.unwrap();
let stats_json = context::tests::TestFileSystem::make_file();
let overpass_template = context::tests::TestFileSystem::make_file();
let settlements_overpass_template = context::tests::TestFileSystem::make_file();
let files = context::tests::TestFileSystem::make_files(
&ctx,
&[
Expand All @@ -776,6 +817,10 @@ fn test_update_stats_http_error() {
"data/street-housenumbers-hungary.overpassql",
&overpass_template,
),
(
"data/housenumberless-settlements-hungary.overpassql",
&settlements_overpass_template,
),
],
);
let file_system = context::tests::TestFileSystem::from_files(&files);
Expand Down Expand Up @@ -973,13 +1018,24 @@ fn test_our_main_stats() {
/*data_path=*/ "",
/*result_path=*/ "src/fixtures/network/overpass-stats.csv",
),
context::tests::URLRoute::new(
/*url=*/ "https://overpass-api.de/api/status",
/*data_path=*/ "",
/*result_path=*/ "src/fixtures/network/overpass-status-happy.txt",
),
context::tests::URLRoute::new(
/*url=*/ "https://overpass-api.de/api/interpreter",
/*data_path=*/ "",
/*result_path=*/ "src/fixtures/network/overpass-settlement-stats.json",
),
];
let network = context::tests::TestNetwork::new(&routes);
let network_rc: Rc<dyn context::Network> = Rc::new(network);
ctx.set_network(network_rc);
let mut file_system = context::tests::TestFileSystem::new();
let stats_value = context::tests::TestFileSystem::make_file();
let overpass_template = context::tests::TestFileSystem::make_file();
let settlements_overpass_template = context::tests::TestFileSystem::make_file();
let files = context::tests::TestFileSystem::make_files(
&ctx,
&[
Expand All @@ -988,6 +1044,10 @@ fn test_our_main_stats() {
"data/street-housenumbers-hungary.overpassql",
&overpass_template,
),
(
"data/housenumberless-settlements-hungary.overpassql",
&settlements_overpass_template,
),
],
);
file_system.set_files(&files);
Expand Down
17 changes: 17 additions & 0 deletions src/fixtures/network/overpass-settlement-stats.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"osm3s": {
"timestamp_osm_base": "2025-02-07T21:11:12Z",
"timestamp_areas_base": "2025-02-06T02:17:44Z"
},
"elements": [
{
"type": "node",
"id": 12353447948,
"lat": 46.8016087,
"lon": 16.3840838,
"tags": {
"name": "Baj\u00e1nsenye"
}
}
]
}
11 changes: 10 additions & 1 deletion src/sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,16 @@ pub fn init(conn: &mut rusqlite::Connection) -> anyhow::Result<()> {
)?;
}

tx.execute("pragma user_version = 20", [])?;
if user_version < 21 {
// Speeds up access on whole_country.city.
tx.execute(
"create index idx_whole_country_cities
on whole_country(city);",
[],
)?;
}

tx.execute("pragma user_version = 21", [])?;
tx.commit()?;
Ok(())
}
Expand Down

0 comments on commit 2416650

Please sign in to comment.