From a93405b2f1b1f1899fcca8114a46642c4e00383d Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Mon, 9 Sep 2024 17:03:41 +0200 Subject: [PATCH] provenance: update parquet to 53.0 to avoid OOMs due to storing Bloom Filters in memory while writing, see https://github.com/apache/arrow-rs/pull/5860 --- db-build/Cargo.toml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/db-build/Cargo.toml b/db-build/Cargo.toml index 8d203ed..43acf4f 100644 --- a/db-build/Cargo.toml +++ b/db-build/Cargo.toml @@ -5,19 +5,20 @@ version.workspace = true edition = "2021" [dependencies] -ar_row = { git = "https://gitlab.softwareheritage.org/swh/devel/ar_row-rs.git", rev = "c4e9f147b75a1e0eabdace9e82ca6432ea11fb20" } -ar_row_derive = { git = "https://gitlab.softwareheritage.org/swh/devel/ar_row-rs.git", rev = "c4e9f147b75a1e0eabdace9e82ca6432ea11fb20" } -arrow = { version = "52.0", default-features = false } +ar_row = { git = "https://gitlab.softwareheritage.org/swh/devel/ar_row-rs.git", rev = "02413b8a74b2fafb4ecc811513b1040dc949c656" } +ar_row_derive = { git = "https://gitlab.softwareheritage.org/swh/devel/ar_row-rs.git", rev = "02413b8a74b2fafb4ecc811513b1040dc949c656" } +arrow = { version = "53.0", default-features = false } anyhow = {version="1.0.71", features=["backtrace"]} bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] } byteorder = "1.4.3" chrono = { version = "0.4.31", features = ["serde"] } clap = { version = "4.1.6", features = ["derive"] } csv = "1.3.0" -dataset-writer = { version = "1.0.0", features = ["parquet"] } +#dataset-writer = { version = "1.0.0", features = ["parquet"] } +dataset-writer = { git = "https://gitlab.softwareheritage.org/swh/devel/dataset-writer-rs.git", rev = "9b9d11b952c175c6a1e7d56f8f17c1b3ab11c3dd", features = ["parquet"] } dsi-progress-logger = "0.2.4" log = "0.4.17" -parquet = { version = "52.0.0", default-features = false, features = ["arrow", "zstd"] } +parquet = { version = "53.0.0", default-features = false, features = ["arrow", "zstd"] } rayon = "1.9.0" serde = { version = "1.0", features = ["derive"] } serde_bytes = "0.11.14"