Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

graph, store: Do not create GIN indexes on array attributes #4933

Merged
merged 1 commit into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions graph/src/env/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ pub struct EnvVarsStore {
/// is 10_000 which corresponds to 10MB. Setting this to 0 disables
/// write batching.
pub write_batch_size: usize,
/// Whether to create GIN indexes for array attributes. Set by
/// `GRAPH_STORE_CREATE_GIN_INDEXES`. The default is `false`
pub create_gin_indexes: bool,
}

// This does not print any values avoid accidentally leaking any sensitive env vars
Expand Down Expand Up @@ -150,6 +153,7 @@ impl From<InnerStore> for EnvVarsStore {
history_slack_factor: x.history_slack_factor.0,
write_batch_duration: Duration::from_secs(x.write_batch_duration_in_secs),
write_batch_size: x.write_batch_size * 1_000,
create_gin_indexes: x.create_gin_indexes,
}
}
}
Expand Down Expand Up @@ -203,6 +207,8 @@ pub struct InnerStore {
write_batch_duration_in_secs: u64,
#[envconfig(from = "GRAPH_STORE_WRITE_BATCH_SIZE", default = "10000")]
write_batch_size: usize,
#[envconfig(from = "GRAPH_STORE_CREATE_GIN_INDEXES", default = "false")]
create_gin_indexes: bool,
}

#[derive(Clone, Copy, Debug)]
Expand Down
30 changes: 17 additions & 13 deletions store/postgres/src/relational/ddl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{
iter,
};

use graph::prelude::BLOCK_NUMBER_MAX;
use graph::prelude::{BLOCK_NUMBER_MAX, ENV_VARS};

use crate::block_range::CAUSALITY_REGION_COLUMN;
use crate::relational::{
Expand Down Expand Up @@ -225,7 +225,7 @@ impl Table {
.filter(not_immutable_pk)
.filter(not_numeric_list);

for (i, column) in columns.enumerate() {
for (column_index, column) in columns.enumerate() {
let (method, index_expr) = if column.is_reference() && !column.is_list() {
// For foreign keys, index the key together with the block range
// since we almost always also have a block_range clause in
Expand Down Expand Up @@ -268,17 +268,21 @@ impl Table {

(method, index_expr)
};
write!(
out,
"create index attr_{table_index}_{column_index}_{table_name}_{column_name}\n on {qname} using {method}({index_expr});\n",
table_index = self.position,
table_name = self.name,
column_index = i,
column_name = column.name,
qname = self.qualified_name,
method = method,
index_expr = index_expr,
)?;
// If `create_gin_indexes` is set to false, we don't create
// indexes on array attributes. Experience has shown that these
// indexes are very expensive to update and can have a very bad
// impact on the write performance of the database, but are
// hardly ever used or needed by queries.
if !column.is_list() || ENV_VARS.store.create_gin_indexes {
write!(
out,
"create index attr_{table_index}_{column_index}_{table_name}_{column_name}\n on {qname} using {method}({index_expr});\n",
table_index = self.position,
table_name = self.name,
column_name = column.name,
qname = self.qualified_name,
)?;
}
}
writeln!(out)
}
Expand Down
8 changes: 0 additions & 8 deletions store/postgres/src/relational/ddl_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,6 @@ create index attr_0_1_musician_name
on "sgd0815"."musician" using btree(left("name", 256));
create index attr_0_2_musician_main_band
on "sgd0815"."musician" using gist("main_band", block_range);
create index attr_0_3_musician_bands
on "sgd0815"."musician" using gin("bands");

create table "sgd0815"."band" (
vid bigserial primary key,
Expand All @@ -368,8 +366,6 @@ create index attr_1_0_band_id
on "sgd0815"."band" using btree("id");
create index attr_1_1_band_name
on "sgd0815"."band" using btree(left("name", 256));
create index attr_1_2_band_original_songs
on "sgd0815"."band" using gin("original_songs");

create table "sgd0815"."song" (
vid bigserial primary key,
Expand Down Expand Up @@ -484,8 +480,6 @@ create index attr_2_0_habitat_id
on "sgd0815"."habitat" using btree("id");
create index attr_2_1_habitat_most_common
on "sgd0815"."habitat" using gist("most_common", block_range);
create index attr_2_2_habitat_dwellers
on "sgd0815"."habitat" using gin("dwellers");

"#;
const FULLTEXT_GQL: &str = r#"
Expand Down Expand Up @@ -583,8 +577,6 @@ create index attr_2_0_habitat_id
on "sgd0815"."habitat" using btree("id");
create index attr_2_1_habitat_most_common
on "sgd0815"."habitat" using gist("most_common", block_range);
create index attr_2_2_habitat_dwellers
on "sgd0815"."habitat" using gin("dwellers");

"#;

Expand Down