graphprotocol · lutter · Oct 24, 2023 · Oct 24, 2023
diff --git a/graph/src/env/store.rs b/graph/src/env/store.rs
@@ -109,6 +109,9 @@ pub struct EnvVarsStore {
     /// is 10_000 which corresponds to 10MB. Setting this to 0 disables
     /// write batching.
     pub write_batch_size: usize,
+    /// Whether to create GIN indexes for array attributes. Set by
+    /// `GRAPH_STORE_CREATE_GIN_INDEXES`. The default is `false`
+    pub create_gin_indexes: bool,
 }
 
 // This does not print any values avoid accidentally leaking any sensitive env vars
@@ -150,6 +153,7 @@ impl From<InnerStore> for EnvVarsStore {
             history_slack_factor: x.history_slack_factor.0,
             write_batch_duration: Duration::from_secs(x.write_batch_duration_in_secs),
             write_batch_size: x.write_batch_size * 1_000,
+            create_gin_indexes: x.create_gin_indexes,
         }
     }
 }
@@ -203,6 +207,8 @@ pub struct InnerStore {
     write_batch_duration_in_secs: u64,
     #[envconfig(from = "GRAPH_STORE_WRITE_BATCH_SIZE", default = "10000")]
     write_batch_size: usize,
+    #[envconfig(from = "GRAPH_STORE_CREATE_GIN_INDEXES", default = "false")]
+    create_gin_indexes: bool,
 }
 
 #[derive(Clone, Copy, Debug)]

diff --git a/store/postgres/src/relational/ddl.rs b/store/postgres/src/relational/ddl.rs
@@ -3,7 +3,7 @@ use std::{
     iter,
 };
 
-use graph::prelude::BLOCK_NUMBER_MAX;
+use graph::prelude::{BLOCK_NUMBER_MAX, ENV_VARS};
 
 use crate::block_range::CAUSALITY_REGION_COLUMN;
 use crate::relational::{
@@ -225,7 +225,7 @@ impl Table {
             .filter(not_immutable_pk)
             .filter(not_numeric_list);
 
-        for (i, column) in columns.enumerate() {
+        for (column_index, column) in columns.enumerate() {
             let (method, index_expr) = if column.is_reference() && !column.is_list() {
                 // For foreign keys, index the key together with the block range
                 // since we almost always also have a block_range clause in
@@ -268,17 +268,21 @@ impl Table {
 
                 (method, index_expr)
             };
-            write!(
-            out,
-            "create index attr_{table_index}_{column_index}_{table_name}_{column_name}\n    on {qname} using {method}({index_expr});\n",
-            table_index = self.position,
-            table_name = self.name,
-            column_index = i,
-            column_name = column.name,
-            qname = self.qualified_name,
-            method = method,
-            index_expr = index_expr,
-        )?;
+            // If `create_gin_indexes` is set to false, we don't create
+            // indexes on array attributes. Experience has shown that these
+            // indexes are very expensive to update and can have a very bad
+            // impact on the write performance of the database, but are
+            // hardly ever used or needed by queries.
+            if !column.is_list() || ENV_VARS.store.create_gin_indexes {
+                write!(
+                    out,
+                    "create index attr_{table_index}_{column_index}_{table_name}_{column_name}\n    on {qname} using {method}({index_expr});\n",
+                    table_index = self.position,
+                    table_name = self.name,
+                    column_name = column.name,
+                    qname = self.qualified_name,
+                )?;
+            }
         }
         writeln!(out)
     }

diff --git a/store/postgres/src/relational/ddl_tests.rs b/store/postgres/src/relational/ddl_tests.rs
@@ -346,8 +346,6 @@ create index attr_0_1_musician_name
     on "sgd0815"."musician" using btree(left("name", 256));
 create index attr_0_2_musician_main_band
     on "sgd0815"."musician" using gist("main_band", block_range);
-create index attr_0_3_musician_bands
-    on "sgd0815"."musician" using gin("bands");
 
 create table "sgd0815"."band" (
         vid                  bigserial primary key,
@@ -368,8 +366,6 @@ create index attr_1_0_band_id
     on "sgd0815"."band" using btree("id");
 create index attr_1_1_band_name
     on "sgd0815"."band" using btree(left("name", 256));
-create index attr_1_2_band_original_songs
-    on "sgd0815"."band" using gin("original_songs");
 
 create table "sgd0815"."song" (
         vid                    bigserial primary key,
@@ -484,8 +480,6 @@ create index attr_2_0_habitat_id
     on "sgd0815"."habitat" using btree("id");
 create index attr_2_1_habitat_most_common
     on "sgd0815"."habitat" using gist("most_common", block_range);
-create index attr_2_2_habitat_dwellers
-    on "sgd0815"."habitat" using gin("dwellers");
 
 "#;
 const FULLTEXT_GQL: &str = r#"
@@ -583,8 +577,6 @@ create index attr_2_0_habitat_id
     on "sgd0815"."habitat" using btree("id");
 create index attr_2_1_habitat_most_common
     on "sgd0815"."habitat" using gist("most_common", block_range);
-create index attr_2_2_habitat_dwellers
-    on "sgd0815"."habitat" using gin("dwellers");
 
 "#;