From a9437c50ac5b608f457d20cfd96dbe7dbde0bca4 Mon Sep 17 00:00:00 2001 From: kberket Date: Wed, 17 Apr 2024 13:50:55 -0400 Subject: [PATCH] fix indexing when doing rebuild from s3 --- src/maggma/stores/open_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/maggma/stores/open_data.py b/src/maggma/stores/open_data.py index 5a0d1f492..2e4fd9295 100644 --- a/src/maggma/stores/open_data.py +++ b/src/maggma/stores/open_data.py @@ -613,7 +613,7 @@ def _get_full_key_path(self, index: pd.DataFrame) -> str: return f"{self.prefix}{id}{self.object_file_extension}" def _gather_indexable_data(self, df: pd.DataFrame) -> pd.DataFrame: - return df[self.searchable_fields] + return self._json_normalize_and_filter(df) def _json_normalize_and_filter(self, docs: pd.DataFrame) -> pd.DataFrame: dfs = [] @@ -689,7 +689,7 @@ def rebuild_index_from_s3_data(self) -> pd.DataFrame: for page in page_iterator: for file in page["Contents"]: key = file["Key"] - if key != self.index._get_manifest_full_key_path(): + if key != self.index._get_manifest_full_key_path() and key.endswith(self.object_file_extension): all_index_docs.append(self._index_for_doc_from_s3(key)) ret = pd.concat(all_index_docs, ignore_index=True) self.index.set_index_data(ret)