Skip to content

Commit

Permalink
fix indexing when doing rebuild from s3
Browse files Browse the repository at this point in the history
  • Loading branch information
kbuma committed Apr 17, 2024
1 parent a03bc7c commit a9437c5
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/maggma/stores/open_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ def _get_full_key_path(self, index: pd.DataFrame) -> str:
return f"{self.prefix}{id}{self.object_file_extension}"

def _gather_indexable_data(self, df: pd.DataFrame) -> pd.DataFrame:
return df[self.searchable_fields]
return self._json_normalize_and_filter(df)

def _json_normalize_and_filter(self, docs: pd.DataFrame) -> pd.DataFrame:
dfs = []
Expand Down Expand Up @@ -689,7 +689,7 @@ def rebuild_index_from_s3_data(self) -> pd.DataFrame:
for page in page_iterator:
for file in page["Contents"]:
key = file["Key"]
if key != self.index._get_manifest_full_key_path():
if key != self.index._get_manifest_full_key_path() and key.endswith(self.object_file_extension):
all_index_docs.append(self._index_for_doc_from_s3(key))
ret = pd.concat(all_index_docs, ignore_index=True)
self.index.set_index_data(ret)
Expand Down

0 comments on commit a9437c5

Please sign in to comment.