Skip to content

Commit

Permalink
Simplify SQL for query with filters in the MariaDB vector store integ…
Browse files Browse the repository at this point in the history
…ration

The complex query we used before is not needed now because
the bug MDEV-34774 is fixed in MariaDB 11.7.
  • Loading branch information
karsov committed Jan 15, 2025
1 parent fd65100 commit 08010f6
Showing 1 changed file with 8 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -371,33 +371,17 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul
text,
embedding,
metadata,
VEC_DISTANCE_COSINE(embedding, vec_fromtext('{query.query_embedding}')) AS distance
FROM `{self.table_name}`
ORDER BY distance
LIMIT {query.similarity_top_k}
"""
VEC_DISTANCE_COSINE(embedding, VEC_FromText('{query.query_embedding}')) AS distance
FROM `{self.table_name}`"""

if query.filters:
where = self._filters_to_where_clause(query.filters)
stmt += f"""
WHERE {self._filters_to_where_clause(query.filters)}"""

# We cannot use the query above when there is a WHERE clause,
# because of a bug in MariaDB: https://jira.mariadb.org/browse/MDEV-34774.
# The following query works around it.
stmt = f"""
SELECT * FROM (
SELECT
node_id,
text,
embedding,
metadata,
VEC_DISTANCE_COSINE(embedding, vec_fromtext('{query.query_embedding}')) AS distance
FROM `{self.table_name}`
WHERE {where}
LIMIT 1000000
) AS unordered
ORDER BY distance
LIMIT {query.similarity_top_k}
"""
stmt += f"""
ORDER BY distance
LIMIT {query.similarity_top_k}
"""

with self._engine.connect() as connection:
result = connection.execute(sqlalchemy.text(stmt))
Expand Down

0 comments on commit 08010f6

Please sign in to comment.