Skip to content

Commit

Permalink
Merge branch 'norm-by-tile'
Browse files Browse the repository at this point in the history
* norm-by-tile:
  Tile-based normalization
  • Loading branch information
davenquinn committed Sep 21, 2024
2 parents b220a9c + 4e66bf7 commit 716aa5d
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 16 deletions.
9 changes: 8 additions & 1 deletion macrostrat_tileserver/vector_search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@

@router.get("/{model}/tiles/{z}/{x}/{y}")
async def get_tile(
request: Request, model: str, z: int, x: int, y: int, term: str = Query(None)
request: Request,
model: str,
z: int,
x: int,
y: int,
term: str = Query(None),
norm_method: str = Query("tile"),
):
"""Get a tile from the tileserver."""
pool = request.app.state.pool
Expand Down Expand Up @@ -52,6 +58,7 @@ async def get_tile(
model_name=model_name,
linesize=linesize,
term_id=term_id,
norm_method=norm_method,
layer_name="units",
)

Expand Down
49 changes: 34 additions & 15 deletions macrostrat_tileserver/vector_search/queries/units.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,38 @@ f1 AS (
JOIN maps.sources
ON z.source_id = sources.source_id
WHERE sources.status_code = 'active'
),
res AS (
SELECT f1.*,
term.id AS term_id,
-- cosine similarity between the term and the legend embedding
text_vectors.distance(le.embedding_vector, term.text_vector) AS raw_similarity
FROM f1
JOIN term ON true
JOIN text_vectors.legend_embedding AS le
ON f1.legend_id = le.legend_id
AND le.model_id = term.model_id
WHERE geom IS NOT NULL
),
boundaries AS (
SELECT
term.lower_bound,
term.upper_bound
FROM term
WHERE :norm_method = 'global'
UNION ALL
SELECT
min(raw_similarity) AS lower_bound,
max(raw_similarity) AS upper_bound
FROM res
WHERE :norm_method = 'tile'
),
res2 AS (
SELECT
res.*,
-- cosine similarity between the term and the legend embedding
(raw_similarity - lower_bound) / (upper_bound - lower_bound) AS similarity
FROM res
JOIN boundaries ON true
)
SELECT
f1.*,
term.id AS term_id,
-- cosine similarity between the term and the legend embedding
text_vectors.distance(le.embedding_vector, term.text_vector) AS raw_similarity,
(text_vectors.distance(le.embedding_vector, term.text_vector) - term.lower_bound) / (term.upper_bound - term.lower_bound) AS similarity,
-- normalized inner product between the term and the legend embedding (same as cosine similarity)
text_vectors.norm_distance(le.normalized_vector, term.norm_vector) AS raw_norm_similarity,
(text_vectors.norm_distance(le.normalized_vector, term.norm_vector) - term.lower_bound_norm) / (term.upper_bound_norm - term.lower_bound_norm) AS norm_similarity
FROM f1
JOIN term ON true
JOIN text_vectors.legend_embedding AS le
ON f1.legend_id = le.legend_id
AND le.model_id = term.model_id
WHERE geom IS NOT NULL
SELECT * FROM res2

0 comments on commit 716aa5d

Please sign in to comment.