-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathexample.php
65 lines (55 loc) · 1.95 KB
/
example.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
<?php
// good resources
// https://opensearch.org/blog/improving-document-retrieval-with-sparse-semantic-encoders/
// https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-v1
//
// run with
// text-embeddings-router --model-id opensearch-project/opensearch-neural-sparse-encoding-v1 --pooling splade
require_once __DIR__ . '/vendor/autoload.php';
use Pgvector\SparseVector;
$db = pg_connect('postgres://localhost/pgvector_example');
pg_query($db, 'CREATE EXTENSION IF NOT EXISTS vector');
pg_query($db, 'DROP TABLE IF EXISTS documents');
pg_query($db, 'CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding sparsevec(30522))');
function fetchEmbeddings($inputs)
{
$url = 'http://localhost:3000/embed_sparse';
$data = [
'inputs' => $inputs
];
$opts = [
'http' => [
'method' => 'POST',
'header' => "Content-Type: application/json\r\n",
'content' => json_encode($data)
]
];
$context = stream_context_create($opts);
$response = file_get_contents($url, false, $context);
$embeddings = [];
foreach (json_decode($response, true) as $item) {
$embedding = [];
foreach ($item as $e) {
$embedding[$e['index']] = $e['value'];
}
$embeddings[] = $embedding;
}
return $embeddings;
}
$input = [
'The dog is barking',
'The cat is purring',
'The bear is growling'
];
$embeddings = fetchEmbeddings($input);
foreach ($input as $i => $content) {
pg_query_params($db, 'INSERT INTO documents (content, embedding) VALUES ($1, $2)', [$content, new SparseVector($embeddings[$i], 30522)]);
}
$query = 'forest';
$queryEmbedding = fetchEmbeddings([$query])[0];
$result = pg_query_params($db, 'SELECT content FROM documents ORDER BY embedding <#> $1 LIMIT 5', [new SparseVector($queryEmbedding, 30522)]);
while ($row = pg_fetch_array($result)) {
echo $row['content'] . "\n";
}
pg_free_result($result);
pg_close($db);