Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ISSUE-325: Vectors for each king, one float to rule them all #326

Merged
merged 15 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
uuid: 05342da1-9340-4e4b-8697-0bbdfc8d80fe
langcode: en
status: true
dependencies:
module:
- strawberryfield
- search_api_solr
id: dense_vector_field_1024_und
label: 'Dense Vector Field of 1024 dimensions suitable for mobileNetV3 feature extraction (embeddings) using dot_product as comparison algorithm'
minimum_solr_version: 9.0.0
field_type_language_code: und
domains: { }
field_type:
name: knn_vector_1024
class: solr.DenseVectorField
vectorDimension: 1024
similarityFunction: dot_product
unstemmed_field_type: null
spellcheck_field_type: null
collated_field_type: null
text_files: { }
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
uuid: 3ac7a111-8af9-40d8-9d41-5edfa8e3971a
langcode: en
status: true
dependencies:
module:
- strawberryfield
- search_api_solr
id: dense_vector_field_384_und
label: 'Dense Vector Field of 384 dimensions suitable for Bert text feature extraction (embeddings) using dot_product as comparison algorithm'
minimum_solr_version: 9.0.0
field_type_language_code: und
domains: { }
field_type:
name: knn_vector_384
class: solr.DenseVectorField
vectorDimension: 384
similarityFunction: dot_product
unstemmed_field_type: null
spellcheck_field_type: null
collated_field_type: null
text_files: { }
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
uuid: eaf9db6d-bf73-4800-9e61-19a0b084c9bc
langcode: en
status: true
dependencies:
module:
- strawberryfield
- search_api_solr
id: dense_vector_field_512_und
label: 'Dense Vector Field of 512 dimensions suitable for Apple Vision ML Image FingerPrint (embeddings) using dot_product as comparison algorithm'
minimum_solr_version: 9.0.0
field_type_language_code: und
domains: { }
field_type:
name: knn_vector_512
class: solr.DenseVectorField
vectorDimension: 512
similarityFunction: dot_product
unstemmed_field_type: null
spellcheck_field_type: null
collated_field_type: null
text_files: { }
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
uuid: 38af5bbc-99de-43cb-9916-a70be8f1f9da
langcode: en
status: true
dependencies:
module:
- strawberryfield
- search_api_solr
id: dense_vector_field_576_und
label: 'Dense Vector Field of 576 dimensions suitable for YOLOv8 feature extraction using dot_product as comparison algorithm'
minimum_solr_version: 9.0.0
field_type_language_code: und
domains: { }
field_type:
name: knn_vector_576
class: solr.DenseVectorField
vectorDimension: 576
similarityFunction: dot_product
unstemmed_field_type: null
spellcheck_field_type: null
collated_field_type: null
text_files: { }
50 changes: 50 additions & 0 deletions src/EventSubscriber/SearchApiSolrEventSubscriber.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

use Drupal\search_api\Event\IndexingItemsEvent;
use Drupal\search_api\Event\SearchApiEvents;
use Drupal\search_api_solr\Event\PostCreateIndexDocumentEvent;
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
use Drupal\search_api_solr\Event\SearchApiSolrEvents;
use Drupal\search_api_solr\Event\PreQueryEvent;
use Drupal\search_api_solr\Event\PostConvertedQueryEvent;
use Drupal\search_api_solr\Event\PostConfigFilesGenerationEvent;
use Solarium\Component\ComponentAwareQueryInterface;
use Drupal\strawberryfield\StrawberryfieldSearchAPIUtilityServiceInterface;

Expand All @@ -23,6 +25,8 @@ public static function getSubscribedEvents() {
SearchApiSolrEvents::PRE_QUERY => 'preQuery',
SearchApiSolrEvents::POST_CONVERT_QUERY => 'convertedQuery',
SearchApiEvents::INDEXING_ITEMS => 'indexingItems',
SearchApiSolrEvents::POST_CONFIG_FILES_GENERATION => 'overrideExtraFields',
SearchApiSolrEvents::POST_CREATE_INDEX_DOCUMENT => 'RestoreLostZerosVector',
];
}

Expand Down Expand Up @@ -130,6 +134,23 @@ public function convertedQuery(PostConvertedQueryEvent $event): void {
$hl->setFields(['*']);
}
}
if ($query->getOption('sbf_knn')) {
//@TODO this is overly nested. I should also allow multiple ones like this
// then move any non KNN queries to "before" or to "filters"
$solarium_query->setQuery($query->getOption('sbf_knn')[0][0]);
$hl = $solarium_query->getHighlighting();
// We can't highlight when doing Vector Queries or we will get
// [ x:drupal] o.a.s.h.RequestHandlerBase java.lang.UnsupportedOperationException => java.lang.UnsupportedOperationException
// at org.apache.lucene.search.highlight.WeightedSpanTermExtractor$DelegatingLeafReader.getFieldInfos(WeightedSpanTermExtractor.java:460)
//java.lang.UnsupportedOperationException: null

$hl->clearFields();
$hl->setUsePhraseHighlighter(FALSE);
// Just in case?
$solarium_query->removeComponent(
ComponentAwareQueryInterface::COMPONENT_EDISMAX
);
}
}
/**
* Reacts to the indexing items event.
Expand All @@ -154,4 +175,33 @@ public function indexingItems(IndexingItemsEvent $event) {
public function finishedIndexingItems(IndexingItemsEvent $event) {
$this->search_api_state->setIsIndexing(FALSE);
}

public function RestoreLostZerosVector(PostCreateIndexDocumentEvent $event) {
$item = $event->getSearchApiItem();
$names = [];
foreach($item->getFields(FALSE) as $field) {
if (str_starts_with($field->getType(), 'densevector_')) {
$names[] = $field->getFieldIdentifier();
}
}

if (count($names) > 0) {
$document = $event->getSolariumDocument();
$index = $item->getIndex();
$solr_names = $index->getServerInstance()->getBackend()->getSolrFieldNames($index);
foreach ($names as $name) {
if (isset($solr_names[$name])) {
// Why this check? I can't sent an empty. But i can send a NULL if not present
if (!empty($values = $item->getField($name)->getValues())) {
$document->setField($solr_names[$name], $item->getField($name)->getValues() ?? NULL);
}
}
}
}
}


public function overrideExtraFields(PostConfigFilesGenerationEvent $event): void {
/// TODO override extra fields so we can get around silly search api defining all as multivalued
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public function process($langcode = NULL)
$delta = 0;
foreach ($this->processed as $reference) {
// No way we can use $this->createItem($delta, $reference); here
// Because our public facing datatype is not what it seems
// Because our public facing data_type is not what it seems
// We can not use DataReferenceDefinitions here, we need actually EntityAdapter!
// Because \Drupal\search_api\Utility\FieldsHelper::extractFields can only act
// on Complexdatainterface elements
Expand Down
30 changes: 30 additions & 0 deletions src/Plugin/search_api/data_type/DenseVector1024DataType.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php

namespace Drupal\strawberryfield\Plugin\search_api\data_type;

use Drupal\search_api\DataType\DataTypePluginBase;

/**
* Provides a Vector data type for 1024 length search api fields.
*
* @SearchApiDataType(
* id = "densevector_1024",
* label = @Translation("Dense Vector of 1024 length"),
* description = @Translation("Contains Dense Vectors, float values."),
* fallback_type = "decimal",
* prefix = "knn1024"
* )
*/
class DenseVector1024DataType extends DataTypePluginBase {

/**
* {@inheritdoc}
*/
public function getValue($value) {
if ($value !== NULL) {
$value = (float)$value;
}
return $value;
}

}
30 changes: 30 additions & 0 deletions src/Plugin/search_api/data_type/DenseVector384DataType.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php

namespace Drupal\strawberryfield\Plugin\search_api\data_type;

use Drupal\search_api\DataType\DataTypePluginBase;

/**
* Provides a Vector data type for 384 length search api fields.
*
* @SearchApiDataType(
* id = "densevector_384",
* label = @Translation("Dense Vector of 384 length"),
* description = @Translation("Contains Dense Vectors, float values."),
* fallback_type = "decimal",
* prefix = "knn384"
* )
*/
class DenseVector384DataType extends DataTypePluginBase {

/**
* {@inheritdoc}
*/
public function getValue($value) {
if ($value !== NULL) {
$value = (float)$value;
}
return $value;
}

}
30 changes: 30 additions & 0 deletions src/Plugin/search_api/data_type/DenseVector512DataType.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php

namespace Drupal\strawberryfield\Plugin\search_api\data_type;

use Drupal\search_api\DataType\DataTypePluginBase;

/**
* Provides a Vector data type for 512 length search api fields.
*
* @SearchApiDataType(
* id = "densevector_512",
* label = @Translation("Dense Vector of 512 length"),
* description = @Translation("Contains Dense Vectors, float values."),
* fallback_type = "decimal",
* prefix = "knn512"
* )
*/
class DenseVector512DataType extends DataTypePluginBase {

/**
* {@inheritdoc}
*/
public function getValue($value) {
if ($value !== NULL) {
$value = (float)$value;
}
return $value;
}

}
30 changes: 30 additions & 0 deletions src/Plugin/search_api/data_type/DenseVector576DataType.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php

namespace Drupal\strawberryfield\Plugin\search_api\data_type;

use Drupal\search_api\DataType\DataTypePluginBase;

/**
* Provides a Vector data type for 576 length search api fields.
*
* @SearchApiDataType(
* id = "densevector_576",
* label = @Translation("Dense Vector of 576 length"),
* description = @Translation("Contains Dense Vectors, float values."),
* fallback_type = "decimal",
* prefix = "knn576"
* )
*/
class DenseVector576DataType extends DataTypePluginBase {

/**
* {@inheritdoc}
*/
public function getValue($value) {
if ($value !== NULL) {
$value = (float)$value;
}
return $value;
}

}
Loading