Skip to content

Commit

Permalink
Merge pull request #47 from weaviate/fix-direct-tokenize-setting
Browse files Browse the repository at this point in the history
Fix T2V_TRANSFORMERS_DIRECT_TOKENIZE setting
  • Loading branch information
antas-marcin authored Jan 30, 2023
2 parents 7ff4780 + 9f627d9 commit 7035c9a
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
6 changes: 3 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ def startup_event():
logger.info("Running on CPU")

# Batch text tokenization enabled by default
direct_tokenize = True
direct_tokenize = False
transformers_direct_tokenize = os.getenv("T2V_TRANSFORMERS_DIRECT_TOKENIZE")
if transformers_direct_tokenize is not None and transformers_direct_tokenize == "false" or transformers_direct_tokenize == "0":
direct_tokenize = False
if transformers_direct_tokenize is not None and transformers_direct_tokenize == "true" or transformers_direct_tokenize == "1":
direct_tokenize = True

meta_config = Meta('./models/model')
vec = Vectorizer('./models/model', cuda_support, cuda_core, cuda_per_process_memory_fraction,
Expand Down
2 changes: 1 addition & 1 deletion vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def pool_embedding(self, batch_results, tokens, config):

async def vectorize(self, text: str, config: VectorInputConfig):
with torch.no_grad():
if not self.direct_tokenize:
if self.direct_tokenize:
# create embeddings without tokenizing text
tokens = self.tokenize(text)
if self.cuda:
Expand Down

0 comments on commit 7035c9a

Please sign in to comment.