From 1dc978e597e51fd616c746f99856816080a5eaea Mon Sep 17 00:00:00 2001 From: taokz Date: Fri, 2 Sep 2022 11:00:30 +0800 Subject: [PATCH 1/2] Fix windows dtype bug of neural search --- applications/neural_search/recall/simcse/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/applications/neural_search/recall/simcse/inference.py b/applications/neural_search/recall/simcse/inference.py index 8788b35cf1cc..e0cd813cdb66 100644 --- a/applications/neural_search/recall/simcse/inference.py +++ b/applications/neural_search/recall/simcse/inference.py @@ -65,8 +65,8 @@ def convert_example(example, tokenizer, max_seq_length=512, do_evalute=False): max_seq_length=max_seq_length) batchify_fn = lambda samples, fn=Tuple( - Pad(axis=0, pad_val=tokenizer.pad_token_id), # text_input - Pad(axis=0, pad_val=tokenizer.pad_token_type_id), # text_segment + Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"), # text_input + Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype="int64"), # text_segment ): [data for data in fn(samples)] pretrained_model = AutoModel.from_pretrained("ernie-3.0-medium-zh") From 1774d4b9bd455119e8a85e08e5a813d946827835 Mon Sep 17 00:00:00 2001 From: kztao Date: Wed, 7 Sep 2022 11:23:41 +0800 Subject: [PATCH 2/2] Fix windows dtype bug of neural search --- applications/neural_search/recall/simcse/inference.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/applications/neural_search/recall/simcse/inference.py b/applications/neural_search/recall/simcse/inference.py index bf1eea8fc64f..097c348c736f 100644 --- a/applications/neural_search/recall/simcse/inference.py +++ b/applications/neural_search/recall/simcse/inference.py @@ -66,8 +66,10 @@ def convert_example(example, tokenizer, max_seq_length=512, do_evalute=False): max_seq_length=max_seq_length) batchify_fn = lambda samples, fn=Tuple( - Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"), # text_input - Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype="int64"), # text_segment + Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64" + ), # text_input + Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype="int64" + ), # text_segment ): [data for data in fn(samples)] pretrained_model = AutoModel.from_pretrained(model_name_or_path)