From 9a51345a907165510ede734d06d00bd270b5c5d2 Mon Sep 17 00:00:00 2001
From: w5688414 <w5688414@gmail.com>
Date: Thu, 1 Sep 2022 03:02:57 +0000
Subject: [PATCH 1/3] Integrate Neural Search models into Pipelines

---
 .../recall/in_batch_negative/README.md        |  69 +++++---
 .../deploy/{C++ => cpp}/http_client.py        |   2 +-
 .../deploy/{C++ => cpp}/rpc_client.py         |   2 +-
 .../deploy/{C++ => cpp}/start_server.sh       |   0
 .../deploy/python/predict.py                  |  35 ++--
 .../deploy/python/web_service.py              |   5 +-
 .../recall/in_batch_negative/evaluate.py      |   3 -
 .../recall/in_batch_negative/export_model.py  |   5 +-
 .../recall/in_batch_negative/inference.py     |   5 +-
 .../recall/in_batch_negative/predict.py       |   5 +-
 .../recall/in_batch_negative/recall.py        |   6 +-
 .../in_batch_negative/scripts/export_model.sh |   4 +-
 .../in_batch_negative/scripts/predict.sh      |   4 +-
 .../scripts/run_build_index.sh                |   1 +
 .../scripts/train_batch_neg.sh                |  62 -------
 .../in_batch_negative/train_batch_neg.py      |   5 +-
 .../neural_search/recall/simcse/README.md     |  31 ++--
 .../recall/simcse/deploy/python/predict.py    |  35 ++--
 .../neural_search/recall/simcse/evaluate.py   |   1 -
 .../recall/simcse/export_model.py             |   6 +-
 .../neural_search/recall/simcse/inference.py  |   5 +-
 .../neural_search/recall/simcse/predict.py    |   6 +-
 .../neural_search/recall/simcse/recall.py     |   8 +-
 .../recall/simcse/scripts/export_model.sh     |   4 +-
 .../recall/simcse/scripts/predict.sh          |   3 +-
 .../recall/simcse/scripts/run_build_index.sh  |   5 +-
 .../recall/simcse/scripts/train.sh            |   4 +-
 .../neural_search/recall/simcse/train.py      |   4 +-
 pipelines/examples/semantic-search/README.md  |   2 +-
 .../run_neural_search_server.sh               |   5 +
 .../semantic-search/run_search_web.sh         |   1 +
 .../semantic_search_example.py                |  85 +++++++---
 pipelines/pipelines/nodes/models/__init__.py  |  15 ++
 .../nodes/models/neural_search_model.py       | 151 ++++++++++++++++++
 pipelines/pipelines/nodes/retriever/dense.py  |  31 +++-
 .../rest_api/pipeline/semantic_search.yaml    |   2 +-
 .../pipeline/semantic_search_custom.yaml      |  67 ++++++++
 pipelines/utils/offline_ann.py                |  40 +++--
 38 files changed, 502 insertions(+), 222 deletions(-)
 rename applications/neural_search/recall/in_batch_negative/deploy/{C++ => cpp}/http_client.py (97%)
 rename applications/neural_search/recall/in_batch_negative/deploy/{C++ => cpp}/rpc_client.py (97%)
 rename applications/neural_search/recall/in_batch_negative/deploy/{C++ => cpp}/start_server.sh (100%)
 delete mode 100644 applications/neural_search/recall/in_batch_negative/scripts/train_batch_neg.sh
 create mode 100644 pipelines/examples/semantic-search/run_neural_search_server.sh
 create mode 100644 pipelines/pipelines/nodes/models/__init__.py
 create mode 100644 pipelines/pipelines/nodes/models/neural_search_model.py
 create mode 100644 pipelines/rest_api/pipeline/semantic_search_custom.yaml

diff --git a/applications/neural_search/recall/in_batch_negative/README.md b/applications/neural_search/recall/in_batch_negative/README.md
index 326b34bf89c1..151a26f10b4a 100644
--- a/applications/neural_search/recall/in_batch_negative/README.md
+++ b/applications/neural_search/recall/in_batch_negative/README.md
@@ -42,7 +42,7 @@ In-batch Negatives 策略的训练数据为语义相似的 Pair 对，策略核
 
 ### 技术方案
 
-双塔模型，采用ERNIE1.0热启，在召回训练阶段引入In-batch Negatives  策略，使用hnswlib建立索引库，进行召回测试。
+双塔模型，在召回训练阶段引入In-batch Negatives  策略，使用hnswlib建立索引库，进行召回测试。
 
 
 ### 评估指标
@@ -53,10 +53,10 @@ Recall@K召回率是指预测的前topK（top-k是指从最后的按得分排序
 
 **效果评估**
 
-|  模型 |  Recall@1 | Recall@5 |Recall@10 |Recall@20 |Recall@50 |策略简要说明|
+|  策略 | 模型 |  Recall@1 | Recall@5 |Recall@10 |Recall@20 |Recall@50 |
 | ------------ | ------------ | ------------ |--------- |--------- |--------- |--------- |
-|  In-batch Negatives |  51.301 | 65.309| 69.878| 73.996|78.881| Inbatch-negative有监督训练|
-
+|  In-batch Negatives | ernie 1.0 | 51.301 | 65.309| 69.878| 73.996|78.881|
+|  In-batch Negatives | rocketqa-zh-base-query-encoder | **59.622** | **75.089**| **79.668**| **83.404**|**87.773**|
 
 
 <a name="环境依赖"></a>
@@ -166,10 +166,10 @@ Recall@K召回率是指预测的前topK（top-k是指从最后的按得分排序
 
 |Model|训练参数配置|硬件|MD5|
 | ------------ | ------------ | ------------ |-----------|
-|[batch_neg](https://bj.bcebos.com/v1/paddlenlp/models/inbatch_model.zip)|<div style="width: 150pt">margin:0.2 scale:30 epoch:3 lr:5E-5 bs:64 max_len:64 </div>|<div style="width: 100pt">4卡 v100-16g</div>|f3e5c7d7b0b718c2530c5e1b136b2d74|
+|[batch_neg](https://bj.bcebos.com/v1/paddlenlp/models/inbatch_model.zip)|<div style="width: 150pt">ernie 1.0 margin:0.2 scale:30 epoch:3 lr:5E-5 bs:64 max_len:64 </div>|<div style="width: 100pt">4卡 v100-16g</div>|f3e5c7d7b0b718c2530c5e1b136b2d74|
 
-### 训练环境说明
 
+### 训练环境说明
 
 - NVIDIA Driver Version: 440.64.00
 - Ubuntu 16.04.6 LTS (Docker)
@@ -185,7 +185,7 @@ Recall@K召回率是指预测的前topK（top-k是指从最后的按得分排序
 然后运行下面的命令使用GPU训练，得到语义索引模型：
 
 ```
-root_path=recall
+root_path=inbatch
 python -u -m paddle.distributed.launch --gpus "0,1,2,3" \
     train_batch_neg.py \
     --device gpu \
@@ -194,11 +194,11 @@ python -u -m paddle.distributed.launch --gpus "0,1,2,3" \
     --learning_rate 5E-5 \
     --epochs 3 \
     --output_emb_size 256 \
+    --model_name_or_path rocketqa-zh-base-query-encoder \
     --save_steps 10 \
     --max_seq_length 64 \
     --margin 0.2 \
     --train_set_file recall/train.csv \
-    --evaluate \
     --recall_result_dir "recall_result_dir" \
     --recall_result_file "recall_result.txt" \
     --hnsw_m 100 \
@@ -217,6 +217,7 @@ python -u -m paddle.distributed.launch --gpus "0,1,2,3" \
 * `learning_rate`: 训练的学习率的大小
 * `epochs`: 训练的epoch数
 * `output_emb_size`: Transformer 顶层输出的文本向量维度
+* `model_name_or_path`: 预训练模型，用于模型和`Tokenizer`的参数初始化
 * `save_steps`： 模型存储 checkpoint 的间隔 steps 个数
 * `max_seq_length`: 输入序列的最大长度
 * `margin`: 正样本相似度与负样本之间的目标 Gap
@@ -234,7 +235,7 @@ python -u -m paddle.distributed.launch --gpus "0,1,2,3" \
 也可以使用bash脚本：
 
 ```
-sh scripts/train_batch_neg.sh
+sh scripts/train.sh
 ```
 
 
@@ -270,6 +271,7 @@ python -u -m paddle.distributed.launch --gpus "3" --log_dir "recall_log/" \
         --recall_result_dir "recall_result_dir" \
         --recall_result_file "recall_result.txt" \
         --params_path "${root_dir}/model_40/model_state.pdparams" \
+        --model_name_or_path rocketqa-zh-base-query-encoder \
         --hnsw_m 100 \
         --hnsw_ef 100 \
         --batch_size 64 \
@@ -280,16 +282,17 @@ python -u -m paddle.distributed.launch --gpus "3" --log_dir "recall_log/" \
         --corpus_file "recall/corpus.csv"
 ```
 参数含义说明
-* `device`: 使用 cpu/gpu 进行训练
-* `recall_result_dir`: 召回结果存储目录
-* `recall_result_file`: 召回结果的文件名
+* `device`： 使用 cpu/gpu 进行训练
+* `recall_result_dir`： 召回结果存储目录
+* `recall_result_file`： 召回结果的文件名
 * `params_path`： 待评估模型的参数文件名
-* `hnsw_m`: hnsw 算法相关参数，保持默认即可
-* `hnsw_ef`: hnsw 算法相关参数，保持默认即可
-* `output_emb_size`: Transformer 顶层输出的文本向量维度
-* `recall_num`: 对 1 个文本召回的相似文本数量
-* `similar_text_pair`: 由相似文本对构成的评估集
-* `corpus_file`: 召回库数据 corpus_file
+* `model_name_or_path`: 预训练模型，用于模型和`Tokenizer`的参数初始化
+* `hnsw_m`： hnsw 算法相关参数，保持默认即可
+* `hnsw_ef`： hnsw 算法相关参数，保持默认即可
+* `output_emb_size`： Transformer 顶层输出的文本向量维度
+* `recall_num`： 对 1 个文本召回的相似文本数量
+* `similar_text_pair`： 由相似文本对构成的评估集
+* `corpus_file`： 召回库数据 corpus_file
 
 也可以使用下面的bash脚本：
 
@@ -383,10 +386,11 @@ python inference.py
 ```
 root_dir="checkpoints/inbatch"
 
-python -u -m paddle.distributed.launch --gpus "3" \
+python -u -m paddle.distributed.launch --gpus "0" \
     predict.py \
     --device gpu \
     --params_path "${root_dir}/model_40/model_state.pdparams" \
+    --model_name_or_path rocketqa-zh-base-query-encoder \
     --output_emb_size 256 \
     --batch_size 128 \
     --max_seq_length 64 \
@@ -396,6 +400,7 @@ python -u -m paddle.distributed.launch --gpus "3" \
 参数含义说明
 * `device`: 使用 cpu/gpu 进行训练
 * `params_path`： 预训练模型的参数文件名
+* `model_name_or_path`: 预训练模型，用于模型和`Tokenizer`的参数初始化
 * `output_emb_size`: Transformer 顶层输出的文本向量维度
 * `text_pair_file`: 由文本 Pair 构成的待预测数据集
 
@@ -423,7 +428,9 @@ predict.sh文件包含了cpu和gpu运行的脚本，默认是gpu运行的脚本
 首先把动态图模型转换为静态图：
 
 ```
-python export_model.py --params_path checkpoints/inbatch/model_40/model_state.pdparams --output_path=./output
+python export_model.py --params_path checkpoints/inbatch/model_40/model_state.pdparams \
+                       --model_name_or_path rocketqa-zh-base-query-encoder \
+                       --output_path=./output
 ```
 也可以运行下面的bash脚本：
 
@@ -449,7 +456,9 @@ corpus_list=[['中西方语言与文化的差异','中西方文化差异以及
 然后使用PaddleInference
 
 ```
-python deploy/python/predict.py --model_dir=./output
+python deploy/python/predict.py \
+                             --model_dir=./output \
+                             --model_name_or_path rocketqa-zh-base-query-encoder
 ```
 也可以运行下面的bash脚本：
 
@@ -501,9 +510,16 @@ Paddle Serving的部署有两种方式，第一种方式是Pipeline的方式，
 
 #### Pipeline方式
 
-启动 Pipeline Server:
+修改模型需要用到的`Tokenizer`
+
+```
+self.tokenizer = AutoTokenizer.from_pretrained("rocketqa-zh-base-query-encoder")
+```
+
+然后启动 Pipeline Server:
 
 ```
+cd deploy/python
 python web_service.py
 ```
 
@@ -520,7 +536,7 @@ list_data = [
 然后运行：
 
 ```
-python rpc_client.py
+python deploy/python/rpc_client.py
 ```
 模型的输出为：
 
@@ -547,12 +563,12 @@ python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_i
 也可以使用脚本：
 
 ```
-sh deploy/C++/start_server.sh
+sh deploy/cpp/start_server.sh
 ```
 Client 可以使用 http 或者 rpc 两种方式，rpc 的方式为：
 
 ```
-python deploy/C++/rpc_client.py
+python deploy/cpp/rpc_client.py
 ```
 运行的输出为：
 ```
@@ -571,7 +587,7 @@ time to cost :0.3960278034210205 seconds
 或者使用 http 的客户端访问模式：
 
 ```
-python deploy/C++/http_client.py
+python deploy/cpp/http_client.py
 ```
 运行的输出为：
 
@@ -599,6 +615,7 @@ python -u -m paddle.distributed.launch --gpus "0,1,2,3" \
     train_batch_neg.py \
     --device gpu \
     --save_dir ./checkpoints/simcse_inbatch_negative \
+    --model_name_or_path rocketqa-zh-base-query-encoder \
     --batch_size 64 \
     --learning_rate 5E-5 \
     --epochs 3 \
diff --git a/applications/neural_search/recall/in_batch_negative/deploy/C++/http_client.py b/applications/neural_search/recall/in_batch_negative/deploy/cpp/http_client.py
similarity index 97%
rename from applications/neural_search/recall/in_batch_negative/deploy/C++/http_client.py
rename to applications/neural_search/recall/in_batch_negative/deploy/cpp/http_client.py
index 320c97166936..164038f46dab 100644
--- a/applications/neural_search/recall/in_batch_negative/deploy/C++/http_client.py
+++ b/applications/neural_search/recall/in_batch_negative/deploy/cpp/http_client.py
@@ -54,7 +54,7 @@ def convert_example(example,
 print(fetch_names)
 
 # 创建tokenizer
-tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+tokenizer = AutoTokenizer.from_pretrained('rocketqa-zh-base-query-encoder')
 max_seq_len = 64
 
 # 数据预处理
diff --git a/applications/neural_search/recall/in_batch_negative/deploy/C++/rpc_client.py b/applications/neural_search/recall/in_batch_negative/deploy/cpp/rpc_client.py
similarity index 97%
rename from applications/neural_search/recall/in_batch_negative/deploy/C++/rpc_client.py
rename to applications/neural_search/recall/in_batch_negative/deploy/cpp/rpc_client.py
index 43275e8d7117..1b257b8d2fb7 100644
--- a/applications/neural_search/recall/in_batch_negative/deploy/C++/rpc_client.py
+++ b/applications/neural_search/recall/in_batch_negative/deploy/cpp/rpc_client.py
@@ -50,7 +50,7 @@ def convert_example(example,
 print(fetch_names)
 
 # 创建tokenizer
-tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+tokenizer = AutoTokenizer.from_pretrained('rocketqa-zh-base-query-encoder')
 max_seq_len = 64
 
 # 数据预处理
diff --git a/applications/neural_search/recall/in_batch_negative/deploy/C++/start_server.sh b/applications/neural_search/recall/in_batch_negative/deploy/cpp/start_server.sh
similarity index 100%
rename from applications/neural_search/recall/in_batch_negative/deploy/C++/start_server.sh
rename to applications/neural_search/recall/in_batch_negative/deploy/cpp/start_server.sh
diff --git a/applications/neural_search/recall/in_batch_negative/deploy/python/predict.py b/applications/neural_search/recall/in_batch_negative/deploy/python/predict.py
index 5e592b5c502b..0c2f2209051d 100644
--- a/applications/neural_search/recall/in_batch_negative/deploy/python/predict.py
+++ b/applications/neural_search/recall/in_batch_negative/deploy/python/predict.py
@@ -40,7 +40,7 @@
     help="Batch size per GPU/CPU for training.")
 parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu",
     help="Select which device to train model, defaults to gpu.")
-
+parser.add_argument('--model_name_or_path', default="rocketqa-zh-base-query-encoder", help="model name.")
 parser.add_argument('--use_tensorrt', default=False, type=eval, choices=[True, False],
     help='Enable to use tensorrt to speed up.')
 parser.add_argument("--precision", default="fp32", type=str, choices=["fp32", "fp16", "int8"],
@@ -156,22 +156,21 @@ def __init__(self,
         if args.benchmark:
             import auto_log
             pid = os.getpid()
-            self.autolog = auto_log.AutoLogger(model_name="ernie-3.0-medium-zh",
-                                               model_precision=precision,
-                                               batch_size=self.batch_size,
-                                               data_shape="dynamic",
-                                               save_path=args.save_log_path,
-                                               inference_config=config,
-                                               pids=pid,
-                                               process_name=None,
-                                               gpu_ids=0,
-                                               time_keys=[
-                                                   'preprocess_time',
-                                                   'inference_time',
-                                                   'postprocess_time'
-                                               ],
-                                               warmup=0,
-                                               logger=logger)
+            self.autolog = auto_log.AutoLogger(
+                model_name=args.model_name_or_path,
+                model_precision=precision,
+                batch_size=self.batch_size,
+                data_shape="dynamic",
+                save_path=args.save_log_path,
+                inference_config=config,
+                pids=pid,
+                process_name=None,
+                gpu_ids=0,
+                time_keys=[
+                    'preprocess_time', 'inference_time', 'postprocess_time'
+                ],
+                warmup=0,
+                logger=logger)
 
     def extract_embedding(self, data, tokenizer):
         """
@@ -279,7 +278,7 @@ def predict(self, data, tokenizer):
 
     # ErnieTinyTokenizer is special for ernie-tiny pretained model.
     output_emb_size = 256
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
     id2corpus = {0: '国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'}
     corpus_list = [{idx: text} for idx, text in id2corpus.items()]
     res = predictor.extract_embedding(corpus_list, tokenizer)
diff --git a/applications/neural_search/recall/in_batch_negative/deploy/python/web_service.py b/applications/neural_search/recall/in_batch_negative/deploy/python/web_service.py
index 42be3d9c2029..af239075b0bc 100644
--- a/applications/neural_search/recall/in_batch_negative/deploy/python/web_service.py
+++ b/applications/neural_search/recall/in_batch_negative/deploy/python/web_service.py
@@ -40,7 +40,8 @@ class ErnieOp(Op):
 
     def init_op(self):
         from paddlenlp.transformers import AutoTokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained('ernie-1.0')
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            "rocketqa-zh-base-query-encoder")
 
     def preprocess(self, input_dicts, data_id, log_id):
         from paddlenlp.data import Stack, Tuple, Pad
@@ -56,7 +57,7 @@ def preprocess(self, input_dicts, data_id, log_id):
         batchify_fn = lambda samples, fn=Tuple(
             Pad(axis=0, pad_val=self.tokenizer.pad_token_id, dtype="int64"
                 ),  # input
-            Pad(axis=0, pad_val=self.tokenizer.pad_token_id, dtype="int64"
+            Pad(axis=0, pad_val=self.tokenizer.pad_token_type_id, dtype="int64"
                 ),  # segment
         ): fn(samples)
         input_ids, segment_ids = batchify_fn(examples)
diff --git a/applications/neural_search/recall/in_batch_negative/evaluate.py b/applications/neural_search/recall/in_batch_negative/evaluate.py
index 262dca418bb6..449887306067 100644
--- a/applications/neural_search/recall/in_batch_negative/evaluate.py
+++ b/applications/neural_search/recall/in_batch_negative/evaluate.py
@@ -76,8 +76,6 @@ def recall(rs, N=10):
                 relevance_labels.append(1)
             else:
                 relevance_labels.append(0)
-        # print(len(rs))
-        # print(rs[:50])
 
     recall_N = []
     recall_num = [1, 5, 10, 20, 50]
@@ -92,4 +90,3 @@ def recall(rs, N=10):
         print('recall@{}={}'.format(key, val))
         res.append(str(val))
     result.write('\t'.join(res) + '\n')
-    # print("\t".join(recall_N))
diff --git a/applications/neural_search/recall/in_batch_negative/export_model.py b/applications/neural_search/recall/in_batch_negative/export_model.py
index 3b98c4fb9134..b2bed9a0bbcf 100644
--- a/applications/neural_search/recall/in_batch_negative/export_model.py
+++ b/applications/neural_search/recall/in_batch_negative/export_model.py
@@ -28,6 +28,7 @@
 parser = argparse.ArgumentParser()
 parser.add_argument("--params_path", type=str, required=True,
                     default='./checkpoint/model_900/model_state.pdparams', help="The path to model parameters to be loaded.")
+parser.add_argument('--model_name_or_path', default="rocketqa-zh-base-query-encoder", help="Select model to train, defaults to rocketqa-zh-base-query-encoder.")
 parser.add_argument("--output_path", type=str, default='./output',
                     help="The path of model parameter in static graph to be saved.")
 args = parser.parse_args()
@@ -35,8 +36,8 @@
 
 if __name__ == "__main__":
     output_emb_size = 256
-    pretrained_model = AutoModel.from_pretrained("ernie-1.0")
-    tokenizer = AutoTokenizer.from_pretrained('ernie-1.0')
+    pretrained_model = AutoModel.from_pretrained(args.model_name_or_path)
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
     model = SemanticIndexBaseStatic(pretrained_model,
                                     output_emb_size=output_emb_size)
     if args.params_path and os.path.isfile(args.params_path):
diff --git a/applications/neural_search/recall/in_batch_negative/inference.py b/applications/neural_search/recall/in_batch_negative/inference.py
index 5e8b5fc914b3..21bc39b3affa 100644
--- a/applications/neural_search/recall/in_batch_negative/inference.py
+++ b/applications/neural_search/recall/in_batch_negative/inference.py
@@ -26,9 +26,10 @@
     batch_size = 1
     params_path = 'checkpoints/inbatch/model_40/model_state.pdparams'
     id2corpus = {0: '国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'}
+    model_name_or_path = "rocketqa-zh-base-query-encoder"
     paddle.set_device(device)
 
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
     trans_func = partial(convert_example,
                          tokenizer=tokenizer,
                          max_seq_length=max_seq_length)
@@ -38,7 +39,7 @@
         Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # text_segment
     ): [data for data in fn(samples)]
 
-    pretrained_model = AutoModel.from_pretrained("ernie-3.0-medium-zh")
+    pretrained_model = AutoModel.from_pretrained(model_name_or_path)
 
     model = SemanticIndexBaseStatic(pretrained_model,
                                     output_emb_size=output_emb_size)
diff --git a/applications/neural_search/recall/in_batch_negative/predict.py b/applications/neural_search/recall/in_batch_negative/predict.py
index 14b73443c1e4..9e30ed94e71b 100644
--- a/applications/neural_search/recall/in_batch_negative/predict.py
+++ b/applications/neural_search/recall/in_batch_negative/predict.py
@@ -37,6 +37,7 @@
                     help="The path to model parameters to be loaded.")
 parser.add_argument("--max_seq_length", default=64, type=int, help="The maximum total input sequence length after tokenization. "
                     "Sequences longer than this will be truncated, sequences shorter will be padded.")
+parser.add_argument('--model_name_or_path', default="rocketqa-zh-base-query-encoder", help="Select model to train, defaults to rocketqa-zh-base-query-encoder.")
 parser.add_argument("--batch_size", default=32, type=int,
                     help="Batch size per GPU/CPU for training.")
 parser.add_argument("--output_emb_size", default=None,
@@ -83,7 +84,7 @@ def predict(model, data_loader):
 if __name__ == "__main__":
     paddle.set_device(args.device)
 
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
 
     trans_func = partial(convert_example,
                          tokenizer=tokenizer,
@@ -107,7 +108,7 @@ def predict(model, data_loader):
                                           batchify_fn=batchify_fn,
                                           trans_fn=trans_func)
 
-    pretrained_model = AutoModel.from_pretrained("ernie-3.0-medium-zh")
+    pretrained_model = AutoModel.from_pretrained(args.model_name_or_path)
 
     model = SemanticIndexBase(pretrained_model,
                               output_emb_size=args.output_emb_size)
diff --git a/applications/neural_search/recall/in_batch_negative/recall.py b/applications/neural_search/recall/in_batch_negative/recall.py
index 78874fc93eb5..f7c73aabec14 100644
--- a/applications/neural_search/recall/in_batch_negative/recall.py
+++ b/applications/neural_search/recall/in_batch_negative/recall.py
@@ -55,7 +55,7 @@
                     type=int, help="output_embedding_size")
 parser.add_argument("--recall_num", default=10, type=int,
                     help="Recall number for each query from Ann index.")
-
+parser.add_argument('--model_name_or_path', default="rocketqa-zh-base-query-encoder", help="Select model to train, defaults to rocketqa-zh-base-query-encoder.")
 parser.add_argument("--hnsw_m", default=100, type=int,
                     help="Recall number for each query from Ann index.")
 parser.add_argument("--hnsw_ef", default=100, type=int,
@@ -74,7 +74,7 @@
     if paddle.distributed.get_world_size() > 1:
         paddle.distributed.init_parallel_env()
 
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
 
     trans_func = partial(convert_example,
                          tokenizer=tokenizer,
@@ -87,7 +87,7 @@
             ),  # text_segment
     ): [data for data in fn(samples)]
 
-    pretrained_model = AutoModel.from_pretrained("ernie-3.0-medium-zh")
+    pretrained_model = AutoModel.from_pretrained(args.model_name_or_path)
 
     model = SemanticIndexBase(pretrained_model,
                               output_emb_size=args.output_emb_size)
diff --git a/applications/neural_search/recall/in_batch_negative/scripts/export_model.sh b/applications/neural_search/recall/in_batch_negative/scripts/export_model.sh
index f59ecefbfbab..99d01c7b5aae 100644
--- a/applications/neural_search/recall/in_batch_negative/scripts/export_model.sh
+++ b/applications/neural_search/recall/in_batch_negative/scripts/export_model.sh
@@ -1 +1,3 @@
-python export_model.py --params_path checkpoints/inbatch/model_40/model_state.pdparams --output_path=./output
\ No newline at end of file
+python export_model.py --params_path checkpoints/inbatch/model_40/model_state.pdparams \
+                       --model_name_or_path rocketqa-zh-base-query-encoder \
+                       --output_path=./output
\ No newline at end of file
diff --git a/applications/neural_search/recall/in_batch_negative/scripts/predict.sh b/applications/neural_search/recall/in_batch_negative/scripts/predict.sh
index 5a253520ded0..3967bb2c9b5d 100644
--- a/applications/neural_search/recall/in_batch_negative/scripts/predict.sh
+++ b/applications/neural_search/recall/in_batch_negative/scripts/predict.sh
@@ -1,10 +1,10 @@
 # gpu version
-
 root_dir="checkpoints/inbatch" 
-python -u -m paddle.distributed.launch --gpus "3" \
+python -u -m paddle.distributed.launch --gpus "0" \
     predict.py \
     --device gpu \
     --params_path "${root_dir}/model_40/model_state.pdparams" \
+    --model_name_or_path rocketqa-zh-base-query-encoder \
     --output_emb_size 256 \
     --batch_size 128 \
     --max_seq_length 64 \
diff --git a/applications/neural_search/recall/in_batch_negative/scripts/run_build_index.sh b/applications/neural_search/recall/in_batch_negative/scripts/run_build_index.sh
index a9f400dfb401..857302c334a1 100755
--- a/applications/neural_search/recall/in_batch_negative/scripts/run_build_index.sh
+++ b/applications/neural_search/recall/in_batch_negative/scripts/run_build_index.sh
@@ -6,6 +6,7 @@ python -u -m paddle.distributed.launch --gpus "3" --log_dir "recall_log/" \
         --recall_result_dir "recall_result_dir" \
         --recall_result_file "recall_result.txt" \
         --params_path "${root_dir}/model_40/model_state.pdparams" \
+        --model_name_or_path rocketqa-zh-base-query-encoder \
         --hnsw_m 100 \
         --hnsw_ef 100 \
         --batch_size 64 \
diff --git a/applications/neural_search/recall/in_batch_negative/scripts/train_batch_neg.sh b/applications/neural_search/recall/in_batch_negative/scripts/train_batch_neg.sh
deleted file mode 100644
index f493b89b5fc3..000000000000
--- a/applications/neural_search/recall/in_batch_negative/scripts/train_batch_neg.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-# GPU training
-root_path=inbatch
-python -u -m paddle.distributed.launch --gpus "0,1,2,3" \
-    train_batch_neg.py \
-    --device gpu \
-    --save_dir ./checkpoints/${root_path} \
-    --batch_size 64 \
-    --learning_rate 5E-5 \
-    --epochs 3 \
-    --output_emb_size 256 \
-    --save_steps 10 \
-    --max_seq_length 64 \
-    --margin 0.2 \
-    --train_set_file recall/train.csv
-
-
-# cpu training
-# root_path=inbatch
-# python train_batch_neg.py \
-#     --device cpu \
-#     --save_dir ./checkpoints/${root_path} \
-#     --batch_size 64 \
-#     --learning_rate 5E-5 \
-#     --epochs 3 \
-#     --output_emb_size 256 \
-#     --save_steps 10 \
-#     --max_seq_length 64 \
-#     --margin 0.2 \
-#     --train_set_file recall/train.csv 
-
-
-
-# 加载simcse训练的模型，模型放在simcse/model_20000
-# python -u -m paddle.distributed.launch --gpus "0,1,2,3" \
-#     train_batch_neg.py \
-#     --device gpu \
-#     --save_dir ./checkpoints/simcse_inbatch_negative \
-#     --batch_size 64 \
-#     --learning_rate 5E-5 \
-#     --epochs 3 \
-#     --output_emb_size 256 \
-#     --save_steps 10 \
-#     --max_seq_length 64 \
-#     --margin 0.2 \
-#     --evaluate \
-#     --train_set_file recall/train.csv  \
-#     --init_from_ckpt simcse/model_20000/model_state.pdparams
-
-# 加载post training的模型，模型放在simcse/post_model_10000
-# python -u -m paddle.distributed.launch --gpus "0,1,2,3" \
-#     train_batch_neg.py \
-#     --device gpu \
-#     --save_dir ./checkpoints/post_simcse_inbatch_negative \
-#     --batch_size 64 \
-#     --learning_rate 5E-5 \
-#     --epochs 3 \
-#     --output_emb_size 256 \
-#     --save_steps 10 \
-#     --max_seq_length 64 \
-#     --margin 0.2 \
-#     --train_set_file recall/train.csv  \
-#     --init_from_ckpt simcse/post_model_10000/model_state.pdparams
diff --git a/applications/neural_search/recall/in_batch_negative/train_batch_neg.py b/applications/neural_search/recall/in_batch_negative/train_batch_neg.py
index 222b02d16423..10bead311455 100644
--- a/applications/neural_search/recall/in_batch_negative/train_batch_neg.py
+++ b/applications/neural_search/recall/in_batch_negative/train_batch_neg.py
@@ -37,6 +37,7 @@
                     "Sequences longer than this will be truncated, sequences shorter will be padded.")
 parser.add_argument("--batch_size", default=32, type=int,
                     help="Batch size per GPU/CPU for training.")
+parser.add_argument('--model_name_or_path', default="rocketqa-zh-base-query-encoder", help="The pretrained model used for training")
 parser.add_argument("--output_emb_size", default=256,
                     type=int, help="output_embedding_size")
 parser.add_argument("--learning_rate", default=5E-5, type=float,
@@ -172,9 +173,9 @@ def do_train():
                             data_path=args.train_set_file,
                             lazy=False)
 
-    pretrained_model = AutoModel.from_pretrained('ernie-3.0-medium-zh')
+    pretrained_model = AutoModel.from_pretrained(args.model_name_or_path)
 
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
 
     trans_func = partial(convert_example,
                          tokenizer=tokenizer,
diff --git a/applications/neural_search/recall/simcse/README.md b/applications/neural_search/recall/simcse/README.md
index 9090dd6fcc1e..e5be579103fa 100644
--- a/applications/neural_search/recall/simcse/README.md
+++ b/applications/neural_search/recall/simcse/README.md
@@ -50,10 +50,10 @@ SimCSE 模型适合缺乏监督数据，但是又有大量无监督数据的匹
 
 **效果评估**
 
-|  模型 |  Recall@1 | Recall@5 |Recall@10 |Recall@20 |Recall@50 |策略简要说明|
+|  策略 | 模型| Recall@1 | Recall@5 |Recall@10 |Recall@20 |Recall@50 |
 | ------------ | ------------ | ------------ |--------- |--------- |--------- |--------- |
-|  SimCSE |  42.374 | 57.505| 62.641| 67.09|72.331| SimCSE无监督训练|
-
+|  SimCSE | ernie 1.0 |42.374 | 57.505| 62.641| 67.09|72.331|
+|  SimCSE | rocketqa-zh-base-query-encoder |**50.108** | **64.005**| **68.288**| **72.306**|**77.306**|
 
 <a name="环境依赖"></a>
 
@@ -151,14 +151,6 @@ simcse/
 
 ## 5. 模型训练
 
-**语义索引预训练模型下载链接：**
-
-以下模型结构参数为: `TrasformerLayer:12, Hidden:768, Heads:12, OutputEmbSize: 256`
-
-|Model|训练参数配置|硬件|MD5|
-| ------------ | ------------ | ------------ |-----------|
-|[SimCSE](https://bj.bcebos.com/v1/paddlenlp/models/simcse_model.zip)|<div style="width: 150pt">epoch:3 lr:5E-5 bs:64 max_len:64 </div>|<div style="width: 100pt">4卡 v100-16g</div>|7c46d9b15a214292e3897c0eb70d0c9f|
-
 ### 训练环境说明
 
 + NVIDIA Driver Version: 440.64.00
@@ -188,7 +180,8 @@ python -u -m paddle.distributed.launch --gpus '0,1,2,3' \
 	--dropout 0.2 \
     --output_emb_size 256 \
 	--train_set_file "./recall/train_unsupervised.csv" \
-	--test_set_file "./recall/dev.csv"
+	--test_set_file "./recall/dev.csv" \
+    --model_name_or_path "rocketqa-zh-base-query-encoder"
 ```
 也可以使用bash脚本：
 
@@ -213,6 +206,7 @@ sh scripts/train.sh
 * `init_from_ckpt`：可选，模型参数路径，热启动模型训练；默认为None。
 * `seed`：可选，随机种子，默认为1000.
 * `device`: 选用什么设备进行训练，可选cpu或gpu。如使用gpu训练则参数gpus指定GPU卡号。
+* `model_name_or_path`: 预训练模型，用于模型和`Tokenizer`的参数初始化。
 
 程序运行时将会自动进行训练，评估。同时训练过程中会自动保存模型在指定的`save_dir`中。
 如：
@@ -255,7 +249,8 @@ python -u -m paddle.distributed.launch --gpus "6" --log_dir "recall_log/" \
         --device gpu \
         --recall_result_dir "recall_result_dir" \
         --recall_result_file "recall_result.txt" \
-        --params_path "checkpoints/model_20000/model_state.pdparams" \
+        --params_path "checkpoints/model_12000/model_state.pdparams" \
+        --model_name_or_path rocketqa-zh-base-query-encoder \
         --hnsw_m 100 \
         --hnsw_ef 100 \
         --batch_size 64 \
@@ -314,7 +309,7 @@ recall@50=74.848
 修改 inference.py 文件里面输入文本 id2corpus 和模型路径 params_path:
 
 ```
-params_path='checkpoints/model_20000/model_state.pdparams'
+params_path='checkpoints/model_12000/model_state.pdparams'
 id2corpus={0:'国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'}
 ```
 然后运行
@@ -352,7 +347,8 @@ root_dir="checkpoints"
 python -u -m paddle.distributed.launch --gpus "3" \
     predict.py \
     --device gpu \
-    --params_path "${root_dir}/model_20000/model_state.pdparams" \
+    --params_path "${root_dir}/model_12000/model_state.pdparams" \
+    --model_name_or_path rocketqa-zh-base-query-encoder \
     --output_emb_size 256 \
     --batch_size 128 \
     --max_seq_length 64 \
@@ -362,6 +358,7 @@ python -u -m paddle.distributed.launch --gpus "3" \
 参数含义说明
 * `device`: 使用 cpu/gpu 进行训练
 * `params_path`： 预训练模型的参数文件名
+* `model_name_or_path`: 预训练模型，用于模型和`Tokenizer`的参数初始化。
 * `output_emb_size`: Transformer 顶层输出的文本向量维度
 * `text_pair_file`: 由文本 Pair 构成的待预测数据集
 
@@ -388,7 +385,9 @@ sh scripts/predict.sh
 首先把动态图模型转换为静态图：
 
 ```
-python export_model.py --params_path checkpoints/model_20000/model_state.pdparams --output_path=./output
+python export_model.py --params_path checkpoints/model_12000/model_state.pdparams \
+                       --model_name_or_path rocketqa-zh-base-query-encoder \
+                       --output_path=./output
 ```
 也可以运行下面的bash脚本：
 
diff --git a/applications/neural_search/recall/simcse/deploy/python/predict.py b/applications/neural_search/recall/simcse/deploy/python/predict.py
index af5b28f9aa1d..93e1e8106111 100644
--- a/applications/neural_search/recall/simcse/deploy/python/predict.py
+++ b/applications/neural_search/recall/simcse/deploy/python/predict.py
@@ -40,7 +40,7 @@
     help="Batch size per GPU/CPU for training.")
 parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu",
     help="Select which device to train model, defaults to gpu.")
-
+parser.add_argument('--model_name_or_path', default="rocketqa-zh-base-query-encoder", help="model name.")
 parser.add_argument('--use_tensorrt', default=False, type=eval, choices=[True, False],
     help='Enable to use tensorrt to speed up.')
 parser.add_argument("--precision", default="fp32", type=str, choices=["fp32", "fp16", "int8"],
@@ -153,22 +153,21 @@ def __init__(self,
         if args.benchmark:
             import auto_log
             pid = os.getpid()
-            self.autolog = auto_log.AutoLogger(model_name="ernie-3.0-medium-zh",
-                                               model_precision=precision,
-                                               batch_size=self.batch_size,
-                                               data_shape="dynamic",
-                                               save_path=args.save_log_path,
-                                               inference_config=config,
-                                               pids=pid,
-                                               process_name=None,
-                                               gpu_ids=0,
-                                               time_keys=[
-                                                   'preprocess_time',
-                                                   'inference_time',
-                                                   'postprocess_time'
-                                               ],
-                                               warmup=0,
-                                               logger=logger)
+            self.autolog = auto_log.AutoLogger(
+                model_name=args.model_name_or_path,
+                model_precision=precision,
+                batch_size=self.batch_size,
+                data_shape="dynamic",
+                save_path=args.save_log_path,
+                inference_config=config,
+                pids=pid,
+                process_name=None,
+                gpu_ids=0,
+                time_keys=[
+                    'preprocess_time', 'inference_time', 'postprocess_time'
+                ],
+                warmup=0,
+                logger=logger)
 
     def extract_embedding(self, data, tokenizer):
         """
@@ -276,7 +275,7 @@ def predict(self, data, tokenizer):
 
     # ErnieTinyTokenizer is special for ernie-tiny pretained model.
     output_emb_size = 256
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
     id2corpus = {0: '国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'}
     corpus_list = [{idx: text} for idx, text in id2corpus.items()]
     res = predictor.extract_embedding(corpus_list, tokenizer)
diff --git a/applications/neural_search/recall/simcse/evaluate.py b/applications/neural_search/recall/simcse/evaluate.py
index d211cbd3cf6d..b0dc868ba876 100644
--- a/applications/neural_search/recall/simcse/evaluate.py
+++ b/applications/neural_search/recall/simcse/evaluate.py
@@ -84,4 +84,3 @@ def recall(rs, N=10):
         print('recall@{}={}'.format(key, val))
         res.append(str(val))
     result.write('\t'.join(res) + '\n')
-    # print("\t".join(recall_N))
diff --git a/applications/neural_search/recall/simcse/export_model.py b/applications/neural_search/recall/simcse/export_model.py
index c4598fa9acfa..8781877c8bec 100644
--- a/applications/neural_search/recall/simcse/export_model.py
+++ b/applications/neural_search/recall/simcse/export_model.py
@@ -14,7 +14,6 @@
 
 import argparse
 import os
-from functools import partial
 
 import numpy as np
 import paddle
@@ -28,6 +27,7 @@
 parser = argparse.ArgumentParser()
 parser.add_argument("--params_path", type=str, required=True, default='./checkpoint/model_900/model_state.pdparams', help="The path to model parameters to be loaded.")
 parser.add_argument("--output_path", type=str, default='./output', help="The path of model parameter in static graph to be saved.")
+parser.add_argument("--model_name_or_path",default='rocketqa-zh-base-query-encoder',type=str,help='The pretrained model used for training')
 args = parser.parse_args()
 # yapf: enable
 
@@ -35,9 +35,9 @@
     # If you want to use ernie1.0 model, plesace uncomment the following code
     output_emb_size = 256
 
-    pretrained_model = AutoModel.from_pretrained("ernie-3.0-medium-zh")
+    pretrained_model = AutoModel.from_pretrained(args.model_name_or_path)
 
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
     model = SimCSE(pretrained_model, output_emb_size=output_emb_size)
 
     if args.params_path and os.path.isfile(args.params_path):
diff --git a/applications/neural_search/recall/simcse/inference.py b/applications/neural_search/recall/simcse/inference.py
index 8788b35cf1cc..0e11c6ad65e4 100644
--- a/applications/neural_search/recall/simcse/inference.py
+++ b/applications/neural_search/recall/simcse/inference.py
@@ -57,9 +57,10 @@ def convert_example(example, tokenizer, max_seq_length=512, do_evalute=False):
     batch_size = 1
     params_path = 'checkpoints/model_20000/model_state.pdparams'
     id2corpus = {0: '国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'}
+    model_name_or_path = "rocketqa-zh-base-query-encoder"
     paddle.set_device(device)
 
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
     trans_func = partial(convert_example,
                          tokenizer=tokenizer,
                          max_seq_length=max_seq_length)
@@ -69,7 +70,7 @@ def convert_example(example, tokenizer, max_seq_length=512, do_evalute=False):
         Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # text_segment
     ): [data for data in fn(samples)]
 
-    pretrained_model = AutoModel.from_pretrained("ernie-3.0-medium-zh")
+    pretrained_model = AutoModel.from_pretrained(model_name_or_path)
 
     model = SimCSE(pretrained_model, output_emb_size=output_emb_size)
 
diff --git a/applications/neural_search/recall/simcse/predict.py b/applications/neural_search/recall/simcse/predict.py
index 02dc6147de70..9f96660a1cc9 100644
--- a/applications/neural_search/recall/simcse/predict.py
+++ b/applications/neural_search/recall/simcse/predict.py
@@ -40,6 +40,8 @@
 parser.add_argument("--margin", default=0.0, type=float, help="Margin beteween pos_sample and neg_samples.")
 parser.add_argument("--scale", default=20, type=int, help="Scale for pair-wise margin_rank_loss.")
 parser.add_argument("--output_emb_size", default=0, type=int, help="Output_embedding_size, 0 means use hidden_size as output embedding size.")
+parser.add_argument("--model_name_or_path",default='rocketqa-zh-base-query-encoder',type=str,help='The pretrained model used for training')
+
 
 args = parser.parse_args()
 # yapf: enable
@@ -80,7 +82,7 @@ def predict(model, data_loader):
 if __name__ == "__main__":
     paddle.set_device(args.device)
 
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
 
     trans_func = partial(convert_example,
                          tokenizer=tokenizer,
@@ -104,7 +106,7 @@ def predict(model, data_loader):
                                           batchify_fn=batchify_fn,
                                           trans_fn=trans_func)
 
-    pretrained_model = AutoModel.from_pretrained("ernie-3.0-medium-zh")
+    pretrained_model = AutoModel.from_pretrained(args.model_name_or_path)
 
     model = SimCSE(pretrained_model,
                    margin=args.margin,
diff --git a/applications/neural_search/recall/simcse/recall.py b/applications/neural_search/recall/simcse/recall.py
index 7d3d5e31b8f2..c784878b9e41 100644
--- a/applications/neural_search/recall/simcse/recall.py
+++ b/applications/neural_search/recall/simcse/recall.py
@@ -47,11 +47,10 @@
 parser.add_argument("--batch_size", default=32, type=int, help="Batch size per GPU/CPU for training.")
 parser.add_argument("--output_emb_size", default=None, type=int, help="output_embedding_size")
 parser.add_argument("--recall_num", default=10, type=int, help="Recall number for each query from Ann index.")
-
 parser.add_argument("--hnsw_m", default=100, type=int, help="Recall number for each query from Ann index.")
 parser.add_argument("--hnsw_ef", default=100, type=int, help="Recall number for each query from Ann index.")
 parser.add_argument("--hnsw_max_elements", default=1000000, type=int, help="Recall number for each query from Ann index.")
-
+parser.add_argument("--model_name_or_path",default='rocketqa-zh-base-query-encoder',type=str,help='The pretrained model used for training')
 parser.add_argument('--device', choices=['cpu', 'gpu'], default="gpu", help="Select which device to train model, defaults to gpu.")
 args = parser.parse_args()
 # yapf: enable
@@ -62,7 +61,7 @@
     if paddle.distributed.get_world_size() > 1:
         paddle.distributed.init_parallel_env()
 
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
 
     trans_func = partial(convert_example_test,
                          tokenizer=tokenizer,
@@ -75,7 +74,7 @@
             ),  # text_segment
     ): [data for data in fn(samples)]
 
-    pretrained_model = AutoModel.from_pretrained("ernie-3.0-medium-zh")
+    pretrained_model = AutoModel.from_pretrained(args.model_name_or_path)
 
     model = SimCSE(pretrained_model, output_emb_size=args.output_emb_size)
     model = paddle.DataParallel(model)
@@ -107,7 +106,6 @@
     final_index = build_index(args, corpus_data_loader, inner_model)
 
     text_list, text2similar_text = gen_text_file(args.similar_text_pair_file)
-    # print(text_list[:5])
 
     query_ds = MapDataset(text_list)
 
diff --git a/applications/neural_search/recall/simcse/scripts/export_model.sh b/applications/neural_search/recall/simcse/scripts/export_model.sh
index f011b5fc900b..629440b9b079 100644
--- a/applications/neural_search/recall/simcse/scripts/export_model.sh
+++ b/applications/neural_search/recall/simcse/scripts/export_model.sh
@@ -1 +1,3 @@
-python export_model.py --params_path checkpoints/model_20000/model_state.pdparams --output_path=./output
\ No newline at end of file
+python export_model.py --params_path checkpoints/model_12000/model_state.pdparams \
+                       --model_name_or_path rocketqa-zh-base-query-encoder \
+                       --output_path=./output
\ No newline at end of file
diff --git a/applications/neural_search/recall/simcse/scripts/predict.sh b/applications/neural_search/recall/simcse/scripts/predict.sh
index 141ea70d1b2d..758e3ecf1696 100644
--- a/applications/neural_search/recall/simcse/scripts/predict.sh
+++ b/applications/neural_search/recall/simcse/scripts/predict.sh
@@ -3,10 +3,11 @@ root_dir="checkpoints"
 python -u -m paddle.distributed.launch --gpus "3" \
     predict.py \
     --device gpu \
-    --params_path "${root_dir}/model_20000/model_state.pdparams" \
+    --params_path "${root_dir}/model_12000/model_state.pdparams" \
     --output_emb_size 256 \
     --batch_size 128 \
     --max_seq_length 64 \
+    --model_name_or_path rocketqa-zh-base-query-encoder \
     --text_pair_file "recall/test.csv"
 
 # cpu
diff --git a/applications/neural_search/recall/simcse/scripts/run_build_index.sh b/applications/neural_search/recall/simcse/scripts/run_build_index.sh
index b13fd69ed347..eee1ad359359 100755
--- a/applications/neural_search/recall/simcse/scripts/run_build_index.sh
+++ b/applications/neural_search/recall/simcse/scripts/run_build_index.sh
@@ -1,10 +1,11 @@
 # gpu
-python -u -m paddle.distributed.launch --gpus "6" --log_dir "recall_log/" \
+python -u -m paddle.distributed.launch --gpus "0" --log_dir "recall_log/" \
         recall.py \
         --device gpu \
         --recall_result_dir "recall_result_dir" \
         --recall_result_file "recall_result.txt" \
-        --params_path "checkpoints/model_20000/model_state.pdparams" \
+        --params_path "checkpoints/model_12000/model_state.pdparams" \
+        --model_name_or_path rocketqa-zh-base-query-encoder \
         --hnsw_m 100 \
         --hnsw_ef 100 \
         --batch_size 64 \
diff --git a/applications/neural_search/recall/simcse/scripts/train.sh b/applications/neural_search/recall/simcse/scripts/train.sh
index 79822037063e..60817e0ff7b5 100644
--- a/applications/neural_search/recall/simcse/scripts/train.sh
+++ b/applications/neural_search/recall/simcse/scripts/train.sh
@@ -1,5 +1,5 @@
 # simcse gpu
-python -u -m paddle.distributed.launch --gpus '0,1,2,3' \
+python -u -m paddle.distributed.launch --gpus '1,2,3,4' \
 	train.py \
 	--device gpu \
 	--save_dir ./checkpoints/ \
@@ -14,7 +14,7 @@ python -u -m paddle.distributed.launch --gpus '0,1,2,3' \
 	--output_emb_size 256 \
 	--train_set_file "./recall/train_unsupervised.csv" \
 	--test_set_file "./recall/dev.csv" \
-	--model_name_or_path "ernie-3.0-medium-zh"
+	--model_name_or_path "rocketqa-zh-base-query-encoder"
 
 # simcse cpu
 # python 	train.py \
diff --git a/applications/neural_search/recall/simcse/train.py b/applications/neural_search/recall/simcse/train.py
index d9d49e189529..c28672a79000 100644
--- a/applications/neural_search/recall/simcse/train.py
+++ b/applications/neural_search/recall/simcse/train.py
@@ -56,7 +56,7 @@
 parser.add_argument("--scale", default=20, type=int, help="Scale for pair-wise margin_rank_loss.")
 parser.add_argument("--dropout", default=0.1, type=float, help="Dropout for pretrained model encoder.")
 parser.add_argument("--infer_with_fc_pooler", action='store_true', help="Whether use fc layer after cls embedding or not for when infer.")
-parser.add_argument("--model_name_or_path",default='ernie-3.0-medium-zh',type=str,help='pretrained model')
+parser.add_argument("--model_name_or_path",default='rocketqa-zh-base-query-encoder',type=str,help='The pretrained model used for training')
 
 args = parser.parse_args()
 
@@ -84,7 +84,7 @@ def do_train():
        hidden_dropout_prob=args.dropout,
        attention_probs_dropout_prob=args.dropout)
     print("loading model from {}".format(args.model_name_or_path))
-    tokenizer = AutoTokenizer.from_pretrained('ernie-3.0-medium-zh')
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
 
     trans_func = partial(
         convert_example,
diff --git a/pipelines/examples/semantic-search/README.md b/pipelines/examples/semantic-search/README.md
index 9ada8926840e..fc5d1076f3e1 100644
--- a/pipelines/examples/semantic-search/README.md
+++ b/pipelines/examples/semantic-search/README.md
@@ -17,7 +17,7 @@
 
 ## 2. 产品功能介绍
 
-本项目提供了低成本搭建端到端语义检索系统的能力。用户只需要处理好自己的业务数据，就可以使用本项目预置的语义检索系统模型(召回模型、排序模型)快速搭建一个针对自己业务数据的问答系统，并可以提供 Web 化产品服务。
+本项目提供了低成本搭建端到端语义检索系统的能力。用户只需要处理好自己的业务数据，就可以使用本项目预置的语义检索系统模型(召回模型、排序模型)快速搭建一个针对自己业务数据的问答系统，并可以提供 Web 化产品服务。以下是使用预置模型的教程，如果用户想接入自己训练的模型，可以参考[Neural Search的流程](./Neural_Search.md)。
 
 ### 2.1 系统特色
 
diff --git a/pipelines/examples/semantic-search/run_neural_search_server.sh b/pipelines/examples/semantic-search/run_neural_search_server.sh
new file mode 100644
index 000000000000..3edad2c52818
--- /dev/null
+++ b/pipelines/examples/semantic-search/run_neural_search_server.sh
@@ -0,0 +1,5 @@
+# 指定语义检索系统的Yaml配置文件
+export CUDA_VISIBLE_DEVICES=0
+export PIPELINE_YAML_PATH=rest_api/pipeline/semantic_search_custom.yaml
+# 使用端口号 8891 启动模型服务
+python rest_api/application.py 8891
\ No newline at end of file
diff --git a/pipelines/examples/semantic-search/run_search_web.sh b/pipelines/examples/semantic-search/run_search_web.sh
index 05530d8779eb..a1273daf018d 100644
--- a/pipelines/examples/semantic-search/run_search_web.sh
+++ b/pipelines/examples/semantic-search/run_search_web.sh
@@ -1,4 +1,5 @@
 unset http_proxy && unset https_proxy
+export PYTHONPATH=/wugaosheng/workplace/PaddleNLP/pipelines:$PYTHONPATH
 # 配置模型服务地址
 export API_ENDPOINT=http://127.0.0.1:8891
 # 在指定端口 8502 启动 WebUI
diff --git a/pipelines/examples/semantic-search/semantic_search_example.py b/pipelines/examples/semantic-search/semantic_search_example.py
index c24be521b393..b21b24b7631c 100644
--- a/pipelines/examples/semantic-search/semantic_search_example.py
+++ b/pipelines/examples/semantic-search/semantic_search_example.py
@@ -13,6 +13,23 @@
 parser.add_argument("--max_seq_len_query", default=64, type=int, help="The maximum total length of query after tokenization.")
 parser.add_argument("--max_seq_len_passage", default=256, type=int, help="The maximum total length of passage after tokenization.")
 parser.add_argument("--retriever_batch_size", default=16, type=int, help="The batch size of retriever to extract passage embedding for building ANN index.")
+parser.add_argument("--query_embedding_model",
+                    default="rocketqa-zh-nano-query-encoder",
+                    type=str,
+                    help="The query_embedding_model path")
+
+parser.add_argument("--passage_embedding_model",
+                    default="rocketqa-zh-nano-para-encoder",
+                    type=str,
+                    help="The passage_embedding_model path")
+parser.add_argument("--params_path",
+                    default="checkpoints/model_40/model_state.pdparams",
+                    type=str,
+                    help="The checkpoint path")
+parser.add_argument("--embedding_dim",
+                    default=312,
+                    type=int,
+                    help="The embedding_dim of index")
 args = parser.parse_args()
 # yapf: enable
 
@@ -25,16 +42,29 @@ def semantic_search_tutorial():
     if os.path.exists(args.index_name) and os.path.exists(faiss_document_store):
         # connect to existed FAISS Index
         document_store = FAISSDocumentStore.load(args.index_name)
-        retriever = DensePassageRetriever(
-            document_store=document_store,
-            query_embedding_model="rocketqa-zh-dureader-query-encoder",
-            passage_embedding_model="rocketqa-zh-dureader-query-encoder",
-            max_seq_len_query=args.max_seq_len_query,
-            max_seq_len_passage=args.max_seq_len_passage,
-            batch_size=args.retriever_batch_size,
-            use_gpu=use_gpu,
-            embed_title=False,
-        )
+        if (os.path.exists(args.params_path)):
+            retriever = DensePassageRetriever(
+                document_store=document_store,
+                query_embedding_model=args.query_embedding_model,
+                params_path=args.params_path,
+                output_emb_size=args.embedding_dim,
+                max_seq_len_query=args.max_seq_len_query,
+                max_seq_len_passage=args.max_seq_len_passage,
+                batch_size=args.retriever_batch_size,
+                use_gpu=use_gpu,
+                embed_title=False,
+            )
+        else:
+            retriever = DensePassageRetriever(
+                document_store=document_store,
+                query_embedding_model=args.query_embedding_model,
+                passage_embedding_model=args.passage_embedding_model,
+                max_seq_len_query=args.max_seq_len_query,
+                max_seq_len_passage=args.max_seq_len_passage,
+                batch_size=args.retriever_batch_size,
+                use_gpu=use_gpu,
+                embed_title=False,
+            )
     else:
         doc_dir = "data/dureader_dev"
         dureader_data = "https://paddlenlp.bj.bcebos.com/applications/dureader_dev.zip"
@@ -49,20 +79,33 @@ def semantic_search_tutorial():
         if os.path.exists(faiss_document_store):
             os.remove(faiss_document_store)
 
-        document_store = FAISSDocumentStore(embedding_dim=768,
+        document_store = FAISSDocumentStore(embedding_dim=args.embedding_dim,
                                             faiss_index_factory_str="Flat")
         document_store.write_documents(dicts)
 
-        retriever = DensePassageRetriever(
-            document_store=document_store,
-            query_embedding_model="rocketqa-zh-dureader-query-encoder",
-            passage_embedding_model="rocketqa-zh-dureader-query-encoder",
-            max_seq_len_query=args.max_seq_len_query,
-            max_seq_len_passage=args.max_seq_len_passage,
-            batch_size=args.retriever_batch_size,
-            use_gpu=use_gpu,
-            embed_title=False,
-        )
+        if (os.path.exists(args.params_path)):
+            retriever = DensePassageRetriever(
+                document_store=document_store,
+                query_embedding_model=args.query_embedding_model,
+                params_path=args.params_path,
+                output_emb_size=args.embedding_dim,
+                max_seq_len_query=args.max_seq_len_query,
+                max_seq_len_passage=args.max_seq_len_passage,
+                batch_size=args.retriever_batch_size,
+                use_gpu=use_gpu,
+                embed_title=False,
+            )
+        else:
+            retriever = DensePassageRetriever(
+                document_store=document_store,
+                query_embedding_model=args.query_embedding_model,
+                passage_embedding_model=args.passage_embedding_model,
+                max_seq_len_query=args.max_seq_len_query,
+                max_seq_len_passage=args.max_seq_len_passage,
+                batch_size=args.retriever_batch_size,
+                use_gpu=use_gpu,
+                embed_title=False,
+            )
 
         # update Embedding
         document_store.update_embeddings(retriever)
diff --git a/pipelines/pipelines/nodes/models/__init__.py b/pipelines/pipelines/nodes/models/__init__.py
new file mode 100644
index 000000000000..231705d96e9d
--- /dev/null
+++ b/pipelines/pipelines/nodes/models/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pipelines.nodes.models.neural_search_model import SemanticIndexBatchNeg
\ No newline at end of file
diff --git a/pipelines/pipelines/nodes/models/neural_search_model.py b/pipelines/pipelines/nodes/models/neural_search_model.py
new file mode 100644
index 000000000000..fd77f2a42ce0
--- /dev/null
+++ b/pipelines/pipelines/nodes/models/neural_search_model.py
@@ -0,0 +1,151 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import abc
+
+import numpy as np
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+
+class SemanticIndexBase(nn.Layer):
+
+    def __init__(self, pretrained_model, dropout=None, output_emb_size=None):
+        super().__init__()
+        self.ptm = pretrained_model
+        self.dropout = nn.Dropout(dropout if dropout is not None else 0.1)
+
+        # if output_emb_size is not None, then add Linear layer to reduce embedding_size,
+        # we recommend set output_emb_size = 256 considering the trade-off beteween
+        # recall performance and efficiency
+
+        self.output_emb_size = output_emb_size
+        if output_emb_size > 0:
+            weight_attr = paddle.ParamAttr(
+                initializer=paddle.nn.initializer.TruncatedNormal(std=0.02))
+            self.emb_reduce_linear = paddle.nn.Linear(768,
+                                                      output_emb_size,
+                                                      weight_attr=weight_attr)
+
+    def get_pooled_embedding(self,
+                             input_ids,
+                             token_type_ids=None,
+                             position_ids=None,
+                             attention_mask=None):
+        _, cls_embedding = self.ptm(input_ids, token_type_ids, position_ids,
+                                    attention_mask)
+
+        if self.output_emb_size > 0:
+            cls_embedding = self.emb_reduce_linear(cls_embedding)
+        cls_embedding = self.dropout(cls_embedding)
+        cls_embedding = F.normalize(cls_embedding, p=2, axis=-1)
+
+        return cls_embedding
+
+    def get_semantic_embedding(self, data_loader):
+        self.eval()
+        with paddle.no_grad():
+            for batch_data in data_loader:
+                input_ids, token_type_ids = batch_data
+
+                text_embeddings = self.get_pooled_embedding(
+                    input_ids, token_type_ids=token_type_ids)
+
+                yield text_embeddings
+
+    def cosine_sim(self,
+                   query_input_ids,
+                   title_input_ids,
+                   query_token_type_ids=None,
+                   query_position_ids=None,
+                   query_attention_mask=None,
+                   title_token_type_ids=None,
+                   title_position_ids=None,
+                   title_attention_mask=None):
+
+        query_cls_embedding = self.get_pooled_embedding(query_input_ids,
+                                                        query_token_type_ids,
+                                                        query_position_ids,
+                                                        query_attention_mask)
+
+        title_cls_embedding = self.get_pooled_embedding(title_input_ids,
+                                                        title_token_type_ids,
+                                                        title_position_ids,
+                                                        title_attention_mask)
+
+        cosine_sim = paddle.sum(query_cls_embedding * title_cls_embedding,
+                                axis=-1)
+        return cosine_sim
+
+    @abc.abstractmethod
+    def forward(self):
+        pass
+
+
+class SemanticIndexBatchNeg(SemanticIndexBase):
+
+    def __init__(self,
+                 pretrained_model,
+                 dropout=None,
+                 margin=0.3,
+                 scale=30,
+                 output_emb_size=None):
+        super().__init__(pretrained_model, dropout, output_emb_size)
+
+        self.margin = margin
+        # Used scaling cosine similarity to ease converge
+        self.sacle = scale
+
+    def forward(self,
+                query_input_ids,
+                title_input_ids,
+                query_token_type_ids=None,
+                query_position_ids=None,
+                query_attention_mask=None,
+                title_token_type_ids=None,
+                title_position_ids=None,
+                title_attention_mask=None):
+
+        query_cls_embedding = self.get_pooled_embedding(query_input_ids,
+                                                        query_token_type_ids,
+                                                        query_position_ids,
+                                                        query_attention_mask)
+
+        title_cls_embedding = self.get_pooled_embedding(title_input_ids,
+                                                        title_token_type_ids,
+                                                        title_position_ids,
+                                                        title_attention_mask)
+
+        cosine_sim = paddle.matmul(query_cls_embedding,
+                                   title_cls_embedding,
+                                   transpose_y=True)
+
+        # substract margin from all positive samples cosine_sim()
+        margin_diag = paddle.full(shape=[query_cls_embedding.shape[0]],
+                                  fill_value=self.margin,
+                                  dtype=paddle.get_default_dtype())
+
+        cosine_sim = cosine_sim - paddle.diag(margin_diag)
+
+        # scale cosine to ease training converge
+        cosine_sim *= self.sacle
+
+        labels = paddle.arange(0, query_cls_embedding.shape[0], dtype='int64')
+        labels = paddle.reshape(labels, shape=[-1, 1])
+
+        loss = F.cross_entropy(input=cosine_sim, label=labels)
+
+        return loss
diff --git a/pipelines/pipelines/nodes/retriever/dense.py b/pipelines/pipelines/nodes/retriever/dense.py
index 728e8d400490..2baeac9ee0a0 100644
--- a/pipelines/pipelines/nodes/retriever/dense.py
+++ b/pipelines/pipelines/nodes/retriever/dense.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 from typing import List, Dict, Union, Optional
+import os
 
 import logging
 import numpy as np
@@ -22,11 +23,12 @@
 
 import paddle
 from paddlenlp.data import Stack, Tuple, Pad
-from paddlenlp.transformers import ErnieDualEncoder, AutoTokenizer
+from paddlenlp.transformers import ErnieDualEncoder, AutoTokenizer, AutoModel
 
 from pipelines.schema import Document
 from pipelines.document_stores import BaseDocumentStore
 from pipelines.nodes.retriever.base import BaseRetriever
+from pipelines.nodes.models import SemanticIndexBatchNeg
 from pipelines.data_handler.processor import TextSimilarityProcessor
 from pipelines.utils.common_utils import initialize_device_settings
 
@@ -45,7 +47,9 @@ def __init__(
             Path, str] = "rocketqa-zh-dureader-query-encoder",
         passage_embedding_model: Union[
             Path, str] = "rocketqa-zh-dureader-para-encoder",
+        params_path: Optional[str] = None,
         model_version: Optional[str] = None,
+        output_emb_size=256,
         max_seq_len_query: int = 64,
         max_seq_len_passage: int = 256,
         top_k: int = 10,
@@ -133,12 +137,25 @@ def __init__(
                 "This can be set when initializing the DocumentStore")
 
         # Init & Load Encoders
-        self.ernie_dual_encoder = ErnieDualEncoder(query_embedding_model,
-                                                   passage_embedding_model)
-        self.query_tokenizer = AutoTokenizer.from_pretrained(
-            query_embedding_model)
-        self.passage_tokenizer = AutoTokenizer.from_pretrained(
-            passage_embedding_model)
+        if (os.path.exists(params_path)):
+            pretrained_model = AutoModel.from_pretrained(query_embedding_model)
+            self.ernie_dual_encoder = SemanticIndexBatchNeg(
+                pretrained_model, output_emb_size=output_emb_size)
+            # Load Custom models
+            print("Loading Parameters from:{}".format(params_path))
+            state_dict = paddle.load(params_path)
+            self.ernie_dual_encoder.set_dict(state_dict)
+            self.query_tokenizer = AutoTokenizer.from_pretrained(
+                query_embedding_model)
+            self.passage_tokenizer = AutoTokenizer.from_pretrained(
+                query_embedding_model)
+        else:
+            self.ernie_dual_encoder = ErnieDualEncoder(query_embedding_model,
+                                                       passage_embedding_model)
+            self.query_tokenizer = AutoTokenizer.from_pretrained(
+                query_embedding_model)
+            self.passage_tokenizer = AutoTokenizer.from_pretrained(
+                passage_embedding_model)
 
         self.processor = TextSimilarityProcessor(
             query_tokenizer=self.query_tokenizer,
diff --git a/pipelines/rest_api/pipeline/semantic_search.yaml b/pipelines/rest_api/pipeline/semantic_search.yaml
index 855e4811ef3f..3367384da48c 100644
--- a/pipelines/rest_api/pipeline/semantic_search.yaml
+++ b/pipelines/rest_api/pipeline/semantic_search.yaml
@@ -6,7 +6,7 @@ components:    # define all the building-blocks for Pipeline
     params:
       host: localhost
       port: 9200
-      index: dureader_robust_query_encoder
+      index: dureader_robust_nano_encoder
       embedding_dim: 312
   - name: Retriever
     type: DensePassageRetriever
diff --git a/pipelines/rest_api/pipeline/semantic_search_custom.yaml b/pipelines/rest_api/pipeline/semantic_search_custom.yaml
new file mode 100644
index 000000000000..b0aab6ac5a99
--- /dev/null
+++ b/pipelines/rest_api/pipeline/semantic_search_custom.yaml
@@ -0,0 +1,67 @@
+version: '1.1.0'
+
+components:    # define all the building-blocks for Pipeline
+  - name: DocumentStore
+    type: ElasticsearchDocumentStore  # consider using MilvusDocumentStore or WeaviateDocumentStore for scaling to large number of documents
+    params:
+      host: localhost
+      port: 9200
+      index: dureader_robust_neural_search
+      embedding_dim: 256
+  - name: Retriever
+    type: DensePassageRetriever
+    params:
+      document_store: DocumentStore    # params can reference other components defined in the YAML
+      top_k: 10
+      query_embedding_model: rocketqa-zh-base-query-encoder
+      params_path: checkpoints/model_40/model_state.pdparams
+      output_emb_size: 256
+      embed_title: False
+  - name: Ranker       # custom-name for the component; helpful for visualization & debugging
+    type: ErnieRanker    # pipelines Class name for the component
+    params:
+      model_name_or_path: rocketqa-nano-cross-encoder
+      top_k: 3
+  - name: TextFileConverter
+    type: TextConverter
+  - name: ImageFileConverter
+    type: ImageToTextConverter
+  - name: PDFFileConverter
+    type: PDFToTextConverter
+  - name: DocxFileConverter
+    type: DocxToTextConverter
+  - name: Preprocessor
+    type: PreProcessor
+    params:
+      split_by: word
+      split_length: 1000
+  - name: FileTypeClassifier
+    type: FileTypeClassifier
+
+pipelines:
+  - name: query    # a sample extractive-qa Pipeline
+    type: Query
+    nodes:
+      - name: Retriever
+        inputs: [Query]
+      - name: Ranker
+        inputs: [Retriever]
+  - name: indexing
+    type: Indexing
+    nodes:
+      - name: FileTypeClassifier
+        inputs: [File]
+      - name: TextFileConverter
+        inputs: [FileTypeClassifier.output_1]
+      - name: PDFFileConverter
+        inputs: [FileTypeClassifier.output_2]
+      - name: DocxFileConverter
+        inputs: [FileTypeClassifier.output_4]
+      - name: ImageFileConverter
+        inputs: [FileTypeClassifier.output_6]
+      - name: Preprocessor
+        inputs: [PDFFileConverter, TextFileConverter, DocxFileConverter, ImageFileConverter]
+      - name: Retriever
+        inputs: [Preprocessor]
+      - name: DocumentStore
+        inputs: [Retriever]
diff --git a/pipelines/utils/offline_ann.py b/pipelines/utils/offline_ann.py
index 2241f585fc0d..373f57d5eda2 100644
--- a/pipelines/utils/offline_ann.py
+++ b/pipelines/utils/offline_ann.py
@@ -1,4 +1,5 @@
 import argparse
+import os
 
 import paddle
 from pipelines.utils import convert_files_to_dicts, fetch_archive_from_http
@@ -51,6 +52,11 @@
                     type=str,
                     help="The passage_embedding_model path")
 
+parser.add_argument("--params_path",
+                    default="checkpoints/model_40/model_state.pdparams",
+                    type=str,
+                    help="The checkpoint path")
+
 parser.add_argument(
     '--delete_index',
     action='store_true',
@@ -82,16 +88,30 @@ def offline_ann(index_name, doc_dir):
     document_store.write_documents(dicts)
 
     ### 语义索引模型
-    retriever = DensePassageRetriever(
-        document_store=document_store,
-        query_embedding_model=args.query_embedding_model,
-        passage_embedding_model=args.passage_embedding_model,
-        max_seq_len_query=64,
-        max_seq_len_passage=256,
-        batch_size=16,
-        use_gpu=True,
-        embed_title=False,
-    )
+    if (os.path.exists(args.params_path)):
+        retriever = DensePassageRetriever(
+            document_store=document_store,
+            query_embedding_model=args.query_embedding_model,
+            params_path=args.params_path,
+            output_emb_size=args.embedding_dim,
+            max_seq_len_query=64,
+            max_seq_len_passage=256,
+            batch_size=16,
+            use_gpu=True,
+            embed_title=False,
+        )
+
+    else:
+        retriever = DensePassageRetriever(
+            document_store=document_store,
+            query_embedding_model=args.query_embedding_model,
+            passage_embedding_model=args.passage_embedding_model,
+            max_seq_len_query=64,
+            max_seq_len_passage=256,
+            batch_size=16,
+            use_gpu=True,
+            embed_title=False,
+        )
 
     # 建立索引库
     document_store.update_embeddings(retriever)

From 3c8ad6bf9508fddc4286a9e33d5b071701d0be73 Mon Sep 17 00:00:00 2001
From: w5688414 <w5688414@gmail.com>
Date: Thu, 1 Sep 2022 03:14:18 +0000
Subject: [PATCH 2/3] Adjust the format

---
 .../neural_search/recall/in_batch_negative/recall.py      | 2 +-
 applications/neural_search/recall/simcse/README.md        | 8 ++++++++
 pipelines/examples/semantic-search/run_search_web.sh      | 1 -
 pipelines/rest_api/pipeline/semantic_search.yaml          | 2 +-
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/applications/neural_search/recall/in_batch_negative/recall.py b/applications/neural_search/recall/in_batch_negative/recall.py
index f7c73aabec14..c0f3c64398bd 100644
--- a/applications/neural_search/recall/in_batch_negative/recall.py
+++ b/applications/neural_search/recall/in_batch_negative/recall.py
@@ -55,7 +55,7 @@
                     type=int, help="output_embedding_size")
 parser.add_argument("--recall_num", default=10, type=int,
                     help="Recall number for each query from Ann index.")
-parser.add_argument('--model_name_or_path', default="rocketqa-zh-base-query-encoder", help="Select model to train, defaults to rocketqa-zh-base-query-encoder.")
+parser.add_argument('--model_name_or_path', default="rocketqa-zh-base-query-encoder", help="The pretrained model used for training")
 parser.add_argument("--hnsw_m", default=100, type=int,
                     help="Recall number for each query from Ann index.")
 parser.add_argument("--hnsw_ef", default=100, type=int,
diff --git a/applications/neural_search/recall/simcse/README.md b/applications/neural_search/recall/simcse/README.md
index e5be579103fa..033afd18008f 100644
--- a/applications/neural_search/recall/simcse/README.md
+++ b/applications/neural_search/recall/simcse/README.md
@@ -151,6 +151,14 @@ simcse/
 
 ## 5. 模型训练
 
+**语义索引预训练模型下载链接：**
+
+以下模型结构参数为: `TrasformerLayer:12, Hidden:768, Heads:12, OutputEmbSize: 256`
+
+|Model|训练参数配置|硬件|MD5|
+| ------------ | ------------ | ------------ |-----------|
+|[SimCSE](https://bj.bcebos.com/v1/paddlenlp/models/simcse_model.zip)|<div style="width: 150pt">ernie 1.0 epoch:3 lr:5E-5 bs:64 max_len:64 </div>|<div style="width: 100pt">4卡 v100-16g</div>|7c46d9b15a214292e3897c0eb70d0c9f|
+
 ### 训练环境说明
 
 + NVIDIA Driver Version: 440.64.00
diff --git a/pipelines/examples/semantic-search/run_search_web.sh b/pipelines/examples/semantic-search/run_search_web.sh
index a1273daf018d..05530d8779eb 100644
--- a/pipelines/examples/semantic-search/run_search_web.sh
+++ b/pipelines/examples/semantic-search/run_search_web.sh
@@ -1,5 +1,4 @@
 unset http_proxy && unset https_proxy
-export PYTHONPATH=/wugaosheng/workplace/PaddleNLP/pipelines:$PYTHONPATH
 # 配置模型服务地址
 export API_ENDPOINT=http://127.0.0.1:8891
 # 在指定端口 8502 启动 WebUI
diff --git a/pipelines/rest_api/pipeline/semantic_search.yaml b/pipelines/rest_api/pipeline/semantic_search.yaml
index 3367384da48c..855e4811ef3f 100644
--- a/pipelines/rest_api/pipeline/semantic_search.yaml
+++ b/pipelines/rest_api/pipeline/semantic_search.yaml
@@ -6,7 +6,7 @@ components:    # define all the building-blocks for Pipeline
     params:
       host: localhost
       port: 9200
-      index: dureader_robust_nano_encoder
+      index: dureader_robust_query_encoder
       embedding_dim: 312
   - name: Retriever
     type: DensePassageRetriever

From 73a970e2ae6e1eaa0349a18bbf75886c7509e335 Mon Sep 17 00:00:00 2001
From: w5688414 <w5688414@gmail.com>
Date: Sat, 3 Sep 2022 09:30:12 +0000
Subject: [PATCH 3/3] Update Neural Search Recall and Upgrade docx for
 Pipelines

---
 .../recall/in_batch_negative/predict.py       |  14 +-
 .../neural_search/recall/milvus/README.md     |   1 +
 .../recall/milvus/feature_extract.py          |   3 +-
 .../neural_search/recall/milvus/inference.py  |   5 +-
 .../recall/milvus/scripts/feature_extract.sh  |   1 +
 .../Install_windows.md                        | 100 +++++++++++
 .../examples/semantic-search/Neural_Search.md | 163 ++++++++++++++++++
 .../pipelines/nodes/file_converter/docx.py    |  61 ++++---
 pipelines/pipelines/nodes/retriever/dense.py  |   2 +-
 9 files changed, 317 insertions(+), 33 deletions(-)
 create mode 100644 pipelines/examples/frequently-asked-question/Install_windows.md
 create mode 100644 pipelines/examples/semantic-search/Neural_Search.md

diff --git a/applications/neural_search/recall/in_batch_negative/predict.py b/applications/neural_search/recall/in_batch_negative/predict.py
index 9e30ed94e71b..0337b49aa1bd 100644
--- a/applications/neural_search/recall/in_batch_negative/predict.py
+++ b/applications/neural_search/recall/in_batch_negative/predict.py
@@ -37,7 +37,7 @@
                     help="The path to model parameters to be loaded.")
 parser.add_argument("--max_seq_length", default=64, type=int, help="The maximum total input sequence length after tokenization. "
                     "Sequences longer than this will be truncated, sequences shorter will be padded.")
-parser.add_argument('--model_name_or_path', default="rocketqa-zh-base-query-encoder", help="Select model to train, defaults to rocketqa-zh-base-query-encoder.")
+parser.add_argument('--model_name_or_path', default="rocketqa-zh-base-query-encoder", help="The pretrained model used for training")
 parser.add_argument("--batch_size", default=32, type=int,
                     help="Batch size per GPU/CPU for training.")
 parser.add_argument("--output_emb_size", default=None,
@@ -92,10 +92,14 @@ def predict(model, data_loader):
                          pad_to_max_seq_len=args.pad_to_max_seq_len)
 
     batchify_fn = lambda samples, fn=Tuple(
-        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # query_input
-        Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # query_segment
-        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # title_input
-        Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # tilte_segment
+        Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype='int64'
+            ),  # query_input
+        Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype='int64'
+            ),  # query_segment
+        Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype='int64'
+            ),  # title_input
+        Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype='int64'
+            ),  # tilte_segment
     ): [data for data in fn(samples)]
 
     valid_ds = load_dataset(read_text_pair,
diff --git a/applications/neural_search/recall/milvus/README.md b/applications/neural_search/recall/milvus/README.md
index e0b40a99636b..de3f1666b960 100644
--- a/applications/neural_search/recall/milvus/README.md
+++ b/applications/neural_search/recall/milvus/README.md
@@ -104,6 +104,7 @@ Milvus 搭建完系统以后就可以插入和检索向量了，首先生成 emb
 ```
 CUDA_VISIBLE_DEVICES=0 python feature_extract.py \
         --model_dir=./output \
+        --model_name_or_path rocketqa-zh-base-query-encoder \
         --corpus_file "data/milvus_data.csv"
 ```
 其中 output 目录下存放的是召回的 Paddle Inference 静态图模型。
diff --git a/applications/neural_search/recall/milvus/feature_extract.py b/applications/neural_search/recall/milvus/feature_extract.py
index d50e8ca25b57..bea355b3af0c 100644
--- a/applications/neural_search/recall/milvus/feature_extract.py
+++ b/applications/neural_search/recall/milvus/feature_extract.py
@@ -50,6 +50,7 @@
     help='Number of threads to predict when using cpu.')
 parser.add_argument('--enable_mkldnn', default=False, type=eval, choices=[True, False],
     help='Enable to use mkldnn to speed up when using cpu.')
+parser.add_argument("--model_name_or_path",default='rocketqa-zh-base-query-encoder',type=str,help='The pretrained model used for training')
 
 args = parser.parse_args()
 # yapf: enable
@@ -173,7 +174,7 @@ def read_text(file_path):
                           args.batch_size, args.use_tensorrt, args.precision,
                           args.cpu_threads, args.enable_mkldnn)
 
-    tokenizer = AutoTokenizer.from_pretrained('ernie-1.0')
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
     id2corpus = read_text(args.corpus_file)
 
     corpus_list = [{idx: text} for idx, text in id2corpus.items()]
diff --git a/applications/neural_search/recall/milvus/inference.py b/applications/neural_search/recall/milvus/inference.py
index 1ce0737982a3..7966ed696bf8 100644
--- a/applications/neural_search/recall/milvus/inference.py
+++ b/applications/neural_search/recall/milvus/inference.py
@@ -54,8 +54,9 @@ def search_in_milvus(text_embedding):
     batch_size = 1
     params_path = 'checkpoints/model_40/model_state.pdparams'
     id2corpus = {0: '国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'}
+    model_name_or_path = "rocketqa-zh-base-query-encoder"
     paddle.set_device(device)
-    tokenizer = AutoTokenizer.from_pretrained('ernie-1.0')
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
     trans_func = partial(convert_example,
                          tokenizer=tokenizer,
                          max_seq_length=max_seq_length)
@@ -65,7 +66,7 @@ def search_in_milvus(text_embedding):
         Pad(axis=0, pad_val=tokenizer.pad_token_type_id, dtype="int64"
             ),  # text_segment
     ): [data for data in fn(samples)]
-    pretrained_model = AutoModel.from_pretrained("ernie-1.0")
+    pretrained_model = AutoModel.from_pretrained(model_name_or_path)
     model = SemanticIndexBaseStatic(pretrained_model,
                                     output_emb_size=output_emb_size)
     # Load pretrained semantic model
diff --git a/applications/neural_search/recall/milvus/scripts/feature_extract.sh b/applications/neural_search/recall/milvus/scripts/feature_extract.sh
index eb1cf52f0dfe..7f996ac0600a 100644
--- a/applications/neural_search/recall/milvus/scripts/feature_extract.sh
+++ b/applications/neural_search/recall/milvus/scripts/feature_extract.sh
@@ -1,5 +1,6 @@
 CUDA_VISIBLE_DEVICES=2 python feature_extract.py \
         --model_dir ./output \
+        --model_name_or_path rocketqa-zh-base-query-encoder \
         --batch_size 512 \
         --corpus_file "milvus/milvus_data.csv" 
 
diff --git a/pipelines/examples/frequently-asked-question/Install_windows.md b/pipelines/examples/frequently-asked-question/Install_windows.md
new file mode 100644
index 000000000000..30236378799f
--- /dev/null
+++ b/pipelines/examples/frequently-asked-question/Install_windows.md
@@ -0,0 +1,100 @@
+# WINDOWS环境下搭建端到端FAQ智能问答系统
+以下的流程都是使用的Anaconda的环境进行的搭建，Anaconda安装好以后，进入 `Anaconda Powershell Prompt`（由于环境变量设置不兼容的原因，暂不支持使用`cmd`执行下面的命令），然后执行下面的流程。
+
+## 1. 快速开始: 快速搭建FAQ智能问答系统
+
+### 1.1 运行环境和安装说明
+
+a. 依赖安装：
+我们预置了基于[ 8000 多条保险行业问答数据](https://github.com/SophonPlus/ChineseNlpCorpus/blob/master/datasets/baoxianzhidao/intro.ipynb)搭建保险FAQ智能问答的代码示例，您可以通过如下命令快速体验智能问答的效果
+```bash
+
+git clone https://github.com/tvst/htbuilder.git
+cd htbuilder/
+python setup install
+# 1) 安装 pipelines package
+cd ${HOME}/PaddleNLP/pipelines/
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+python setup.py install
+```
+【注意】以下的所有的流程都只需要在`pipelines`根目录下进行，不需要跳转目录
+### 1.2 数据说明
+我们预置了基于[ 8000 多条保险行业问答数据](https://github.com/SophonPlus/ChineseNlpCorpus/blob/master/datasets/baoxianzhidao/intro.ipynb)搭建保险FAQ智能问答的代码示例，您可以通过如下命令快速体验智能问答的效果
+
+### 1.3 一键体验FAQ智能问答系统
+
+```bash
+# 我们建议在 GPU 环境下运行本示例，运行速度较快
+# 设置 1 个空闲的 GPU 卡，此处假设 0 卡为空闲 GPU
+export CUDA_VISIBLE_DEVICES=0
+python examples/frequently-asked-question/dense_faq_example.py --device gpu
+# 如果只有 CPU 机器，可以通过 --device 参数指定 cpu 即可, 运行耗时较长
+unset CUDA_VISIBLE_DEVICES
+python examples/frequently-asked-question/dense_faq_example.py --device cpu
+```
+
+### 1.4 构建 Web 可视化FAQ系统
+
+整个 Web 可视化FAQ智能问答系统主要包含 3 大组件: 1. 基于 ElasticSearch 的 ANN 服务 2. 基于 RestAPI 构建模型服务 3. 基于 Streamlit 构建 WebUI，接下来我们依次搭建这 3 个服务并最终形成可视化的FAQ智能问答系统。
+
+#### 1.4.1 启动 ANN 服务
+1. 参考官方文档下载安装 [elasticsearch-8.3.2](https://www.elastic.co/cn/downloads/elasticsearch) 并解压。
+2. 启动 ES 服务
+把`xpack.security.enabled` 设置成false，如下：
+```
+xpack.security.enabled: false
+```
+
+然后直接双击bin目录下的elasticsearch.bat即可启动。
+
+3. elasticsearch可视化工具Kibana（可选）
+为了更好的对数据进行管理，可以使用Kibana可视化工具进行管理和分析，下载链接为[Kibana](https://www.elastic.co/cn/downloads/kibana)，下载完后解压，直接双击运行 `bin\kibana.bat`即可。
+
+#### 1.4.2 文档数据写入 ANN 索引库
+```
+# 以DuReader-Robust 数据集为例建立 ANN 索引库
+python utils/offline_ann.py --index_name insurance --doc_dir data/insurance --split_answers --delete_index
+```
+参数含义说明
+* `index_name`: 索引的名称
+* `doc_dir`: txt文本数据的路径
+* `host`: Elasticsearch的IP地址
+* `port`: Elasticsearch的端口号
+* `delete_index`: 是否删除现有的索引和数据，用于清空es的数据，默认为false
+
+
+运行结束后，可使用Kibana查看数据
+
+#### 1.4.3 启动 RestAPI 模型服务
+```bash
+# 指定FAQ智能问答系统的Yaml配置文件
+$env:PIPELINE_YAML_PATH='rest_api/pipeline/dense_faq.yaml'
+# 使用端口号 8891 启动模型服务
+python rest_api/application.py 8891
+```
+
+#### 1.4.4 启动 WebUI
+```bash
+# 配置模型服务地址
+$env:API_ENDPOINT='http://127.0.0.1:8891'
+# 在指定端口 8502 启动 WebUI
+python -m streamlit run ui/webapp_faq.py --server.port 8502
+```
+
+到这里您就可以打开浏览器访问 http://127.0.0.1:8502 地址体验FAQ智能问答系统服务了。
+
+#### 1.4.5 数据更新
+
+数据更新的方法有两种，第一种使用前面的 `utils/offline_ann.py`进行数据更新，另一种是使用前端界面的文件上传进行数据更新，支持txt，pdf，image，word的格式，以txt格式的文件为例，每段文本需要使用空行隔开，程序会根据空行进行分段建立索引，示例数据如下(demo.txt)：
+
+```
+兴证策略认为，最恐慌的时候已经过去，未来一个月市场迎来阶段性修复窗口。
+
+从海外市场表现看，
+对俄乌冲突的恐慌情绪已显著释放，
+海外权益市场也从单边下跌转入双向波动。
+
+长期，继续聚焦科技创新的五大方向。1)新能源(新能源汽车、光伏、风电、特高压等)，2)新一代信息通信技术(人工智能、大数据、云计算、5G等)，3)高端制造(智能数控机床、机器人、先进轨交装备等)，4)生物医药(创新药、CXO、医疗器械和诊断设备等)，5)军工(导弹设备、军工电子元器件、空间站、航天飞机等)。
+```
+
+如果安装遇见问题可以查看[FAQ文档](../../FAQ.md)
diff --git a/pipelines/examples/semantic-search/Neural_Search.md b/pipelines/examples/semantic-search/Neural_Search.md
new file mode 100644
index 000000000000..ac68a0c47cca
--- /dev/null
+++ b/pipelines/examples/semantic-search/Neural_Search.md
@@ -0,0 +1,163 @@
+# Neural Search
+
+## 1. 快速开始: 快速搭建语义检索系统
+
+
+### 1.1 运行环境和安装说明
+
+本实验采用了以下的运行环境进行，详细说明如下，用户也可以在自己 GPU 硬件环境进行：
+
+a. 软件环境：
+- python >= 3.7.0
+- paddlenlp >= 2.2.1
+- paddlepaddle-gpu >=2.3
+- CUDA Version: 10.2
+- NVIDIA Driver Version: 440.64.00
+- Ubuntu 16.04.6 LTS (Docker)
+
+b. 硬件环境：
+
+- NVIDIA Tesla V100 16GB x4卡
+- Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
+
+c. 依赖安装：
+首先需要安装PaddlePaddle，PaddlePaddle的安装请参考文档[官方安装文档](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html)，然后安装下面的依赖：
+```bash
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+# 1) 安装 pipelines package
+cd ${HOME}/PaddleNLP/pipelines/
+python setup.py install
+```
+【注意】以下的所有的流程都只需要在`pipelines`根目录下进行，不需要跳转目录
+
+### 1.2 数据说明
+语义检索数据库的数据来自于[DuReader-Robust数据集](https://github.com/baidu/DuReader/tree/master/DuReader-Robust)，共包含 46972 个段落文本，并选取了其中验证集1417条段落文本来搭建语义检索系统。
+
+### 1.3 一键体验语义检索系统
+
+#### 1.3.1 快速一键启动
+
+我们预置了基于[DuReader-Robust数据集](https://github.com/baidu/DuReader/tree/master/DuReader-Robust)搭建语义检索系统的代码示例，您可以通过如下命令快速体验语义检索系统的效果
+```bash
+# 我们建议在 GPU 环境下运行本示例，运行速度较快
+# 设置 1 个空闲的 GPU 卡，此处假设 0 卡为空闲 GPU
+export CUDA_VISIBLE_DEVICES=0
+python examples/semantic-search/semantic_search_example.py \
+                                            --device gpu \
+                                            --query_embedding_model rocketqa-zh-base-query-encoder \
+                                            --params_path checkpoints/model_40/model_state.pdparams \
+                                            --embedding_dim 256
+# 如果只有 CPU 机器，可以通过 --device 参数指定 cpu 即可, 运行耗时较长
+unset CUDA_VISIBLE_DEVICES
+python examples/semantic-search/semantic_search_example.py \
+                                              --device cpu \
+                                              --query_embedding_model rocketqa-zh-base-query-encoder \
+                                              --params_path checkpoints/model_40/model_state.pdparams \
+                                              --embedding_dim 256
+```
+
+### 1.4 构建 Web 可视化语义检索系统
+
+整个 Web 可视化语义检索系统主要包含 3 大组件: 1. 基于 ElasticSearch 的 ANN 服务 2. 基于 RestAPI 构建模型服务 3. 基于 Streamlit 构建 WebUI，接下来我们依次搭建这 3 个服务并最终形成可视化的语义检索系统。
+
+#### 1.4.1 启动 ANN 服务
+1. 参考官方文档下载安装 [elasticsearch-8.3.2](https://www.elastic.co/cn/downloads/elasticsearch) 并解压。
+2. 启动 ES 服务
+首先修改`config/elasticsearch.yml`的配置：
+```
+xpack.security.enabled: false
+```
+然后启动：
+```bash
+./bin/elasticsearch
+```
+3. 检查确保 ES 服务启动成功
+```bash
+curl http://localhost:9200/_aliases?pretty=true
+```
+备注：ES 服务默认开启端口为 9200
+
+#### 1.4.2 文档数据写入 ANN 索引库
+```
+# 以DuReader-Robust 数据集为例建立 ANN 索引库
+python utils/offline_ann.py --index_name dureader_robust_neural_search \
+                            --doc_dir data/dureader_dev \
+                            --query_embedding_model rocketqa-zh-base-query-encoder \
+                            --params_path checkpoints/model_40/model_state.pdparams \
+                            --embedding_dim 256 \
+                            --delete_index
+```
+可以使用下面的命令来查看数据：
+
+```
+# 打印几条数据
+curl http://localhost:9200/dureader_robust_neural_search/_search
+```
+
+参数含义说明
+* `index_name`: 索引的名称
+* `doc_dir`: txt文本数据的路径
+* `host`: Elasticsearch的IP地址
+* `port`: Elasticsearch的端口号
+* `delete_index`: 是否删除现有的索引和数据，用于清空es的数据，默认为false
+
+#### 1.4.3 启动 RestAPI 模型服务
+```bash
+# 指定语义检索系统的Yaml配置文件
+export PIPELINE_YAML_PATH=rest_api/pipeline/semantic_search_custom.yaml
+# 使用端口号 8891 启动模型服务
+python rest_api/application.py 8891
+```
+Linux 用户推荐采用 Shell 脚本来启动服务：：
+
+```bash
+sh examples/semantic-search/run_neural_search_server.sh
+```
+启动后可以使用curl命令验证是否成功运行：
+
+```
+curl -X POST -k http://localhost:8891/query -H 'Content-Type: application/json' -d '{"query": "衡量酒水的价格的因素有哪些?","params": {"Retriever": {"top_k": 5}, "Ranker":{"top_k": 5}}}'
+
+```
+#### 1.4.4 启动 WebUI
+```bash
+# 配置模型服务地址
+export API_ENDPOINT=http://127.0.0.1:8891
+# 在指定端口 8502 启动 WebUI
+python -m streamlit run ui/webapp_semantic_search.py --server.port 8502
+```
+Linux 用户推荐采用 Shell 脚本来启动服务：：
+
+```bash
+sh examples/semantic-search/run_search_web.sh
+```
+
+到这里您就可以打开浏览器访问 http://127.0.0.1:8502 地址体验语义检索系统服务了。
+
+#### 1.4.5 数据更新
+
+数据更新的方法有两种，第一种使用前面的 `utils/offline_ann.py`进行数据更新，另一种是使用前端界面的文件上传进行数据更新，支持txt，pdf，image，word的格式，以txt格式的文件为例，每段文本需要使用空行隔开，程序会根据空行进行分段建立索引，示例数据如下(demo.txt)：
+
+```
+兴证策略认为，最恐慌的时候已经过去，未来一个月市场迎来阶段性修复窗口。
+
+从海外市场表现看，
+对俄乌冲突的恐慌情绪已显著释放，
+海外权益市场也从单边下跌转入双向波动。
+
+长期，继续聚焦科技创新的五大方向。1)新能源(新能源汽车、光伏、风电、特高压等)，2)新一代信息通信技术(人工智能、大数据、云计算、5G等)，3)高端制造(智能数控机床、机器人、先进轨交装备等)，4)生物医药(创新药、CXO、医疗器械和诊断设备等)，5)军工(导弹设备、军工电子元器件、空间站、航天飞机等)。
+```
+如果安装遇见问题可以查看[FAQ文档](../../FAQ.md)
+
+## Reference
+[1]Y. Sun et al., “[ERNIE 3.0: Large-scale Knowledge Enhanced Pre-training for Language Understanding and Generation](https://arxiv.org/pdf/2107.02137.pdf),” arXiv:2107.02137 [cs], Jul. 2021, Accessed: Jan. 17, 2022. [Online]. Available: http://arxiv.org/abs/2107.02137
+
+[2]Y. Qu et al., “[RocketQA: An Optimized Training Approach to Dense Passage Retrieval for Open-Domain Question Answering](https://arxiv.org/abs/2010.08191),” arXiv:2010.08191 [cs], May 2021, Accessed: Aug. 16, 2021. [Online]. Available: http://arxiv.org/abs/2010.08191
+
+[3]H. Tang, H. Li, J. Liu, Y. Hong, H. Wu, and H. Wang, “[DuReader_robust: A Chinese Dataset Towards Evaluating Robustness and Generalization of Machine Reading Comprehension in Real-World Applications](https://arxiv.org/pdf/2004.11142.pdf).” arXiv, Jul. 21, 2021. Accessed: May 15, 2022. [Online]. Available: http://arxiv.org/abs/2004.11142
+
+## Acknowledge
+
+我们借鉴了 Deepset.ai [Haystack](https://github.com/deepset-ai/haystack) 优秀的框架设计，在此对[Haystack](https://github.com/deepset-ai/haystack)作者及其开源社区表示感谢。
+
+We learn form the excellent framework design of Deepset.ai [Haystack](https://github.com/deepset-ai/haystack), and we would like to express our thanks to the authors of Haystack and their open source community.
diff --git a/pipelines/pipelines/nodes/file_converter/docx.py b/pipelines/pipelines/nodes/file_converter/docx.py
index e2adc5a54a4e..3d036a3ada39 100644
--- a/pipelines/pipelines/nodes/file_converter/docx.py
+++ b/pipelines/pipelines/nodes/file_converter/docx.py
@@ -52,7 +52,7 @@ def __init__(
                                 in garbled text.
         """
 
-        # save init parameters to enable export of component config as YAML
+        # Save init parameters to enable export of component config as YAML
         self.set_config(remove_numeric_tables=remove_numeric_tables,
                         valid_languages=valid_languages)
 
@@ -109,38 +109,51 @@ def convert(
         for i in range(len(file.paragraphs)):
             paragraph = file.paragraphs[i]
             # Extracting text from the paragraph
-            if (paragraph.text):
-                if bool(text_dict):
-                    # The texts and corresponding images will be added into documents
+            # If there is text, Adding the text to text_dict
+            if (paragraph.text != ""):
+                text = paragraph.text
+                if (bool(text_dict) == False):
+                    text_dict = {'text': [text], 'images': []}
+                else:
+                    text_dict['text'].append(text)
+            # Extracting images from the paragraph
+            else:
+                image_list = self.get_image_list(file, paragraph)
+                # If there are not text and images, adding text_dict to documents
+                if (image_list is None and bool(text_dict)):
+                    raw_text = ''.join(text_dict['text'])
+                    # If the extracted text is "", skip it
+                    if (raw_text == ''):
+                        continue
                     meta_data = {}
                     meta_data['name'] = meta['name']
                     meta_data['images'] = text_dict['images']
                     document = {
-                        "content": text_dict['text'],
+                        "content": raw_text,
                         "content_type": "text",
                         "meta": meta_data
                     }
                     documents.append(document)
-                # Storing new paragraph text into a new dict
-                text = paragraph.text
-                text_dict = {'text': text, 'images': []}
-            # Extracting images from the paragraph
-            else:
-                image_list = self.get_image_list(file, paragraph)
-                if (image_list is None):
+
+                    text = paragraph.text
+                    text_dict = {'text': [text], 'images': []}
+                # If there are images, adding image to text_dict
+                elif (image_list is not None):
+                    for i, image in enumerate(image_list):
+                        if image:
+                            # File extension & file content
+                            ext, blob = image.ext, image.blob
+                            # Using md5 to generate image name and save image into desc_path
+                            md5hash = hashlib.md5(blob)
+                            md5_name = md5hash.hexdigest()
+                            image_name = '{}_{}.{}'.format(md5_name, i, ext)
+                            image_path = os.path.join(self.desc_path,
+                                                      image_name)
+                            Image.open(BytesIO(blob)).save(image_path)
+                            # Adding image_name into the text_dict as the image for the text
+                            text_dict['images'].append(image_name)
+                else:
                     continue
-                for i, image in enumerate(image_list):
-                    if image:
-                        # File extension & file content
-                        ext, blob = image.ext, image.blob
-                        # Using md5 to generate image name and save image into desc_path
-                        md5hash = hashlib.md5(blob)
-                        md5_name = md5hash.hexdigest()
-                        image_name = '{}_{}.{}'.format(md5_name, i, ext)
-                        image_path = os.path.join(self.desc_path, image_name)
-                        Image.open(BytesIO(blob)).save(image_path)
-                        # Adding image_name into the text_dict as the image for the text
-                        text_dict['images'].append(image_name)
         return documents
 
     def get_image_list(self, document: Document, paragraph: Paragraph):
diff --git a/pipelines/pipelines/nodes/retriever/dense.py b/pipelines/pipelines/nodes/retriever/dense.py
index 2baeac9ee0a0..6040938faf29 100644
--- a/pipelines/pipelines/nodes/retriever/dense.py
+++ b/pipelines/pipelines/nodes/retriever/dense.py
@@ -47,7 +47,7 @@ def __init__(
             Path, str] = "rocketqa-zh-dureader-query-encoder",
         passage_embedding_model: Union[
             Path, str] = "rocketqa-zh-dureader-para-encoder",
-        params_path: Optional[str] = None,
+        params_path: Optional[str] = "",
         model_version: Optional[str] = None,
         output_emb_size=256,
         max_seq_len_query: int = 64,