From 8983ffcf042fb0b02e24e0685d4162273f16d670 Mon Sep 17 00:00:00 2001 From: Yulv-git Date: Mon, 2 May 2022 11:57:30 +0800 Subject: [PATCH 1/2] [cherry-pick] Fix some typos for Jetson, metrics and result from release/2.4-typos2 branch and merge. --- ...50\345\215\225\350\257\206\345\210\253.md" | 2 +- doc/doc_ch/add_new_algorithm.md | 2 +- doc/doc_en/add_new_algorithm_en.md | 2 +- ...347\275\262\345\256\236\346\210\230.ipynb" | 6 +- ...pocrv2_inference_deployment_practice.ipynb | 6 +- ppocr/metrics/det_metric.py | 24 +- ppocr/metrics/e2e_metric.py | 4 +- ppocr/metrics/kie_metric.py | 4 +- ppocr/metrics/vqa_token_ser_metric.py | 4 +- ppstructure/vqa/infer_ser_e2e.py | 156 ++++++++++++ ppstructure/vqa/train_re.py | 229 ++++++++++++++++++ .../jeston_test_train_inference_python.md | 8 +- tools/infer/utility.py | 2 +- tools/infer_e2e.py | 2 +- tools/infer_vqa_token_ser_re.py | 2 +- 15 files changed, 419 insertions(+), 34 deletions(-) create mode 100644 ppstructure/vqa/infer_ser_e2e.py create mode 100644 ppstructure/vqa/train_re.py diff --git "a/applications/\345\244\232\346\250\241\346\200\201\350\241\250\345\215\225\350\257\206\345\210\253.md" "b/applications/\345\244\232\346\250\241\346\200\201\350\241\250\345\215\225\350\257\206\345\210\253.md" index 2143a6da86..e64a22e169 100644 --- "a/applications/\345\244\232\346\250\241\346\200\201\350\241\250\345\215\225\350\257\206\345\210\253.md" +++ "b/applications/\345\244\232\346\250\241\346\200\201\350\241\250\345\215\225\350\257\206\345\210\253.md" @@ -809,7 +809,7 @@ plt.imshow(img) ``` fout.write(img_path + "\t" + json.dumps( { - "ser_resule": result, + "ser_result": result, }, ensure_ascii=False) + "\n") ``` diff --git a/doc/doc_ch/add_new_algorithm.md b/doc/doc_ch/add_new_algorithm.md index 79c29249dd..bb97e00aa6 100644 --- a/doc/doc_ch/add_new_algorithm.md +++ b/doc/doc_ch/add_new_algorithm.md @@ -246,7 +246,7 @@ class MyMetric(object): def get_metric(self): """ - return metircs { + return metrics { 'acc': 0, 'norm_edit_dis': 0, } diff --git a/doc/doc_en/add_new_algorithm_en.md b/doc/doc_en/add_new_algorithm_en.md index db72fe7d4b..a8903b0a20 100644 --- a/doc/doc_en/add_new_algorithm_en.md +++ b/doc/doc_en/add_new_algorithm_en.md @@ -237,7 +237,7 @@ class MyMetric(object): def get_metric(self): """ - return metircs { + return metrics { 'acc': 0, 'norm_edit_dis': 0, } diff --git "a/notebook/notebook_ch/5.ppocrv2_inference_deployment/PP-OCRv2\351\242\204\346\265\213\351\203\250\347\275\262\345\256\236\346\210\230.ipynb" "b/notebook/notebook_ch/5.ppocrv2_inference_deployment/PP-OCRv2\351\242\204\346\265\213\351\203\250\347\275\262\345\256\236\346\210\230.ipynb" index c65627acc8..3b8550d339 100644 --- "a/notebook/notebook_ch/5.ppocrv2_inference_deployment/PP-OCRv2\351\242\204\346\265\213\351\203\250\347\275\262\345\256\236\346\210\230.ipynb" +++ "b/notebook/notebook_ch/5.ppocrv2_inference_deployment/PP-OCRv2\351\242\204\346\265\213\351\203\250\347\275\262\345\256\236\346\210\230.ipynb" @@ -1876,11 +1876,11 @@ " rec_res)\n", " filter_boxes, filter_rec_res = [], []\n", " # 根据识别得分的阈值对结果进行过滤,如果得分小于阈值,就过滤掉\n", - " for box, rec_reuslt in zip(dt_boxes, rec_res):\n", - " text, score = rec_reuslt\n", + " for box, rec_result in zip(dt_boxes, rec_res):\n", + " text, score = rec_result\n", " if score >= self.drop_score:\n", " filter_boxes.append(box)\n", - " filter_rec_res.append(rec_reuslt)\n", + " filter_rec_res.append(rec_result)\n", " return filter_boxes, filter_rec_res\n", "\n", "def sorted_boxes(dt_boxes):\n", diff --git a/notebook/notebook_en/5.ppocrv2_inference_deployment/ppocrv2_inference_deployment_practice.ipynb b/notebook/notebook_en/5.ppocrv2_inference_deployment/ppocrv2_inference_deployment_practice.ipynb index 61cd456151..780f948579 100644 --- a/notebook/notebook_en/5.ppocrv2_inference_deployment/ppocrv2_inference_deployment_practice.ipynb +++ b/notebook/notebook_en/5.ppocrv2_inference_deployment/ppocrv2_inference_deployment_practice.ipynb @@ -1886,11 +1886,11 @@ " rec_res)\n", " filter_boxes, filter_rec_res = [], []\n", " #Filter the results according to the threshold of the recognition score, if the score is less than the threshold, filter out\n", - " for box, rec_reuslt in zip(dt_boxes, rec_res):\n", - " text, score = rec_reuslt\n", + " for box, rec_result in zip(dt_boxes, rec_res):\n", + " text, score = rec_result\n", " if score >= self.drop_score:\n", " filter_boxes.append(box)\n", - " filter_rec_res.append(rec_reuslt)\n", + " filter_rec_res.append(rec_result)\n", " return filter_boxes, filter_rec_res\n", "\n", "def sorted_boxes(dt_boxes):\n", diff --git a/ppocr/metrics/det_metric.py b/ppocr/metrics/det_metric.py index c9ec8dd2e9..dca94c0927 100644 --- a/ppocr/metrics/det_metric.py +++ b/ppocr/metrics/det_metric.py @@ -64,9 +64,9 @@ def get_metric(self): } """ - metircs = self.evaluator.combine_results(self.results) + metrics = self.evaluator.combine_results(self.results) self.reset() - return metircs + return metrics def reset(self): self.results = [] # clear results @@ -127,20 +127,20 @@ def get_metric(self): 'thr 0.9':'precision: 0 recall: 0 hmean: 0', } """ - metircs = {} + metrics = {} hmean = 0 for score_thr in self.results.keys(): - metirc = self.evaluator.combine_results(self.results[score_thr]) - # for key, value in metirc.items(): - # metircs['{}_{}'.format(key, score_thr)] = value - metirc_str = 'precision:{:.5f} recall:{:.5f} hmean:{:.5f}'.format( - metirc['precision'], metirc['recall'], metirc['hmean']) - metircs['thr {}'.format(score_thr)] = metirc_str - hmean = max(hmean, metirc['hmean']) - metircs['hmean'] = hmean + metric = self.evaluator.combine_results(self.results[score_thr]) + # for key, value in metric.items(): + # metrics['{}_{}'.format(key, score_thr)] = value + metric_str = 'precision:{:.5f} recall:{:.5f} hmean:{:.5f}'.format( + metric['precision'], metric['recall'], metric['hmean']) + metrics['thr {}'.format(score_thr)] = metric_str + hmean = max(hmean, metric['hmean']) + metrics['hmean'] = hmean self.reset() - return metircs + return metrics def reset(self): self.results = { diff --git a/ppocr/metrics/e2e_metric.py b/ppocr/metrics/e2e_metric.py index 41b7ac2bad..2f8ba3b222 100644 --- a/ppocr/metrics/e2e_metric.py +++ b/ppocr/metrics/e2e_metric.py @@ -78,9 +78,9 @@ def __call__(self, preds, batch, **kwargs): self.results.append(result) def get_metric(self): - metircs = combine_results(self.results) + metrics = combine_results(self.results) self.reset() - return metircs + return metrics def reset(self): self.results = [] # clear results diff --git a/ppocr/metrics/kie_metric.py b/ppocr/metrics/kie_metric.py index f3bce0411d..28ab22b807 100644 --- a/ppocr/metrics/kie_metric.py +++ b/ppocr/metrics/kie_metric.py @@ -61,9 +61,9 @@ def combine_results(self, results): def get_metric(self): - metircs = self.combine_results(self.results) + metrics = self.combine_results(self.results) self.reset() - return metircs + return metrics def reset(self): self.results = [] # clear results diff --git a/ppocr/metrics/vqa_token_ser_metric.py b/ppocr/metrics/vqa_token_ser_metric.py index 92d80d0970..286d8addaf 100644 --- a/ppocr/metrics/vqa_token_ser_metric.py +++ b/ppocr/metrics/vqa_token_ser_metric.py @@ -34,13 +34,13 @@ def __call__(self, preds, batch, **kwargs): def get_metric(self): from seqeval.metrics import f1_score, precision_score, recall_score - metircs = { + metrics = { "precision": precision_score(self.gt_list, self.pred_list), "recall": recall_score(self.gt_list, self.pred_list), "hmean": f1_score(self.gt_list, self.pred_list), } self.reset() - return metircs + return metrics def reset(self): self.pred_list = [] diff --git a/ppstructure/vqa/infer_ser_e2e.py b/ppstructure/vqa/infer_ser_e2e.py new file mode 100644 index 0000000000..9ff5d0602e --- /dev/null +++ b/ppstructure/vqa/infer_ser_e2e.py @@ -0,0 +1,156 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) + +import json +import cv2 +import numpy as np +from copy import deepcopy +from PIL import Image + +import paddle +from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification +from paddlenlp.transformers import LayoutLMModel, LayoutLMTokenizer, LayoutLMForTokenClassification + +# relative reference +from vqa_utils import parse_args, get_image_file_list, draw_ser_results, get_bio_label_maps + +from vqa_utils import pad_sentences, split_page, preprocess, postprocess, merge_preds_list_with_ocr_info + +MODELS = { + 'LayoutXLM': + (LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForTokenClassification), + 'LayoutLM': + (LayoutLMTokenizer, LayoutLMModel, LayoutLMForTokenClassification) +} + + +def trans_poly_to_bbox(poly): + x1 = np.min([p[0] for p in poly]) + x2 = np.max([p[0] for p in poly]) + y1 = np.min([p[1] for p in poly]) + y2 = np.max([p[1] for p in poly]) + return [x1, y1, x2, y2] + + +def parse_ocr_info_for_ser(ocr_result): + ocr_info = [] + for res in ocr_result: + ocr_info.append({ + "text": res[1][0], + "bbox": trans_poly_to_bbox(res[0]), + "poly": res[0], + }) + return ocr_info + + +class SerPredictor(object): + def __init__(self, args): + self.args = args + self.max_seq_length = args.max_seq_length + + # init ser token and model + tokenizer_class, base_model_class, model_class = MODELS[ + args.ser_model_type] + self.tokenizer = tokenizer_class.from_pretrained( + args.model_name_or_path) + self.model = model_class.from_pretrained(args.model_name_or_path) + self.model.eval() + + # init ocr_engine + from paddleocr import PaddleOCR + + self.ocr_engine = PaddleOCR( + rec_model_dir=args.rec_model_dir, + det_model_dir=args.det_model_dir, + use_angle_cls=False, + show_log=False) + # init dict + label2id_map, self.id2label_map = get_bio_label_maps( + args.label_map_path) + self.label2id_map_for_draw = dict() + for key in label2id_map: + if key.startswith("I-"): + self.label2id_map_for_draw[key] = label2id_map["B" + key[1:]] + else: + self.label2id_map_for_draw[key] = label2id_map[key] + + def __call__(self, img): + ocr_result = self.ocr_engine.ocr(img, cls=False) + + ocr_info = parse_ocr_info_for_ser(ocr_result) + + inputs = preprocess( + tokenizer=self.tokenizer, + ori_img=img, + ocr_info=ocr_info, + max_seq_len=self.max_seq_length) + + if self.args.ser_model_type == 'LayoutLM': + preds = self.model( + input_ids=inputs["input_ids"], + bbox=inputs["bbox"], + token_type_ids=inputs["token_type_ids"], + attention_mask=inputs["attention_mask"]) + elif self.args.ser_model_type == 'LayoutXLM': + preds = self.model( + input_ids=inputs["input_ids"], + bbox=inputs["bbox"], + image=inputs["image"], + token_type_ids=inputs["token_type_ids"], + attention_mask=inputs["attention_mask"]) + preds = preds[0] + + preds = postprocess(inputs["attention_mask"], preds, self.id2label_map) + ocr_info = merge_preds_list_with_ocr_info( + ocr_info, inputs["segment_offset_id"], preds, + self.label2id_map_for_draw) + return ocr_info, inputs + + +if __name__ == "__main__": + args = parse_args() + os.makedirs(args.output_dir, exist_ok=True) + + # get infer img list + infer_imgs = get_image_file_list(args.infer_imgs) + + # loop for infer + ser_engine = SerPredictor(args) + with open( + os.path.join(args.output_dir, "infer_results.txt"), + "w", + encoding='utf-8') as fout: + for idx, img_path in enumerate(infer_imgs): + save_img_path = os.path.join( + args.output_dir, + os.path.splitext(os.path.basename(img_path))[0] + "_ser.jpg") + print("process: [{}/{}], save result to {}".format( + idx, len(infer_imgs), save_img_path)) + + img = cv2.imread(img_path) + + result, _ = ser_engine(img) + fout.write(img_path + "\t" + json.dumps( + { + "ser_result": result, + }, ensure_ascii=False) + "\n") + + img_res = draw_ser_results(img, result) + cv2.imwrite(save_img_path, img_res) diff --git a/ppstructure/vqa/train_re.py b/ppstructure/vqa/train_re.py new file mode 100644 index 0000000000..f4cfee2789 --- /dev/null +++ b/ppstructure/vqa/train_re.py @@ -0,0 +1,229 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) + +import random +import time +import numpy as np +import paddle + +from paddlenlp.transformers import LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForRelationExtraction + +from xfun import XFUNDataset +from vqa_utils import parse_args, get_bio_label_maps, print_arguments, set_seed +from data_collator import DataCollator +from eval_re import evaluate + +from ppocr.utils.logging import get_logger + + +def train(args): + logger = get_logger(log_file=os.path.join(args.output_dir, "train.log")) + rank = paddle.distributed.get_rank() + distributed = paddle.distributed.get_world_size() > 1 + + print_arguments(args, logger) + + # Added here for reproducibility (even between python 2 and 3) + set_seed(args.seed) + + label2id_map, id2label_map = get_bio_label_maps(args.label_map_path) + pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index + + # dist mode + if distributed: + paddle.distributed.init_parallel_env() + + tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path) + if not args.resume: + model = LayoutXLMModel.from_pretrained(args.model_name_or_path) + model = LayoutXLMForRelationExtraction(model, dropout=None) + logger.info('train from scratch') + else: + logger.info('resume from {}'.format(args.model_name_or_path)) + model = LayoutXLMForRelationExtraction.from_pretrained( + args.model_name_or_path) + + # dist mode + if distributed: + model = paddle.DataParallel(model) + + train_dataset = XFUNDataset( + tokenizer, + data_dir=args.train_data_dir, + label_path=args.train_label_path, + label2id_map=label2id_map, + img_size=(224, 224), + max_seq_len=args.max_seq_length, + pad_token_label_id=pad_token_label_id, + contains_re=True, + add_special_ids=False, + return_attention_mask=True, + load_mode='all') + + eval_dataset = XFUNDataset( + tokenizer, + data_dir=args.eval_data_dir, + label_path=args.eval_label_path, + label2id_map=label2id_map, + img_size=(224, 224), + max_seq_len=args.max_seq_length, + pad_token_label_id=pad_token_label_id, + contains_re=True, + add_special_ids=False, + return_attention_mask=True, + load_mode='all') + + train_sampler = paddle.io.DistributedBatchSampler( + train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True) + + train_dataloader = paddle.io.DataLoader( + train_dataset, + batch_sampler=train_sampler, + num_workers=args.num_workers, + use_shared_memory=True, + collate_fn=DataCollator()) + + eval_dataloader = paddle.io.DataLoader( + eval_dataset, + batch_size=args.per_gpu_eval_batch_size, + num_workers=args.num_workers, + shuffle=False, + collate_fn=DataCollator()) + + t_total = len(train_dataloader) * args.num_train_epochs + + # build linear decay with warmup lr sch + lr_scheduler = paddle.optimizer.lr.PolynomialDecay( + learning_rate=args.learning_rate, + decay_steps=t_total, + end_lr=0.0, + power=1.0) + if args.warmup_steps > 0: + lr_scheduler = paddle.optimizer.lr.LinearWarmup( + lr_scheduler, + args.warmup_steps, + start_lr=0, + end_lr=args.learning_rate, ) + grad_clip = paddle.nn.ClipGradByNorm(clip_norm=10) + optimizer = paddle.optimizer.Adam( + learning_rate=args.learning_rate, + parameters=model.parameters(), + epsilon=args.adam_epsilon, + grad_clip=grad_clip, + weight_decay=args.weight_decay) + + # Train! + logger.info("***** Running training *****") + logger.info(" Num examples = {}".format(len(train_dataset))) + logger.info(" Num Epochs = {}".format(args.num_train_epochs)) + logger.info(" Instantaneous batch size per GPU = {}".format( + args.per_gpu_train_batch_size)) + logger.info( + " Total train batch size (w. parallel, distributed & accumulation) = {}". + format(args.per_gpu_train_batch_size * + paddle.distributed.get_world_size())) + logger.info(" Total optimization steps = {}".format(t_total)) + + global_step = 0 + model.clear_gradients() + train_dataloader_len = len(train_dataloader) + best_metric = {'f1': 0} + model.train() + + train_reader_cost = 0.0 + train_run_cost = 0.0 + total_samples = 0 + reader_start = time.time() + + print_step = 1 + + for epoch in range(int(args.num_train_epochs)): + for step, batch in enumerate(train_dataloader): + train_reader_cost += time.time() - reader_start + train_start = time.time() + outputs = model(**batch) + train_run_cost += time.time() - train_start + # model outputs are always tuple in ppnlp (see doc) + loss = outputs['loss'] + loss = loss.mean() + + loss.backward() + optimizer.step() + optimizer.clear_grad() + # lr_scheduler.step() # Update learning rate schedule + + global_step += 1 + total_samples += batch['image'].shape[0] + + if rank == 0 and step % print_step == 0: + logger.info( + "epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {:.6f}, lr: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec". + format(epoch, args.num_train_epochs, step, + train_dataloader_len, global_step, + np.mean(loss.numpy()), + optimizer.get_lr(), train_reader_cost / print_step, ( + train_reader_cost + train_run_cost) / print_step, + total_samples / print_step, total_samples / ( + train_reader_cost + train_run_cost))) + + train_reader_cost = 0.0 + train_run_cost = 0.0 + total_samples = 0 + + if rank == 0 and args.eval_steps > 0 and global_step % args.eval_steps == 0 and args.evaluate_during_training: + # Log metrics + # Only evaluate when single GPU otherwise metrics may not average well + results = evaluate(model, eval_dataloader, logger) + if results['f1'] >= best_metric['f1']: + best_metric = results + output_dir = os.path.join(args.output_dir, "best_model") + os.makedirs(output_dir, exist_ok=True) + if distributed: + model._layers.save_pretrained(output_dir) + else: + model.save_pretrained(output_dir) + tokenizer.save_pretrained(output_dir) + paddle.save(args, + os.path.join(output_dir, "training_args.bin")) + logger.info("Saving model checkpoint to {}".format( + output_dir)) + logger.info("eval results: {}".format(results)) + logger.info("best_metric: {}".format(best_metric)) + reader_start = time.time() + + if rank == 0: + # Save model checkpoint + output_dir = os.path.join(args.output_dir, "latest_model") + os.makedirs(output_dir, exist_ok=True) + if distributed: + model._layers.save_pretrained(output_dir) + else: + model.save_pretrained(output_dir) + tokenizer.save_pretrained(output_dir) + paddle.save(args, os.path.join(output_dir, "training_args.bin")) + logger.info("Saving model checkpoint to {}".format(output_dir)) + logger.info("best_metric: {}".format(best_metric)) + + +if __name__ == "__main__": + args = parse_args() + os.makedirs(args.output_dir, exist_ok=True) + train(args) diff --git a/test_tipc/docs/jeston_test_train_inference_python.md b/test_tipc/docs/jeston_test_train_inference_python.md index d96505985e..9e9d15fb67 100644 --- a/test_tipc/docs/jeston_test_train_inference_python.md +++ b/test_tipc/docs/jeston_test_train_inference_python.md @@ -1,6 +1,6 @@ -# Jeston端基础训练预测功能测试 +# Jetson端基础训练预测功能测试 -Jeston端基础训练预测功能测试的主程序为`test_inference_inference.sh`,由于Jeston端CPU较差,Jeston只需要测试TIPC关于GPU和TensorRT预测推理的部分即可。 +Jetson端基础训练预测功能测试的主程序为`test_inference_inference.sh`,由于Jetson端CPU较差,Jetson只需要测试TIPC关于GPU和TensorRT预测推理的部分即可。 ## 1. 测试结论汇总 @@ -42,7 +42,7 @@ Jeston端基础训练预测功能测试的主程序为`test_inference_inference. 先运行`prepare.sh`准备数据和模型,然后运行`test_inference_inference.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 -`test_inference_inference.sh`仅有一个模式`whole_infer`,在Jeston端,仅需要测试预测推理的模式即可: +`test_inference_inference.sh`仅有一个模式`whole_infer`,在Jetson端,仅需要测试预测推理的模式即可: ``` - 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; @@ -51,7 +51,7 @@ bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_lin # 用法1: bash test_tipc/test_inference_inference.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt 'whole_infer' # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 -bash test_tipc/test_inference_jeston.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt 'whole_infer' '1' +bash test_tipc/test_inference_jetson.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt 'whole_infer' '1' ``` 运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如`whole_infer`模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: diff --git a/tools/infer/utility.py b/tools/infer/utility.py index c92e8e152a..ce4e2d92c2 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -193,7 +193,7 @@ def create_predictor(args, mode, logger): gpu_id = get_infer_gpuid() if gpu_id is None: logger.warning( - "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jeston." + "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson." ) config.enable_use_gpu(args.gpu_mem, 0) if args.use_tensorrt: diff --git a/tools/infer_e2e.py b/tools/infer_e2e.py index f3d5712fdd..d3e6b28fca 100755 --- a/tools/infer_e2e.py +++ b/tools/infer_e2e.py @@ -104,7 +104,7 @@ def main(): preds = model(images) post_result = post_process_class(preds, shape_list) points, strs = post_result['points'], post_result['texts'] - # write resule + # write result dt_boxes_json = [] for poly, str in zip(points, strs): tmp_json = {"transcription": str} diff --git a/tools/infer_vqa_token_ser_re.py b/tools/infer_vqa_token_ser_re.py index 2c7cb5e425..6210f7f3c2 100755 --- a/tools/infer_vqa_token_ser_re.py +++ b/tools/infer_vqa_token_ser_re.py @@ -193,7 +193,7 @@ def preprocess(): result = result[0] fout.write(img_path + "\t" + json.dumps( { - "ser_resule": result, + "ser_result": result, }, ensure_ascii=False) + "\n") img_res = draw_re_results(img_path, result) cv2.imwrite(save_img_path, img_res) From 0f0ae980c6bb222d6873d510a9f3ba875c9687c3 Mon Sep 17 00:00:00 2001 From: Yulv-git Date: Mon, 2 May 2022 12:16:34 +0800 Subject: [PATCH 2/2] rm infer_ser_e2e.py and train_re.py. --- ppstructure/vqa/infer_ser_e2e.py | 156 --------------------- ppstructure/vqa/train_re.py | 229 ------------------------------- 2 files changed, 385 deletions(-) delete mode 100644 ppstructure/vqa/infer_ser_e2e.py delete mode 100644 ppstructure/vqa/train_re.py diff --git a/ppstructure/vqa/infer_ser_e2e.py b/ppstructure/vqa/infer_ser_e2e.py deleted file mode 100644 index 9ff5d0602e..0000000000 --- a/ppstructure/vqa/infer_ser_e2e.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) - -import json -import cv2 -import numpy as np -from copy import deepcopy -from PIL import Image - -import paddle -from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification -from paddlenlp.transformers import LayoutLMModel, LayoutLMTokenizer, LayoutLMForTokenClassification - -# relative reference -from vqa_utils import parse_args, get_image_file_list, draw_ser_results, get_bio_label_maps - -from vqa_utils import pad_sentences, split_page, preprocess, postprocess, merge_preds_list_with_ocr_info - -MODELS = { - 'LayoutXLM': - (LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForTokenClassification), - 'LayoutLM': - (LayoutLMTokenizer, LayoutLMModel, LayoutLMForTokenClassification) -} - - -def trans_poly_to_bbox(poly): - x1 = np.min([p[0] for p in poly]) - x2 = np.max([p[0] for p in poly]) - y1 = np.min([p[1] for p in poly]) - y2 = np.max([p[1] for p in poly]) - return [x1, y1, x2, y2] - - -def parse_ocr_info_for_ser(ocr_result): - ocr_info = [] - for res in ocr_result: - ocr_info.append({ - "text": res[1][0], - "bbox": trans_poly_to_bbox(res[0]), - "poly": res[0], - }) - return ocr_info - - -class SerPredictor(object): - def __init__(self, args): - self.args = args - self.max_seq_length = args.max_seq_length - - # init ser token and model - tokenizer_class, base_model_class, model_class = MODELS[ - args.ser_model_type] - self.tokenizer = tokenizer_class.from_pretrained( - args.model_name_or_path) - self.model = model_class.from_pretrained(args.model_name_or_path) - self.model.eval() - - # init ocr_engine - from paddleocr import PaddleOCR - - self.ocr_engine = PaddleOCR( - rec_model_dir=args.rec_model_dir, - det_model_dir=args.det_model_dir, - use_angle_cls=False, - show_log=False) - # init dict - label2id_map, self.id2label_map = get_bio_label_maps( - args.label_map_path) - self.label2id_map_for_draw = dict() - for key in label2id_map: - if key.startswith("I-"): - self.label2id_map_for_draw[key] = label2id_map["B" + key[1:]] - else: - self.label2id_map_for_draw[key] = label2id_map[key] - - def __call__(self, img): - ocr_result = self.ocr_engine.ocr(img, cls=False) - - ocr_info = parse_ocr_info_for_ser(ocr_result) - - inputs = preprocess( - tokenizer=self.tokenizer, - ori_img=img, - ocr_info=ocr_info, - max_seq_len=self.max_seq_length) - - if self.args.ser_model_type == 'LayoutLM': - preds = self.model( - input_ids=inputs["input_ids"], - bbox=inputs["bbox"], - token_type_ids=inputs["token_type_ids"], - attention_mask=inputs["attention_mask"]) - elif self.args.ser_model_type == 'LayoutXLM': - preds = self.model( - input_ids=inputs["input_ids"], - bbox=inputs["bbox"], - image=inputs["image"], - token_type_ids=inputs["token_type_ids"], - attention_mask=inputs["attention_mask"]) - preds = preds[0] - - preds = postprocess(inputs["attention_mask"], preds, self.id2label_map) - ocr_info = merge_preds_list_with_ocr_info( - ocr_info, inputs["segment_offset_id"], preds, - self.label2id_map_for_draw) - return ocr_info, inputs - - -if __name__ == "__main__": - args = parse_args() - os.makedirs(args.output_dir, exist_ok=True) - - # get infer img list - infer_imgs = get_image_file_list(args.infer_imgs) - - # loop for infer - ser_engine = SerPredictor(args) - with open( - os.path.join(args.output_dir, "infer_results.txt"), - "w", - encoding='utf-8') as fout: - for idx, img_path in enumerate(infer_imgs): - save_img_path = os.path.join( - args.output_dir, - os.path.splitext(os.path.basename(img_path))[0] + "_ser.jpg") - print("process: [{}/{}], save result to {}".format( - idx, len(infer_imgs), save_img_path)) - - img = cv2.imread(img_path) - - result, _ = ser_engine(img) - fout.write(img_path + "\t" + json.dumps( - { - "ser_result": result, - }, ensure_ascii=False) + "\n") - - img_res = draw_ser_results(img, result) - cv2.imwrite(save_img_path, img_res) diff --git a/ppstructure/vqa/train_re.py b/ppstructure/vqa/train_re.py deleted file mode 100644 index f4cfee2789..0000000000 --- a/ppstructure/vqa/train_re.py +++ /dev/null @@ -1,229 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys - -__dir__ = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(__dir__) -sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) - -import random -import time -import numpy as np -import paddle - -from paddlenlp.transformers import LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForRelationExtraction - -from xfun import XFUNDataset -from vqa_utils import parse_args, get_bio_label_maps, print_arguments, set_seed -from data_collator import DataCollator -from eval_re import evaluate - -from ppocr.utils.logging import get_logger - - -def train(args): - logger = get_logger(log_file=os.path.join(args.output_dir, "train.log")) - rank = paddle.distributed.get_rank() - distributed = paddle.distributed.get_world_size() > 1 - - print_arguments(args, logger) - - # Added here for reproducibility (even between python 2 and 3) - set_seed(args.seed) - - label2id_map, id2label_map = get_bio_label_maps(args.label_map_path) - pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index - - # dist mode - if distributed: - paddle.distributed.init_parallel_env() - - tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path) - if not args.resume: - model = LayoutXLMModel.from_pretrained(args.model_name_or_path) - model = LayoutXLMForRelationExtraction(model, dropout=None) - logger.info('train from scratch') - else: - logger.info('resume from {}'.format(args.model_name_or_path)) - model = LayoutXLMForRelationExtraction.from_pretrained( - args.model_name_or_path) - - # dist mode - if distributed: - model = paddle.DataParallel(model) - - train_dataset = XFUNDataset( - tokenizer, - data_dir=args.train_data_dir, - label_path=args.train_label_path, - label2id_map=label2id_map, - img_size=(224, 224), - max_seq_len=args.max_seq_length, - pad_token_label_id=pad_token_label_id, - contains_re=True, - add_special_ids=False, - return_attention_mask=True, - load_mode='all') - - eval_dataset = XFUNDataset( - tokenizer, - data_dir=args.eval_data_dir, - label_path=args.eval_label_path, - label2id_map=label2id_map, - img_size=(224, 224), - max_seq_len=args.max_seq_length, - pad_token_label_id=pad_token_label_id, - contains_re=True, - add_special_ids=False, - return_attention_mask=True, - load_mode='all') - - train_sampler = paddle.io.DistributedBatchSampler( - train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True) - - train_dataloader = paddle.io.DataLoader( - train_dataset, - batch_sampler=train_sampler, - num_workers=args.num_workers, - use_shared_memory=True, - collate_fn=DataCollator()) - - eval_dataloader = paddle.io.DataLoader( - eval_dataset, - batch_size=args.per_gpu_eval_batch_size, - num_workers=args.num_workers, - shuffle=False, - collate_fn=DataCollator()) - - t_total = len(train_dataloader) * args.num_train_epochs - - # build linear decay with warmup lr sch - lr_scheduler = paddle.optimizer.lr.PolynomialDecay( - learning_rate=args.learning_rate, - decay_steps=t_total, - end_lr=0.0, - power=1.0) - if args.warmup_steps > 0: - lr_scheduler = paddle.optimizer.lr.LinearWarmup( - lr_scheduler, - args.warmup_steps, - start_lr=0, - end_lr=args.learning_rate, ) - grad_clip = paddle.nn.ClipGradByNorm(clip_norm=10) - optimizer = paddle.optimizer.Adam( - learning_rate=args.learning_rate, - parameters=model.parameters(), - epsilon=args.adam_epsilon, - grad_clip=grad_clip, - weight_decay=args.weight_decay) - - # Train! - logger.info("***** Running training *****") - logger.info(" Num examples = {}".format(len(train_dataset))) - logger.info(" Num Epochs = {}".format(args.num_train_epochs)) - logger.info(" Instantaneous batch size per GPU = {}".format( - args.per_gpu_train_batch_size)) - logger.info( - " Total train batch size (w. parallel, distributed & accumulation) = {}". - format(args.per_gpu_train_batch_size * - paddle.distributed.get_world_size())) - logger.info(" Total optimization steps = {}".format(t_total)) - - global_step = 0 - model.clear_gradients() - train_dataloader_len = len(train_dataloader) - best_metric = {'f1': 0} - model.train() - - train_reader_cost = 0.0 - train_run_cost = 0.0 - total_samples = 0 - reader_start = time.time() - - print_step = 1 - - for epoch in range(int(args.num_train_epochs)): - for step, batch in enumerate(train_dataloader): - train_reader_cost += time.time() - reader_start - train_start = time.time() - outputs = model(**batch) - train_run_cost += time.time() - train_start - # model outputs are always tuple in ppnlp (see doc) - loss = outputs['loss'] - loss = loss.mean() - - loss.backward() - optimizer.step() - optimizer.clear_grad() - # lr_scheduler.step() # Update learning rate schedule - - global_step += 1 - total_samples += batch['image'].shape[0] - - if rank == 0 and step % print_step == 0: - logger.info( - "epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {:.6f}, lr: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec". - format(epoch, args.num_train_epochs, step, - train_dataloader_len, global_step, - np.mean(loss.numpy()), - optimizer.get_lr(), train_reader_cost / print_step, ( - train_reader_cost + train_run_cost) / print_step, - total_samples / print_step, total_samples / ( - train_reader_cost + train_run_cost))) - - train_reader_cost = 0.0 - train_run_cost = 0.0 - total_samples = 0 - - if rank == 0 and args.eval_steps > 0 and global_step % args.eval_steps == 0 and args.evaluate_during_training: - # Log metrics - # Only evaluate when single GPU otherwise metrics may not average well - results = evaluate(model, eval_dataloader, logger) - if results['f1'] >= best_metric['f1']: - best_metric = results - output_dir = os.path.join(args.output_dir, "best_model") - os.makedirs(output_dir, exist_ok=True) - if distributed: - model._layers.save_pretrained(output_dir) - else: - model.save_pretrained(output_dir) - tokenizer.save_pretrained(output_dir) - paddle.save(args, - os.path.join(output_dir, "training_args.bin")) - logger.info("Saving model checkpoint to {}".format( - output_dir)) - logger.info("eval results: {}".format(results)) - logger.info("best_metric: {}".format(best_metric)) - reader_start = time.time() - - if rank == 0: - # Save model checkpoint - output_dir = os.path.join(args.output_dir, "latest_model") - os.makedirs(output_dir, exist_ok=True) - if distributed: - model._layers.save_pretrained(output_dir) - else: - model.save_pretrained(output_dir) - tokenizer.save_pretrained(output_dir) - paddle.save(args, os.path.join(output_dir, "training_args.bin")) - logger.info("Saving model checkpoint to {}".format(output_dir)) - logger.info("best_metric: {}".format(best_metric)) - - -if __name__ == "__main__": - args = parse_args() - os.makedirs(args.output_dir, exist_ok=True) - train(args)