Skip to content

Commit

Permalink
update doc for whl
Browse files Browse the repository at this point in the history
  • Loading branch information
andyjiang1116 committed Oct 8, 2022
1 parent 59812f6 commit a5df6c3
Show file tree
Hide file tree
Showing 5 changed files with 351 additions and 74 deletions.
57 changes: 54 additions & 3 deletions doc/doc_ch/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ cd /path/to/ppocr_img
......
```

此外,paddleocr也支持输入pdf文件,并且可以通过指定参数`page_num`来控制推理前面几页,默认为0,表示推理所有页。
```bash
paddleocr --image_dir ./xxx.pdf --use_angle_cls true --use_gpu false --page_num 2
```

- 单独使用检测:设置`--rec``false`

```bash
Expand Down Expand Up @@ -165,12 +170,14 @@ from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
img_path = './imgs/11.jpg'
result = ocr.ocr(img_path, cls=True)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)

# 显示结果
from PIL import Image

result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
Expand All @@ -196,6 +203,50 @@ im_show.save('result.jpg')

<a name="3"></a>

如果输入是PDF文件,那么可以参考下面代码进行可视化

```python
from paddleocr import PaddleOCR, draw_ocr

# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
ocr = PaddleOCR(use_angle_cls=True, lang="ch"page_num=2) # need to run only once to download and load model into memory
img_path = './xxx.pdf'
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)

# 显示结果
import fitz
from PIL import Image
import cv2
import numpy as np
imgs = []
with fitz.open(img_path) as pdf:
for pg in range(0, pdf.pageCount):
page = pdf[pg]
mat = fitz.Matrix(2, 2)
pm = page.getPixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)

img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
for idx in range(len(result)):
res = result[idx]
image = imgs[idx]
boxes = [line[0] for line in res]
txts = [line[1][0] for line in res]
scores = [line[1][1] for line in res]
im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result_page_{}.jpg'.format(idx))
```

## 3. 小结

通过本节内容,相信您已经熟练掌握PaddleOCR whl包的使用方法并获得了初步效果。
Expand Down
126 changes: 100 additions & 26 deletions doc/doc_ch/whl.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs/11.jpg'
result = ocr.ocr(img_path, cls=True)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)

# 显示结果
from PIL import Image

result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
Expand Down Expand Up @@ -71,12 +73,14 @@ from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR() # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs/11.jpg'
result = ocr.ocr(img_path, cls=False)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)

# 显示结果
from PIL import Image

result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
Expand Down Expand Up @@ -109,8 +113,10 @@ from paddleocr import PaddleOCR
ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
result = ocr.ocr(img_path, det=False, cls=True)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)
```
结果是一个list,每个item只包含识别结果和识别置信度
Expand All @@ -127,12 +133,14 @@ from paddleocr import PaddleOCR, draw_ocr
ocr = PaddleOCR() # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs/11.jpg'
result = ocr.ocr(img_path, rec=False)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)

# 显示结果
from PIL import Image

result = result[0]
image = Image.open(img_path).convert('RGB')
im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
Expand Down Expand Up @@ -163,8 +171,10 @@ from paddleocr import PaddleOCR
ocr = PaddleOCR() # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
result = ocr.ocr(img_path, det=False)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)
```
结果是一个list,每个item只包含识别结果和识别置信度
Expand All @@ -181,8 +191,10 @@ from paddleocr import PaddleOCR
ocr = PaddleOCR(use_angle_cls=True) # need to run only once to download and load model into memory
img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
result = ocr.ocr(img_path, det=False, rec=False, cls=True)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)
```
结果是一个list,每个item只包含分类结果和分类置信度
Expand Down Expand Up @@ -212,6 +224,11 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true
......
```
此外,paddleocr也支持输入pdf文件,并且可以通过指定参数`page_num`来控制推理前面几页,默认为0,表示推理所有页。
```bash
paddleocr --image_dir ./xxx.pdf --use_angle_cls true --use_gpu false --page_num 2
```
* 检测+识别
```bash
Expand Down Expand Up @@ -290,12 +307,14 @@ ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_m
use_angle_cls=True)
img_path = 'PaddleOCR/doc/imgs/11.jpg'
result = ocr.ocr(img_path, cls=True)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)

# 显示结果
from PIL import Image

result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
Expand Down Expand Up @@ -325,12 +344,14 @@ from paddleocr import PaddleOCR, draw_ocr, download_with_progressbar
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
img_path = 'http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg'
result = ocr.ocr(img_path, cls=True)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)

# 显示结果
from PIL import Image

result = result[0]
download_with_progressbar(img_path, 'tmp.jpg')
image = Image.open('tmp.jpg').convert('RGB')
boxes = [line[0] for line in result]
Expand Down Expand Up @@ -362,12 +383,14 @@ img_path = 'PaddleOCR/doc/imgs/11.jpg'
img = cv2.imread(img_path)
# img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), 如果你自己训练的模型支持灰度图,可以将这句话的注释取消
result = ocr.ocr(img, cls=True)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)

# 显示结果
from PIL import Image

result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
Expand All @@ -376,14 +399,65 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')
```
## 5 PDF文件作为输入
- 命令行模式
可以通过指定参数`page_num`来控制推理前面几页,默认为0,表示推理所有页。
```bash
paddleocr --image_dir ./xxx.pdf --use_angle_cls true --use_gpu false --page_num 2
```
- 代码使用
```python
from paddleocr import PaddleOCR, draw_ocr

## 5 参数说明
# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=2) # need to run only once to download and load model into memory
img_path = './xxx.pdf'
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)

# 显示结果
import fitz
from PIL import Image
import cv2
import numpy as np
imgs = []
with fitz.open(img_path) as pdf:
for pg in range(0, pdf.pageCount):
page = pdf[pg]
mat = fitz.Matrix(2, 2)
pm = page.getPixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)

img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
for idx in range(len(result)):
res = result[idx]
image = imgs[idx]
boxes = [line[0] for line in res]
txts = [line[1][0] for line in res]
scores = [line[1][1] for line in res]
im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result_page_{}.jpg'.format(idx))
```
## 6 参数说明
| 字段 | 说明 | 默认值 |
|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------|
| use_gpu | 是否使用GPU | TRUE |
| gpu_mem | 初始化占用的GPU内存大小 | 8000M |
| image_dir | 通过命令行调用时执行预测的图片或文件夹路径 | |
| image_dir | 通过命令行调用时执行预测的图片或文件夹路径 |
| page_num | 当输入类型为pdf文件时有效,指定预测前面page_num页,默认预测所有页 | 0 |
| det_algorithm | 使用的检测算法类型 | DB |
| det_model_dir | 检测模型所在文件夹。传参方式有两种,1. None: 自动下载内置模型到 `~/.paddleocr/det`;2.自己转换好的inference模型路径,模型路径下必须包含model和params文件 | None |
| det_max_side_len | 检测算法前向时图片长边的最大尺寸,当长边超出这个值时会将长边resize到这个大小,短边等比例缩放 | 960 |
Expand Down
57 changes: 55 additions & 2 deletions doc/doc_en/quickstart_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ If you do not use the provided test image, you can replace the following `--imag
......
```

pdf file is also supported, you can infer the first few pages by using the `page_num` parameter, the default is 0, which means infer all pages

```bash
paddleocr --image_dir ./xxx.pdf --use_angle_cls true --use_gpu false --page_num 2
```

* Only detection: set `--rec` to `false`

```bash
Expand Down Expand Up @@ -176,12 +182,15 @@ from paddleocr import PaddleOCR,draw_ocr
ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
img_path = './imgs_en/img_12.jpg'
result = ocr.ocr(img_path, cls=True)
for line in result:
print(line)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)


# draw result
from PIL import Image
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
Expand All @@ -206,6 +215,50 @@ Visualization of results
<img src="../imgs_results/whl/12_det_rec.jpg" width="800">
</div>

If the input is a PDF file, you can refer to the following code for visualization

```python
from paddleocr import PaddleOCR, draw_ocr

# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(use_angle_cls=True, lang="ch"page_num=2) # need to run only once to download and load model into memory
img_path = './xxx.pdf'
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)

# draw result
import fitz
from PIL import Image
import cv2
import numpy as np
imgs = []
with fitz.open(img_path) as pdf:
for pg in range(0, pdf.pageCount):
page = pdf[pg]
mat = fitz.Matrix(2, 2)
pm = page.getPixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)

img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
for idx in range(len(result)):
res = result[idx]
image = imgs[idx]
boxes = [line[0] for line in res]
txts = [line[1][0] for line in res]
scores = [line[1][1] for line in res]
im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result_page_{}.jpg'.format(idx))
```

<a name="3"></a>

Expand Down
Loading

0 comments on commit a5df6c3

Please sign in to comment.