-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrapidocr_demo.py
68 lines (51 loc) · 2.05 KB
/
rapidocr_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import re
from rapidocr_onnxruntime import RapidOCR
from PIL import Image
from pillow_heif import register_heif_opener
register_heif_opener()
# 按照位置排序
def get_text_position(item, image_height):
box = item[0] # 获取文本框坐标
y = box[0][1] # 左上角y坐标
x = box[0][0] # 左上角x坐标
# y坐标在图片上1/3的分到第一组(group=0),其余分到第二组(group=1)
group = 0 if y <= image_height/3 else 1
# 返回 (组号, x坐标) 作为排序依据
return (group, x)
engine = RapidOCR()
img_path = './imgs/test3.heic'
# img_path = './imgs/WechatIMG29815.jpg'
# 使用前先获取图片高度
img = Image.open(img_path)
img_height = img.height
# result, elapse = engine(img_path)
# result, elapse = engine(img_path, use_det=False, use_cls=False, use_rec=True) # 只有识别
result, elapse = engine(img_path, use_det=True, use_cls=False, use_rec=True) # 检测+识别
# print(result)
# print(elapse)
sorted_result = sorted(result, key=lambda item: get_text_position(item, img_height))
texts = [item[1] for item in sorted_result]
# print(texts)
for text in texts:
pattern = r'号码:\d{8}'
match = re.search(pattern, text)
if match:
number = match.group(0)
print('1>>>', number)
elif text.startswith('号码') or text.startswith('母码') or text.startswith('务码') or text.startswith('粤码'):
# print(text) # 号码:01819689
print('text[2]', text[2])
numbers = ''.join(filter(str.isdigit, text))
print(numbers)
elif text.__contains__('号码'):
# 代码:135022422881代码:135022322881号码:00213347 提取出号码
start_index = text.find('号码:')
if start_index != -1:
number = text[start_index + 3:]
print('>>>', number)
# 检测texts中符合 x年x月x日格式、2022-12-12格式的文本
# date_pattern = re.compile(r'^\d{4}年\d{1,2}月\d{1,2}日$|^\d{4}-\d{1,2}-\d{1,2}$')
# for text in texts:
# if date_pattern.match(text):
# print(text)