Skip to content

Commit

Permalink
Support: multi file ocr
Browse files Browse the repository at this point in the history
  • Loading branch information
Chandler Lu committed Dec 24, 2019
1 parent 9e1fc86 commit d5050b8
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 29 deletions.
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@

[查看更新历史](https://github.com/Chandler-Lu/alfred-ocr/wiki/Update-History)

### 3.7 (2019-12-23 19:51:33)
### 4.0 (2019-12-24 10:16)

- 百度接口更换为含有位置信息的通用 OCR 接口
- 大幅度提高百度 OCR 接口的段落优化能力
- 感谢[Elvis Cai](https://github.com/elviscai)提供的帮助
- 支持多文件批量识别

## 能力

- 离线 OCR (CNOCR)
- 通用 OCR (百度,腾讯优图,Google)
- 二维码识别 (百度)
- 文本翻译 (彩云小译)
- 多文件识别

## 使用(必看!!)

Expand Down Expand Up @@ -137,7 +136,7 @@ pip install requests
- [x] 腾讯优图
- [ ] 有道翻译
- [x] 彩云小译
- [ ] 多文件识别
- [x] 多文件识别
- [ ] 截图翻译
- [x] 二维码识别
- [x] 文本翻译
Expand Down
68 changes: 44 additions & 24 deletions src/ocr_bgt.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
'''
@Description: ocr_baidu_google_tencent
@version: 3.0
@version: 3.1
@Author: Chandler Lu
@Date: 2019-11-26 23:52:36
@LastEditTime : 2019-12-23 19:34:36
@LastEditTime : 2019-12-24 10:13:37
'''
# -*- coding: UTF-8 -*-
import sys
Expand All @@ -24,11 +24,11 @@

OCR_SELECT = sys.argv[1]
PIC_PATH = sys.argv[2]
FOLDER_PATH = '/private/tmp/com.chandler.alfredocr'

# Control
BAIDU_OCR_SPACING_OFFSET = 8
BAIDU_OCR_SPACING_VARIANCE = 15
BAIDU_OCR_LINE_BREAK = 100
BAIDU_OCR_WIDTH_OFFSET = 50

# Key
Expand Down Expand Up @@ -84,22 +84,25 @@ def return_baidu_token():


def baidu_ocr(pic_path):
response = requests.post(
url=BAIDU_OCR_API,
params={
"access_token": return_baidu_token(),
},
headers={
"Content-Type": "application/x-www-form-urlencoded",
},
data={
"image": convert_image_base64(pic_path),
},
)
if (response.status_code == 200):
output_baidu_ocr(response.json())
if (os.path.getsize(pic_path) <= 4194304):
response = requests.post(
url=BAIDU_OCR_API,
params={
"access_token": return_baidu_token(),
},
headers={
"Content-Type": "application/x-www-form-urlencoded",
},
data={
"image": convert_image_base64(pic_path),
},
)
if (response.status_code == 200):
output_baidu_ocr(response.json())
else:
print('Request failed!', end='')
else:
print('Request failed!', end='')
print('Too large!')


def baidu_ocr_qrcode(pic_path):
Expand Down Expand Up @@ -233,6 +236,17 @@ def google_ocr(pic_path):
print('Request failed!', end='')


def multi_file_ocr():
names = [name for name in os.listdir(FOLDER_PATH)
if re.search(r'.png|.jpg|.jpeg|.bmp', name, re.IGNORECASE) and os.path.getsize(FOLDER_PATH + '/' + name) <= 4194304]
for i in range(len(names)):
current_pic_path = FOLDER_PATH + '/' + names[i]
baidu_ocr(current_pic_path)
remove_pic(current_pic_path)
if i != len(names) - 1:
print('\n')


'''
Output
'''
Expand Down Expand Up @@ -324,7 +338,7 @@ def output_baidu_ocr(response_json):
is_num_between_chinese_space = re.finditer(
r'[\u4e00-\u9fa5+][0-9a-zA-Z]+( )+[\u4e00-\u9fa5+]', words) # 测试666 代码
is_num_between_space_chinese = re.finditer(
r'[\u4e00-\u9fa5+]( )+[0-9a-zA-Z]+[\u4e00-\u9fa5+]', words) # 测试 666代码
r'( )+[0-9a-zA-Z]+[\u4e00-\u9fa5+]', words) # 测试 666代码
if is_num_between_chinese != None:
space_insert_offset = 0 # 第一次插入空格后,后续插入点发生偏移
for i in is_num_between_chinese:
Expand All @@ -350,11 +364,12 @@ def output_baidu_ocr(response_json):
i.span()[1] + space_insert_offset - 1, ' ')
space_insert_offset += 1
words = ''.join(list_words)
words = words.replace(",", ",")
words = words.replace(", ", ",").replace(",", ",")
words = words.replace(". ", "。").replace(
".", "。").replace("。 ", "。")
words = words.replace("!", "!")
words = words.replace(";", ";")
words = words.replace(":", ":")
words = words.replace(".", "。")
words = words.replace("(", "(")
words = words.replace(")", ")")
words = words.replace("?", "?")
Expand All @@ -363,17 +378,17 @@ def output_baidu_ocr(response_json):
print(words, end='')
else:
words = words.replace(",", ",")
words = words.replace("。", ".")
words = words.replace("!", "!")
words = words.replace(";", ";")
words = words.replace("。", ".")
words = words.replace("(", "(")
words = words.replace(")", ")")
words = words.replace("?", "?")
words = re.sub(r'( ){2,}', ' ', words)
print(words, end='')
if (is_line_spacing_check is 1) and (index != response_json['words_result_num'] - 1) and (response_json['words_result'][index + 1]['location']['top'] - response_json['words_result'][index]['location']['top'] > top_half + BAIDU_OCR_SPACING_OFFSET):
print()
elif (response_json['words_result'][index]['location']['width'] < width_half - BAIDU_OCR_WIDTH_OFFSET):
elif (is_line_spacing_check is 0) and (index != response_json['words_result_num'] - 1) and (response_json['words_result'][index]['location']['width'] < width_half - BAIDU_OCR_WIDTH_OFFSET):
print()


Expand All @@ -384,13 +399,18 @@ def remove_pic(pic_path):
if __name__ == "__main__":
if (OCR_SELECT == 'baidu'):
baidu_ocr(PIC_PATH)
remove_pic(PIC_PATH)
elif (OCR_SELECT == 'tencent'):
tencent_youtu_ocr(PIC_PATH)
remove_pic(PIC_PATH)
elif (OCR_SELECT == 'google'):
google_ocr(PIC_PATH)
remove_pic(PIC_PATH)
elif (OCR_SELECT == 'baidu_qrcode'):
baidu_ocr_qrcode(PIC_PATH)
remove_pic(PIC_PATH)
remove_pic(PIC_PATH)
elif (OCR_SELECT == 'file'):
multi_file_ocr()

'''
________
Expand Down

0 comments on commit d5050b8

Please sign in to comment.