Skip to content

Commit

Permalink
Update: support cnocr server mode and optimise output
Browse files Browse the repository at this point in the history
  • Loading branch information
Chandler Lu committed Sep 12, 2022
1 parent ae5b47e commit 948cf92
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 16 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@

## 版本

### 4.9.2

1. 支持 CNOCR 基于 FastAPI 的 HTTP [服务模式](https://github.com/breezedeus/cnocr#http%E6%9C%8D%E5%8A%A1)

### 4.9.1

1. 修复新版 CNOCR 的输出问题(因没有 mac 设备,只更新了源码)。
Expand Down
20 changes: 13 additions & 7 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,14 @@
@Description: Capture than OCR - Variable
@Author: Chandler Lu
@Date: 2020-03-09 20:32:15
LastEditTime: 2021-01-07 17:29:41
LastEditTime: 2022-09-12 17:50:31
'''
# -*- coding: UTF-8 -*-
import os

# Key - Alfred
BAIDU_API_KEY = os.environ['baidu_api_key']
BAIDU_SECRET_KEY = os.environ['baidu_secret_key']
if os.environ['baidu_language_type'] == '':
BAIDU_LANGUAGE_TYPE = 'CHN_ENG'
else:
BAIDU_LANGUAGE_TYPE = os.environ['baidu_language_type']
TENCENT_SECRET_ID = os.environ['tencent_secret_id']
TENCENT_SECRET_KEY = os.environ['tencent_secret_key']
GOOGLE_ACCESS_TOKEN = os.environ['google_access_token']
Expand All @@ -24,12 +20,11 @@
MATHPIX_APP_KEY = os.environ['mathpix_app_key']

# Key - Quicker

# BAIDU_API_KEY = 'rmMynojL9KapDOikDTgKlImy'
# BAIDU_SECRET_KEY = '3QKoI1E56u16tEMdwBnpXSPNezdoZWFD'
# TENCENT_SECRET_ID = ''
# TENCENT_SECRET_KEY = ''
# # GOOGLE_ACCESS_TOKEN = ''
# GOOGLE_ACCESS_TOKEN = ''
# GOOGLE_POST_REFERER = ''
# GOOGLE_HTTP_PROXY = ''
# CAIYUN_TRANSLATE_TOKEN = '3975l6lr5pcbvidl6jl2'
Expand All @@ -46,6 +41,10 @@
CAIYUN_TRANSLATE_API = 'http://api.interpreter.caiyunai.com/v1/translator'
MATHPIX_API = 'https://api.mathpix.com/v3/text'

# CNOCR Control
CNOCR_SERVE = 0
CNOCR_API = 'http://127.0.0.1:8501/ocr'

# Tencent Control
TENCENT_SERVICE = 'ocr'
TENCENT_ACTION = 'GeneralFastOCR'
Expand All @@ -54,6 +53,13 @@
TENCENT_CONTENT_TYPE = 'application/json; charset=utf-8'

# Baidu Control
try:
if os.environ['baidu_language_type'] == '':
BAIDU_LANGUAGE_TYPE = 'CHN_ENG'
else:
BAIDU_LANGUAGE_TYPE = os.environ['baidu_language_type']
except KeyError:
BAIDU_LANGUAGE_TYPE = 'CHN_ENG'
BAIDU_OCR_SPACING_OFFSET = 8
BAIDU_OCR_SPACING_VARIANCE = 15
BAIDU_OCR_WIDTH_OFFSET = 50
32 changes: 23 additions & 9 deletions src/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
@version: 4.9.1
@Author: Chandler Lu
@Date: 2019-11-26 23:52:36
LastEditTime: 2022-08-24 08:07:18
LastEditTime: 2022-09-12 17:41:48
'''
# -*- coding: UTF-8 -*-
import sys
Expand Down Expand Up @@ -60,12 +60,22 @@ def convert_image_base64(pic_path):


def cnocr_ocr(pic_path):
from cnocr import CnOcr
ocr = CnOcr(det_model_name='naive_det')
res = ocr.ocr(pic_path)
if c.CNOCR_SERVE == 0:
from cnocr import CnOcr
ocr = CnOcr(det_model_name='naive_det')
res = ocr.ocr(pic_path)
elif c.CNOCR_SERVE == 1:
try:
req = requests.post(
c.CNOCR_API, timeout=1.5, files={'image': (pic_path, open(pic_path, 'rb'))})
except requests.Timeout:
print('Request timeout!', end='')
sys.exit(0)
res = req.json()['results']
for i in res:
print(i['text'], end='')


'''
Baidu OCR
'''
Expand Down Expand Up @@ -268,8 +278,12 @@ def mathpix_ocr(pic_path):
})
)
if (response.status_code == 200):
response_json = response.json()['latex_styled']
print(response_json, end='')
if 'error' in response.json():
print(response.json()['error'], end='')
elif 'latex_styled' in response.json():
print(response.json()['latex_styled'], end='')
else:
print(response.json()['text'], end='')
else:
print('Request failed!', end='')
except requests.exceptions.ConnectionError:
Expand Down Expand Up @@ -495,9 +509,9 @@ def remove_pic(pic_path):
________
< rabbit >
--------
\ ^__^
\ (oo)\_______
(__)\ )\/\
\\ ^__^
\\ (oo)\\_______
(__)\\ )\\/\
||----w |
|| ||
'''

0 comments on commit 948cf92

Please sign in to comment.