diff --git a/.gitignore b/.gitignore index b2b5d0d..40a5019 100755 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -debug \ No newline at end of file +debug +README-EN.md \ No newline at end of file diff --git a/README.md b/README.md index ff1fa02..d4deab0 100755 --- a/README.md +++ b/README.md @@ -1,14 +1,8 @@ # Alfred - OCR and Translation -## 声明 - -![ocr_abuse][image-7] - -近期发现有用户滥用百度 OCR 的 API,如上图所示,有用户在短时间内进行了上百次调用。请有较高需求的用户自行注册百度AI,这样的滥用行为将导致日调用额度超标以致其他用户无法正常使用。若长期发现这样的行为,将重新考虑是否要内置 Key。谢谢配合! - ## 下载地址 -[国内直链][2] | [Github-Release][3] +[Packal][3] | [国内直链][2] ## OCR Demo @@ -20,15 +14,22 @@ ## 版本 -### 4.8.1 +### 4.9 + +1. 删除腾讯优图接口; +2. 新增腾讯云文字识别接口(正式版); +3. 新版本不再发布至 GitHub Release,将发布至 Packal。 + +### 近期更新 - 百度 OCR 更换接口参数,已修复 -- 支持 Mathpix 公式识别(因学业原因,仅支持识别后输出 Latex 格式文本,更多支持选项待后期开发); +- 支持 Mathpix 公式识别; - 由于可选识别方式过多,CNOCR 的触发方式修改为唯一触发词 `ooc (CNOCR)`。 + ## 能力 - 离线 OCR (CNOCR) -- 通用 OCR (百度 | 腾讯优图 | Google) +- 通用 OCR (百度 | 腾讯 | Google) - 二维码识别 (百度 | ZXing) - 表格文字识别 (百度) - 数学公式识别 (Mathpix) @@ -43,10 +44,6 @@ 4. 您需要申请并将对应接口的配置填入环境变量,部分接口附带我自己的 Key,但严禁滥用。 5. 具体配置方法请移步 [安装方式][4]。 - - ## 依赖 ### 通用 OCR @@ -132,7 +129,7 @@ pip install zxing -### [Tencent Youtu (腾讯优图)][8] +### [Tencent (腾讯)][8] #### 触发 @@ -141,7 +138,7 @@ pip install zxing #### 说明 1. 自带一个测试 Token,不保证可用性,需要稳定可自行申请; -2. 最大支持 1MB 的图片,过大图片会自动交由百度处理(当然百度最大也只支持 4MB)。 +2. 最大支持 3MB 的图片。 ### [Google OCR][9] @@ -219,13 +216,13 @@ pip install zxing 2. [LucasZhan](https://github.com/LucasZhan) [1]: https://github.com/Chandler-Lu/alfred-ocr/blob/master/README-EN.md -[2]: https://img.yeslu.cn/github/Capture_then_OCR.zip -[3]: https://github.com/Chandler-Lu/alfred-ocr/releases +[2]: https://t.yeslu.cn/github/Capture_then_OCR.zip +[3]: https://www.packal.org/workflow/alfred-ocr [4]: https://www.yeslu.cn/archives/7fe802d0.html [5]: https://github.com/breezedeus/cnocr [6]: https://github.com/breezedeus/cnocr/blob/master/README.md [7]: https://ai.baidu.com/tech/ocr -[8]: https://ai.qq.com/product/ocr.shtml#common +[8]: https://console.cloud.tencent.com/ocr/overview [9]: https://cloud.google.com/vision/docs/ocr [10]: https://github.com/dlenski/python-zxing [11]: https://github.com/dlenski/python-zxing/blob/master/README.md @@ -238,7 +235,7 @@ pip install zxing [image-1]: img/demo_ocr_cn.gif [image-2]: img/demo_trans.gif [image-3]: img/file_ocr.png -[image-4]: img/first_lang_select.png -[image-5]: img/second_lang_select.png +[image-4]: img/first_lang_selection.png +[image-5]: img/second_lang_selection.png [image-6]: img/jetbrains_logo.png [image-7]: img/ocr_abuse.png diff --git a/img/first_lang_select.png b/img/first_lang_select.png deleted file mode 100755 index b553b11..0000000 Binary files a/img/first_lang_select.png and /dev/null differ diff --git a/img/first_lang_selection.png b/img/first_lang_selection.png new file mode 100644 index 0000000..a563d70 Binary files /dev/null and b/img/first_lang_selection.png differ diff --git a/img/second_lang_select.png b/img/second_lang_select.png deleted file mode 100755 index e82eaa0..0000000 Binary files a/img/second_lang_select.png and /dev/null differ diff --git a/img/second_lang_selection.png b/img/second_lang_selection.png new file mode 100644 index 0000000..b63c66a Binary files /dev/null and b/img/second_lang_selection.png differ diff --git a/src/config.py b/src/config.py index 0815f63..cbf814a 100755 --- a/src/config.py +++ b/src/config.py @@ -2,16 +2,11 @@ @Description: Capture than OCR - Variable @Author: Chandler Lu @Date: 2020-03-09 20:32:15 -LastEditTime: 2020-08-14 14:18:47 +LastEditTime: 2021-01-07 17:29:41 ''' # -*- coding: UTF-8 -*- import os -# Control -BAIDU_OCR_SPACING_OFFSET = 8 -BAIDU_OCR_SPACING_VARIANCE = 15 -BAIDU_OCR_WIDTH_OFFSET = 50 - # Key - Alfred BAIDU_API_KEY = os.environ['baidu_api_key'] BAIDU_SECRET_KEY = os.environ['baidu_secret_key'] @@ -19,8 +14,8 @@ BAIDU_LANGUAGE_TYPE = 'CHN_ENG' else: BAIDU_LANGUAGE_TYPE = os.environ['baidu_language_type'] -TENCENT_YOUTU_APPID = os.environ['tencent_youtu_appid'] -TENCENT_YOUTU_APPKEY = os.environ['tencent_youtu_appkey'] +TENCENT_SECRET_ID = os.environ['tencent_secret_id'] +TENCENT_SECRET_KEY = os.environ['tencent_secret_key'] GOOGLE_ACCESS_TOKEN = os.environ['google_access_token'] GOOGLE_POST_REFERER = os.environ['google_post_referer'] GOOGLE_HTTP_PROXY = os.environ['google_http_proxy'] @@ -32,9 +27,9 @@ # BAIDU_API_KEY = 'rmMynojL9KapDOikDTgKlImy' # BAIDU_SECRET_KEY = '3QKoI1E56u16tEMdwBnpXSPNezdoZWFD' -# TENCENT_YOUTU_APPID = '2124810247' -# TENCENT_YOUTU_APPKEY = 'OUd1lpQk4yqp9vSs' -# GOOGLE_ACCESS_TOKEN = '' +# TENCENT_SECRET_ID = '' +# TENCENT_SECRET_KEY = '' +# # GOOGLE_ACCESS_TOKEN = '' # GOOGLE_POST_REFERER = '' # GOOGLE_HTTP_PROXY = '' # CAIYUN_TRANSLATE_TOKEN = '3975l6lr5pcbvidl6jl2' @@ -47,7 +42,18 @@ BAIDU_OCR_API = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general' BAIDU_QRCODE_API = 'https://aip.baidubce.com/rest/2.0/ocr/v1/qrcode' BAIDU_FORM_API = 'https://aip.baidubce.com/rest/2.0/solution/v1/form_ocr/request' -TENCENT_YOUTU_OCR_API = 'https://api.ai.qq.com/fcgi-bin/ocr/ocr_generalocr' GOOGLE_OCR_API = 'https://vision.googleapis.com/v1/images:annotate' CAIYUN_TRANSLATE_API = 'http://api.interpreter.caiyunai.com/v1/translator' MATHPIX_API = 'https://api.mathpix.com/v3/text' + +# Tencent Control +TENCENT_SERVICE = 'ocr' +TENCENT_ACTION = 'GeneralFastOCR' +TENCENT_OCR_HOST = 'ocr.tencentcloudapi.com' +TENCENT_OCR_API = 'https://ocr.tencentcloudapi.com' +TENCENT_CONTENT_TYPE = 'application/json; charset=utf-8' + +# Baidu Control +BAIDU_OCR_SPACING_OFFSET = 8 +BAIDU_OCR_SPACING_VARIANCE = 15 +BAIDU_OCR_WIDTH_OFFSET = 50 diff --git a/src/error_declare.py b/src/error_declare.py new file mode 100644 index 0000000..d7e678a --- /dev/null +++ b/src/error_declare.py @@ -0,0 +1,16 @@ +''' +Description: Error Declare +Author: Chandler Lu +Date: 2021-01-07 17:14:36 +LastEditTime: 2021-01-07 17:15:02 +''' +import sys + + +def declare_network_error(): + print('Network connection refused!', end='') + sys.exit(0) + + +def declare_file_error(): + sys.exit(0) diff --git a/src/img_to_b64.py b/src/img_to_b64.py new file mode 100644 index 0000000..2abc4e1 --- /dev/null +++ b/src/img_to_b64.py @@ -0,0 +1,14 @@ +''' +Description: Convert IMG to BASE64 +Author: Chandler Lu +Date: 2021-01-07 16:57:01 +LastEditTime: 2021-01-07 17:09:50 +''' +from base64 import b64encode + + +def convert_image_base64(pic_path): + with open(pic_path, 'rb') as pic_file: + byte_content = pic_file.read() + pic_base64 = b64encode(byte_content).decode('utf-8') + return pic_base64 diff --git a/src/ocr.py b/src/ocr.py index 6771553..f79d68b 100755 --- a/src/ocr.py +++ b/src/ocr.py @@ -3,25 +3,25 @@ @version: 4.8 @Author: Chandler Lu @Date: 2019-11-26 23:52:36 -LastEditTime: 2020-12-27 22:52:10 +LastEditTime: 2021-01-07 17:26:36 ''' # -*- coding: UTF-8 -*- import sys import os import time import statistics - import json import re import requests - import hashlib +import hmac import random import string from base64 import b64encode from urllib import parse import config as c +import tencent_ocr ocr_select = int(sys.argv[1]) @@ -189,48 +189,6 @@ def baidu_ocr_form(pic_path): print('Too large!') -''' -Tencent Youtu OCR -''' - - -def request_tencent_youtu_sign(postdata, pic_path): - # 字典升序排序 - dic = sorted(postdata.items(), key=lambda d: d[0]) - # URL编码 + 拼接app_key - sign_text = parse.urlencode(dic) + '&app_key=' + c.TENCENT_YOUTU_APPKEY - # MD5 + 转换大写 - sign = hashlib.md5(sign_text.encode('utf-8')).hexdigest().upper() - return sign - - -def tencent_youtu_ocr(pic_path): - if (1048576 <= os.path.getsize(pic_path) <= 4194304): - baidu_ocr(pic_path) - return - elif (os.path.getsize(pic_path) <= 1048576): - postdata = {'app_id': c.TENCENT_YOUTU_APPID, 'time_stamp': int(time.time()), 'nonce_str': ''.join( - random.choices(string.ascii_letters + string.digits, k=8)), 'image': convert_image_base64(pic_path)} - postdata['sign'] = request_tencent_youtu_sign(postdata, pic_path) - try: - response = requests.post( - url=c.TENCENT_YOUTU_OCR_API, - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }, - data=postdata - ) - if (response.status_code == 200): - response_json = response.json()['data']['item_list'] - output_result(4, response_json) - else: - print('Request failed!', end='') - except requests.exceptions.ConnectionError: - declare_network_error() - else: - print('Too large!') - - ''' Google OCR ''' @@ -429,7 +387,7 @@ def output_baidu_ocr(response_json): if chinese_tag == 1: is_num_between_chinese_space = re.finditer( r'[\u4e00-\u9fa5+][0-9a-zA-Z]', words) # 汉字+数字 - if is_num_between_chinese_space != None: + if is_num_between_chinese_space is not None: space_insert_offset = 0 for i in is_num_between_chinese_space: list_words = list(words) @@ -439,7 +397,7 @@ def output_baidu_ocr(response_json): words = ''.join(list_words) is_num_between_space_chinese = re.finditer( r'[0-9a-zA-Z]+[\u4e00-\u9fa5+]', words) # 数字+汉字 - if is_num_between_space_chinese != None: + if is_num_between_space_chinese is not None: space_insert_offset = 0 for i in is_num_between_space_chinese: list_words = list(words) @@ -467,7 +425,8 @@ def output_baidu_ocr(response_json): words = words.replace("?", "?") words = re.sub(r'( ){2,}', ' ', words) print(words, end='') - if (is_line_spacing_check == 1) and (index != response_json['words_result_num'] - 1) and (response_json['words_result'][index + 1]['location']['top'] - response_json['words_result'][index]['location']['top'] > top_half + c.BAIDU_OCR_SPACING_OFFSET): + if (is_line_spacing_check == 1) and (index != response_json['words_result_num'] - 1) and ( + response_json['words_result'][index + 1]['location']['top'] - response_json['words_result'][index]['location']['top'] > top_half + c.BAIDU_OCR_SPACING_OFFSET): print() elif (is_line_spacing_check == 0) and (index != response_json['words_result_num'] - 1) and (response_json['words_result'][index]['location']['width'] < width_half - c.BAIDU_OCR_WIDTH_OFFSET): print() @@ -526,7 +485,7 @@ def remove_pic(pic_path): elif (ocr_select == 3): baidu_ocr_form(pic_path) elif (ocr_select == 4): - tencent_youtu_ocr(pic_path) + tencent_ocr.tencent_ocr(pic_path) elif (ocr_select == 5): google_ocr(pic_path) elif (ocr_select == 6): diff --git a/src/tencent_ocr.py b/src/tencent_ocr.py new file mode 100644 index 0000000..ffa8cc2 --- /dev/null +++ b/src/tencent_ocr.py @@ -0,0 +1,124 @@ +''' +Description: +version: +Author: Chandler Lu +Date: 2021-01-07 16:47:12 +LastEditTime: 2021-01-07 17:25:26 +''' + +import os +import time +from datetime import datetime +import requests +import hashlib +import json +import hmac + +import config as c +import img_to_b64 +import error_declare + + +def tencent_sign(key, msg): + return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() + + +def tencent_ocr(pic_path): + if (os.path.getsize(pic_path) <= 3145728): + # 请求方式 + tencent_http_request_method = 'POST' + tencent_canonical_uri = '/' + tencent_canonical_query_string = '' + tencent_canonical_headers = "content-type:%s\nhost:%s\n" % ( + c.TENCENT_CONTENT_TYPE, c.TENCENT_OCR_HOST) + tencent_signed_headers = 'content-type;host' + + # 签名方式 + tencent_algorithm = 'TC3-HMAC-SHA256' + + # 时间戳 + tencent_request_timestamp = int(time.time()) + tencent_request_date = str(datetime.utcfromtimestamp( + tencent_request_timestamp).strftime("%Y-%m-%d")) + + # 参数准备 + tencent_credential_scope_data = [ + tencent_request_date, c.TENCENT_SERVICE, 'tc3_request'] + tencent_credential_scope = '/'.join(tencent_credential_scope_data) + + # 图片数据 + data_img = { + "ImageBase64": img_to_b64.convert_image_base64(pic_path) + } + data_img = json.dumps(data_img).encode('utf-8') + tencent_hashed_request_payload = hashlib.sha256( + data_img).hexdigest().lower() + + # 规范请求串 + tencnet_canonical_request_data = [ + tencent_http_request_method, + tencent_canonical_uri, + tencent_canonical_query_string, + tencent_canonical_headers, + tencent_signed_headers, + tencent_hashed_request_payload] + tencnet_canonical_request = '\n'.join(tencnet_canonical_request_data) + tencent_hashed_canonical_request = hashlib.sha256( + tencnet_canonical_request.encode("utf-8")).hexdigest() + + # 待签名字符串 + tencent_string_to_sign_data = [ + tencent_algorithm, + str(tencent_request_timestamp), + tencent_credential_scope, + tencent_hashed_canonical_request] + tencent_string_to_sign = '\n'.join(tencent_string_to_sign_data) + + # 计算签名 + tencent_secret_date = tencent_sign( + ("TC3" + c.TENCENT_SECRET_KEY).encode("utf-8"), + tencent_request_date) + tencent_secret_service = tencent_sign( + tencent_secret_date, c.TENCENT_SERVICE) + tencent_secret_signing = tencent_sign( + tencent_secret_service, "tc3_request") + tencent_signature = hmac.new( + tencent_secret_signing, + tencent_string_to_sign.encode("utf-8"), + hashlib.sha256).hexdigest() + + tencent_authorization = tencent_algorithm + ' ' + 'Credential=' + c.TENCENT_SECRET_ID + '/' + \ + tencent_credential_scope + ', ' + 'SignedHeaders=' + \ + tencent_signed_headers + ', ' + 'Signature=' + tencent_signature + + try: + response = requests.post( + url=c.TENCENT_OCR_API, + headers={ + "Content-Type": c.TENCENT_CONTENT_TYPE, + "X-TC-Action": c.TENCENT_ACTION, + "X-TC-Region": "ap-shanghai", + "X-TC-Timestamp": str(tencent_request_timestamp), + "X-TC-Version": "2018-11-19", + "Authorization": tencent_authorization, + }, + data=data_img, + ) + if (response.status_code == 200): + if 'Error' in response.json()['Response']: + print(response.json()['Response']['Error']['Message']) + else: + output(response.json()['Response']['TextDetections']) + else: + print('Request failed!', end='') + except requests.exceptions.ConnectionError: + declare_network_error() + else: + print('Too large!') + + +def output(message): + for i in range(len(message)): + print(message[i]['DetectedText'], end='') + if i != len(message) - 1: + print()