59 lines
2.4 KiB
Python
59 lines
2.4 KiB
Python
def general_text_recognize(image) -> str:
|
||
"""
|
||
通用文本识别
|
||
:param image: 影像件
|
||
:return: 识别文本
|
||
"""
|
||
# 请求深圳快瞳通用文本识别接口
|
||
response = http_client.post(
|
||
url=(url := "https://ai.inspirvision.cn/s/api/ocr/general"),
|
||
headers={
|
||
"X-RequestId-Header": image["影像件唯一标识"]
|
||
}, # 以影像件唯一标识作为请求唯一标识,用于双方联查
|
||
data={
|
||
"token": authenticator.get_token(servicer="szkt"), # 获取深圳快瞳访问令牌
|
||
"imgBase64": f"data:image/{image["影像件格式"].lstrip(".")};base64,{image["影像件BASE64编码"]}",
|
||
},
|
||
guid=md5((url + image["影像件唯一标识"]).encode("utf-8")).hexdigest().upper(),
|
||
)
|
||
# TODO: 若响应非成功则流转至人工处理
|
||
if not (response.get("status") == 200 and response.get("code") == 0):
|
||
raise RuntimeError("请求深圳快瞳通用文本识别接口发生异常")
|
||
|
||
blocks = []
|
||
for block in response["data"]:
|
||
# noinspection PyTypeChecker
|
||
blocks.append(
|
||
[
|
||
int(block["itemPolygon"]["x"]), # 文本块左上角的X坐标
|
||
int(block["itemPolygon"]["y"]), # 文本块左上角的Y坐标
|
||
int(block["itemPolygon"]["height"]), # 文本块左上角的高度
|
||
block["value"], # 文本块的文本内容
|
||
]
|
||
)
|
||
# 使用俄罗斯方块方法整理文本块,先按照文本块的Y坐标升序(从上到下)
|
||
blocks.sort(key=lambda x: x[1])
|
||
|
||
lines = []
|
||
for idx, block in enumerate(blocks[1:]):
|
||
if idx == 0:
|
||
line = [blocks[0]]
|
||
continue
|
||
# 若当前文本块的Y坐标和当前文本行的平均Y坐标差值小于阈值则归为同一文本行,否则另起一文本行(分行)
|
||
if (
|
||
block[1] - numpy.array([e[1] for e in line]).mean()
|
||
< numpy.array([e[2] for e in line]).mean()
|
||
):
|
||
line.append(block)
|
||
else:
|
||
lines.append(line)
|
||
line = [block]
|
||
lines.append(line)
|
||
|
||
blocks = []
|
||
for line in lines:
|
||
blocks.extend(
|
||
[re.sub(r"\s", "", x[3]) for x in sorted(line, key=lambda x: x[0])]
|
||
) # 按照文本块的X坐标升序(从左到右)并去除文本块的文本内容中所有空字符
|
||
return "\n".join(blocks)
|