Python/utils/client.py

993 lines
33 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
# 导入模块
import hashlib
import hmac
import json
import re
import sqlite3
import threading
import time
from email.parser import BytesParser
from email.policy import default
from email.utils import parsedate_to_datetime
from functools import wraps
from imaplib import IMAP4_SSL
from pathlib import Path
from typing import Any, Dict, Generator, Literal, Optional, Tuple, Union
from urllib.parse import quote_plus
from xml.etree import ElementTree
import pandas
from pydantic import BaseModel, Field, HttpUrl, model_validator
from requests import Response, Session
from requests.adapters import HTTPAdapter
from sqlalchemy import create_engine, text
from urllib3.util.retry import Retry
"""
封装sqlalchemy实现按照SQL查询
类和函数的区别
类作为对象的模板,定义了对象的结构和行为;而函数则作为实现特定功能或操作的代码块,提高了代码的可读性和可维护性。
使用方法:
with MySQLClient(database='data_analysis') as client:
dataframe = client.execute_query(sql='select * from {{}}')')
"""
class MySQLClient:
"""MySQL客户端"""
def __init__(
self,
database,
host: str = "cdb-7z9lzx4y.cd.tencentcdb.com",
port: int = 10039,
username: str = "root",
password: str = "Te198752",
): # 默认为刘弼仁的腾讯云MySQL数据库
# 数据库登录密码安全编码
password = quote_plus(password)
# 构建数据库连接字符
connect_config = f"mysql+pymysql://{username}:{password}@{host}:{port}/{database}?charset=utf8"
# 创建MySQL引擎并连接数据库
self.engine = create_engine(
connect_config,
pool_size=5,
max_overflow=10,
pool_recycle=3600,
pool_pre_ping=True,
) # 连接池中保持打开连接的数量为5额外连接数为10连接1小时后重新回收重连检查连接有效性
def __del__(self):
"""析构时自动关闭连接"""
if hasattr(self, "engine") and self.engine:
self.engine.dispose()
def execute_query(self, sql: str) -> pandas.DataFrame:
"""执行SQL查询并返回DATAFRAME"""
if not hasattr(self, "engine") or not self.engine:
raise ConnectionError("未创建数据库连接")
try:
with self.engine.connect() as connection:
dataframe = pandas.read_sql_query(
text(sql), connection, coerce_float=False
) # 不尝试将非整数数值转为浮点维持DECIMAL
return dataframe
except:
connection.rollback()
raise RuntimeError("执行SQL查询并返回DATAFRAME发生其它异常")
"""
封装urllib.request的相关操作实现常用HTTPREQUEST
使用方法:
client = HTTPClient()
response = clinet.post(url)
"""
class TokenBucket:
def __init__(self, refill_rate, max_tokens):
"""令牌桶,基于令牌桶算法限制请求频率"""
# 填充令牌速率(个/秒)
self.refill_rate = refill_rate
# 令牌桶最大令牌数
self.max_tokens = max_tokens
# 令牌桶当前令牌数
self.tokens = max_tokens
# 上一次填充令牌时间戳(使用单调递增时间,单位为秒)
self.refill_timestamp = time.monotonic()
# 获取令牌
# noinspection PyMissingReturnStatement
def acquire(self) -> tuple[bool, float]:
with threading.Lock():
# 本次填充令牌时间戳
refill_timestamp = time.monotonic()
# 重新计算令牌桶中令牌数
self.tokens = min(
self.max_tokens,
self.tokens
+ self.refill_rate * (refill_timestamp - self.refill_timestamp),
)
self.refill_timestamp = refill_timestamp
# 若令牌桶当前令牌数大于等于1则减少令牌
if self.tokens >= 1:
self.tokens -= 1
return True, 0.0
# 同时返回等待时间
return False, 0.2
# 将令牌桶以装饰函数封装为请求频率限制方法
def restrict(refill_rate=5, max_tokens=5):
def decorator(func):
# 初始化令牌桶
token_bucket = TokenBucket(refill_rate=refill_rate, max_tokens=max_tokens)
@wraps(func)
def wrapper(*args, **kwargs):
# 重试次数
retries = 0
# 若重试数小于等于最大重试次数,则循环检查是否允许请求
while retries <= 10:
success, wait_time = token_bucket.acquire()
# 若允许请求则返回嵌套函数,若不允许请求则等待
if success:
return func(*args, **kwargs)
time.sleep(wait_time * 1.5**retries)
retries += 1
raise Exception("request too frequently")
return wrapper
return decorator
class RequestException(Exception):
"""请求异常"""
def __init__(
self, status: int = 400, code: int = 0, message: str = "request failed"
):
"""
:param status: 状态编码默认为0
:param message: 错误信息默认为RequestException
"""
self.status = status
self.code = code
self.message = message
super().__init__(self.message)
def __str__(self):
return f"请求发生异常({self.status}, {self.message})"
# 请求参数数据模型
class Arguments(BaseModel):
"""
:param url: 统一资源定位符,基于统一资源定位符校验器进行校验
:param params: 查询参数
:param headers: 请求头
:param data: 表单数据
:param json_data: JSON # 入参时使用别名出参时根据BY_ALIAS=TRUE确定是否使用别名
:param files: 上传文件
:param stream: 是否启用流式传输
:param guid: 全局唯一标识
"""
# 统一资源定位符
url: HttpUrl = Field(default=...)
# 查询参数
params: Optional[Dict] = Field(default=None)
# 请求头
headers: Optional[Dict] = Field(default=None)
# 表单数据
data: Optional[Dict] = Field(default=None)
# JSON
json_data: Optional[Dict] = Field(default=None, alias="json")
# 上传文件
files: Optional[
Dict[
str,
Union[
Tuple[str, bytes], Tuple[str, bytes, str], Tuple[str, bytes, str, dict]
],
]
] = Field(default=None)
# 是否启用流式传输
stream: Optional[bool] = Field(default=None)
# 全局唯一标识
guid: Optional[str] = Field(default=None)
# 表单数据和JSON数据互斥
@model_validator(mode="after")
def validate_data(self):
if self.data and self.json_data:
raise ValueError("cannot use both data and json parameters simultaneously")
return self
# 上传文件和启用流式传输互斥
@model_validator(mode="after")
def validate_files(self):
if self.files and self.stream:
raise ValueError(
"cannot use both files and stream parameters simultaneously"
)
return self
# HTTP客户端
class HTTPClient:
def __init__(
self,
timeout: int = 60,
default_headers: Optional[Dict[str, str]] = None,
total: int = 3,
backoff_factor: float = 0.5,
cache_enabled: bool = False,
cache_ttl: int = 90,
):
"""
:param timeout: 超时时间,单位为秒
:param default_headers: 默认请求头
:param total: 最大重试次数
:param backoff_factor: 重试间隔退避因子
:param cache_enabled: 是否使用缓存
:param cache_ttl: 缓存生存时间,单位为天
"""
# 超时时间
self.timeout = timeout
# 创建HTTP会话并挂载适配器
self.session = self._create_session(
default_headers=default_headers, total=total, backoff_factor=backoff_factor
)
# 是否使用缓存
self.cache_enabled = cache_enabled
# 缓存生存时间
self.cache_ttl = cache_ttl
# 若使用缓存,则初始化缓存数据库
if self.cache_enabled:
self._initialize_cache_database()
# 创建HTTP会话并挂载适配器
@staticmethod
def _create_session(
total: int,
backoff_factor: float,
default_headers: Optional[Dict[str, str]] = None,
) -> Session:
"""
:param default_headers 默认请求头
:param total 最大重试次数
:param backoff_factor 重试间隔退避因子
"""
# 创建会话对象
session = Session()
# 设置请求头
if default_headers:
session.headers.update(default_headers)
# 设置重试策略(优先按照服响应等待时长,若未返回则默认按照退避算法等待)
strategy_retries = Retry(
allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "PATCH"],
status_forcelist=[
408,
502,
503,
504,
], # 408:请求超时502:网关错误503:服务不可用504:网关超时
total=total,
respect_retry_after_header=True,
backoff_factor=backoff_factor,
)
# 创建适配器并绑定重试策略
adapter = HTTPAdapter(max_retries=strategy_retries)
# 就HTTP请求生效
session.mount("http://", adapter)
# 就HTTPS请求生效
session.mount("https://", adapter)
return session
def _initialize_cache_database(self):
"""初始化缓存数据库"""
# 创建缓存数据库连接使用SQLite
self.cache_connection = sqlite3.connect(
database="SQLite.db", check_same_thread=False
)
self.cache_connection.execute(
"""CREATE TABLE IF NOT EXISTS caches (guid TEXT PRIMARY KEY, response TEXT, timestamp REAL)"""
)
# 创建时间戳索引
self.cache_connection.execute(
"""CREATE INDEX IF NOT EXISTS index_timestamp ON caches(timestamp)"""
)
# 删除过期缓存
self.cache_connection.execute(
"DELETE FROM caches WHERE timestamp < ?",
(time.time() - self.cache_ttl * 86400,), # 缓存生存时间单位转为秒
)
# 提交事物
self.cache_connection.commit()
# 在缓存数据库查询响应
def _query_response(self, guid: str) -> Optional[Dict]:
with threading.Lock():
cursor = None
try:
# 创建游标
cursor = self.cache_connection.cursor()
# 根据请求唯一标识查询响应
cursor.execute(
"SELECT response FROM caches WHERE guid = ? AND timestamp >= ?",
(guid, time.time() - self.cache_ttl * 86400),
)
if result := cursor.fetchone():
return json.loads(result[0])
return None
# 若发生异常则返回NONE
except:
self.cache_connection.rollback()
return None
finally:
if cursor:
cursor.close()
# 将响应保存至缓存数据库
def _save_response(self, guid: str, response: Dict):
with threading.Lock():
cursor = None
try:
# 创建游标
cursor = self.cache_connection.cursor()
# 新增或覆盖响应
cursor.execute(
"INSERT OR REPLACE INTO caches (guid, response, timestamp) VALUES (?, ?, ?)",
(guid, json.dumps(response, ensure_ascii=False), time.time()),
)
# 提交事物
self.cache_connection.commit()
# 若发生异常则返回NONE
except:
self.cache_connection.rollback()
finally:
if cursor:
cursor.close()
# GET请求
def get(self, **kwargs) -> Union[Dict, str]:
return self._request(method="GET", arguments=Arguments(**kwargs))
# POST请求
def post(self, **kwargs) -> Union[Dict, str]:
return self._request(method="POST", arguments=Arguments(**kwargs))
# 文件下载
def download(
self, stream=False, chunk_size=1024, **kwargs
) -> Union[Dict, str, Generator[bytes, None, None]]:
response = self._request(
method="GET", arguments=Arguments(**{"stream": stream, **kwargs})
)
# 若禁用流式传输,则返回响应
if not stream:
return response
# 若启用流式传输,则处理流式传输响应并返回
return self._process_stream_response(response=response, chunk_size=chunk_size)
def _request(self, method: Literal["GET", "POST"], arguments: Arguments) -> Any:
"""发送请求"""
# 请求参数模型
arguments = arguments.model_dump(exclude_none=True, by_alias=True)
# URL由HTTPURL对象转为字符串
arguments["url"] = str(arguments["url"])
# 重构表单数据
if arguments.get("data") is not None:
arguments["data"] = {
key: value
for key, value in arguments["data"].items()
if value is not None
}
# 重构JSON格式数据
if arguments.get("json_data") is not None:
arguments["json_data"] = {
key: value
for key, value in arguments["json_data"].items()
if value is not None
}
# 重构文件数据
if arguments.get("files") is not None:
files_valid = {}
# 遍历文件数据键值对
for key, value in arguments["files"].items():
if isinstance(value, (tuple, list)):
match len(value):
# 若文件数据包括文件名称和文件内容
case 2:
files_valid[key] = (value[0], value[1], None, None)
# 若文件数据包含文件名称、文件内容和内容类型
case 3:
files_valid[key] = (value[0], value[1], value[2], None)
# 若文件数据包含文件名称、文件内容、内容类型和请求头
case 4:
files_valid[key] = (value[0], value[1], value[2], value[3])
arguments.update({"files": files_valid})
# 全局唯一标识
guid = arguments.pop("guid", None)
# 若使用缓存且本次请求参数包含全局唯一标识,则优先返回缓存数据库中响应
if self.cache_enabled and guid is not None:
# 在缓存数据库查询响应
response = self._query_response(guid=guid)
# 若缓存响应非空则返回
if response is not None:
return response
try:
# 发送请求
response = self.session.request(
method=method, timeout=self.timeout, **arguments
)
# 若返回错误状态码则抛出异常
response.raise_for_status()
# 处理响应
response = self._process_response(response=response)
# 若请求全局唯一标识非NONE则响应保存至缓存数据库
# noinspection PyUnboundLocalVariable
if guid is not None:
# noinspection PyUnboundLocalVariable
self._save_response(guid=guid, response=response)
return response
except Exception as exception:
# 尝试根据响应解析响应状态码和错误信息,否则进行构造
try:
# JOSN反序列化
# noinspection PyUnboundLocalVariable
response_decoded = response.json()
# 响应状态码
status = response_decoded["status"]
# 错误信息
message = response_decoded["message"]
except:
status = getattr(getattr(exception, "response", None), "status", None)
url = arguments["url"]
message = str(exception).split("\n")[0]
# 重新构建错误信息
message = f"{method} {url} failed: {message}"
raise RequestException(status=status, message=message)
# 处理响应
@staticmethod
def _process_response(response: Response) -> Any:
# 响应内容
content = response.content
# 若响应内容为空则返回NONE
if not content:
return None
# 标准化内容类型
content_type = (
response.headers.get("Content-Type", "").split(";")[0].strip().lower()
)
# 根据内容类型匹配解析返回内容方法
# noinspection PyUnreachableCode
match content_type:
case "application/json" | "text/json":
# JSON反序列化
return response.json()
case "application/xml" | "text/xml":
# 解析为XMLELEMENT对象
return ElementTree.fromstring(text=content)
case _:
# 若内容类型以IMAGE/开头则返回图片格式和图片数据
if content_type.startswith("image/"):
# 图片格式
image_format = content_type.split(sep="/", maxsplit=1)[1]
return f"{image_format}", content
else:
return content
# 处理流式传输响应
@staticmethod
def _process_stream_response(
response: Response, chunk_size: int
) -> Generator[bytes, None, None]: # 生成器不接受发SEND发送至、结束时返回NONE
# 检查数据分块
if not isinstance(chunk_size, int) and isinstance(chunk_size, bool):
raise ValueError("chunk_size must type=int")
if chunk_size <= 0:
raise ValueError("chunk_size must >0")
try:
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk:
yield chunk
finally:
response.close()
class Authenticator:
def __init__(
self,
):
"""认证器(用于获取访问令牌)"""
# 初始化
self._initialize()
def _initialize(self):
"""初始化访问凭证"""
# 创建访问凭证地址对象
self.certifications_path = (
Path(__file__).parent.resolve() / "certifications.json"
)
# 若访问凭证地址对象不存在则创建
if not self.certifications_path.exists():
with open(self.certifications_path, "w", encoding="utf-8") as file:
json.dump(
{},
file,
ensure_ascii=False,
)
# 初始化HTTP客户端
self.http_client = HTTPClient()
def _szkt_get_certification(self) -> tuple[str, float]:
"""获取深圳快瞳访问凭证"""
# 请求深圳快瞳访问凭证获取接口
response = self.http_client.get(
url="https://ai.inspirvision.cn/s/api/getAccessToken?accessKey=APPID_6Gf78H59D3O2Q81u&accessSecret=947b8829d4d5d55890b304d322ac2d0d"
)
# 若响应非成功则抛出异常
if not (response["status"] == 200 and response["code"] == 0):
raise RuntimeError("获取深圳快瞳访问凭证发生异常")
# 返回令牌、失效时间戳
# noinspection PyTypeChecker
return (
response["data"]["access_token"],
time.time() + response["data"]["expires_in"],
)
def _hlyj_get_certification(self) -> tuple[str, float]:
"""获取合力亿捷访问凭证"""
# 企业访问标识
access_key_id = "25938f1c190448829dbdb5d344231e42"
# 签名秘钥
secret_access_key = "44dc0299aff84d68ae27712f8784f173"
# 时间戳(秒级)
timestamp = int(time.time())
# 签名企业访问标识、签名秘钥和时间戳拼接后计算的十六进制的HMAC-SHA256
signature = hmac.new(
secret_access_key.encode("utf-8"),
f"{access_key_id}{secret_access_key}{timestamp}".encode("utf-8"),
hashlib.sha256,
).hexdigest()
# 请求合力亿捷访问凭证获取接口
response = self.http_client.get(
url=f"https://kms.7x24cc.com/api/v1/corp/auth/token?access_key_id={access_key_id}&timestamp={timestamp}&signature={signature}"
)
# 若响应非成功则抛出异常
if not response["success"]:
raise RuntimeError("获取合力亿捷访问凭证发生异常")
# 返回令牌、失效时间戳
# noinspection PyTypeChecker
return (
response["data"],
time.time() + 3600, # 访问令牌有效期为1小时
)
def _feishu_get_certification(self) -> tuple[str, float]:
"""获取飞书访问凭证"""
# 请求飞书访问凭证获取接口
response = self.http_client.post(
url="https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
data={
"app_id": "cli_a1587980be78500c",
"app_secret": "vZXGZomwfmyaHXoG8s810d1YYGLsIqCA",
},
)
# 若响应非成功则抛出异常
if not response["code"] == 0:
raise RuntimeError("获取飞书访问凭证发生异常")
# 返回令牌、失效时间戳
# noinspection PyTypeChecker
return (
response["tenant_access_token"],
time.time() + response["expire"],
)
def get_token(self, servicer: str) -> str | None:
"""获取访问令牌"""
"""
:param servicer: 服务商,数据类型为字符串
"""
with threading.Lock():
# 初始化令牌和失效时间戳
token, expired_timestamp = None, 0
try:
with open(self.certifications_path, "r", encoding="utf-8") as file:
# 读取所有服务商访问凭证
certifications = json.load(file)
# 获取服务商访问凭证
certification = certifications.get(servicer, None)
# 若服务商访问凭证非NONE则解析令牌和失效时间戳
if certification is not None:
# 解析服务商访问令牌
token = certification["token"]
# 解析服务商访问令牌失效时间戳
expired_timestamp = certification["expired_timestamp"]
# 若JSON反序列化时发生异常则重置访问凭证
except json.decoder.JSONDecodeError:
with open(self.certifications_path, "w", encoding="utf-8") as file:
json.dump(
{},
file,
ensure_ascii=False,
)
except Exception:
raise RuntimeError("获取访问令牌发生异常")
# 若当前时间戳大于失效时间戳,则请求服务商获取访问凭证接口
if time.time() > expired_timestamp:
# noinspection PyUnreachableCode
match servicer:
case "szkt":
# 获取深圳快瞳访问凭证
token, expired_timestamp = self._szkt_get_certification()
case "feishu":
token, expired_timestamp = self._feishu_get_certification()
case "hlyj":
token, expired_timestamp = self._hlyj_get_certification()
case _:
raise RuntimeError(f"服务商({servicer})未设置获取访问令牌方法")
# 更新服务商访问凭证
certifications[servicer] = {
"token": token,
"expired_timestamp": expired_timestamp,
}
# 将所有服务商访问凭证保存至本地文件
with open(self.certifications_path, "w", encoding="utf-8") as file:
json.dump(
certifications,
file,
ensure_ascii=False,
)
return token
"""
封装飞书客户端,实现获取验证码、操作多维表格等
"""
class FeishuClinet:
def __init__(self):
self.authenticator = Authenticator()
self.http_client = HTTPClient()
def _headers(self):
"""请求头"""
# 装配飞书访问凭证
return {
"Authorization": f"Bearer {self.authenticator.get_token(servicer='feishu')}",
}
@staticmethod
def get_verification_code():
try:
# 执行时间戳
execute_timestamp = time.time()
# 超时时间戳
timeout_timestamp = execute_timestamp + 65
# 建立加密IMAP连接
server = IMAP4_SSL("imap.feishu.cn", 993)
# 登录
server.login("mars@liubiren.cloud", "a2SfPUgbKDmrjPV2")
while True:
# 若当前时间戳大于超时时间戳则返回NONE
if time.time() <= timeout_timestamp:
# 等待10秒
time.sleep(10)
# 选择文件夹(邮箱验证码)
server.select("&kK57sZqMi8F4AQ-")
try:
# 获取最后一封邮件索引server.search()返回数据类型为元组,第一个元素为查询状态,第二个元素为查询结果(邮件索引字节串的列表);然后,从列表获取字节串并分割取最后一个,作为最后一封邮件索引
index = server.search(None, "ALL")[1][0].split()[-1]
# 获取最后一封邮件内容并解析server.fetch()返回数据类型为元组,第一个元素为查询状态,第二个元素为查询结果(邮件内容字节串的列表);然后,从列表获取字节串并解析正文
# noinspection PyUnresolvedReferences
contents = BytesParser(policy=default).parsebytes(
server.fetch(index, "(RFC822)")[1][0][1]
)
# 遍历邮件内容若正文内容类型为纯文本或HTML则解析发送时间和验证码
for content in contents.walk():
if (
content.get_content_type() == "text/plain"
or content.get_content_type() == "text/html"
):
# 邮件发送时间戳
# noinspection PyUnresolvedReferences
send_timestamp = parsedate_to_datetime(
content["Date"]
).timestamp()
# 若邮件发送时间戳大于执行时间戳则解析验证码并返回
if (
execute_timestamp
> send_timestamp
>= execute_timestamp - 35
):
# 登出
server.logout()
# 解析验证码
return re.search(
r"【普康健康】您的验证码是:(\d+)",
content.get_payload(decode=True).decode(),
).group(1)
# 若文件夹无邮件则继续
except:
pass
# 若超时则登出
else:
server.logout()
return None
except Exception:
raise RuntimeError("获取邮箱验证码发生其它异常")
# 查询多维表格记录单次最多查询500条记录
@restrict(refill_rate=5, max_tokens=5)
def query_bitable_records(
self,
bitable: str,
table_id: str,
field_names: Optional[list[str]] = None,
filter_conditions: Optional[dict] = None,
) -> pandas.DataFrame:
# 先查询多维表格记录,在根据字段解析记录
# 装配多维表格查询记录地址
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{bitable}/tables/{table_id}/records/search?page_size=20"
response = self.http_client.post(
url=url,
headers=self._headers(),
json={"field_names": field_names, "filter": filter_conditions},
)
# 响应业务码为0则定义为响应成功
assert response.get("code") == 0, "查询多维表格记录发生异常"
# 多维表格记录
records = response.get("data").get("items")
# 检查响应中是否包含还有下一页标识,若有则继续请求下一页
while response.get("data").get("has_more"):
url_next = url + "&page_token={}".format(
response.get("data").get("page_token")
)
response = self.http_client.post(
url=url_next,
headers=self._headers(),
json={"field_names": field_names, "filter": filter_conditions},
)
assert response.get("code") == 0, "查询多维表格记录发生异常"
# 合并记录
records.append(response.get("data").get("items"))
# 装配多维表格列出字段地址
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{bitable}/tables/{table_id}/fields?page_size=20"
response = self.http_client.get(
url=url,
headers=self._headers(),
)
assert response.get("code") == 0, "列出多维表格字段发生异常"
# 多维表格字段
fields = response.get("data").get("items")
while response.get("data").get("has_more"):
url_next = url + "&page_token={}".format(
response.get("data").get("page_token")
)
response = self.http_client.get(
url=url_next,
headers=self._headers(),
)
assert response.get("code") == 0, "列出多维表格字段发生异常"
fields.append(response.get("data").get("items"))
# 字段映射
field_mappings = {}
for field in fields:
# 字段名
field_name = field["field_name"]
# 根据字段类型匹配
match field["type"]:
case 1005:
field_type = "主键"
case 1:
field_type = "文本"
case 3:
field_type = "单选"
case 2:
# 数字、公式字段的显示格式
match field["property"]["formatter"]:
case "0":
field_type = "整数"
case _:
raise ValueError("未设置数字、公式字段的显示格式")
case _:
raise ValueError("未设置字段类型")
# noinspection PyUnboundLocalVariable
field_mappings.update({field_name: field_type})
# 记录数据体
records_data = []
# 解析记录
for record in records:
# 单条记录数据体
record_data = {}
for field_name, content in record["fields"].items():
match field_mappings[field_name]:
case "主键" | "单选" | "整数":
record_data.update({field_name: content})
case "文本":
# 若存在多行文本则拼接
fragments_content = ""
for fragment_content in content:
fragments_content += fragment_content["text"]
record_data.update({field_name: fragments_content})
case _:
raise ValueError("未设置字段解析方法")
records_data.append(record_data)
return pandas.DataFrame(records_data)