日常更新

from mac mini(m1)
This commit is contained in:
liubiren 2025-12-14 02:20:53 +08:00
parent b52473a54a
commit 963ce19609
4 changed files with 484 additions and 469 deletions

View File

@ -15,7 +15,7 @@ from email.utils import parsedate_to_datetime
from functools import wraps from functools import wraps
from imaplib import IMAP4_SSL from imaplib import IMAP4_SSL
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Generator, Literal, Optional, Tuple, Union from typing import Any, Callable, Dict, Generator, Literal, Optional, Tuple, Union
from urllib.parse import quote_plus from urllib.parse import quote_plus
from xml.etree import ElementTree from xml.etree import ElementTree
@ -87,192 +87,235 @@ class MySQLClient:
raise RuntimeError("执行SQL查询并返回DATAFRAME发生其它异常") raise RuntimeError("执行SQL查询并返回DATAFRAME发生其它异常")
class CacheClient: class SQLiteClient:
"""缓存客户端""" """SQLite客户端"""
def __init__(self, cache_ttl: int = 360, database: str = "SQLite.db"): def __init__(self, database: Union[str, Path]):
""" """
:param cache_ttl: 缓存生存时间单位为天 初始化SQLite客户端
:param database: 缓存数据库名称 :param database: 数据库
""" """
# 初始化缓存数据库连接 self.database = database
self.connection: Optional[sqlite3.Connection] = None # 初始化本地线程存储
# 初始化缓存生存时间,单位为天 self.threads = threading.local()
self.cache_ttl = cache_ttl
def _connect(self):
"""为当前线程创建数据库连接和游标"""
# 检查当前线程有数据库连接,若有则继续否则创建数据库连接
if hasattr(self.threads, "connection") and self.threads.connection is not None:
return
# 为当前线程关闭数据库连接和游标
self._disconnect()
# noinspection PyBroadException
try: try:
# 创建缓存数据库连接 # 为当前线程创建数据库连接
self.connection = sqlite3.connect( self.threads.connection = sqlite3.connect(
database=( database=self.database,
Path(__file__).parent.resolve() / database check_same_thread=True,
), # 当前目录下创建缓存数据库 timeout=30, # 数据库锁超时时间单位默认为30秒避免并发锁死
check_same_thread=False,
timeout=30, # 缓存数据库锁超时时间单位默认为30秒避免并发锁死
) )
# 开启行映射,支持按照字段名取值
# 创建缓存表和索引、清理过期缓存 self.threads.connection.row_factory = sqlite3.Row
with self.connection: # 为当前线程创建数据库游标
self.connection.execute( self.threads.cursor = self.threads.connection.cursor()
"""CREATE TABLE IF NOT EXISTS caches (
guid TEXT PRIMARY KEY,
cache TEXT NOT NULL,
timestamp REAL NOT NULL
)"""
)
self.connection.execute(
"""CREATE INDEX IF NOT EXISTS idx_timestamp ON caches(timestamp)"""
)
self.connection.execute(
"DELETE FROM caches WHERE timestamp < ?",
(time.time() - self.cache_ttl * 86400,),
)
except Exception as exception: except Exception as exception:
self._disconnect() self.threads.connection = None
raise f"初始缓存数据库失败:{str(exception)}" from exception self.threads.cursor = None
raise RuntimeError(
f"为当前线程创建数据库连接和游标发生异常,{str(exception)}"
) from exception
def _disconnect(self) -> None: def _disconnect(self) -> None:
"""关闭缓存数据库连接""" """为当前线程关闭数据库连接和游标"""
if self.connection: # 检查当前线程有数据库游标,若有则关闭数据库游标
if hasattr(self.threads, "cursor") and self.threads.cursor is not None:
# noinspection PyBroadException # noinspection PyBroadException
try: try:
self.connection.close() # 为当前线程关闭数据库游标
except Exception: self.threads.cursor.close()
pass self.threads.cursor = None
except Exception as exception:
raise RuntimeError(
f"为当前线程关闭数据库游标发生异常,{str(exception)}"
) from exception
def __enter__(self) -> "CacheClient": # 检查当前线程有数据库连接,若有则关闭数据库连接
"""实现上下文管理""" if hasattr(self.threads, "connection") and self.threads.connection is not None:
# noinspection PyBroadException
try:
# 为当前线程提交事务
self.threads.connection.commit()
# 为当前线程关闭数据库连接
self.threads.connection.close()
self.threads.connection = None
except Exception as exception:
raise RuntimeError(
f"为当前线程关闭数据库连接发生异常,{str(exception)}"
) from exception
def _query_one(
self, sql: str, parameters: Tuple[Any, ...] = ()
) -> Optional[Dict[str, Any]]:
"""
为当前线程查询并获取单行数据
:param sql: 查询SQL语句
:param parameters: SQL参数
:return: 单行数据
"""
# noinspection PyBroadException
try:
# 为当前线程创建数据库连接和游标
self._connect()
# 检查当前线程无数据库游标,若无则抛出异常
if not hasattr(self.threads, "cursor") or self.threads.cursor is None:
raise RuntimeError("为当前线程创建数据库游标发生异常")
# 为当前线程执行SQL
self.threads.cursor.execute(sql, parameters)
return (
None
if (result := self.threads.cursor.fetchone()) is None
else dict(result)
)
# 若发生异常则回滚事务并抛出异常
except Exception as exception:
# 检查当前线程有数据库连接,若有则回滚
if (
hasattr(self.threads, "connection")
and self.threads.connection is not None
):
self.threads.connection.rollback()
raise RuntimeError("为当前线程查询并获取单行数据发生异常") from exception
def _execute(self, sql: str, parameters: Tuple[Any, ...] = ()) -> bool:
"""
为当前线程执行SQL
:param sql: 新增删除和修改SQL语句
:param parameters: SQL参数
:return: 执行结果
"""
try:
self._connect()
# 检查当前线程无数据库游标,若无则抛出异常
if not hasattr(self.threads, "cursor") or self.threads.cursor is None:
raise RuntimeError("为当前线程创建数据库游标发生异常")
# 为当前线程执行SQL
self.threads.cursor.execute(sql, parameters)
# 为当前线程提交事务
self.threads.connection.commit()
return True
# 若发生异常则回滚事务并抛出异常
except Exception as exception:
# 检查当前线程有数据库连接,若有则回滚
if (
hasattr(self.threads, "connection")
and self.threads.connection is not None
):
self.threads.connection.rollback()
raise RuntimeError("为当前线程执行SQL发生异常") from exception
def __enter__(self):
"""进入上下文管理时为当前线程创建数据库连接和游标"""
self._connect()
return self return self
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
"""退出时关闭连接""" """退出上下文管理为当前线程关闭数据库连接和游标"""
self._disconnect() self._disconnect()
return False return False
def query(self, guid: str) -> Optional[Dict]: def __del__(self):
""" """析构时为当前线程关闭数据库连接和游标"""
查询缓存 self._disconnect()
:param guid: 缓存唯一标识
:return: 缓存
"""
with threading.Lock(): # 线程锁,保证并发安全
# noinspection PyBroadException
try:
# 创建游标
cursor = self.connection.cursor()
# 根据缓存唯一标识查询有效缓存
cursor.execute(
"SELECT cache FROM caches WHERE guid = ? AND timestamp >= ?",
(guid, time.time() - self.cache_ttl * 86400),
)
if result := cursor.fetchone():
return json.loads(result[0])
return None
# 若发生异常则回滚事务并返回None
except Exception:
self.connection.rollback()
return None
finally:
# 确保游标关闭(关键:释放资源)
if cursor:
# noinspection PyBroadException
try:
cursor.close()
except Exception:
pass
def update(self, guid: str, cache: Dict) -> bool:
""" # 基于令牌桶限流算法的装饰器
更新缓存存在则覆盖不存在则新增 def restrict(refill_rate: float = 5.0, max_tokens: int = 5):
:param guid: 缓存唯一标识
:param cache: 缓存 class TokenBucket:
:return: 成功返回True失败返回False
""" # noinspection PyShadowingNames
with threading.Lock(): # 线程锁,保证并发安全 def __init__(self, max_tokens: int, refill_rate: float):
# noinspection PyBroadException """
try: 初始化令牌桶限流
# 创建游标 :param refill_rate: 令牌填充速率单位为个/
cursor = self.connection.cursor() :param max_tokens: 最大令牌数单位为个
# 新增或覆盖缓存 """
cursor.execute( # 初始化最大令牌数
"INSERT OR REPLACE INTO caches (guid, cache, timestamp) VALUES (?, ?, ?)", self.max_tokens = max_tokens
( # 初始化当前令牌数
guid, self.tokens = self.max_tokens * 0.5
json.dumps(cache, ensure_ascii=False), # 初始化令牌填充速率
time.time(), self.refill_rate = refill_rate
# 初始化上一次填充令牌的时间戳(使用单调时间戳)
self.refill_timestamp = time.monotonic()
# 初始化线程锁(所有线程共用)
self.thread_lock = threading.Lock()
# 填充令牌
def _refill(self) -> None:
with self.thread_lock:
# 本次填充令牌的时间戳
refill_timestamp = time.monotonic()
# 重新计算令牌桶中令牌数
# noinspection PyTypeChecker
self.tokens = min(
self.max_tokens,
max(
0,
self.tokens
+ self.refill_rate * (refill_timestamp - self.refill_timestamp),
), ),
) )
# 提交事务 self.refill_timestamp = refill_timestamp
self.connection.commit()
return True
# 若发生异常则回滚事务并返回None
except Exception:
self.connection.rollback()
return False
finally:
# 确保游标关闭(关键:释放资源)
if cursor:
# noinspection PyBroadException
try:
cursor.close()
except Exception:
pass
# 尝试消耗令牌
def consume(self) -> Tuple[bool, float]:
# 填充令牌
self._refill()
class TokenBucket: with self.thread_lock:
if self.tokens >= 1:
self.tokens -= 1
return True, 0
def __init__(self, refill_rate, max_tokens): # 等待时长
"""令牌桶,基于令牌桶算法限制请求频率""" # noinspection PyTypeChecker
# 填充令牌速率(个/秒) wait_time = min(
self.refill_rate = refill_rate 1 / self.refill_rate,
# 令牌桶最大令牌数 max(
self.max_tokens = max_tokens 0,
# 令牌桶当前令牌数 1 / self.refill_rate
self.tokens = max_tokens - (time.monotonic() - self.refill_timestamp),
# 上一次填充令牌时间戳(使用单调递增时间,单位为秒) ),
self.refill_timestamp = time.monotonic() )
return False, wait_time
# 获取令牌 # 初始化所有被装饰的函数创建令牌桶限流存储
# noinspection PyMissingReturnStatement buckets = {}
def acquire(self) -> tuple[bool, float]:
with threading.Lock():
# 本次填充令牌时间戳
refill_timestamp = time.monotonic()
# 重新计算令牌桶中令牌数
self.tokens = min(
self.max_tokens,
self.tokens
+ self.refill_rate * (refill_timestamp - self.refill_timestamp),
)
self.refill_timestamp = refill_timestamp
# 若令牌桶当前令牌数大于等于1则减少令牌
if self.tokens >= 1:
self.tokens -= 1
return True, 0.0
# 同时返回等待时间
return False, 0.2
# 将令牌桶以装饰函数封装为请求频率限制方法
def restrict(refill_rate=5, max_tokens=5):
def decorator(func):
# 初始化令牌桶
token_bucket = TokenBucket(refill_rate=refill_rate, max_tokens=max_tokens)
def decorator(func: Callable) -> Callable:
@wraps(func) @wraps(func)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
# 若当前被装饰的函数不在所有被装饰的函数创建令牌桶限流存储则为当前被装饰的函数实例化令牌桶限流
if func not in buckets:
# 初始化令牌桶限流
buckets[func] = TokenBucket(
refill_rate=refill_rate, max_tokens=max_tokens
)
bucket = buckets[func]
# 重试次数 # 重试次数
retries = 0 retries = 0
# 若重试数小于等于最大重试次数,则循环检查是否允许请求
while retries <= 10: while retries <= 10:
success, wait_time = token_bucket.acquire() # 尝试消耗令牌
# 若允许请求则返回嵌套函数,若不允许请求则等待 success, wait_time = bucket.consume()
# 若消耗令牌成功则返回被装饰的函数,否则等待
if success: if success:
return func(*args, **kwargs) return func(*args, **kwargs)
time.sleep(wait_time * 1.5**retries) time.sleep(wait_time * 2)
retries += 1 retries += 1
raise Exception("request too frequently") raise Exception("request too frequently")
@ -281,82 +324,150 @@ def restrict(refill_rate=5, max_tokens=5):
return decorator return decorator
class RequestException(Exception):
"""请求异常"""
def __init__(
self, status: int = 400, code: int = 0, message: str = "request failed"
):
"""
:param status: 状态编码默认为0
:param message: 错误信息默认为RequestException
"""
self.status = status
self.code = code
self.message = message
super().__init__(self.message)
def __str__(self):
return f"请求发生异常({self.status}, {self.message})"
# 请求参数数据模型
class Arguments(BaseModel):
"""
:param url: 统一资源定位符基于统一资源定位符校验器进行校验
:param params: 查询参数
:param headers: 请求头
:param data: 表单数据
:param json_data: JSON # 入参时使用别名出参时根据BY_ALIAS=TRUE确定是否使用别名
:param files: 上传文件
:param stream: 是否启用流式传输
:param guid: 全局唯一标识
"""
# 统一资源定位符
url: HttpUrl = Field(default=...)
# 查询参数
params: Optional[Dict] = Field(default=None)
# 请求头
headers: Optional[Dict] = Field(default=None)
# 表单数据
data: Optional[Dict] = Field(default=None)
# JSON
json_data: Optional[Dict] = Field(default=None, alias="json")
# 上传文件
files: Optional[
Dict[
str,
Union[
Tuple[str, bytes], Tuple[str, bytes, str], Tuple[str, bytes, str, dict]
],
]
] = Field(default=None)
# 是否启用流式传输
stream: Optional[bool] = Field(default=None)
# 全局唯一标识
guid: Optional[str] = Field(default=None)
# 表单数据和JSON数据互斥
@model_validator(mode="after")
def validate_data(self):
if self.data and self.json_data:
raise ValueError("cannot use both data and json parameters simultaneously")
return self
# 上传文件和启用流式传输互斥
@model_validator(mode="after")
def validate_files(self):
if self.files and self.stream:
raise ValueError(
"cannot use both files and stream parameters simultaneously"
)
return self
class HTTPClient: class HTTPClient:
"""请求客户端""" """请求客户端"""
class RequestException(Exception):
"""请求异常"""
def __init__(
self,
status: Optional[int] = 400,
code: int = 0,
message: str = "请求发生异常",
):
"""
:param status: 状态码
:param code: 错误码
:param message: 错误信息
"""
self.status = status
self.code = code
self.message = message
super().__init__(self.message)
def __str__(self):
return f"请求发生异常(status={self.status} code={self.code}message={self.message})"
class Parameters(BaseModel):
"""
请求参数模型支持自动校验
"""
url: HttpUrl = Field(
default=..., description="统一资源定位符基于HttpUrl自动校验"
)
params: Optional[Dict[str, Any]] = Field(
default=None, description="统一资源定位符的查询参数"
)
headers: Optional[Dict[str, str]] = Field(default=None, description="请求头")
data: Optional[Dict[str, Any]] = Field(default=None, description="表单数据")
json_data: Optional[Dict[str, Any]] = Field(
default=None, alias="json", description="JSON数据"
)
files: Optional[
Dict[
str,
Union[
Tuple[str, bytes],
Tuple[str, bytes, str],
Tuple[str, bytes, str, Dict[str, str]],
],
]
] = Field(
default=None,
description="上传文件,{字段名: (文件名, 字节数据, 内容类型, 请求头)}",
)
stream_enabled: Optional[bool] = Field(default=None, description="使用流式传输")
guid: Optional[str] = Field(default=None, description="缓存全局唯一标识")
@model_validator(mode="after")
def validate_data(self):
"""校验表单数据和JSON数据互斥"""
if self.data is not None and self.json_data is not None:
raise ValueError("表单数据和JSON数据不能同时使用")
return self
@model_validator(mode="after")
def validate_files(self):
if self.files is not None and self.stream_enabled:
raise ValueError("上传文件和使用流式传输不能同时使用")
return self
class CacheClient(SQLiteClient):
"""缓存客户端"""
def __init__(self, cache_ttl: int):
"""
初始化缓存数据库
:param cache_ttl: 缓存生存时间单位为天
"""
# 初始化SQLite客户端
super().__init__(database=Path(__file__).parent.resolve() / "caches.db")
# 初始化缓存生存时间,单位为秒
self.cache_ttl = cache_ttl
# 初始化缓存表、时间戳索引和清理过期缓存
try:
with self:
self._execute(
sql="""CREATE TABLE IF NOT EXISTS caches (guid TEXT PRIMARY KEY, cache TEXT NOT NULL, timestamp REAL NOT NULL)"""
)
self._execute(
sql="""CREATE INDEX IF NOT EXISTS idx_timestamp ON caches(timestamp)"""
)
self._execute(
sql="DELETE FROM caches WHERE timestamp < ?",
parameters=(time.time() - self.cache_ttl,),
)
except Exception as exception:
raise RuntimeError(
f"初始化缓存数据库发生异常:{str(exception)}"
) from exception
# noinspection PyShadowingNames
def query(self, guid: str) -> Optional[Dict[str, Any]]:
"""
查询并获取单条缓存
:param guid: 缓存唯一标识
:return: 缓存
"""
# noinspection PyBroadException
try:
with self:
result = self._query_one(
sql="SELECT cache FROM caches WHERE guid = ? AND timestamp >= ?",
parameters=(guid, time.time() - self.cache_ttl),
)
# 就查询结果JSON反序列化
if result is not None and "cache" in result:
return json.loads(result["cache"])
return None
except Exception as exception:
raise RuntimeError("查询并获取单条缓存发生异常") from exception
# noinspection PyShadowingNames
def update(self, guid: str, cache: Dict) -> Optional[bool]:
"""
新增或更新缓存若无则新增缓存若有则更新缓存
:param guid: 缓存唯一标识
:param cache: 缓存
:return: 成功返回True失败返回False
"""
# noinspection PyBroadException
try:
with self:
return self._execute(
sql="INSERT OR REPLACE INTO caches (guid, cache, timestamp) VALUES (?, ?, ?)",
parameters=(
guid,
json.dumps(cache, ensure_ascii=False),
time.time(),
),
)
except Exception as exception:
raise RuntimeError("新增或更新缓存发生异常") from exception
def __init__( def __init__(
self, self,
default_headers: Optional[Dict[str, str]] = None, default_headers: Optional[Dict[str, str]] = None,
@ -383,9 +494,19 @@ class HTTPClient:
# 初始化使用缓存 # 初始化使用缓存
self.cache_enabled = cache_enabled self.cache_enabled = cache_enabled
# 初始化缓存生存时间,单位为秒 # 初始化缓存生存时间,单位为秒
self.cache_ttl = cache_ttl * 24 * 60 * 60 self.cache_ttl = cache_ttl * 86400
self.cache_client: Optional[HTTPClient.CacheClient] = None
# 若使用缓存则实例化缓存客户端
if self.cache_enabled:
# 初始化缓存客户端
self.cache_client = self.CacheClient(cache_ttl=self.cache_ttl)
def __del__(self):
"""析构时关闭请求会话"""
if hasattr(self, "session") and self.session:
self.session.close()
# 创建请求会话并挂载适配器
@staticmethod @staticmethod
def _create_session( def _create_session(
total: int, total: int,
@ -393,183 +514,200 @@ class HTTPClient:
default_headers: Optional[Dict[str, str]] = None, default_headers: Optional[Dict[str, str]] = None,
) -> Session: ) -> Session:
""" """
创建请求会话并挂载适配器
:param default_headers: 默认请求头 :param default_headers: 默认请求头
:param total: 最大重试次数 :param total: 最大重试次数
:param backoff_factor: 重试间隔退避因子 :param backoff_factor: 重试间隔退避因子
:return Session: 会话对象 :return Session: 请求会话实例
""" """
# 创建会话对象 # 实例化请求会话
session = Session() session = Session()
# 设置请求头 # 设置默认请求头
if default_headers: if default_headers:
session.headers.update(default_headers) session.headers.update(default_headers)
# 设置重试策略(优先按照服响应等待时长,若未返回则默认按照退避算法等待) # 设置重试策略并挂载适配器
strategy_retries = Retry( adapter = HTTPAdapter(
allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "PATCH"], max_retries=Retry(
status_forcelist=[ allowed_methods=["HEAD", "GET", "POST", "PUT", "DELETE", "PATCH"],
408, status_forcelist=[
502, 408,
503, 502,
504, 503,
], # 408为请求超时502为网关错误503为服务不可用504为网关超时 504,
total=total, ], # 408为请求超时502为网关错误503为服务不可用504为网关超时
respect_retry_after_header=True, total=total,
backoff_factor=backoff_factor, respect_retry_after_header=True,
backoff_factor=backoff_factor,
)
) )
# 创建适配器并绑定重试策略
adapter = HTTPAdapter(max_retries=strategy_retries)
# 就HTTP请求生效
session.mount("http://", adapter) session.mount("http://", adapter)
# 就HTTPS请求生效
session.mount("https://", adapter) session.mount("https://", adapter)
return session return session
# GET请求 def get(
def get(self, **kwargs) -> Union[Dict, str]: self, **kwargs
return self._request(method="GET", arguments=Arguments(**kwargs)) ) -> Union[str, Tuple[str, bytes], Dict[str, Any], ElementTree.Element, None]:
"""发送GET请求"""
return self._request(method="GET", parameters=self.Parameters(**kwargs))
# POST请求 def post(
def post(self, **kwargs) -> Union[Dict, str]: self, **kwargs
return self._request(method="POST", arguments=Arguments(**kwargs)) ) -> Union[str, Tuple[str, bytes], Dict[str, Any], ElementTree.Element, None]:
"""发送POST请求"""
return self._request(method="POST", parameters=self.Parameters(**kwargs))
# DOWNLOAD请求
def download( def download(
self, stream=False, chunk_size=1024, **kwargs self, stream_enabled: bool = False, chunk_size: int = 1024, **kwargs
) -> Union[Dict, str, Generator[bytes, None, None]]: ) -> Union[
str,
Tuple[str, bytes],
Dict[str, Any],
ElementTree.Element,
Generator[bytes, None, None],
None,
]:
response = self._request( response = self._request(
method="GET", arguments=Arguments(**{"stream": stream, **kwargs}) method="GET",
parameters=self.Parameters(**{"stream_enabled": stream_enabled, **kwargs}),
) )
# 若禁用流式传输,则返回响应 """
if not stream: 下载文件
return response :param stream_enabled: 使用流式传输
# 若启用流式传输,则处理流式传输响应并返回 :param chunk_size: 流式传输的分块大小
return self._process_stream_response(response=response, chunk_size=chunk_size) """
# 若是用流式传输则处理流式传输响应
if stream_enabled:
return self._process_stream_response(
response=response, chunk_size=chunk_size
)
return response
def _request(self, method: Literal["GET", "POST"], arguments: Arguments) -> Any: def _request(
self, method: Literal["GET", "POST"], parameters: Parameters
) -> Union[
str, Tuple[str, bytes], Dict[str, Any], ElementTree.Element, Response, None
]:
"""请求""" """请求"""
# 请求参数模型 # 请求参数模型转为请求参数字典
arguments = arguments.model_dump(exclude_none=True, by_alias=True) parameters = parameters.model_dump(exclude_none=True, by_alias=True)
# URL对象转为字符串 # URL由HttpUrl对象转为字符串
arguments["url"] = str(arguments["url"]) parameters["url"] = str(parameters["url"])
# 重构表单数据 # 过滤表单数据中None值
if arguments.get("data") is not None: if parameters.get("data") is not None:
arguments["data"] = { parameters["data"] = {
k: v for k, v in arguments["data"].items() if v is not None k: v for k, v in parameters["data"].items() if v is not None
} }
# 重构JSON数据 # 过滤JSON数据中None值
if arguments.get("json_data") is not None: if parameters.get("json_data") is not None:
arguments["json_data"] = { parameters["json_data"] = {
k: v for k, v in arguments["json_data"].items() if v is not None k: v for k, v in parameters["json_data"].items() if v is not None
} }
# 重构文件数据 # 使用流式传输
if arguments.get("files") is not None: stream_enabled = parameters.pop("stream_enabled", False)
files = {}
for k, v in arguments["files"].items():
if isinstance(v, (tuple, list)):
match len(v):
# 文件数据包括文件名称和文件内容
case 2:
files[k] = (v[0], v[1], None, None)
# 文件数据包含文件名称、文件内容和内容类型
case 3:
files[k] = (v[0], v[1], v[2], None)
# 文件数据包含文件名称、文件内容、内容类型和请求头
case 4:
files[k] = (v[0], v[1], v[2], v[3])
arguments.update({"files": files})
# 全局唯一标识 # 缓存全局唯一标识
guid = arguments.pop("guid", None) guid = parameters.pop("guid", None)
# 若使用缓存且全局唯一标识非空则查询缓存 # 若使用缓存且缓存全局唯一标识非空则查询并获取单条缓存
if self.cache_enabled and guid is not None: if self.cache_enabled and guid is not None:
with CacheClient(cache_ttl=self.cache_ttl) as cache_client: cache = self.cache_client.query(guid)
cache = cache_client.query(guid)
# 若缓存非空则返回
if cache is not None: if cache is not None:
return cache return cache
# 发送请求并处理响应
# noinspection PyBroadException
try: try:
# 发送请求
response = self.session.request( response = self.session.request(
method=method, timeout=self.timeout, **arguments method=method, timeout=self.timeout, **parameters
) )
# 若返回错误状态码则抛出异常 response.raise_for_status() # 若返回非2??状态码则抛出异常
response.raise_for_status()
# 若使用流式传输则直接返回(不缓存)
if stream_enabled:
return response
# 处理响应 # 处理响应
response = self._process_response(response=response) response = self._process_response(response=response)
# 若使用缓存且全局唯一标识非空则更新缓存 # 若使用缓存且缓存全局唯一标识非空则新增或更新缓存
if self.cache_enabled and guid is not None: if self.cache_enabled and guid is not None:
with CacheClient(cache_ttl=self.cache_ttl) as cache_client: self.cache_client.update(guid, response)
cache_client.update(guid, response)
return response return response
except Exception as exception: except Exception as exception:
# 尝试根据响应解析错误状态码和错误信息,否则进行构造
# noinspection PyBroadException # noinspection PyBroadException
try: try:
# 响应反序列化 response = response or getattr(exception, "response", None)
response_decoded = response.json() status = (
# 错误状态码 response.json().get("status", response.status_code)
status = response_decoded["status"] if response is not None
# 错误信息 else None
message = response_decoded["message"] )
except: message = (
status = getattr(getattr(exception, "response", None), "status", None) response.json().get("message", str(exception).splitlines()[0])
# 重构错误信息 if response is not None
message = f"{method} {arguments["url"]} failed: {str(exception).split("\n")[0]}" else str(exception).splitlines()[0]
raise RequestException(status=status, message=message) )
except Exception:
status = None
message = f"{method} {parameters["url"]} 请求发生异常:{str(exception).split("\n")[0]}"
raise self.RequestException(status=status, message=message) from exception
# 处理响应 # 处理响应
@staticmethod @staticmethod
def _process_response(response: Response) -> Any: def _process_response(
# 响应内容 response: Response,
) -> Union[str, Tuple[str, bytes], Dict[str, Any], ElementTree.Element, None]:
# 若响应内容为空则返回None
content = response.content content = response.content
# 若响应内容为空则返回NONE if not response:
if not content:
return None return None
# 标准化内容类型 # 响应类型
content_type = ( _type = response.headers.get("Content-Type", "").split(";")[0].strip().lower()
response.headers.get("Content-Type", "").split(";")[0].strip().lower() # 根据响应类型匹配响应内容解析方法并返回
)
# 根据内容类型匹配解析返回内容方法
# noinspection PyUnreachableCode # noinspection PyUnreachableCode
match content_type: match _type:
# JSONJSON反序列化
case "application/json" | "text/json": case "application/json" | "text/json":
# 响应反序列化
return response.json() return response.json()
# XML解析为XML对象Element实例
case "application/xml" | "text/xml": case "application/xml" | "text/xml":
# 解析为XMLELEMENT对象 return ElementTree.fromstring(content)
return ElementTree.fromstring(text=content) # 以image/开头:返回影像件格式和响应内容
case _ if _type.startswith("image/"):
# 影像件格式
image_format = _type.split(sep="/", maxsplit=1)[1]
return image_format, content
# 其它的响应类型先UTF8解码再返回若解码发生异常则直接返回
case _: case _:
# 若内容类型以IMAGE/开头则返回图片格式和图片数据 try:
if content_type.startswith("image/"): return content.decode("utf-8")
# 图片格式 except UnicodeDecodeError:
image_format = content_type.split(sep="/", maxsplit=1)[1]
return f"{image_format}", content
else:
return content return content
# 处理流式传输响应 # 处理流式传输响应
@staticmethod @staticmethod
def _process_stream_response( def _process_stream_response(
response: Response, chunk_size: int response: Response, chunk_size: int
) -> Generator[bytes, None, None]: # 生成器不接受发SEND发送至、结束时返回NONE ) -> Generator[bytes, None, None]:
# 检查数据分块 """
if not isinstance(chunk_size, int) and isinstance(chunk_size, bool): 处理流式响应
raise ValueError("chunk_size must type=int") :param response: requests.Response对象
:param chunk_size: 分块大小
:return: 字节数据块生成器
"""
if not isinstance(chunk_size, int):
raise ValueError("分块大小数据类型必须为整数")
if chunk_size <= 0: if chunk_size <= 0:
raise ValueError("chunk_size must >0") raise ValueError("分块大小必须大于0")
try: try:
for chunk in response.iter_content(chunk_size=chunk_size): for chunk in response.iter_content(chunk_size=chunk_size):

Binary file not shown.

View File

@ -8,7 +8,6 @@ https://liubiren.feishu.cn/docx/WFjTdBpzroUjQvxxrNIcKvGnneh?from=from_copylink
import json import json
import re import re
import sqlite3
import uuid import uuid
from base64 import b64encode from base64 import b64encode
from datetime import datetime from datetime import datetime
@ -26,137 +25,15 @@ from zen import ZenDecision, ZenEngine
from utils.client import Authenticator, HTTPClient from utils.client import Authenticator, HTTPClient
# from utils.ocr import fuzzy_match # from utils.ocr import fuzzy_match
class SQLiteClient: client = SQLiteClient()
"""SQLite客户端""" a = client._query_one("SELECT * FROM institutions")
def __init__(self, database: str = "SQLite.db"): print(a)
"""
:param database: 数据库名称
"""
# 初始化数据库连接
self.connection: Optional[sqlite3.Connection] = None
try: exit()
# 创建数据库连接
self.connection = sqlite3.connect(
database=(
Path(__file__).parent.resolve() / database
), # 当前目录下创建数据库
check_same_thread=False,
timeout=30, # 数据库锁超时时间单位默认为30秒避免并发锁死
)
# 创建缓存表和索引、清理过期缓存
with self.connection:
self.connection.execute(
"""CREATE TABLE IF NOT EXISTS caches (
guid TEXT PRIMARY KEY,
cache TEXT NOT NULL,
timestamp REAL NOT NULL
)"""
)
self.connection.execute(
"""CREATE INDEX IF NOT EXISTS idx_timestamp ON caches(timestamp)"""
)
self.connection.execute(
"DELETE FROM caches WHERE timestamp < ?",
(time.time() - self.cache_ttl * 86400,),
)
except Exception as exception:
self._disconnect()
raise f"初始缓存数据库失败:{str(exception)}" from exception
def _disconnect(self) -> None:
"""关闭缓存数据库连接"""
if self.connection:
# noinspection PyBroadException
try:
self.connection.close()
except Exception:
pass
def __enter__(self) -> "CacheClient":
"""实现上下文管理"""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""退出时关闭连接"""
self._disconnect()
return False
def query(self, guid: str) -> Optional[Dict]:
"""
查询缓存
:param guid: 缓存唯一标识
:return: 缓存
"""
with threading.Lock(): # 线程锁,保证并发安全
# noinspection PyBroadException
try:
# 创建游标
cursor = self.connection.cursor()
# 根据缓存唯一标识查询有效缓存
cursor.execute(
"SELECT cache FROM caches WHERE guid = ? AND timestamp >= ?",
(guid, time.time() - self.cache_ttl * 86400),
)
if result := cursor.fetchone():
return json.loads(result[0])
return None
# 若发生异常则回滚事务并返回None
except Exception:
self.connection.rollback()
return None
finally:
# 确保游标关闭(关键:释放资源)
if cursor:
# noinspection PyBroadException
try:
cursor.close()
except Exception:
pass
def update(self, guid: str, cache: Dict) -> bool:
"""
更新缓存存在则覆盖不存在则新增
:param guid: 缓存唯一标识
:param cache: 缓存
:return: 成功返回True失败返回False
"""
with threading.Lock(): # 线程锁,保证并发安全
# noinspection PyBroadException
try:
# 创建游标
cursor = self.connection.cursor()
# 新增或覆盖缓存
cursor.execute(
"INSERT OR REPLACE INTO caches (guid, cache, timestamp) VALUES (?, ?, ?)",
(
guid,
json.dumps(cache, ensure_ascii=False),
time.time(),
),
)
# 提交事务
self.connection.commit()
return True
# 若发生异常则回滚事务并返回None
except Exception:
self.connection.rollback()
return False
finally:
# 确保游标关闭(关键:释放资源)
if cursor:
# noinspection PyBroadException
try:
cursor.close()
except Exception:
pass
def common_extraction(**kwargs) -> dict | None: def common_extraction(**kwargs) -> dict | None: