Python/utils/feishu.py

# -*- coding: utf-8 -*-
"""
封装飞书客户端
"""

from email.parser import BytesParser
from email.policy import default
from email.utils import parsedate_to_datetime
from imaplib import IMAP4_SSL
import re
from time import time
from typing import Any, Dict, Optional, List

from authenticator import Authenticator
from request import Request
from base64 import b64encode


class Feishu:
    """飞书客户端"""

    def __init__(self):
        # 实例化认证器
        self.authenticator = Authenticator()
        # 实例化请求客户端
        self.http_client = Request()

    @staticmethod
    def get_mail_verification_code(
        folder: str, regular_expression: str
    ) -> Optional[str]:
        """
        根据邮箱文件夹名获取邮箱验证码
        :param folder: 邮箱文件夹名
        :param regular_expression: 正则表达式
        :return: 邮箱验证码
        """
        """
        使用示例：
        feishu = Feishu()
        print(feishu.get_mail_verification_code(folder="邮箱验证码", regular_expression=r"【普康健康】您的验证码是：(\\d+)"))
        输出：123456
        """
        if not folder:
            raise RuntimeError("邮箱文件夹名不能为空")

        # 若邮箱文件夹名不可ASCII编码（例如包含中文）则按照 IMAP 协议支持的字符串
        try:
            folder.encode("ascii")
        except UnicodeEncodeError:
            folder = f"&{b64encode(folder.encode(encoding="utf-16be")).decode(encoding="ascii").replace("/", ",").replace("+", "-").rstrip("=")}-"

        try:
            # 建立加密IMAP连接
            connection = IMAP4_SSL(host="imap.feishu.cn", port=993)
            # 登录
            connection.login(user="mars@liubiren.cloud", password="aJBZSZzhQN13M11K")
        except Exception as exception:
            raise RuntimeError(f"登录邮箱发生异常：{str(exception)}")

        # 开始时间戳（秒级，若无特殊说明时间戳均为秒级）
        start_timestamp = time()
        # 上一次查询时间戳
        last_timestamp = 0

        while True:
            # 当前时间戳
            current_timestamp = time()

            # 若当前时间戳大于超时时间戳则登出并返回空
            if current_timestamp > start_timestamp + 120:
                connection.logout()
                return None

            # 若当前时间戳和上一次查询时间戳间隔小于5秒则跳转至下一次循环
            if current_timestamp - last_timestamp < 5:
                continue

            last_timestamp = current_timestamp

            # 选择邮箱文件夹
            connection.select(mailbox=folder)

            # 查询该邮箱文件夹内所有邮件
            status, indices = connection.search(
                "utf-8", "ALL"
            )  # search()返回形如 ('OK', [b'1 2 3 4 5']) 元组。其中，第一个元素为查询状态，第二个元素为查询结果（邮件索引的字节串列表）
            # 若查询状态非成功则登出并抛出异常
            if status != "OK":
                connection.logout()
                raise RuntimeError(f"查询邮箱文件夹内所有邮件失败")

            # 拼接所有邮件索引并拆分为邮件索引列表
            indices = b" ".join(indices).split()
            # 若邮件索引列表为空则跳转至下一次循环
            if not indices:
                continue

            # 查询该邮箱文件夹内最后一封邮件完整原始内容
            status, contents = connection.fetch(indices[-1].decode("utf-8"), "(RFC822)")
            # 若查询状态非成功则登出并抛出异常
            if status != "OK":
                connection.logout()
                raise RuntimeError(f"查询邮箱文件夹内最后一封邮件完整原始内容失败")

            contents = b"".join(
                item[1] if isinstance(item, tuple) else item
                for item in contents
                if isinstance(item, (bytes, tuple))
                and not (isinstance(item, bytes) and item == b")")
            )  # IMAP 协议中约定邮件完整原始内容中，第一个元素为二元组，形如 (b'5 (RFC822 {1234}', b'邮件字节') （第一个元素为元数据，第二个元素为第一部分内容），最后一个元素为结束符 b')'
            # 若邮件内容为空则跳转至下一次循环
            if not contents:
                continue

            # 解析邮件内容
            contents = BytesParser(policy=default).parsebytes(text=contents)
            # 邮件发送时间戳
            send_timestamp = parsedate_to_datetime(contents["Date"]).timestamp()
            # 若邮件发送时间戳小于开始时间戳减去冗余（近N秒）则跳转至下一次循环
            if send_timestamp < start_timestamp - 300:
                continue

            for content in contents.walk():
                # 若内容类型非 text/html 或 text/plain 则跳转至下一次循环
                if content.get_content_type() not in ["text/plain", "text/html"]:
                    continue

                # 获取内容载荷
                payload = content.get_payload(decode=True)
                match payload:
                    # 若内容载荷类型为字节串则解码
                    case _ if isinstance(payload, bytes):
                        payload = payload.decode(
                            encoding=content.get_content_charset() or "utf-8",
                            errors="replace",
                        )
                    case _ if isinstance(payload, str):
                        payload = payload
                    # 若内容为空则跳转至下一次循环
                    case _:
                        continue

                matched = re.match(
                    pattern=regular_expression,
                    string=payload,
                )
                # 若未匹配到验证码则跳转至下一次循环
                if not matched:
                    continue

                connection.logout()
                return matched.group(1)

    def _get_headers(self) -> Dict[str, Any]:
        """
        获取请求头
        :return: 请求头
        """
        return {
            "Authorization": f"Bearer {self.authenticator.get_token(servicer='feishu')}",
        }

    # 获取多维表格记录
    def get_bitable_records(
        self,
        app_token: str,
        table_id: str,
    ) -> List[Dict[str, Any]]:
        """
        获取多维表格记录
        :param app_token: 多维表格应用标识
        :param table_id: 多维表格数据表标识
        :return: 多维表格记录
        """
        # 构建多维表格查询记录的请求地址
        url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{app_token}/tables/{table_id}/records/search"  # https://open.feishu.cn/document/docs/bitable-v1/app-table-record/search 默认分页大小为 20

        headers = self._get_headers()
        # 添加 Content-Type 请求头
        headers.update(
            {
                "Content-Type": "application/json; charset=utf-8",
            }
        )

        records = []  # 用于临时保存多维表格记录
        page_token = None  # 分页标识
        while True:
            response = self.http_client.post(
                url=(f"{url}?&page_token={page_token}" if page_token else url),
                headers=headers,
            )
            # 若响应错误代码非0则抛出异常
            if response["code"] != 0:
                raise RuntimeError(
                    f"请求多维表格查询记录失败：{response['code']} {response['message']}"
                )

            # 合并多维表格记录
            records.extend(response["data"]["items"])

            # 若响应没有更多记录则跳出循环
            if not response["data"]["has_more"]:
                break

            # 更新分页标识
            page_token = response["data"]["page_token"]

        return records

    def download_material(self, file_token: str, stream_enabled: bool = False) -> str:
        """
        下载素材
        :param file_token: 素材标识
        :param stream_enabled: 使用流式传输，默认 False
        :return: 素材 base64 编码的字符串
        """
        # 构建下载素材的请求地址
        url = f"https://open.feishu.cn/open-apis/drive/v1/medias/{file_token}/download"  # https://open.feishu.cn/document/server-docs/docs/drive-v1/media/download

        headers = self._get_headers()
        # 添加 Content-Type 请求头
        headers.update(
            {
                "Content-Type": "application/json; charset=utf-8",
            }
        )

        response = self.http_client.download(
            url=url,
            headers=headers,
            stream_enabled=stream_enabled,
        )
        print(type(response[1]))


a = Feishu()

print(
    a.download_material(
        file_token="DsG4bY3iKo0n6Bx6O5fcKAbnnCh",
    )
)