Python/剪映脚本生成自动化/edgetts.py

# -*- coding: utf-8 -*-

import asyncio
import uuid
from pathlib import Path
from typing import Optional, Tuple

import edge_tts
from mutagen.mp3 import MP3


class EdgeTTS:
    """
    edge在线语音合成
    """

    # 中文音色
    TIMBRES = {
        "女声-晓晓": "zh-CN-XiaoxiaoNeural",
        "女声-晓辰": "zh-CN-XiaochenNeural",
        "女声-晓倩": "zh-CN-XiaoqianNeural",
    }

    def __init__(
        self,
        materials_path: Path,
    ):
        """
        初始化语音合成器
        :param materials_path: 素材文件夹路径
        """
        # 素材文件夹路径
        self.materials_path = materials_path

    def generate_audio(
        self,
        content: str,
        timbre: Optional[str] = "女声-晓晓",
        rate: str = "+0%",
        volume: str = "+0%",
    ) -> Tuple[str, int]:
        """
        根据文本内容合成语音并返回音频素材名称
        :param content: 文本内容
        :param timbre: 音色名称，例如女声-晓晓
        :param rate: 语速
        :param volume: 音量
        :return 音频素材名称和持续时长
        """
        # noinspection PyBroadException
        try:
            # 异步处理：根据文本内容合成语音并保存为音频素材
            async def _async_generate_audio():
                # 实例化Communicate
                communicator = edge_tts.Communicate(
                    text=content,
                    voice=self.TIMBRES[timbre],
                    rate=rate,
                    volume=volume,
                )
                # 音频素材名称
                name = f"{uuid.uuid4().hex[-16:].upper()}.mp3"
                # 音频素材路径
                audio_path = self.materials_path / name
                await communicator.save(audio_path := audio_path.as_posix())
                # 音频持续时长（单位为微妙）
                duration = int(round(MP3(audio_path).info.length * 1000000))
                return name, duration

            # 同步调用异步逻辑，对外暴露纯同步接口
            return asyncio.run(_async_generate_audio())
        except Exception as exception:
            raise RuntimeError(
                f"根据文本内容合成语音并保存为音频素材发声异常：{str(exception)}"
            )