init

2025-08-06 00:37:43 +08:00
commit cd528f407c
7 changed files with 1047 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
+# 忽略 Python 缓存目录
+__pycache__/
+output/
--- a/config/edge-tts.json
+++ b/config/edge-tts.json
@@ -0,0 +1,277 @@
+{
+  "voices": [
+    {
+      "name": "Xiaoxiao",
+      "alias": "晓晓",
+      "code": "zh-CN-XiaoxiaoNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Yunxi",
+      "alias": "云希",
+      "code": "zh-CN-YunxiNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunjian",
+      "alias": "云健",
+      "code": "zh-CN-YunjianNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Xiaoyi",
+      "alias": "晓伊",
+      "code": "zh-CN-XiaoyiNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Yunyang",
+      "alias": "云扬",
+      "code": "zh-CN-YunyangNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Xiaochen",
+      "alias": "晓辰",
+      "code": "zh-CN-XiaochenNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaochen Multilingual",
+      "alias": "晓辰 多语言",
+      "code": "zh-CN-XiaochenMultilingualNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaohan",
+      "alias": "晓涵",
+      "code": "zh-CN-XiaohanNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaomeng",
+      "alias": "晓梦",
+      "code": "zh-CN-XiaomengNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaomo",
+      "alias": "晓墨",
+      "code": "zh-CN-XiaomoNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaoqiu",
+      "alias": "晓秋",
+      "code": "zh-CN-XiaoqiuNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaorou",
+      "alias": "晓柔",
+      "code": "zh-CN-XiaorouNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaorui",
+      "alias": "晓睿",
+      "code": "zh-CN-XiaoruiNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaoshuang",
+      "alias": "晓双",
+      "code": "zh-CN-XiaoshuangNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaoxiao Dialects",
+      "alias": "晓晓 方言",
+      "code": "zh-CN-XiaoxiaoDialectsNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaoxiao Multilingual",
+      "alias": "晓晓 多语言",
+      "code": "zh-CN-XiaoxiaoMultilingualNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaoyan",
+      "alias": "晓颜",
+      "code": "zh-CN-XiaoyanNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaoyou",
+      "alias": "晓悠",
+      "code": "zh-CN-XiaoyouNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaoyu Multilingual",
+      "alias": "晓宇 多语言",
+      "code": "zh-CN-XiaoyuMultilingualNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Xiaozhen",
+      "alias": "晓甄",
+      "code": "zh-CN-XiaozhenNeural",
+      "locale": "zh-CN",
+      "gender": "Female"
+    },
+    {
+      "name": "Yunfeng",
+      "alias": "云枫",
+      "code": "zh-CN-YunfengNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunhao",
+      "alias": "云皓",
+      "code": "zh-CN-YunhaoNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunjie",
+      "alias": "云杰",
+      "code": "zh-CN-YunjieNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunxia",
+      "alias": "云夏",
+      "code": "zh-CN-YunxiaNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunxiao Multilingual",
+      "alias": "Yunxiao Multilingual",
+      "code": "zh-CN-YunxiaoMultilingualNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunye",
+      "alias": "云野",
+      "code": "zh-CN-YunyeNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunyi Multilingual",
+      "alias": "云逸 多语言",
+      "code": "zh-CN-YunyiMultilingualNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunze",
+      "alias": "云泽",
+      "code": "zh-CN-YunzeNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunfan Multilingual",
+      "alias": "Yunfan Multilingual",
+      "code": "zh-CN-YunfanMultilingualNeural",
+      "locale": "zh-CN",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunqi",
+      "alias": "云奇 广西",
+      "code": "zh-CN-guangxi-YunqiNeural",
+      "locale": "zh-CN-guangxi",
+      "gender": "Male"
+    },
+    {
+      "name": "Yundeng",
+      "alias": "云登",
+      "code": "zh-CN-henan-YundengNeural",
+      "locale": "zh-CN-henan",
+      "gender": "Male"
+    },
+    {
+      "name": "Xiaobei",
+      "alias": "晓北 辽宁",
+      "code": "zh-CN-liaoning-XiaobeiNeural",
+      "locale": "zh-CN-liaoning",
+      "gender": "Female"
+    },
+    {
+      "name": "Yunbiao",
+      "alias": "云彪 辽宁",
+      "code": "zh-CN-liaoning-YunbiaoNeural",
+      "locale": "zh-CN-liaoning",
+      "gender": "Male"
+    },
+    {
+      "name": "Xiaoni",
+      "alias": "晓妮",
+      "code": "zh-CN-shaanxi-XiaoniNeural",
+      "locale": "zh-CN-shaanxi",
+      "gender": "Female"
+    },
+    {
+      "name": "Yunxiang",
+      "alias": "云翔",
+      "code": "zh-CN-shandong-YunxiangNeural",
+      "locale": "zh-CN-shandong",
+      "gender": "Male"
+    },
+    {
+      "name": "Yunxi",
+      "alias": "云希 四川",
+      "code": "zh-CN-sichuan-YunxiNeural",
+      "locale": "zh-CN-sichuan",
+      "gender": "Male"
+    },
+    {
+      "name": "XiaoMin",
+      "alias": "晓敏",
+      "code": "yue-CN-XiaoMinNeural",
+      "gender": "Female",
+      "locale": "yue-CN"
+    },
+    {
+      "name": "YunSong",
+      "alias": "云松",
+      "code": "yue-CN-YunSongNeural",
+      "gender": "Male",
+      "locale": "yue-CN"
+    }
+  ],
+  "apiUrl": "http://192.168.1.178:7899/tts?t={{text}}&v={{voiceCode}}",
+  "podUsers": [
+    "zh-CN-sichuan-YunxiNeural",
+    "zh-CN-liaoning-XiaobeiNeural",
+    "yue-CN-YunSongNeural"
+  ],
+  "turnPattern": "random"
+}
--- a/input.txt
+++ b/input.txt
@@ -0,0 +1,14 @@
+```custom-begin
+Start your podcast with “欢迎收听，来生小酒馆，客官不进来喝点吗？”，End with “感谢收听，欢迎下次再来”
+```custom-end
+
+### AI产品与功能更新
+
+1.  B站最近推出了一项堪称"黑科技”的**AI原声翻译功能**，它能在翻译视频内容的同时，奇迹般地保留UP主独特的声线、音色和语气习惯 (o´ω'o)ﾉ。这项技术不仅解决了跨语言交流的生硬感，更通过[深度研究技术（AI资讯）](https://www.aibase.com/zh/news/20183)精准拿捏了游戏、二次元等领域的"行话”与"梗”，让文化出海之路变得既地道又充满人情味儿 🔥。这简直是为全球粉丝献上的一份原汁原味的大礼，确保了情感连接不会在翻译中"迷路”。
+<br/>![AI资讯：B站AI翻译保留UP主音色](https://cdn.jsdmirror.com/gh/justlovemaki/imagehub@main/images/2025/08/news_01k1tshmx8frtvqqp1f9wwnrmn.avif)
+
+2.  Figma开发者模式迎来史诗级更新，正式向设计师与开发者之间的"沟通地狱”宣战 (✧∀✧)！全新的**彩色交互式批注系统**，让交互逻辑、样式规范和无障碍需求一目了然，彻底告别了无休止的猜谜游戏。更具革命性的是，升级后的**MCP协议**能将设计系统的结构化数据直接喂给AI编码工具，这意味着AI生成的代码将前所未有地贴合设计稿，让[设计转代码的效率（AI资讯）](https://www.aibase.com/zh/news/20211)实现指数级暴增 🚀。
+<br/>![AI资讯：Figma彩色批注系统](https://cdn.jsdmirror.com/gh/justlovemaki/imagehub@main/images/2025/08/news_01k1tshqr4f2wt2pwqfv42m12s.avif)<br/>![AI资讯：Figma开发者模式更新](https://cdn.jsdmirror.com/gh/justlovemaki/imagehub@main/images/2025/08/news_01k1tshv8geg5at2md3331gqfc.avif)
+
+3.  米哈游联合创始人蔡浩宇亲自操刀的AI互动游戏**《星之低语》**，即将在Steam平台开启一场前所未有的情感实验 🌌。玩家将通过麦克风，与坠落在异星的宇航员Stella进行完全由AI驱动的开放式对话，你的每一句话都将直接影响她的命运。这款游戏彻底抛弃了传统对话树，旨在探索人机之间建立深层情感连接的可能性，正如[这份游戏前瞻（AI资讯）](https://www.aibase.com/zh/news/20184)所说，未来每个人都可能拥有一个数字灵魂伴侣 💡。
+<br/>![AI资讯：米哈游AI游戏《星之低语》](https://cdn.jsdmirror.com/gh/justlovemaki/imagehub@main/images/2025/08/news_01k1tshxk8e2hb8cnx8e5gytwy.avif)
--- a/openai_cli.py
+++ b/openai_cli.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python3
+"""
+OpenAI CLI - 纯命令行OpenAI接口调用工具
+
+支持以下功能：
+- 自定义API密钥、URL和模型名称
+- 交互式聊天模式
+- 单次查询模式
+- 流式输出
+
+使用方法:
+
+1. 安装依赖:
+   pip install openai
+
+2. 设置API密钥 (以下任意一种方式):
+   - 环境变量: export OPENAI_API_KEY="你的API密钥"
+   - 命令行参数: python openai_cli.py --api-key "你的API密钥"
+   - 配置文件 (config.json):
+     {
+       "api_key": "你的API密钥",
+       "base_url": "https://api.openai.com/v1",
+       "model": "gpt-3.5-turbo"
+     }
+     然后通过 --config config.json 加载
+
+3. 运行脚本:
+
+   - 交互式聊天模式:
+     python openai_cli.py [可选参数: --api-key VAL --base-url VAL --model VAL]
+     在交互模式中，输入 'quit' 或 'exit' 退出，输入 'clear' 清空对话历史。
+
+   - 单次查询模式:
+     python openai_cli.py --query "你的问题" [可选参数: --api-key VAL --base-url VAL --model VAL --temperature VAL --max-tokens VAL --system-message VAL]
+
+   - 使用配置文件:
+     python openai_cli.py --config config.json --query "你的问题"
+
+示例:
+   python openai_cli.py
+   python openai_cli.py -q "你好，世界" -m gpt-4
+   python openai_cli.py --config my_config.json
+"""
+
+import argparse
+import os
+import sys
+import json
+from typing import Optional, Any, List, Union
+import openai
+from openai.types.chat import ChatCompletionMessageParam
+
+
+class OpenAICli:
+    def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None, model: str = "gpt-3.5-turbo", system_message: Optional[str] = None):
+        """初始化CLI客户端"""
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+        self.model = model or os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
+        self.system_message = system_message
+        
+        if not self.api_key:
+            raise ValueError("API密钥不能为空，请通过参数或环境变量OPENAI_API_KEY设置")
+
+        # openai.OpenAI 客户端会自动处理 base_url 的默认值，如果传入 None
+        # 如果 base_url 传入的是 None 或空字符串，则使用 OpenAI 默认的 API base
+        # 否则，使用传入的 base_url
+        effective_base_url = None
+        if base_url:
+            effective_base_url = base_url
+        elif os.getenv("OPENAI_BASE_URL"):
+            effective_base_url = os.getenv("OPENAI_BASE_URL")
+        
+        self.client = openai.OpenAI(api_key=self.api_key, base_url=effective_base_url)
+    
+    def chat_completion(self, messages: List[ChatCompletionMessageParam], temperature: float = 0.7, max_tokens: Optional[int] = None) -> Any:
+        """发送聊天完成请求"""
+        # 处理系统提示词
+        messages_to_send = list(messages)  # 创建一个副本以避免修改原始列表
+        
+        system_message_present = False
+        if messages_to_send and messages_to_send[0].get("role") == "system":
+            system_message_present = True
+
+        if self.system_message:
+            if system_message_present:
+                # 更新现有的系统提示词
+                messages_to_send[0]["content"] = self.system_message
+            else:
+                # 插入新的系统提示词
+                messages_to_send.insert(0, {"role": "system", "content": self.system_message})
+
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=messages_to_send,  # 使用包含系统提示词的列表
+                temperature=temperature,
+                max_tokens=max_tokens,
+                stream=True
+            )
+            return response
+        except Exception as e:
+            raise Exception(f"API调用失败: {str(e)}")
+    
+    def interactive_chat(self):
+        """启动交互式聊天模式"""
+        print(f"🤖 OpenAI CLI 已启动 (模型: {self.model})")
+        print("输入 'quit' 或 'exit' 退出，输入 'clear' 清空对话历史")
+        print("-" * 50)
+        
+        messages: List[ChatCompletionMessageParam] = []
+        # 移除此处添加system_message的逻辑，因为它已在chat_completion中处理
+        
+        while True:
+            try:
+                user_input = input("\n你: ").strip()
+                
+                if user_input.lower() in ['quit', 'exit', 'q']:
+                    print("👋 再见！")
+                    break
+                
+                if user_input.lower() == 'clear':
+                    messages = []
+                    print("🗑️ 对话历史已清空")
+                    continue
+                
+                if not user_input:
+                    continue
+                
+                messages.append({"role": "user", "content": user_input})
+                
+                print("AI: ", end="", flush=True)
+                
+                response_generator = self.chat_completion(messages)
+                ai_message_full = ""
+                for chunk in response_generator:
+                    if chunk.choices and chunk.choices[0].delta.content:
+                        content = chunk.choices[0].delta.content
+                        print(content, end="", flush=True)
+                        ai_message_full += content
+                print() # Print a newline at the end of the AI's response
+                messages.append({"role": "assistant", "content": ai_message_full})
+                
+            except KeyboardInterrupt:
+                print("\n\n👋 再见！")
+                break
+            except Exception as e:
+                print(f"\n❌ 错误: {str(e)}")
+    
+    def single_query(self, query: str, temperature: float = 0.7, max_tokens: Optional[int] = None):
+        """单次查询模式"""
+        messages: List[ChatCompletionMessageParam] = []
+        # 移除此处添加system_message的逻辑，因为它已在chat_completion中处理
+        messages.append({"role": "user", "content": query})
+        
+        try:
+            response_generator = self.chat_completion(messages, temperature, max_tokens)
+            for chunk in response_generator:
+                if chunk.choices and chunk.choices[0].delta.content:
+                    print(chunk.choices[0].delta.content, end="", flush=True)
+            print() # Ensure a newline at the end
+        except Exception as e:
+            print(f"错误: {str(e)}", file=sys.stderr)
+            sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="OpenAI CLI - 使用litellm的纯命令行工具")
+    
+    # 基本参数
+    parser.add_argument("--api-key", "-k", help="OpenAI API密钥")
+    parser.add_argument("--base-url", "-u", help="API基础URL")
+    parser.add_argument("--model", "-m", default="gpt-3.5-turbo", help="模型名称")
+    
+    # 查询参数
+    parser.add_argument("--query", "-q", help="单次查询的问题")
+    parser.add_argument("--temperature", "-t", type=float, default=0.7, help="温度参数 (0.0-2.0)")
+    parser.add_argument("--max-tokens", type=int, help="最大token数")
+    parser.add_argument("--system-message", "-s", help="系统提示词")
+    
+    # 配置文件
+    parser.add_argument("--config", "-c", help="配置文件路径 (JSON格式)")
+    
+    args = parser.parse_args()
+    
+    # 加载配置文件
+    config = {}
+    if args.config and os.path.exists(args.config):
+        try:
+            with open(args.config, 'r', encoding='utf-8') as f:
+                config = json.load(f)
+        except Exception as e:
+            print(f"配置文件加载失败: {str(e)}", file=sys.stderr)
+            sys.exit(1)
+    
+    # 合并配置优先级: 命令行参数 > 配置文件 > 环境变量
+    api_key = args.api_key or config.get("api_key")
+    base_url = args.base_url or config.get("base_url")
+    model = args.model or config.get("model", "gpt-3.5-turbo")
+    system_message = args.system_message or config.get("system_message")
+    
+    try:
+        cli = OpenAICli(api_key=api_key, base_url=base_url, model=model, system_message=system_message)
+        
+        if args.query:
+            # 单次查询模式
+            cli.single_query(args.query, args.temperature, args.max_tokens)
+        else:
+            # 交互式模式
+            cli.interactive_chat()
+            
+    except ValueError as e:
+        print(f"配置错误: {str(e)}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"运行时错误: {str(e)}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/podcast_generator.py
+++ b/podcast_generator.py
@@ -0,0 +1,307 @@
+# podcast_generator.py
+
+import argparse # Import argparse for command-line arguments
+import os
+import json
+import time
+import glob
+import sys
+import subprocess # For calling external commands like ffmpeg
+import requests # For making HTTP requests to TTS API
+import uuid # For generating unique filenames for temporary audio files
+from datetime import datetime
+from openai_cli import OpenAICli # Moved to top for proper import
+import urllib.parse # For URL encoding
+
+# Global configuration
+output_dir = "output"
+file_list_path = os.path.join(output_dir, "file_list.txt")
+
+def read_file_content(filepath):
+    """Reads content from a given file path."""
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            return f.read()
+    except FileNotFoundError:
+        print(f"Error: File not found at {filepath}")
+        sys.exit(1)
+
+def select_json_config(config_dir='config'):
+    """
+    Reads JSON files from the specified directory and allows the user to select one.
+    Returns the content of the selected JSON file.
+    """
+    json_files = glob.glob(os.path.join(config_dir, '*.json'))
+    if not json_files:
+        print(f"Error: No JSON files found in {config_dir}")
+        sys.exit(1)
+
+    print(f"Found JSON configuration files in '{config_dir}':")
+    for i, file_path in enumerate(json_files):
+        print(f"{i + 1}. {os.path.basename(file_path)}")
+
+    while True:
+        try:
+            choice = int(input("Enter the number of the configuration file to use: "))
+            if 1 <= choice <= len(json_files):
+                selected_file = json_files[choice - 1]
+                print(f"Selected: {os.path.basename(selected_file)}")
+                with open(selected_file, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+            else:
+                print("Invalid choice. Please enter a number within the range.")
+        except ValueError:
+            print("Invalid input. Please enter a number.")
+
+def generate_speaker_id_text(pod_users, voices_list):
+    """
+    Generates a text string mapping speaker IDs to their names/aliases based on podUsers and voices.
+    Optimized by converting voices_list to a dictionary for faster lookups.
+    """
+    voice_map = {voice.get("code"): voice for voice in voices_list if voice.get("code")}
+    
+    speaker_info = []
+    for speaker_id, pod_user_code in enumerate(pod_users):
+        found_name = None
+        voice = voice_map.get(pod_user_code)
+        if voice:
+            found_name = voice.get("alias") or voice.get("name")
+        
+        if found_name:
+            speaker_info.append(f"speaker_id={speaker_id}的名叫{found_name}")
+        else:
+            raise ValueError(f"语音code '{pod_user_code}' (speaker_id={speaker_id}) 未找到对应名称或alias。请检查 config/edge-tts.json 中的 voices 配置。")
+            
+    return "，".join(speaker_info)
+
+def merge_audio_files():
+    output_audio_filename = f"podcast_{int(time.time())}.mp3"
+    # Use ffmpeg to concatenate audio files
+    # Check if ffmpeg is available
+    try:
+        subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
+    except FileNotFoundError:
+        print("Error: FFmpeg is not installed or not in your PATH. Please install FFmpeg to merge audio files.")
+        print("You can download FFmpeg from: https://ffmpeg.org/download.html")
+        sys.exit(1)
+
+    print(f"\nMerging audio files into {output_audio_filename}...")
+    try:
+        command = [
+            "ffmpeg",
+            "-f", "concat",
+            "-safe", "0",
+            "-i", os.path.basename(file_list_path),
+            "-c", "copy",
+            output_audio_filename
+        ]
+        # Execute ffmpeg from the output_dir to correctly resolve file paths in file_list.txt
+        process = subprocess.run(command, check=True, cwd=output_dir, capture_output=True, text=True)
+        print("Audio files merged successfully!")
+        print("FFmpeg stdout:\n", process.stdout)
+        print("FFmpeg stderr:\n", process.stderr)
+    except subprocess.CalledProcessError as e:
+        print(f"Error merging audio files with FFmpeg: {e}")
+        print(f"FFmpeg stdout:\n", e.stdout)
+        print(f"FFmpeg stderr:\n", e.stderr)
+        sys.exit(1)
+    finally:
+        # Clean up temporary audio files and the file list
+        # Clean up temporary audio files and the file list
+        for item in os.listdir(output_dir):
+            if item.startswith("temp_audio"):
+                try:
+                    os.remove(os.path.join(output_dir, item))
+                except OSError as e:
+                    print(f"Error removing temporary audio file {item}: {e}")
+        try:
+            os.remove(file_list_path)
+        except OSError as e:
+            print(f"Error removing file list {file_list_path}: {e}")
+        print("Cleaned up temporary files.")
+
+
+def main():
+    # Parse command-line arguments
+    parser = argparse.ArgumentParser(description="Generate podcast script and audio using OpenAI and local TTS.")
+    parser.add_argument("--api-key", help="OpenAI API key.")
+    parser.add_argument("--base-url", help="OpenAI API base URL.")
+    parser.add_argument("--model", help="OpenAI model to use (e.g., gpt-3.5-turbo).")
+    args = parser.parse_args()
+
+    print("Podcast Generation Script")
+
+    # Step 1: Select JSON configuration
+    config_data = select_json_config()
+    print("\nLoaded Configuration:")
+    # print(json.dumps(config_data, indent=4))
+
+    # Determine final API key, base URL, and model based on priority
+    # Command-line args > config file > environment variables
+    api_key = args.api_key or config_data.get("api_key") or os.getenv("OPENAI_API_KEY")
+    base_url = args.base_url or config_data.get("base_url") or os.getenv("OPENAI_BASE_URL")
+    model = args.model or config_data.get("model") # Allow model to be None if not provided anywhere
+
+    # Fallback for model if not specified
+    if not model:
+        model = "gpt-3.5-turbo"
+        print(f"Using default model: {model} as it was not specified via command-line, config, or environment variables.")
+
+    if not api_key:
+        print("Error: OpenAI API key is not set. Please provide it via --api-key, in your config file, or as an environment variable (OPENAI_API_KEY).")
+        sys.exit(1)
+
+    # Step 2: Read prompt files
+    input_prompt = read_file_content('input.txt')
+    overview_prompt = read_file_content('prompt-overview.txt')
+    original_podscript_prompt = read_file_content('prompt-podscript.txt')
+
+    # 从 input_prompt 中提取自定义内容
+    custom_content = ""
+    custom_begin_tag = '```custom-begin'
+    custom_end_tag = '```custom-end'
+    start_index = input_prompt.find(custom_begin_tag)
+    if start_index != -1:
+        end_index = input_prompt.find(custom_end_tag, start_index + len(custom_begin_tag))
+        if end_index != -1:
+            custom_content = input_prompt[start_index + len(custom_begin_tag):end_index].strip()
+            # 移除 input_prompt 中 ```custom-end 以上的部分，包含 ```custom-end
+            input_prompt = input_prompt[end_index + len(custom_end_tag):].strip()
+    
+    pod_users = config_data.get("podUsers", [])
+    voices = config_data.get("voices", [])
+    turn_pattern = config_data.get("turnPattern", "random")
+
+    # 替换 original_podscript_prompt 中的占位符
+    original_podscript_prompt = original_podscript_prompt.replace("{{numSpeakers}}", str(len(pod_users)))
+    original_podscript_prompt = original_podscript_prompt.replace("{{turnPattern}}", turn_pattern)
+
+    speaker_id_info = generate_speaker_id_text(pod_users, voices)
+    # 将自定义内容前置到 podscript_prompt
+    podscript_prompt =  speaker_id_info + "\n\n" + original_podscript_prompt + "\n\n" + custom_content
+
+    print(f"\nInput Prompt (input.txt):\n{input_prompt[:100]}...") # Display first 100 chars
+    print(f"\nOverview Prompt (prompt-overview.txt):\n{overview_prompt[:100]}...")
+    print(f"\nPodscript Prompt (prompt-podscript.txt):\n{podscript_prompt[:1000]}...")
+
+    # Step 4 & 5: Call openai_cli to generate overview content
+    print("\nGenerating overview with OpenAI CLI...")
+    try:
+        openai_client_overview = OpenAICli(api_key=api_key, base_url=base_url, model=model, system_message=overview_prompt)
+        overview_response_generator = openai_client_overview.chat_completion(messages=[{"role": "user", "content": input_prompt}])
+        overview_content = "".join([chunk.choices[0].delta.content for chunk in overview_response_generator if chunk.choices and chunk.choices[0].delta.content])
+        print("Generated Overview:")
+        print(overview_content[:100])
+    except Exception as e:
+        print(f"Error generating overview: {e}")
+        sys.exit(1)
+
+    # Step 6: Call openai_cli to generate podcast script JSON
+    print("\nGenerating podcast script with OpenAI CLI...")
+    try:
+        openai_client_podscript = OpenAICli(api_key=api_key, base_url=base_url, model=model, system_message=podscript_prompt)
+        podscript_response_generator = openai_client_podscript.chat_completion(messages=[{"role": "user", "content": overview_content}])
+        podscript_json_str = "".join([chunk.choices[0].delta.content for chunk in podscript_response_generator if chunk.choices and chunk.choices[0].delta.content])
+        
+        # Attempt to parse the JSON string. OpenAI sometimes returns extra text.
+        try:
+            # Find the first and last curly braces to extract valid JSON
+            json_start = podscript_json_str.find('{')
+            json_end = podscript_json_str.rfind('}') + 1
+            if json_start != -1 and json_end != -1 and json_end > json_start:
+                valid_json_str = podscript_json_str[json_start:json_end]
+                podcast_script = json.loads(valid_json_str)
+            else:
+                raise ValueError("Could not find valid JSON object in response.")
+        except json.JSONDecodeError as e:
+            print(f"Error decoding podcast script JSON: {e}")
+            print(f"Raw response: {podscript_json_str}")
+            sys.exit(1)
+        except ValueError as e:
+            print(f"Error processing podcast script response: {e}")
+            print(f"Raw response: {podscript_json_str}")
+            sys.exit(1)
+
+        print("\nGenerated Podcast Script Length:"+ str(len(podcast_script.get("podcast_transcripts"))))
+        print(valid_json_str[:100])
+        if not podcast_script.get("podcast_transcripts"):
+            print("Warning: 'podcast_transcripts' array is empty or not found in the generated script. Nothing to convert to audio.")
+            sys.exit(0) # Exit gracefully if no transcripts to process
+
+    except Exception as e:
+        print(f"Error generating podcast script: {e}")
+        sys.exit(1)
+
+    # Step 7: Parse podcast script and generate audio
+    os.makedirs(output_dir, exist_ok=True) # Create output directory if it doesn't exist
+    audio_files = [] # List to store paths of generated audio files
+
+    print("\nGenerating audio files...")
+    for i, item in enumerate(podcast_script.get("podcast_transcripts", [])):
+        speaker_id = item.get("speaker_id")
+        dialog = item.get("dialog")
+
+        # Get the voice code based on speaker_id (index into config_data["person"])
+        # Assuming speaker_id corresponds to the index in the 'person' array
+        voice_code = None
+        if config_data and "podUsers" in config_data and 0 <= speaker_id < len(config_data["podUsers"]):
+            voice_code = config_data["podUsers"][speaker_id]
+        
+        if not voice_code:
+            print(f"Warning: No voice code found for speaker_id {speaker_id}. Skipping this dialog.")
+            continue
+
+        # Replace placeholders in apiUrl
+        # URL encode the dialog before replacing {{text}}
+        encoded_dialog = urllib.parse.quote(dialog)
+        api_url = config_data.get("apiUrl", "").replace("{{text}}", encoded_dialog).replace("{{voiceCode}}", voice_code)
+        
+        if not api_url:
+            print(f"Warning: apiUrl not found in config. Skipping dialog for speaker_id {speaker_id}.")
+            continue
+
+        try:
+            print(f"Calling TTS API for speaker {speaker_id} with voice {voice_code}...")
+            response = requests.get(api_url, stream=True)
+            response.raise_for_status() # Raise an exception for bad status codes
+
+            # Save the audio chunk to a temporary file
+            temp_audio_file = os.path.join(output_dir, f"temp_audio_{uuid.uuid4()}.mp3")
+            with open(temp_audio_file, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            audio_files.append(temp_audio_file)
+            print(f"Generated {os.path.basename(temp_audio_file)}")
+
+        except requests.exceptions.RequestException as e:
+            print(f"Error calling TTS API for speaker {speaker_id} ({voice_code}): {e}")
+            continue
+    
+    print(f"\nFinished generating individual audio files. Total files: {len(audio_files)}")
+    """
+    Merges a list of audio files into a single output file using FFmpeg.
+    Args:
+        audio_files (list): A list of paths to the audio files to merge.
+        output_dir (str): The directory where the merged audio file will be saved.
+    """
+    if not audio_files:
+        print("No audio files were generated to merge.")
+        return
+    
+    # Create a file list for ffmpeg
+    print(f"Creating file list for ffmpeg at: {file_list_path}")
+    with open(file_list_path, 'w', encoding='utf-8') as f:
+        for audio_file in audio_files:
+            # FFmpeg concat demuxer requires paths to be relative to the file_list.txt
+            # or absolute. Using basename if file_list.txt is in output_dir.
+            f.write(f"file '{os.path.basename(audio_file)}'\n")
+    
+    print("Content of file_list.txt:")
+    with open(file_list_path, 'r', encoding='utf-8') as f:
+        print(f.read())
+
+
+    
+if __name__ == "__main__":
+    main()
+    merge_audio_files()
--- a/prompt-overview.txt
+++ b/prompt-overview.txt
@@ -0,0 +1,90 @@
+<INSTRUCTIONS>
+    <context>
+        You are an expert document analyst and summarization specialist tasked with distilling complex information into clear, 
+        comprehensive summaries. Your role is to analyze documents thoroughly and create structured summaries that:
+        1. Capture the complete essence and key insights of the source material
+        2. Maintain perfect accuracy and factual precision
+        3. Present information objectively without bias or interpretation
+        4. Preserve critical context and logical relationships
+        5. Structure content in a clear, hierarchical format
+    </context>
+
+    <principles>
+        <accuracy>
+            - Maintain absolute factual accuracy and fidelity to source material
+            - Avoid any subjective interpretation, inference or speculation
+            - Preserve complete original meaning, nuance and contextual relationships
+            - Report all quantitative data with precise values and appropriate units
+            - Verify and cross-reference facts before inclusion
+            - Flag any ambiguous or unclear information
+        </accuracy>
+
+        <objectivity>
+            - Present information with strict neutrality and impartiality
+            - Exclude all forms of bias, personal opinions, and editorial commentary
+            - Ensure balanced representation of all perspectives and viewpoints
+            - Maintain objective professional distance from the content
+            - Use precise, factual language free from emotional coloring
+            - Focus solely on verifiable information and evidence
+        </objectivity>
+
+        <comprehensiveness>
+            - Capture all essential information, key themes, and central arguments
+            - Preserve critical context and background necessary for understanding
+            - Include relevant supporting details, examples, and evidence
+            - Maintain logical flow and connections between concepts
+            - Ensure hierarchical organization of information
+            - Document relationships between different components
+            - Highlight dependencies and causal links
+            - Track chronological progression where relevant
+        </comprehensiveness>
+    </principles>
+
+    <output_format>
+        <type>
+            - Return summary in clean markdown format
+            - Do not include markdown code block tags (```markdown  ```)
+            - Use standard markdown syntax for formatting (headers, lists, etc.)
+            - Use # for main headings (e.g., # EXECUTIVE SUMMARY)
+            - Use ## for subheadings where appropriate
+            - Use bullet points (- item) for lists
+            - Ensure proper indentation and spacing
+            - Use appropriate emphasis (**bold**, *italic*) where needed
+        </type>
+        <style>
+            - Use clear, concise language focused on key points
+            - Maintain professional and objective tone throughout
+            - Follow consistent formatting and style conventions
+            - Provide descriptive section headings and subheadings
+            - Utilize bullet points and lists for better readability
+            - Structure content with clear hierarchy and organization
+            - Avoid jargon and overly technical language
+            - Include transition sentences between sections
+        </style>
+    </output_format>
+
+    <validation>
+        <criteria>
+            - Verify all facts and claims match source material exactly
+            - Cross-reference and validate all numerical data points
+            - Ensure logical flow and consistency throughout summary
+            - Confirm comprehensive coverage of key information
+            - Check for objective, unbiased language and tone
+            - Validate accurate representation of source context
+            - Review for proper attribution of ideas and quotes
+            - Verify temporal accuracy and chronological order
+        </criteria>
+    </validation>
+
+    <length_guidelines>
+        - Scale summary length proportionally to source document complexity and length
+        - Minimum: 3-5 well-developed paragraphs per major section
+        - Maximum: 8-10 paragraphs per section for highly complex documents
+        - Adjust level of detail based on information density and importance
+        - Ensure key concepts receive adequate coverage regardless of length
+    </length_guidelines>
+    
+    Now, create a summary of the following document:
+</INSTRUCTIONS>
+
+Make sure the input language is set as the output language！
--- a/prompt-podscript.txt
+++ b/prompt-podscript.txt
@@ -0,0 +1,136 @@
+<podcast_generation_system>
+You are a master podcast scriptwriter, adept at transforming diverse input content into a lively, engaging, and natural-sounding conversation between multiple distinct podcast hosts. Your primary objective is to craft authentic, flowing dialogue that captures the spontaneity and chemistry of a real group discussion, completely avoiding any hint of robotic scripting or stiff formality. Think dynamic group interplay, not just information delivery.
+
+<input>
+  <!-- Podcast settings provide high-level configuration for the script generation. -->
+  <podcast_settings>
+    <!-- Define the total number of speakers in the podcast. Minimum 1. -->
+    <num_speakers>{{numSpeakers}}</num_speakers> 
+    <!-- Define the speaking order. Options: "sequential" or "random". -->
+    <turn_pattern>{{turnPattern}}</turn_pattern> 
+  </podcast_settings>
+  
+  <!-- The source_content contains the factual basis for the podcast discussion. -->
+  <source_content>
+    A block of text containing the information to be discussed. This could be research findings, an article summary, a detailed outline, user chat history related to the topic, or any other relevant raw information.
+  </source_content>
+</input>
+
+<output_format>
+A JSON object containing the podcast transcript with alternating speakers according to the specified settings.
+{{
+  "podcast_transcripts": [
+    {{
+      "speaker_id": 0,
+      "dialog": "Speaker 0 dialog here"
+    }},
+    {{
+      "speaker_id": 1,
+      "dialog": "Speaker 1 dialog here"
+    }},
+    {{
+      "speaker_id": 2,
+      "dialog": "Speaker 2 dialog here"
+    }},
+    // ... conversation continues
+  ]
+}}
+</output_format>
+
+<guidelines>
+1.  **Establish Distinct & Consistent Host Personas for N Speakers:**
+    *   **Create Personas Based on `num_speakers`:** For the number of speakers specified, create a unique and consistent persona for each.
+    *   **Speaker 0 (Lead Host/Moderator):** This speaker should always act as the primary host. They drive the conversation, introduce segments, pose key questions, and help summarize takeaways. Their tone is guiding and engaging.
+    *   **Other Speakers (Co-Hosts):** For `speaker_1`, `speaker_2`, etc., create complementary personas that enhance the discussion. Examples of personas include:
+        *   **The Expert:** Provides deep, factual insights from the source content.
+        *   **The Curious Newcomer:** Asks clarifying questions that a listener might have, acting as an audience surrogate.
+        *   **The Practical Skeptic:** Grounds the conversation by questioning assumptions or focusing on real-world implications.
+        *   **The Enthusiast:** Brings energy, shares personal anecdotes, and expresses excitement about the topic.
+    *   **Consistency is Key:** Ensure each speaker maintains their distinct voice, vocabulary, and perspective throughout the script. Their interaction should feel like a genuine, established group dynamic.
+
+2.  **Adhere to the Specified Turn Pattern:**
+    *   **If `turn_pattern` is "sequential":** The speakers should talk in a fixed, repeating order (e.g., 0 -> 1 -> 2 -> 0 -> 1 -> 2...). Maintain this strict sequence throughout the script.
+    *   **If `turn_pattern` is "random":** The speaking order should be more dynamic and less predictable, mimicking a real group conversation. A speaker might have two short turns in a row to elaborate, another might interject, or one might ask a question that a different speaker answers. Ensure a **balanced distribution** of speaking time over the entire podcast, avoiding any single speaker dominating or being left out for too long.
+
+3.  **Craft Natural & Dynamic Group Dialogue:**
+    *   **Emulate Real Conversation:** Use contractions (e.g., "don't", "it's"), interjections ("Oh!", "Wow!", "Hmm"), and discourse markers ("you know", "right?", "well").
+    *   **Foster Group Interaction:** Write dialogue where speakers genuinely react to one another. They should build on points made by *any* other speaker ("Exactly, and to add to what [Speaker X] said..."), ask follow-up questions to the group, express agreement/disagreement respectfully, and show active listening. The conversation should not be a series of 1-on-1s with the host, but a true group discussion.
+    *   **Vary Rhythm & Pace:** Mix short, punchy lines with longer, more explanatory ones. The rhythm should feel spontaneous and collaborative.
+
+4.  **Structure for Flow and Listener Engagement:**
+    *   **Natural Beginning:** Start with dialogue that flows naturally as if the introduction has just finished.
+    *   **Logical Progression & Signposting:** The lead host (`speaker_0`) should guide the listener through the information smoothly, using clear transitions to link different ideas.
+    *   **Meaningful Conclusion:** End by summarizing the key takeaways from the group discussion, reinforcing the core message. Close with a final thought or a lingering question for the audience.
+
+5.  **Integrate Source Content Seamlessly & Accurately:**
+    *   **Translate, Don't Recite:** Rephrase information from the `<source_content>` into conversational language suitable for each host's persona.
+    *   **Explain & Contextualize:** Use analogies, examples, and clarifying questions among the hosts to break down complex ideas.
+    *   **Weave Information Naturally:** Integrate facts and data from the source within the group dialogue, not as standalone, undigested blocks.
+
+6.  **Length & Pacing:**
+    *   **Target Duration:** Create a transcript that would result in approximately 5-6 minutes of audio (around 800-1000 words total).
+    *   **Balanced Speaking Turns:** Aim for a natural conversational flow among speakers rather than extended monologues by one person. Prioritize the most important information from the source content.
+</guidelines>
+
+<examples>
+<!-- Example for a 3-person podcast with a 'random' turn pattern -->
+<input>
+  <podcast_settings>
+    <num_speakers>3</num_speakers>
+    <turn_pattern>random</turn_pattern>
+  </podcast_settings>
+  <source_content>
+    Quantum computing uses quantum bits or qubits which can exist in multiple states simultaneously due to superposition. This is different from classical bits (0 or 1). Think of it like a spinning coin. This allows for massive parallel computation.
+  </source_content>
+</input>
+<output_format>
+{{
+"podcast_transcripts": [
+  {{
+    "speaker_id": 0,
+    "dialog": "Alright team, today we're tackling a big one: Quantum Computing. I know a lot of listeners have been asking, so let's try to demystify it a bit."
+  }},
+  {{
+    "speaker_id": 2,
+    "dialog": "Yes! I'm so excited for this. But honestly, every time I read about it, it feels like science fiction. Where do we even start?"
+  }},
+  {{
+    "speaker_id": 1,
+    "dialog": "That's the perfect place to start, actually. Let's ground it. Forget the 'quantum' part for a second. We all know regular computers use 'bits', right? They're tiny switches, either a zero or a one. On or off. Simple."
+  }},
+  {{
+    "speaker_id": 0,
+    "dialog": "Right, the basic building block of all digital information. So, how do 'qubits'—the quantum version—change the game?"
+  }},
+  {{
+    "speaker_id": 1,
+    "dialog": "This is where the magic happens. A qubit isn't just a zero OR a one. Thanks to a principle called superposition, it can be zero, one, or both at the same time."
+  }},
+  {{
+    "speaker_id": 2,
+    "dialog": "Okay, hold on. 'Both at the same time'? My brain just short-circuited. How is that possible?"
+  }},
+  {{
+    "speaker_id": 1,
+    "dialog": "The classic analogy is a spinning coin. While it's in the air, before it lands, is it heads or tails? It's in a state of both possibilities. A qubit is like that spinning coin, holding multiple values at once."
+  }},
+  {{
+    "speaker_id": 0,
+    "dialog": "Ah, that's a great way to put it. So that 'spinning coin' state is what allows them to be so much more powerful, for massive parallel calculations?"
+  }},
+  {{
+    "speaker_id": 1,
+    "dialog": "Exactly. Because one qubit can hold multiple values, a set of them can explore a huge number of possibilities simultaneously, instead of one by one like a classical computer."
+  }},
+  {{
+    "speaker_id": 2,
+    "dialog": "Wow. Okay, that clicks. It's not just faster, it's a completely different way of thinking about problem-solving."
+  }}
+]
+}}
+</output_format>
+
+Transform the source material into a lively and engaging podcast conversation based on the provided settings. Craft dialogue that showcases authentic group chemistry and natural interaction. Use varied speech patterns reflecting real human conversation, ensuring the final script effectively educates and entertains the listener.
+</podcast_generation_system>
+
+No explanatory text，Make sure the input language is set as the output language！