feat(config): 添加webvoice配置支持多TTS提供商和优化播客生成流程

新增webvoice.json配置文件，包含大量语音选项，更新TTS适配器以支持多提供商配置，改进播客生成流程中的错误处理和重试机制，优化UI组件以支持新的语音选择功能
2025-10-19 17:16:36 +08:00
parent ff88049afd
commit 321e3cded4
11 changed files with 2736 additions and 368 deletions
--- a/config/webvoice.json
+++ b/config/webvoice.json
--- a/server/main.py
+++ b/server/main.py
@@ -86,6 +86,7 @@ audio_file_mapping: Dict[str, Dict] = {}
 SECRET_KEY = os.getenv("PODCAST_API_SECRET_KEY", "your-super-secret-key") # 在生产环境中请务必修改!
 # 定义从 tts_provider 名称到其配置文件路径的映射
 tts_provider_map = {
+    "webvoice": "../config/webvoice.json",
    "index-tts": "../config/index-tts.json",
    "doubao-tts": "../config/doubao-tts.json",
    "edge-tts": "../config/edge-tts.json",
--- a/server/podcast_generator.py
+++ b/server/podcast_generator.py
@@ -21,6 +21,19 @@ output_dir = "output"
 # file_list_path is now generated uniquely for each merge operation
 tts_providers_config_path = '../config/tts_providers.json'

+# Global cache for TTS provider configurations
+tts_provider_configs_cache = {}
+
+# Define the TTS provider map
+tts_provider_map = {
+    "index-tts": "../config/index-tts.json",
+    "doubao-tts": "../config/doubao-tts.json",
+    "edge-tts": "../config/edge-tts.json",
+    "fish-audio": "../config/fish-audio.json",
+    "gemini-tts": "../config/gemini-tts.json",
+    "minimax": "../config/minimax.json",
+}
+
 def read_file_content(filepath):
    """Reads content from a given file path."""
    try:
@@ -360,18 +373,36 @@ def _load_configuration():
    print("\nLoaded Configuration: " + tts_provider)
    return config_data

-def _load_configuration_path(config_path: str) -> dict:
-    """Loads JSON configuration from a specified path and infers tts_provider from the file name."""
+def _load_configuration_path(config_path: str, pod_users: Optional[list] = None) -> dict:
+    """Loads JSON configuration from a specified path and infers tts_provider from the file name or podUsers owner."""
    config_data = _load_json_config(config_path)
    
-    # 从文件名中提取 tts_provider
+    # 先从文件名中提取 tts_provider
    file_name = os.path.basename(config_path)
-    tts_provider = os.path.splitext(file_name)[0] # 移除 .json 扩展名
+    default_tts_provider = os.path.splitext(file_name)[0] # 移除 .json 扩展名
+    
+    # 如果提供了 pod_users 参数，则使用它；否则从配置中获取
+    if pod_users is None:
+        pod_users = config_data.get("podUsers", [])
+    
+    # 从 podUsers 中获取所有不同的 owner 值，用逗号分隔
+    owners = []
+    if pod_users:  # 添加空值检查
+        owners = list(set(user.get("owner") for user in pod_users if user.get("owner")))
+    
+    # 如果找到了 owners，则使用逗号分隔的 owners 作为 tts_provider
+    if owners:
+        tts_provider = ",".join(owners)
+        print(f"Found multiple owners in podUsers: {owners}. Using comma-separated tts_provider: {tts_provider}")
+    else:
+        # 否则使用默认的从文件名提取的 tts_provider
+        tts_provider = "edge-tts"
+        print(f"No owners found in podUsers. Using default tts_provider from file name: {tts_provider}")
    
    config_data["tts_provider"] = tts_provider # 将 tts_provider 添加到配置数据中
    
    print(f"\nLoaded Configuration: {tts_provider} from {config_path}")
-    return config_data    
+    return config_data

 def _prepare_openai_settings(args, config_data):
    """Determines final OpenAI API key, base URL, and model based on priority."""
@@ -428,86 +459,180 @@ def _prepare_podcast_prompts(config_data, original_podscript_prompt, custom_cont
    podscript_prompt = speaker_id_info  + "\n\n" + custom_content + "\n\n" + original_podscript_prompt
    return podscript_prompt, pod_users, voices, turn_pattern # Return voices for potential future use or consistency

+def _is_content_quality_acceptable(content: str, title: str, tags: str, content_type: str = "overview") -> bool:
+    """Checks if the generated content meets quality standards."""
+    if content_type == "overview":
+        # Check if overview content is not empty and has reasonable length
+        if not content or len(content.strip()) < 20:
+            return False
+        if not title or len(title.strip()) < 2:
+            return False
+        if not tags or len(tags.strip()) < 1:
+            return False
+        return True
+    elif content_type == "script":
+        try:
+            # Check if the content contains valid podcast script JSON with transcripts
+            podcast_script = json.loads(content)
+            if "podcast_transcripts" not in podcast_script:
+                return False
+            transcripts = podcast_script.get("podcast_transcripts", [])
+            if not transcripts or len(transcripts) == 0:
+                return False
+            # Check if transcripts have required fields (speaker_id and dialog)
+            for transcript in transcripts:
+                if "speaker_id" not in transcript or "dialog" not in transcript:
+                    return False
+                dialog = transcript.get("dialog", "").strip()
+                if not dialog or len(dialog) < 1:
+                    return False
+            return True
+        except json.JSONDecodeError:
+            return False
+    return False
+
+
 def _generate_overview_content(api_key, base_url, model, overview_prompt, input_prompt, output_language: Optional[str] = None) -> Tuple[str, str, str]:
    """Generates overview content using OpenAI CLI, and extracts title and tags."""
    print(f"\nGenerating overview with OpenAI CLI (Output Language: {output_language})...")
-    try:
-        # Replace the placeholder with the actual output language
-        formatted_overview_prompt = overview_prompt.replace("{{outlang}}", output_language if output_language is not None else "Make sure the input language is set as the output language")
-        
-        openai_client_overview = OpenAICli(api_key=api_key, base_url=base_url, model=model, system_message=formatted_overview_prompt)
-        overview_response_generator = openai_client_overview.chat_completion(messages=[{"role": "user", "content": input_prompt}])
-        overview_content = "".join([chunk.choices[0].delta.content for chunk in overview_response_generator if chunk.choices and chunk.choices[0].delta.content])

-        # Extract title (first line) and tags (second line)
-        lines = overview_content.strip().split('\n')
-        title = lines[0].strip() if len(lines) > 0 else ""
-        tags = ""
-        # 重复判断3次是否有非空值，没有值就取下一行
-        for i in range(1, min(len(lines), 4)): # 检查第2到第4行 (索引1到3)
-            current_tags = lines[i].strip()
-            if current_tags:
-                tags = current_tags
-                # 保留取到tags的索引行，从下一行开始截取到最后一行，保存数据到overview_content
-                overview_content = "\n".join(lines[i+1:]).strip()
-                break
-        else: # 如果循环结束没有找到非空tags，则从第二行开始截取
-            overview_content = "\n".join(lines[1:]).strip()
+    max_retries = 3
+    attempt = 0

-        print(f"Extracted Title: {title}")
-        print(f"Extracted Tags: {tags}")
-        print("Generated Overview:")
-        print(overview_content[:100])
-        
-        return overview_content, title, tags
-    except Exception as e:
-        raise RuntimeError(f"Error generating overview: {e}")
+    while attempt < max_retries:
+        try:
+            # Replace the placeholder with the actual output language
+            formatted_overview_prompt = overview_prompt.replace("{{outlang}}", output_language if output_language is not None else "Make sure the input language is set as the output language")
+
+            openai_client_overview = OpenAICli(api_key=api_key, base_url=base_url, model=model, system_message=formatted_overview_prompt)
+            overview_response_generator = openai_client_overview.chat_completion(messages=[{"role": "user", "content": input_prompt}])
+            overview_content = "".join([chunk.choices[0].delta.content for chunk in overview_response_generator if chunk.choices and chunk.choices[0].delta.content])
+
+            # Extract title (first line) and tags (second line)
+            lines = overview_content.strip().split('\n')
+            title = lines[0].strip() if len(lines) > 0 else ""
+            tags = ""
+            # 重复判断3次是否有非空值，没有值就取下一行
+            for i in range(1, min(len(lines), 4)): # 检查第2到第4行 (索引1到3)
+                current_tags = lines[i].strip()
+                if current_tags:
+                    tags = current_tags
+                    # 保留取到tags的索引行，从下一行开始截取到最后一行，保存数据到overview_content
+                    overview_content = "\n".join(lines[i+1:]).strip()
+                    break
+            else: # 如果循环结束没有找到非空tags，则从第二行开始截取
+                overview_content = "\n".join(lines[1:]).strip()
+
+            # Check if the generated content meets quality standards
+            if _is_content_quality_acceptable(overview_content, title, tags, "overview"):
+                print(f"Generated overview content meets quality standards on attempt {attempt + 1}")
+                print(f"Extracted Title: {title}")
+                print(f"Extracted Tags: {tags}")
+                print("Generated Overview:")
+                print(overview_content[:100])
+
+                return overview_content, title, tags
+            else:
+                print(f"Generated overview content did not meet quality standards, attempt {attempt + 1}/{max_retries}")
+                attempt += 1
+                if attempt >= max_retries:
+                    raise RuntimeError(f"Failed to generate acceptable overview content after {max_retries} attempts. Content may be too short or missing required elements.")
+                else:
+                    print(f"Retrying overview generation...")
+                    continue
+        except Exception as e:
+            attempt += 1
+            if attempt >= max_retries:
+                raise RuntimeError(f"Error generating overview after {max_retries} attempts: {e}")
+            else:
+                print(f"Attempt {attempt}/{max_retries} failed: {e}. Retrying...")
+                time.sleep(1 * attempt)  # Exponential backoff

 def _generate_podcast_script(api_key, base_url, model, podscript_prompt, overview_content):
    """Generates and parses podcast script JSON using OpenAI CLI."""
    print("\nGenerating podcast script with OpenAI CLI...")
-    # Initialize podscript_json_str outside try block to ensure it's always defined
-    podscript_json_str = ""
-    try:
-        openai_client_podscript = OpenAICli(api_key=api_key, base_url=base_url, model=model, system_message=podscript_prompt)
-        # Generate the response string first
-        podscript_json_str = "".join([chunk.choices[0].delta.content for chunk in openai_client_podscript.chat_completion(messages=[{"role": "user", "content": overview_content}]) if chunk.choices and chunk.choices[0].delta.content])

-        podcast_script = None
-        decoder = json.JSONDecoder()
-        idx = 0
-        valid_json_str = ""
+    max_retries = 3
+    attempt = 0

-        while idx < len(podscript_json_str):
-            try:
-                obj, end = decoder.raw_decode(podscript_json_str[idx:])
-                if isinstance(obj, dict) and "podcast_transcripts" in obj:
-                    podcast_script = obj
-                    valid_json_str = podscript_json_str[idx : idx + end]
-                    break
-                idx += end
-            except json.JSONDecodeError:
-                idx += 1
-                next_brace = podscript_json_str.find('{', idx)
-                if next_brace != -1:
-                    idx = next_brace
+    while attempt < max_retries:
+        # Initialize podscript_json_str outside try block to ensure it's always defined
+        podscript_json_str = ""
+        try:
+            openai_client_podscript = OpenAICli(api_key=api_key, base_url=base_url, model=model, system_message=podscript_prompt)
+            # Generate the response string first
+            podscript_json_str = "".join([chunk.choices[0].delta.content for chunk in openai_client_podscript.chat_completion(messages=[{"role": "user", "content": overview_content}]) if chunk.choices and chunk.choices[0].delta.content])
+
+            podcast_script = None
+            decoder = json.JSONDecoder()
+            idx = 0
+            valid_json_str = ""
+
+            while idx < len(podscript_json_str):
+                try:
+                    obj, end = decoder.raw_decode(podscript_json_str[idx:])
+                    if isinstance(obj, dict) and "podcast_transcripts" in obj:
+                        podcast_script = obj
+                        valid_json_str = podscript_json_str[idx : idx + end]
+                        break
+                    idx += end
+                except json.JSONDecodeError:
+                    idx += 1
+                    next_brace = podscript_json_str.find('{', idx)
+                    if next_brace != -1:
+                        idx = next_brace
+                    else:
+                        break
+
+            if podcast_script is None:
+                print(f"Could not find a valid podcast script JSON object with 'podcast_transcripts' key in response, attempt {attempt + 1}/{max_retries}")
+                attempt += 1
+                if attempt >= max_retries:
+                    raise ValueError(f"Error: Could not find a valid podcast script JSON object with 'podcast_transcripts' key in response. Raw response: {podscript_json_str}")
                else:
-                    break
+                    print(f"Retrying podcast script generation...")
+                    continue

-        if podcast_script is None:
-            raise ValueError(f"Error: Could not find a valid podcast script JSON object with 'podcast_transcripts' key in response. Raw response: {podscript_json_str}")
+            print("\nGenerated Podcast Script Length:"+ str(len(podcast_script.get("podcast_transcripts") or [])))
+            print(valid_json_str[:100] + "...")

-        print("\nGenerated Podcast Script Length:"+ str(len(podcast_script.get("podcast_transcripts") or [])))
-        print(valid_json_str[:100] + "...")
-        if not podcast_script.get("podcast_transcripts"):
-            raise ValueError("Error: 'podcast_transcripts' array is empty or not found in the generated script. Nothing to convert to audio.")
-        return podcast_script
-    except json.JSONDecodeError as e:
-        raise ValueError(f"Error decoding JSON from podcast script response: {e}. Raw response: {podscript_json_str}")
-    except Exception as e:
-        raise RuntimeError(f"Error generating podcast script: {e}")
+            if not podcast_script.get("podcast_transcripts"):
+                print(f"'podcast_transcripts' array is empty or not found in the generated script, attempt {attempt + 1}/{max_retries}")
+                attempt += 1
+                if attempt >= max_retries:
+                    raise ValueError("Error: 'podcast_transcripts' array is empty or not found in the generated script. Nothing to convert to audio.")
+                else:
+                    print(f"Retrying podcast script generation...")
+                    continue

-def generate_audio_for_item(item, config_data, tts_adapter: TTSAdapter, max_retries: int = 3):
+            # Check if the generated script meets quality standards
+            if _is_content_quality_acceptable(valid_json_str, "", "", "script"):
+                print(f"Generated podcast script meets quality standards on attempt {attempt + 1}")
+                return podcast_script
+            else:
+                print(f"Generated podcast script did not meet quality standards, attempt {attempt + 1}/{max_retries}")
+                attempt += 1
+                if attempt >= max_retries:
+                    raise ValueError(f"Failed to generate acceptable podcast script after {max_retries} attempts. Script may be missing required elements.")
+                else:
+                    print(f"Retrying podcast script generation...")
+                    continue
+        except json.JSONDecodeError as e:
+            attempt += 1
+            if attempt >= max_retries:
+                raise ValueError(f"Error decoding JSON from podcast script response: {e}. Raw response: {podscript_json_str}")
+            else:
+                print(f"JSON decode error on attempt {attempt}: {e}. Retrying...")
+                time.sleep(1 * attempt)  # Exponential backoff
+        except Exception as e:
+            attempt += 1
+            if attempt >= max_retries:
+                raise RuntimeError(f"Error generating podcast script after {max_retries} attempts: {e}")
+            else:
+                print(f"Attempt {attempt}/{max_retries} failed: {e}. Retrying...")
+                time.sleep(1 * attempt)  # Exponential backoff
+
+def generate_audio_for_item(item, config_data, tts_adapter, max_retries: int = 3):
    """Generate audio for a single podcast transcript item using the provided TTS adapter."""
    speaker_id = item.get("speaker_id")
    dialog = item.get("dialog")
@@ -515,11 +640,14 @@ def generate_audio_for_item(item, config_data, tts_adapter: TTSAdapter, max_retr
    voice_code = None
    volume_adjustment = 0.0 # 默认值为 0.0
    speed_adjustment = 0.0 # 默认值为 0.0
+    voice_tts_provider = None # 默认使用主要的 TTS 提供商


    if config_data and "podUsers" in config_data and 0 <= speaker_id < len(config_data["podUsers"]):
        pod_user_entry = config_data["podUsers"][speaker_id]
        voice_code = pod_user_entry.get("code")
+        voice_tts_provider = pod_user_entry.get("owner") # 获取特定于该说话者的 TTS 提供商
+        
        # 从 voices 列表中获取对应的 volume_adjustment
        voice_map = {voice.get("code"): voice for voice in config_data.get("voices", []) if voice.get("code")}
        volume_adjustment = voice_map.get(voice_code, {}).get("volume_adjustment", 0.0)
@@ -527,15 +655,17 @@ def generate_audio_for_item(item, config_data, tts_adapter: TTSAdapter, max_retr

    if not voice_code:
        raise ValueError(f"No voice code found for speaker_id {speaker_id}. Cannot generate audio for this dialog.")
- 
+    
+    # 如果 tts_adapter 是映射对象，则根据 voice_tts_provider 选择对应的适配器
+    selected_adapter = tts_adapter[voice_tts_provider]
    # print(f"dialog-before: {dialog}")
    dialog = re.sub(r'[^\w\s\-,，.。?？!！\u4e00-\u9fa5]', '', dialog)
    print(f"dialog: {dialog}")
    
    for attempt in range(max_retries):
        try:
-            print(f"Calling TTS API for speaker {speaker_id} ({voice_code}) (Attempt {attempt + 1}/{max_retries})...")
-            temp_audio_file = tts_adapter.generate_audio(
+            print(f"Calling TTS API for speaker {speaker_id} ({voice_code}) with adapter (Attempt {attempt + 1}/{max_retries})...")
+            temp_audio_file = selected_adapter.generate_audio(
                text=dialog,
                voice_code=voice_code,
                output_dir=output_dir,
@@ -554,7 +684,7 @@ def generate_audio_for_item(item, config_data, tts_adapter: TTSAdapter, max_retr
        except Exception as e: # Catch other unexpected errors
            raise RuntimeError(f"An unexpected error occurred for speaker {speaker_id} ({voice_code}) on attempt {attempt + 1}: {e}")

-def _generate_all_audio_files(podcast_script, config_data, tts_adapter: TTSAdapter, threads):
+def _generate_all_audio_files(podcast_script, config_data, tts_adapter, threads):
    """Orchestrates the generation of individual audio files."""
    os.makedirs(output_dir, exist_ok=True)
    print("\nGenerating audio files...")
@@ -634,15 +764,41 @@ def _create_ffmpeg_file_list(audio_files, expected_count: int):

 from typing import cast # Add import for cast

-def _initialize_tts_adapter(config_data: dict, tts_providers_config_content: Optional[str] = None) -> TTSAdapter:
-
+def initialize_tts_provider_configs():
    """
-    根据配置数据初始化并返回相应的 TTS 适配器。
+    初始化并缓存所有 TTS 提供商的配置
+    """
+    global tts_provider_configs_cache
+    global tts_provider_map
+    
+    # 清空现有缓存
+    tts_provider_configs_cache = {}
+    
+    # 加载预定义映射中的配置文件
+    for provider, config_path in tts_provider_map.items():
+        try:
+            config_data = _load_json_config(config_path)
+            tts_provider_configs_cache[provider] = config_data  # 例如 'doubao-tts' -> 'doubao'
+        except FileNotFoundError:
+            print(f"Warning: Configuration file not found for {provider}: {config_path}")
+        except json.JSONDecodeError as e:
+            print(f"Warning: Invalid JSON in configuration file for {provider}: {config_path}, Error: {e}")
+        except Exception as e:
+            print(f"Warning: Could not load configuration for {provider}: {config_path}, Error: {e}")
+
+def _initialize_tts_adapter(config_data: dict, tts_providers_config_content: Optional[str] = None) -> dict:
+    """
+    根据配置数据初始化并返回相应的 TTS 适配器映射对象。
+    支持逗号分隔的 tts_provider 值，返回每个 provider 对应的适配器映射对象
    """
    tts_provider = config_data.get("tts_provider")
    if not tts_provider:
        raise ValueError("TTS provider is not specified in the configuration.")

+    # 如果缓存为空，则初始化缓存
+    if not tts_provider_configs_cache:
+        initialize_tts_provider_configs()
+
    tts_providers_config = {}
    try:
        if tts_providers_config_content:
@@ -653,50 +809,64 @@ def _initialize_tts_adapter(config_data: dict, tts_providers_config_content: Opt
    except Exception as e:
        print(f"Warning: Could not load tts_providers.json: {e}")
    
-    # 获取当前 tts_provider 的额外参数
-    current_tts_extra_params = tts_providers_config.get(tts_provider.split('-')[0], {}) # 例如 'doubao-tts' -> 'doubao'
+    # 支持逗号分隔的 tts_provider
+    providers = [provider.strip() for provider in tts_provider.split(',')] 
+    
+    adapters_map = {}
+    for provider in providers:
+        # 从缓存中获取当前 tts_provider 的额外参数
+        current_tts_config_params = tts_provider_configs_cache.get(provider, {})
+        current_tts_extra_params = tts_providers_config.get(provider.split('-')[0], {}) # 例如 'doubao-tts' -> 'doubao'

-    if tts_provider == "index-tts":
-        api_url = config_data.get("apiUrl")
-        if not api_url:
-            raise ValueError("IndexTTS apiUrl is not configured.")
-        return IndexTTSAdapter(api_url_template=cast(str, api_url), tts_extra_params=cast(dict, current_tts_extra_params))
-    elif tts_provider == "edge-tts":
-        api_url = config_data.get("apiUrl")
-        if not api_url:
-            raise ValueError("EdgeTTS apiUrl is not configured.")
-        return EdgeTTSAdapter(api_url_template=cast(str, api_url), tts_extra_params=cast(dict, current_tts_extra_params))
+        if provider == "index-tts":
+            # 优先从 config_data 获取，如果没有则从缓存中获取
+            api_url = config_data.get("apiUrl") or current_tts_config_params.get("apiUrl")
+            if not api_url:
+                raise ValueError("IndexTTS apiUrl is not configured.")
+            adapters_map[provider] = IndexTTSAdapter(api_url_template=cast(str, api_url), tts_extra_params=cast(dict, current_tts_extra_params))
+        elif provider == "edge-tts":
+            # 优先从 config_data 获取，如果没有则从缓存中获取
+            api_url = config_data.get("apiUrl") or current_tts_config_params.get("apiUrl")
+            if not api_url:
+                raise ValueError("EdgeTTS apiUrl is not configured.")
+            adapters_map[provider] = EdgeTTSAdapter(api_url_template=cast(str, api_url), tts_extra_params=cast(dict, current_tts_extra_params))

-    elif tts_provider == "fish-audio":
-        api_url = config_data.get("apiUrl")
-        headers = config_data.get("headers")
-        request_payload = config_data.get("request_payload")
-        if not all([api_url, headers, request_payload]):
-            raise ValueError("FishAudio requires apiUrl, headers, and request_payload configuration.")
-        return FishAudioAdapter(api_url=cast(str, api_url), headers=cast(dict, headers), request_payload_template=cast(dict, request_payload), tts_extra_params=cast(dict, current_tts_extra_params))
-    elif tts_provider == "minimax":
-        api_url = config_data.get("apiUrl")
-        headers = config_data.get("headers")
-        request_payload = config_data.get("request_payload")
-        if not all([api_url, headers, request_payload]):
-            raise ValueError("Minimax requires apiUrl, headers, and request_payload configuration.")
-        return MinimaxAdapter(api_url=cast(str, api_url), headers=cast(dict, headers), request_payload_template=cast(dict, request_payload), tts_extra_params=cast(dict, current_tts_extra_params))
-    elif tts_provider == "doubao-tts":
-        api_url = config_data.get("apiUrl")
-        headers = config_data.get("headers")
-        request_payload = config_data.get("request_payload")
-        if not all([api_url, headers, request_payload]):
-            raise ValueError("DoubaoTTS requires apiUrl, headers, and request_payload configuration.")
-        return DoubaoTTSAdapter(api_url=cast(str, api_url), headers=cast(dict, headers), request_payload_template=cast(dict, request_payload), tts_extra_params=cast(dict, current_tts_extra_params))
-    elif tts_provider == "gemini-tts":
-        api_url = config_data.get("apiUrl")
-        headers = config_data.get("headers")
-        request_payload = config_data.get("request_payload")
-        if not all([api_url, headers, request_payload]):
-            raise ValueError("GeminiTTS requires apiUrl, headers, and request_payload configuration.")
-        return GeminiTTSAdapter(api_url=cast(str, api_url), headers=cast(dict, headers), request_payload_template=cast(dict, request_payload), tts_extra_params=cast(dict, current_tts_extra_params))
-    else:
-        raise ValueError(f"Unsupported TTS provider: {tts_provider}")
+        elif provider == "fish-audio":
+            # 优先从 config_data 获取，如果没有则从缓存中获取
+            api_url = config_data.get("apiUrl") or current_tts_config_params.get("apiUrl")
+            headers = config_data.get("headers") or current_tts_config_params.get("headers")
+            request_payload = config_data.get("request_payload") or current_tts_config_params.get("request_payload")
+            if not all([api_url, headers, request_payload]):
+                raise ValueError("FishAudio requires apiUrl, headers, and request_payload configuration.")
+            adapters_map[provider] = FishAudioAdapter(api_url=cast(str, api_url), headers=cast(dict, headers), request_payload_template=cast(dict, request_payload), tts_extra_params=cast(dict, current_tts_extra_params))
+        elif provider == "minimax":
+            # 优先从 config_data 获取，如果没有则从缓存中获取
+            api_url = config_data.get("apiUrl") or current_tts_config_params.get("apiUrl")
+            headers = config_data.get("headers") or current_tts_config_params.get("headers")
+            request_payload = config_data.get("request_payload") or current_tts_config_params.get("request_payload")
+            if not all([api_url, headers, request_payload]):
+                raise ValueError("Minimax requires apiUrl, headers, and request_payload configuration.")
+            adapters_map[provider] = MinimaxAdapter(api_url=cast(str, api_url), headers=cast(dict, headers), request_payload_template=cast(dict, request_payload), tts_extra_params=cast(dict, current_tts_extra_params))
+        elif provider == "doubao-tts":
+            # 优先从 config_data 获取，如果没有则从缓存中获取
+            api_url = config_data.get("apiUrl") or current_tts_config_params.get("apiUrl")
+            headers = config_data.get("headers") or current_tts_config_params.get("headers")
+            request_payload = config_data.get("request_payload") or current_tts_config_params.get("request_payload")
+            if not all([api_url, headers, request_payload]):
+                raise ValueError("DoubaoTTS requires apiUrl, headers, and request_payload configuration.")
+            adapters_map[provider] = DoubaoTTSAdapter(api_url=cast(str, api_url), headers=cast(dict, headers), request_payload_template=cast(dict, request_payload), tts_extra_params=cast(dict, current_tts_extra_params))
+        elif provider == "gemini-tts":
+            # 优先从 config_data 获取，如果没有则从缓存中获取
+            api_url = config_data.get("apiUrl") or current_tts_config_params.get("apiUrl")
+            headers = config_data.get("headers") or current_tts_config_params.get("headers")
+            request_payload = config_data.get("request_payload") or current_tts_config_params.get("request_payload")
+            if not all([api_url, headers, request_payload]):
+                raise ValueError("GeminiTTS requires apiUrl, headers, and request_payload configuration.")
+            adapters_map[provider] = GeminiTTSAdapter(api_url=cast(str, api_url), headers=cast(dict, headers), request_payload_template=cast(dict, request_payload), tts_extra_params=cast(dict, current_tts_extra_params))
+        else:
+            raise ValueError(f"Unsupported TTS provider: {provider}")
+    
+    return adapters_map

 def generate_podcast_audio():
    args = _parse_arguments()
@@ -714,7 +884,7 @@ def generate_podcast_audio():
    overview_content, title, tags = _generate_overview_content(api_key, base_url, model, overview_prompt, input_prompt, args.output_language)
    podcast_script = _generate_podcast_script(api_key, base_url, model, podscript_prompt, overview_content)

-    tts_adapter = _initialize_tts_adapter(config_data) # 初始化 TTS 适配器
+    tts_adapter = _initialize_tts_adapter(config_data) # 初始化 TTS 适配器，现在返回适配器映射

    audio_files = _generate_all_audio_files(podcast_script, config_data, tts_adapter, args.threads)
    file_list_path_created = _create_ffmpeg_file_list(audio_files, len(podcast_script.get("podcast_transcripts", [])))
@@ -744,8 +914,8 @@ def generate_podcast_audio_api(args, config_path: str, input_txt_content: str, t
        str: The path to the generated audio file.
    """
    print("Starting podcast audio generation...")
-    config_data = _load_configuration_path(config_path)
    podUsers = json.loads(podUsers_json_content)
+    config_data = _load_configuration_path(config_path, podUsers)
    config_data["podUsers"] = podUsers

    final_api_key, final_base_url, final_model = _prepare_openai_settings(args, config_data)
@@ -761,7 +931,7 @@ def generate_podcast_audio_api(args, config_path: str, input_txt_content: str, t
    overview_content, title, tags = _generate_overview_content(final_api_key, final_base_url, final_model, overview_prompt, input_prompt, args.output_language)
    podcast_script = _generate_podcast_script(final_api_key, final_base_url, final_model, podscript_prompt, overview_content)
    
-    tts_adapter = _initialize_tts_adapter(config_data, tts_providers_config_content) # 初始化 TTS 适配器
+    tts_adapter = _initialize_tts_adapter(config_data, tts_providers_config_content) # 初始化 TTS 适配器，现在返回适配器映射

    audio_files = _generate_all_audio_files(podcast_script, config_data, tts_adapter, args.threads)
    file_list_path_created = _create_ffmpeg_file_list(audio_files, len(podcast_script.get("podcast_transcripts", [])))
@@ -787,6 +957,9 @@ def generate_podcast_audio_api(args, config_path: str, input_txt_content: str, t


 if __name__ == "__main__":
+    # Initialize TTS provider configs cache at startup
+    initialize_tts_provider_configs()
+    
    start_time = time.time()
    try:
        generate_podcast_audio()
--- a/web/src/app/[lang]/page.tsx
+++ b/web/src/app/[lang]/page.tsx
@@ -56,7 +56,8 @@ export default function HomePage({ params }: { params: Promise<{ lang: string }>
  const { lang } = use(params);
  const { t } = useTranslation(lang, 'home');
  const { toasts, success, error, warning, info, removeToast } = useToast();
-  const { executeOnce } = usePreventDuplicateCall();
+  const { executeOnce: executeOncePodcasts } = usePreventDuplicateCall();
+  const { executeOnce: executeOnceCredits } = usePreventDuplicateCall();
  const router = useRouter(); // Initialize useRouter

  // 辅助函数：将 API 响应映射为 PodcastItem 数组
@@ -72,7 +73,7 @@ export default function HomePage({ params }: { params: Promise<{ lang: string }>
      },
      audio_duration: task.audio_duration || '00:00',
      playCount: 0,
-      createdAt: task.timestamp ? new Date(task.timestamp * 1000).toISOString() : new Date().toISOString(),
+      createdAt: task.timestamp ? new Date(task.timestamp * 1000).toISOString() : '', // 使用空字符串而不是当前时间，避免水合错误
      audioUrl: task.audioUrl ? task.audioUrl : '',
      tags: task.tags ? task.tags.split('#').map((tag: string) => tag.trim()).filter((tag: string) => !!tag) : task.status === 'failed' ? [task.error] : [t('podcastTagsPlaceholder')],
      status: task.status,
@@ -105,18 +106,11 @@ export default function HomePage({ params }: { params: Promise<{ lang: string }>
  // 播客详情页状态

  // 从后端获取积分数据和初始化数据加载
-  const initialized = React.useRef(false); // 使用 useRef 追踪是否已初始化
-
  useEffect(() => {
-    // 确保只在组件首次挂载时执行一次
-    if (!initialized.current) {
-      initialized.current = true;
-
-      // 首次加载时获取播客列表和积分/用户信息
-      fetchRecentPodcasts();
-      // fetchCreditsAndUserInfo(); // 在fetchRecentPodcasts中调用
-
-    }
+    console.log('HomePage mounted: 初始化数据加载');
+    // 首次加载时获取播客列表和积分/用户信息
+    fetchRecentPodcasts();
+    // fetchCreditsAndUserInfo(); // 在fetchRecentPodcasts中调用
    
    // 设置定时器每20秒刷新一次
    // const interval = setInterval(() => {
@@ -124,7 +118,10 @@ export default function HomePage({ params }: { params: Promise<{ lang: string }>
    // }, 20000);

    // // 清理定时器
-    // return () => clearInterval(interval);
+    // return () => {
+    //   clearInterval(interval);
+    //   console.log('HomePage unmounted: 清理定时器');
+    // };
  }, []); // 空依赖数组，只在组件挂载时执行一次

  // 加载设置
@@ -267,7 +264,7 @@ export default function HomePage({ params }: { params: Promise<{ lang: string }>

  // 获取最近播客列表 - 使用防重复调用机制
  const fetchRecentPodcasts = async () => {
-    const result = await executeOnce(async () => {
+    const result = await executeOncePodcasts(async () => {
      const response = await trackedFetch('/api/podcast-status', {
        method: 'GET',
        headers: {
@@ -282,6 +279,7 @@ export default function HomePage({ params }: { params: Promise<{ lang: string }>
    });

    if (!result) {
+      console.log('fetchRecentPodcasts: 重复调用已跳过');
      return; // 如果是重复调用，直接返回
    }

@@ -298,61 +296,83 @@ export default function HomePage({ params }: { params: Promise<{ lang: string }>
      error(t('error.dataProcessing'), err instanceof Error ? err.message : t('error.cantProcessPodcastList'));
    }

-    fetchCreditsAndUserInfo();
+    // 调用积分和用户信息获取（也有防重复机制）
+    await fetchCreditsAndUserInfo();
  };

-  // 新增辅助函数：获取积分和用户信息
+  // 新增辅助函数：获取积分和用户信息 - 使用防重复调用机制
  const fetchCreditsAndUserInfo = async () => {
+    const result = await executeOnceCredits(async () => {
+      const results = {
+        credits: 0,
+        transactions: [] as any[],
+        user: null as any,
+      };
+
+      // 获取积分
      try {
-          const pointsResponse = await fetch('/api/points', {
-            method: 'GET',
-            headers: {
-              'x-next-locale': lang,
-            },
-          });
-          if (pointsResponse.ok) {
-              const data = await pointsResponse.json();
-              if (data.success) {
-                  setCredits(data.points);
-              } else {
-                  console.error('Failed to fetch credits:', data.error);
-                  setCredits(0); // 获取失败则设置为0
-              }
+        const pointsResponse = await trackedFetch('/api/points', {
+          method: 'GET',
+          headers: {
+            'x-next-locale': lang,
+          },
+        });
+        if (pointsResponse.ok) {
+          const data = await pointsResponse.json();
+          if (data.success) {
+            results.credits = data.points;
          } else {
-              console.error('Failed to fetch credits with status:', pointsResponse.status);
-              setCredits(0); // 获取失败则设置为0
+            console.error('Failed to fetch credits:', data.error);
          }
+        } else {
+          console.error('Failed to fetch credits with status:', pointsResponse.status);
+        }
      } catch (error) {
-          console.error('Error fetching credits:', error);
-          setCredits(0); // 发生错误则设置为0
+        console.error('Error fetching credits:', error);
      }

+      // 获取积分历史
      try {
-          const transactionsResponse = await fetch('/api/points/transactions', {
-            method: 'GET',
-            headers: {
-              'x-next-locale': lang,
-            },
-          });
-          if (transactionsResponse.ok) {
-              const data = await transactionsResponse.json();
-              if (data.success) {
-                  setPointHistory(data.transactions);
-              } else {
-                  console.error('Failed to fetch point transactions:', data.error);
-                  setPointHistory([]);
-              }
+        const transactionsResponse = await trackedFetch('/api/points/transactions', {
+          method: 'GET',
+          headers: {
+            'x-next-locale': lang,
+          },
+        });
+        if (transactionsResponse.ok) {
+          const data = await transactionsResponse.json();
+          if (data.success) {
+            results.transactions = data.transactions;
          } else {
-              console.error('Failed to fetch point transactions with status:', transactionsResponse.status);
-              setPointHistory([]);
+            console.error('Failed to fetch point transactions:', data.error);
          }
+        } else {
+          console.error('Failed to fetch point transactions with status:', transactionsResponse.status);
+        }
      } catch (error) {
-          console.error('Error fetching point transactions:', error);
-          setPointHistory([]);
+        console.error('Error fetching point transactions:', error);
      }

-      const { session, user } = await getSessionData();
-      setUser(user); // 设置用户信息
+      // 获取用户信息
+      try {
+        const { session, user } = await getSessionData();
+        results.user = user;
+      } catch (error) {
+        console.error('Error fetching session data:', error);
+      }
+
+      return results;
+    });
+
+    if (!result) {
+      console.log('fetchCreditsAndUserInfo: 重复调用已跳过');
+      return; // 如果是重复调用，直接返回
+    }
+
+    // 更新状态
+    setCredits(result.credits);
+    setPointHistory(result.transactions);
+    setUser(result.user);
  };

  const renderMainContent = () => {
--- a/web/src/app/api/config/route.ts
+++ b/web/src/app/api/config/route.ts
@@ -33,6 +33,7 @@ const TTS_PROVIDER_ORDER = [
  'fish-audio',
  'gemini-tts',
  'index-tts',
+  'webvoice',
 ];

 // 获取配置文件列表
--- a/web/src/app/api/newuser/route.ts
+++ b/web/src/app/api/newuser/route.ts
@@ -6,18 +6,17 @@ import { fallbackLng } from '@/i18n/settings';

 export async function GET(request: NextRequest) {
  const sessionData = await getSessionData();
-  let baseUrl = process.env.NEXT_PUBLIC_BASE_URL || "/";
-  const pathname = request.nextUrl.searchParams.get('pathname');
-  if(!!pathname){
-    baseUrl += pathname.replace('/','');
-  }
+  const pathname = request.nextUrl.searchParams.get('pathname') || '';

-  // 如果没有获取到 session，直接重定向到根目录
+  // 如果没有获取到 session，直接重定向
  if (!sessionData?.user) {
-    const url = new URL(baseUrl, request.url);
+    const url = new URL(request.url);
+    url.pathname = pathname || '/';
+    url.search = '';
    return NextResponse.redirect(url);
  }

+
  const lng = !pathname ? fallbackLng : pathname.replace('/','');
  const { t } = await getTranslation(lng, 'components');
  const userId = sessionData.user.id; // 获取 userId
@@ -41,8 +40,11 @@ export async function GET(request: NextRequest) {
    console.log(t('newUser.pointsAccountExists', { userId }));
  }

-  // 创建一个 URL 对象，指向要重定向到的根目录
-  const url = new URL(baseUrl, request.url);
+  // 构建重定向 URL
+  const url = new URL(request.url);
+  url.pathname = pathname ? `${pathname}/` : '/';
+  url.search = '';
+  
  // 返回重定向响应
  return NextResponse.redirect(url);
 }
--- a/web/src/components/ConfigSelector.tsx
+++ b/web/src/components/ConfigSelector.tsx
@@ -26,7 +26,7 @@ const ConfigSelector: React.FC<ConfigSelectorProps> = ({
 }) => {
  const { t } = useTranslation(lang, 'components'); // 初始化 useTranslation 并指定命名空间
  const [configFiles, setConfigFiles] = useState<ConfigFile[]>([]);
-  const [selectedConfig, setSelectedConfig] = useState<string>('');
+  const [selectedConfig, setSelectedConfig] = useState<string>();
  const [currentConfig, setCurrentConfig] = useState<TTSConfig | null>(null);
  const [voices, setVoices] = useState<Voice[]>([]); // 新增 voices 状态
  const [isOpen, setIsOpen] = useState(false);
@@ -52,6 +52,9 @@ const ConfigSelector: React.FC<ConfigSelectorProps> = ({
        return !!(settings.minimax?.group_id && settings.minimax?.api_key);
      case 'gemini':
        return !!(settings.gemini?.api_key);
+      case 'webvoice':
+        // webvoice 使用浏览器内置的 Web Speech API，无需额外配置
+        return true;
      default:
        return false;
    }
@@ -95,36 +98,43 @@ const ConfigSelector: React.FC<ConfigSelectorProps> = ({
    loadConfigFilesCalled.current = true;

    try {
-      const response = await fetch('/api/config', {
-        method: 'GET',
-        headers: {
-          'x-next-locale': lang,
-        },
-      });
-      const result = await response.json();
+      // const response = await fetch('/api/config', {
+      //   method: 'GET',
+      //   headers: {
+      //     'x-next-locale': lang,
+      //   },
+      // });
+      // const result = await response.json();
      
-      if (result.success && Array.isArray(result.data)) {
+      // if (result.success && Array.isArray(result.data)) {
        // 过滤出已配置的TTS选项
-        const settings = await getTTSProviders(lang);
-        const availableConfigs = result.data.filter((config: ConfigFile) =>
-          isTTSConfigured(config.name, settings)
-        );
-        
+        // const settings = await getTTSProviders(lang);
+        // const availableConfigs = result.data.filter((config: ConfigFile) =>
+        //   isTTSConfigured(config.name, settings)
+        // );
+
+        const availableConfigs = [
+          {
+            name: 'webvoice.json',
+            displayName: 'webvoice',
+            path: 'webvoice.json',
+          },
+        ];
        setConfigFiles(availableConfigs);
        // 默认选择第一个可用配置
        if (availableConfigs.length > 0 && !selectedConfig) {
-          setSelectedConfig(availableConfigs[0].name);
-          loadConfig(availableConfigs[0].name);
+            setSelectedConfig(availableConfigs[0].name);
+            loadConfig(availableConfigs[0].name);
        } else if (availableConfigs.length === 0) {
          // 如果没有可用配置，清空当前选择
          setSelectedConfig('');
          setCurrentConfig(null);
          onConfigChange?.(null as any, '', []); // 传递空数组作为 voices
        }
-      } else {
-        console.error('Invalid config files data:', result);
-        setConfigFiles([]);
-      }
+      // } else {
+      //   console.error('Invalid config files data:', result);
+      //   setConfigFiles([]);
+      // }
    } catch (error) {
      console.error('Failed to process config files:', error);
      setConfigFiles([]);
@@ -163,56 +173,61 @@ const ConfigSelector: React.FC<ConfigSelectorProps> = ({

  return (
      <div className={className}>
-        {/* 配置选择器 */}
-          <button
-            onClick={() => setIsOpen(!isOpen)}
-            className="px-4 py-2 rounded-lg text-sm btn-secondary w-full"
-            disabled={isLoading}
-          >
-            {/* <Settings className="w-4 h-4 text-neutral-500" /> */}
-            <span className="flex-1 text-left text-sm">
-              {isLoading ? t('configSelector.loading') : selectedConfigFile?.displayName || (configFiles.length === 0 ? t('configSelector.pleaseConfigTTS') : t('configSelector.selectTTSConfig'))}
-            </span>
-            {/* <ChevronDown className={cn(
-              "w-4 h-4 text-neutral-400 transition-transform",
-              isOpen && "rotate-180"
-            )} /> */}
-          </button>
+        {/* 隐藏TTS选择按钮 */}
+        {false && (
+          <>
+            {/* 配置选择器 */}
+              <button
+                onClick={() => setIsOpen(!isOpen)}
+                className="px-4 py-2 rounded-lg text-sm btn-secondary w-full"
+                disabled={isLoading}
+              >
+                {/* <Settings className="w-4 h-4 text-neutral-500" /> */}
+                <span className="flex-1 text-left text-sm">
+                  {isLoading ? t('configSelector.loading') : selectedConfigFile?.displayName || (configFiles.length === 0 ? t('configSelector.pleaseConfigTTS') : t('configSelector.selectTTSConfig'))}
+                </span>
+                {/* <ChevronDown className={cn(
+                  "w-4 h-4 text-neutral-400 transition-transform",
+                  isOpen && "rotate-180"
+                )} /> */}
+              </button>

-          {/* 下拉菜单 */}
-          {isOpen && (
-            <div className="absolute top-full left-0 right-0 mb-1 bg-white border border-neutral-200 rounded-lg shadow-large z-50 max-h-60 overflow-y-auto">
-              {Array.isArray(configFiles) && configFiles.length > 0 ? configFiles.map((config) => (
-                <button
-                  key={config.name}
-                  onClick={() => handleConfigSelect(config.name)}
-                  className="flex items-center gap-3 w-full px-4 py-3 text-left hover:bg-neutral-50 transition-colors"
-                >
-                  <div className="flex-1">
-                    <div className="font-medium text-sm text-black">
-                      {config.displayName}
+              {/* 下拉菜单 */}
+              {isOpen && (
+                <div className="absolute top-full left-0 right-0 mb-1 bg-white border border-neutral-200 rounded-lg shadow-large z-50 max-h-60 overflow-y-auto">
+                  {Array.isArray(configFiles) && configFiles.length > 0 ? configFiles.map((config) => (
+                    <button
+                      key={config.name}
+                      onClick={() => handleConfigSelect(config.name)}
+                      className="flex items-center gap-3 w-full px-4 py-3 text-left hover:bg-neutral-50 transition-colors"
+                    >
+                      <div className="flex-1">
+                        <div className="font-medium text-sm text-black">
+                          {config.displayName}
+                        </div>
+                      </div>
+                      {selectedConfig === config.name && (
+                        <AiOutlineCheck className="w-4 h-4 text-green-500" />
+                      )}
+                    </button>
+                  )) : (
+                    <div className="px-4 py-3 text-sm text-neutral-500 text-center">
+                      <div className="mb-1">{t('configSelector.noAvailableTTSConfig')}</div>
+                      <div className="text-xs">{t('configSelector.pleaseConfigTTS')}</div>
                    </div>
-                  </div>
-                  {selectedConfig === config.name && (
-                    <AiOutlineCheck className="w-4 h-4 text-green-500" />
                  )}
-                </button>
-              )) : (
-                <div className="px-4 py-3 text-sm text-neutral-500 text-center">
-                  <div className="mb-1">{t('configSelector.noAvailableTTSConfig')}</div>
-                  <div className="text-xs">{t('configSelector.pleaseConfigTTS')}</div>
-                </div>
-              )}
            </div>
-          )}
+              )}

-      
-        {/* 点击外部关闭下拉菜单 */}
-        {isOpen && (
-          <div
-            className="fixed inset-0 z-40"
-            onClick={() => setIsOpen(false)}
-          />
+           
+            {/* 点击外部关闭下拉菜单 */}
+            {isOpen && (
+              <div
+                className="fixed inset-0 z-40"
+                onClick={() => setIsOpen(false)}
+              />
+            )}
+          </>
        )}
      </div>
  );
--- a/web/src/components/ConfirmModal.tsx
+++ b/web/src/components/ConfirmModal.tsx
@@ -96,7 +96,7 @@ const ConfirmModal: FC<ConfirmModalProps> = ({
          
          <button
            onClick={handleConfirm}
-            className="px-4 py-2 border border-transparent rounded-md shadow-sm font-medium text-white bg-gradient-to-r from-brand-purple to-brand-pink hover:from-brand-purple-hover hover:to-brand-pink focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-brand-purple transition-all"
+            className="px-4 py-2 border-transparent rounded-md shadow-sm font-medium text-white bg-gradient-to-r from-brand-purple to-brand-pink hover:from-brand-purple-hover hover:to-brand-pink focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-brand-purple transition-all"
          >
            {confirmText || t('podcastCreator.confirm')}
          </button>
--- a/web/src/components/NotificationBanner.tsx
+++ b/web/src/components/NotificationBanner.tsx
@@ -17,17 +17,24 @@ const NotificationBanner: React.FC<NotificationBannerProps> = ({
  lang
 }) => {
  const { t } = useTranslation(lang, 'components');
-  const [isVisible, setIsVisible] = useState(true);
+  const [isVisible, setIsVisible] = useState(false); // 初始为 false 避免水合错误
  const [isClosing, setIsClosing] = useState(false);
+  const [isMounted, setIsMounted] = useState(false);

-  // 从本地存储获取通知状态，避免重复显示
+  // 组件挂载后再检查 localStorage
  useEffect(() => {
+    setIsMounted(true);
    const hasClosed = localStorage.getItem('notificationBannerClosed');
-    if (hasClosed) {
-      setIsVisible(false);
+    if (!hasClosed) {
+      setIsVisible(true);
    }
  }, []);

+  // 在挂载前不渲染任何内容，避免水合不匹配
+  if (!isMounted) {
+    return null;
+  }
+
  const handleClose = () => {
    setIsClosing(true);
    // 添加关闭动画
--- a/web/src/components/PodcastCreator.tsx
+++ b/web/src/components/PodcastCreator.tsx
@@ -128,11 +128,17 @@ const PodcastCreator: React.FC<PodcastCreatorProps> = ({
   const [showLoginModal, setShowLoginModal] = useState(false); // 控制登录模态框的显示
   const [showConfirmModal, setShowConfirmModal] = useState(false); // 控制确认模态框的显示
   const [voices, setVoices] = useState<Voice[]>([]); // 从 ConfigSelector 获取 voices
-   const [selectedPodcastVoices, setSelectedPodcastVoices] = useState<{[key: string]: Voice[]}>(() => {
-     // 从 localStorage 读取缓存的说话人配置
+   const [selectedPodcastVoices, setSelectedPodcastVoices] = useState<{[key: string]: Voice[]}>({}); // 初始为空对象，避免水合错误
+   const [isVoicesLoaded, setIsVoicesLoaded] = useState(false);
+
+   // 组件挂载后从 localStorage 加载说话人配置
+   useEffect(() => {
     const cachedVoices = getItem<{[key: string]: Voice[]}>('podcast-selected-voices');
-     return cachedVoices || {};
-   }); // 新增：单独存储选中的说话人
+     if (cachedVoices) {
+       setSelectedPodcastVoices(cachedVoices);
+     }
+     setIsVoicesLoaded(true);
+   }, []);
   const [selectedConfig, setSelectedConfig] = useState<TTSConfig | null>(null);
   const [selectedConfigName, setSelectedConfigName] = useState<string>(''); // 新增状态来存储配置文件的名称
   const fileInputRef = useRef<HTMLInputElement>(null);
@@ -367,158 +373,158 @@ const PodcastCreator: React.FC<PodcastCreatorProps> = ({
          </div>

          {/* 工具栏 */}
-          <div className="flex flex-col sm:flex-row items-start sm:items-center justify-start sm:justify-between px-4 sm:px-6 py-3 border-t border-neutral-100 bg-neutral-50 gap-y-4 sm:gap-x-2">
-            {/* 左侧配置选项 */}
-          <div className="grid grid-cols-2 sm:grid-cols-4 gap-2 sm:gap-4 w-full sm:max-w-[500px]">
-            {/* TTS配置选择 */}
-            <div className='relative w-full'>
-            <ConfigSelector
-                onConfigChange={(config, name, newVoices) => { // 接收新的 voices 参数
+          <div className="flex flex-col lg:flex-row items-stretch lg:items-center justify-between px-4 sm:px-6 py-4 border-t border-neutral-100 bg-gradient-to-br from-neutral-50 to-white gap-4">
+            {/* 隐藏的 TTS 配置选择器 */}
+            <div className="hidden">
+              <ConfigSelector
+                onConfigChange={(config, name, newVoices) => {
                  setSelectedConfig(config);
-                  setSelectedConfigName(name); // 更新配置名称状态
-                  setVoices(newVoices); // 更新 voices 状态
+                  setSelectedConfigName(name);
+                  setVoices(newVoices);
                }}
                className="w-full"
-                lang={lang} // 传递 lang
-            /></div>
+                lang={lang}
+              />
+            </div>

-            {/* 说话人按钮 */}
-            <div className='relative w-full'>
-            <button
+            {/* 左侧配置选项 */}
+            <div className="flex flex-wrap gap-2 lg:gap-3 justify-center lg:justify-start items-center">
+              {/* 说话人按钮 */}
+              <button
                onClick={() => setShowVoicesModal(true)}
                className={cn(
-                  "px-4 py-2 rounded-lg text-sm",
+                  "w-[120px] px-4 py-2 rounded-lg text-sm font-medium transition-all duration-200 shadow-sm hover:shadow-md",
                  selectedPodcastVoices[selectedConfigName] && selectedPodcastVoices[selectedConfigName].length > 0
-                    ? "w-full bg-black text-white"
-                    : "btn-secondary w-full"
+                    ? "bg-gradient-to-r from-purple-600 to-pink-600 text-white hover:from-purple-700 hover:to-pink-700"
+                    : "bg-white border border-neutral-200 text-neutral-700 hover:border-neutral-300 hover:bg-neutral-50",
+                  (isGenerating || !selectedConfig) && "opacity-50 cursor-not-allowed"
                )}
                disabled={isGenerating || !selectedConfig}
-            >
+              >
                {t('podcastCreator.speaker')}
-            </button></div>
+              </button>

-            {/* 语言选择 */}
-            <div className="relative w-full">
-              <select
-                value={language}
-                onChange={(e) => setLanguage(e.target.value)}
-                className="appearance-none bg-white border border-neutral-200 rounded-lg px-3 py-2 sm:px-3 sm:py-2 pr-6 sm:pr-8 text-sm focus:outline-none focus:ring-2 focus:ring-black w-full text-center"
-                disabled={isGenerating}
-              >
-                {languageOptions.map(option => (
-                  <option key={option.value} value={option.value}>
-                    {option.label}
-                  </option>
-                ))}
-              </select>
-              <AiOutlineDown className="absolute right-1 sm:right-2 top-1/2 transform -translate-y-1/2 w-3 h-3 sm:w-4 sm:h-4 text-neutral-400 pointer-events-none" />
-            </div>
+              {/* 语言选择 */}
+              <div className="relative w-[120px]">
+                <select
+                  value={language}
+                  onChange={(e) => setLanguage(e.target.value)}
+                  className="appearance-none w-full bg-white border border-neutral-200 rounded-lg px-3 py-2 pr-8 text-sm font-medium text-neutral-700 focus:outline-none focus:ring-2 focus:ring-purple-500 focus:border-transparent transition-all duration-200 shadow-sm hover:shadow-md hover:border-neutral-300 disabled:opacity-50 disabled:cursor-not-allowed"
+                  disabled={isGenerating}
+                >
+                  {languageOptions.map(option => (
+                    <option key={option.value} value={option.value}>
+                      {option.label}
+                    </option>
+                  ))}
+                </select>
+                <AiOutlineDown className="absolute right-2 top-1/2 transform -translate-y-1/2 w-4 h-4 text-neutral-400 pointer-events-none" />
+              </div>

-            {/* 时长选择 */}
-            <div className="relative w-full">
-              <select
-                value={duration}
-                onChange={(e) => setDuration(e.target.value as any)}
-                className="appearance-none bg-white border border-neutral-200 rounded-lg px-3 py-2 sm:px-3 sm:py-2 pr-6 sm:pr-8 text-sm focus:outline-none focus:ring-2 focus:ring-black w-full text-center"
-                disabled={isGenerating}
-              >
-                {durationOptions.map(option => (
-                  <option key={option.value} value={option.value}>
-                    {option.label}
-                  </option>
-                ))}
-              </select>
-              <AiOutlineDown className="absolute right-1 sm:right-2 top-1/2 transform -translate-y-1/2 w-3 h-3 sm:w-4 sm:h-4 text-neutral-400 pointer-events-none" />
-            </div>
-          </div>
+              {/* 时长选择 */}
+              <div className="relative w-[120px]">
+                <select
+                  value={duration}
+                  onChange={(e) => setDuration(e.target.value as any)}
+                  className="appearance-none w-full bg-white border border-neutral-200 rounded-lg px-3 py-2 pr-8 text-sm font-medium text-neutral-700 focus:outline-none focus:ring-2 focus:ring-purple-500 focus:border-transparent transition-all duration-200 shadow-sm hover:shadow-md hover:border-neutral-300 disabled:opacity-50 disabled:cursor-not-allowed"
+                  disabled={isGenerating}
+                >
+                  {durationOptions.map(option => (
+                    <option key={option.value} value={option.value}>
+                      {option.label}
+                    </option>
+                  ))}
+                </select>
+                <AiOutlineDown className="absolute right-2 top-1/2 transform -translate-y-1/2 w-4 h-4 text-neutral-400 pointer-events-none" />
+              </div>

-          {/* 右侧操作按钮 todo */}
-          <div className="flex items-center gap-6 sm:gap-1 flex-wrap justify-center sm:justify-right w-full sm:w-auto">
-            {/* 文件上传 */}
-            {/* <button
-              onClick={() => fileInputRef.current?.click()}
-              className="p-1 sm:p-2 text-neutral-500 hover:text-black transition-colors"
-              title={t('podcastCreator.fileUpload')}
-              disabled={isGenerating}
-            >
-              <AiOutlineUpload className="w-4 h-4 sm:w-5 sm:h-5" />
-            </button>
-            <input
-              ref={fileInputRef}
-              type="file"
-              accept=".txt,.md,.doc,.docx"
-              onChange={handleFileUpload}
-              className="hidden"
-            /> */}
-
-            {/* 粘贴链接 */}
-            {/* <button
-              onClick={handlePaste}
-              className="p-1 sm:p-2 text-neutral-500 hover:text-black transition-colors"
-              title={t('podcastCreator.pasteContent')}
-              disabled={isGenerating}
-            >
-              <AiOutlineLink className="w-4 h-4 sm:w-5 sm:h-5" />
-            </button> */}
-
-            {/* 复制 */}
-            {/* <button
-              onClick={() => navigator.clipboard.writeText(topic)}
-              className="p-1 sm:p-2 text-neutral-500 hover:text-black transition-colors"
-              title={t('podcastCreator.copyContent')}
-              disabled={isGenerating || !topic}
-            >
-              <AiOutlineCopy className="w-4 h-4 sm:w-5 sm:h-5" />
-            </button> */}
-            
-            {/* 积分显示 */}
-              <div className="flex items-center justify-center gap-1 text-xs text-neutral-500 w-20 flex-shrink-0">
-                <svg xmlns="http://www.w3.org/2000/svg" width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className="lucide lucide-gem flex-shrink-0">
+              {/* 积分显示 */}
+              <div className="w-[120px] flex items-center justify-center gap-1.5 px-3 py-2 bg-white border border-neutral-200 rounded-lg shadow-sm">
+                <svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" className="text-purple-600 flex-shrink-0">
                  <path d="M6 3v18l6-4 6 4V3z"/>
                  <path d="M12 3L20 9L12 15L4 9L12 3Z"/>
                </svg>
-                <span className="truncate">{credits}</span>
+                <span className="text-sm font-semibold text-neutral-700">{credits}</span>
              </div>
+            </div>

+            {/* 右侧操作按钮 */}
+            <div className="flex items-center justify-center lg:justify-end gap-2 lg:gap-3 flex-shrink-0">
+              {/* 文件上传 */}
+              {/* <button
+                onClick={() => fileInputRef.current?.click()}
+                className="p-1 sm:p-2 text-neutral-500 hover:text-black transition-colors"
+                title={t('podcastCreator.fileUpload')}
+                disabled={isGenerating}
+              >
+                <AiOutlineUpload className="w-4 h-4 sm:w-5 sm:h-5" />
+              </button>
+              <input
+                ref={fileInputRef}
+                type="file"
+                accept=".txt,.md,.doc,.docx"
+                onChange={handleFileUpload}
+                className="hidden"
+              /> */}
+
+              {/* 粘贴链接 */}
+              {/* <button
+                onClick={handlePaste}
+                className="p-1 sm:p-2 text-neutral-500 hover:text-black transition-colors"
+                title={t('podcastCreator.pasteContent')}
+                disabled={isGenerating}
+              >
+                <AiOutlineLink className="w-4 h-4 sm:w-5 sm:h-5" />
+              </button> */}
+
+              {/* 复制 */}
+              {/* <button
+                onClick={() => navigator.clipboard.writeText(topic)}
+                className="p-1 sm:p-2 text-neutral-500 hover:text-black transition-colors"
+                title={t('podcastCreator.copyContent')}
+                disabled={isGenerating || !topic}
+              >
+                <AiOutlineCopy className="w-4 h-4 sm:w-5 sm:h-5" />
+              </button> */}
              {/* 签到按钮 */}
+              
              <button
                onClick={handleSignIn}
                disabled={isGenerating}
                className={cn(
-                  "btn-secondary flex items-center gap-1 text-sm px-3 py-2 sm:px-4 sm:py-2",
+                  "flex items-center gap-1.5 px-4 py-2 rounded-lg text-sm font-medium transition-all duration-200 shadow-sm hover:shadow-md",
+                  "bg-white border border-neutral-200 text-neutral-700 hover:border-neutral-300 hover:bg-neutral-50",
                  isGenerating && "opacity-50 cursor-not-allowed"
                )}
              >
-              {t('podcastCreator.checkIn')}
+                {t('podcastCreator.checkIn')}
              </button>

-            <div className="flex flex-col items-center gap-1">
              {/* 创作按钮 */}
              <button
                onClick={handleSubmit}
                disabled={!topic.trim() || isGenerating}
                className={cn(
-                  "btn-primary flex items-center gap-1 text-sm px-3 py-2 sm:px-4 sm:py-2",
+                  "flex items-center gap-1.5 px-5 py-2 rounded-lg text-sm font-medium transition-all duration-200 shadow-md hover:shadow-lg",
+                  "bg-gradient-to-r from-purple-600 to-pink-600 text-white hover:from-purple-700 hover:to-pink-700",
                  (!topic.trim() || isGenerating) && "opacity-50 cursor-not-allowed"
                )}
              >
                {isGenerating ? (
                  <>
-                    <AiOutlineLoading3Quarters className="w-3 h-3 sm:w-4 sm:h-4 animate-spin" />
-                    <span className=" xs:inline">{t('podcastCreator.biu')}</span>
+                    <AiOutlineLoading3Quarters className="w-4 h-4 animate-spin" />
+                    <span>{t('podcastCreator.biu')}</span>
                  </>
                ) : (
                  <>
-                    <Wand2 className="w-3 h-3 sm:w-4 sm:h-4" />
-                    <span className=" xs:inline">{t('podcastCreator.create')}</span>
+                    <Wand2 className="w-4 h-4" />
+                    <span>{t('podcastCreator.create')}</span>
                  </>
                )}
              </button>
-              
            </div>
          </div>
        </div>
-      </div>

      {/* Voices Modal */}
      {selectedConfig && (
--- a/web/src/components/VoicesModal.tsx
+++ b/web/src/components/VoicesModal.tsx
@@ -198,11 +198,16 @@ const VoicesModal: React.FC<VoicesModalProps> = ({ isOpen, onClose, voices, onSe
                              audio.pause();
                              setPlayingVoiceId(null);
                            } else {
+                              // 先暂停其他正在播放的音频
                              if (playingVoiceId && audioRefs.current.has(playingVoiceId)) {
                                audioRefs.current.get(playingVoiceId)?.pause();
                              }
-                              audio.play();
-                              setPlayingVoiceId(voice.code!);
+                              // 尝试播放音频,处理可能的失败
+                              audio.play().catch((error) => {
+                                console.error('音频播放失败:', error);
+                                setPlayingVoiceId(null);
+                              });
+                              // 注意:状态会在 onPlay 事件中设置,而不是在这里
                            }
                          }
                        }}
@@ -216,8 +221,18 @@ const VoicesModal: React.FC<VoicesModalProps> = ({ isOpen, onClose, voices, onSe
                          else audioRefs.current.delete(voice.code!);
                        }}
                        src={voice.audio}
+                        onPlay={() => setPlayingVoiceId(voice.code!)}
                        onEnded={() => setPlayingVoiceId(null)}
-                        onPause={() => setPlayingVoiceId(null)}
+                        onPause={(e) => {
+                          // 只在音频真正暂停时清除状态(不是因为切换到其他音频)
+                          if (playingVoiceId === voice.code) {
+                            setPlayingVoiceId(null);
+                          }
+                        }}
+                        onError={() => {
+                          console.error('音频加载失败:', voice.audio);
+                          setPlayingVoiceId(null);
+                        }}
                        preload="none"
                        className="hidden"
                      />