feat(podcast): 添加沉浸故事模式支持多语言播客生成

新增沉浸故事生成模式,支持原文朗读和智能分段: - 服务端新增generate_podcast_with_story_api函数和专用API端点 - 添加故事模式专用prompt模板(prompt-story-overview.txt和prompt-story-podscript.txt) - 前端新增模式切换UI,支持AI播客和沉浸故事两种模式 - 沉浸故事模式固定消耗30积分,不需要语言和时长参数 - 优化音频静音裁剪逻辑,保留首尾200ms空白提升自然度 - 修复session管理和错误处理,提升系统稳定性 - 新增多语言配置(中英日)支持模式切换文案
2025-10-19 22:09:13 +08:00
parent 321e3cded4
commit dd2a1b536f
18 changed files with 672 additions and 116 deletions
--- a/server/main.py
+++ b/server/main.py
@@ -22,7 +22,7 @@ import httpx # 导入 httpx 库
 from io import BytesIO # 导入 BytesIO
 import base64 # 导入 base64

-from podcast_generator import generate_podcast_audio_api
+from podcast_generator import generate_podcast_audio_api, generate_podcast_with_story_api

 class TaskStatus(str, Enum):
    PENDING = "pending"
@@ -214,6 +214,7 @@ async def _generate_podcast_task(
    output_language: Optional[str] = None,
    usetime: Optional[str] = None,
    lang: Optional[str] = None,
+    use_story_mode: bool = False, # 新增参数，是否使用故事模式
 ):
    task_results[auth_id][task_id]["status"] = TaskStatus.RUNNING
    try:
@@ -230,14 +231,25 @@ async def _generate_podcast_task(
        if not actual_config_path:
            raise ValueError(f"Invalid tts_provider: {tts_provider}.")

-        podcast_generation_results = await asyncio.to_thread(
-            generate_podcast_audio_api,
-            args=args,
-            config_path=actual_config_path,
-            input_txt_content=input_txt_content.strip(),
-            tts_providers_config_content=tts_providers_config_content.strip(),
-            podUsers_json_content=podUsers_json_content.strip()
-        )
+        # 根据 use_story_mode 参数决定调用哪个函数
+        if use_story_mode:
+            podcast_generation_results = await asyncio.to_thread(
+                generate_podcast_with_story_api,
+                args=args,
+                config_path=actual_config_path,
+                input_txt_content=input_txt_content.strip(),
+                tts_providers_config_content=tts_providers_config_content.strip(),
+                podUsers_json_content=podUsers_json_content.strip()
+            )
+        else:
+            podcast_generation_results = await asyncio.to_thread(
+                generate_podcast_audio_api,
+                args=args,
+                config_path=actual_config_path,
+                input_txt_content=input_txt_content.strip(),
+                tts_providers_config_content=tts_providers_config_content.strip(),
+                podUsers_json_content=podUsers_json_content.strip()
+            )
        task_results[auth_id][task_id]["status"] = TaskStatus.COMPLETED
        task_results[auth_id][task_id].update(podcast_generation_results)
        print(f"\nPodcast generation completed for task {task_id}. Output file: {podcast_generation_results.get('output_audio_filepath')}")
@@ -266,10 +278,11 @@ async def _generate_podcast_task(
                "task_id": str(task_id),
                "auth_id": auth_id,
                "task_results": task_results[auth_id][task_id],
-                "timestamp": int(time.time()), 
+                "timestamp": int(time.time()),
                "status": task_results[auth_id][task_id]["status"],
                "usetime": usetime,
                "lang": lang,
+                "mode": "ai-story" if use_story_mode else "normal",
            }
            
            MAX_RETRIES = 3 # 定义最大重试次数
@@ -293,7 +306,34 @@ async def _generate_podcast_task(
                    print(f"Retrying callback for task {task_id} in {RETRY_DELAY} seconds...")
                    await asyncio.sleep(RETRY_DELAY)
                else:
-                    print(f"Callback failed for task {task_id} after {MAX_RETRIES} attempts.") 
+                    print(f"Callback failed for task {task_id} after {MAX_RETRIES} attempts.")
+
+
+async def _generate_podcast_with_story_task(
+    task_id: UUID,
+    auth_id: str,
+    api_key: str,
+    base_url: str,
+    model: str,
+    input_txt_content: str,
+    tts_providers_config_content: str,
+    podUsers_json_content: str,
+    threads: int,
+    tts_provider: str,
+    callback_url: Optional[str] = None, # 新增回调地址参数
+    output_language: Optional[str] = None,
+    usetime: Optional[str] = None,
+    lang: Optional[str] = None,
+):
+    """
+    调用带优化流程的播客生成任务处理函数
+    """
+    return await _generate_podcast_task(
+        task_id, auth_id, api_key, base_url, model, input_txt_content,
+        tts_providers_config_content, podUsers_json_content, threads,
+        tts_provider, callback_url, output_language, usetime, lang,
+        use_story_mode=True
+    )

 # @app.post("/generate-podcast", dependencies=[Depends(verify_signature)])
@app.post("/generate-podcast")
@@ -354,6 +394,65 @@ async def generate_podcast_submission(

    return {"message": "Podcast generation started.", "task_id": task_id}

+
+@app.post("/generate-podcast-with-story")
+async def generate_podcast_with_story_submission(
+    background_tasks: BackgroundTasks,
+    auth_id: str = Depends(get_auth_id),
+    api_key: str = Form("OpenAI API key."),
+    base_url: str = Form("https://api.openai.com/v1"),
+    model: str = Form("gpt-3.5-turbo"),
+    input_txt_content: str = Form(...),
+    tts_providers_config_content: str = Form(...),
+    podUsers_json_content: str = Form(...),
+    threads: int = Form(1),
+    tts_provider: str = Form("index-tts"),
+    callback_url: Optional[str] = Form(None),
+    output_language: Optional[str] = Form(None),
+    usetime: Optional[str] = Form(None),
+    lang: Optional[str] = Form(None),
+):
+    # 1. 验证 tts_provider
+    if tts_provider not in tts_provider_map:
+        raise HTTPException(status_code=400, detail=f"Invalid tts_provider: {tts_provider}.")
+
+    # 2. 检查此 auth_id 是否有正在运行的任务
+    if auth_id in task_results:
+        for existing_task_id, existing_task_info in task_results[auth_id].items():
+            if existing_task_info["status"] in (TaskStatus.RUNNING, TaskStatus.PENDING):
+                raise HTTPException(status_code=409, detail=f"There is already a running task (ID: {existing_task_id}) for this auth_id. Please wait for it to complete.")
+
+    task_id = uuid.uuid4()
+    if auth_id not in task_results:
+        task_results[auth_id] = {}
+    task_results[auth_id][task_id] = {
+        "status": TaskStatus.PENDING,
+        "result": None,
+        "timestamp": time.time(),
+        "callback_url": callback_url, # 存储回调地址
+        "auth_id": auth_id, # 存储 auth_id
+    }
+
+    background_tasks.add_task(
+        _generate_podcast_with_story_task,
+        task_id,
+        auth_id,
+        api_key,
+        base_url,
+        model,
+        input_txt_content,
+        tts_providers_config_content,
+        podUsers_json_content,
+        threads,
+        tts_provider,
+        callback_url,
+        output_language,
+        usetime,
+        lang,
+    )
+
+    return {"message": "Podcast generation with story started.", "task_id": task_id}
+
 # @app.get("/podcast-status", dependencies=[Depends(verify_signature)])
@app.get("/podcast-status")
 async def get_podcast_status(
--- a/server/podcast_generator.py
+++ b/server/podcast_generator.py
@@ -19,7 +19,7 @@ from tts_adapters import TTSAdapter, IndexTTSAdapter, EdgeTTSAdapter, FishAudioA
 # Global configuration
 output_dir = "output"
 # file_list_path is now generated uniquely for each merge operation
-tts_providers_config_path = '../config/tts_providers.json'
+tts_providers_config_path = '../config/tts_providers-local.json'

 # Global cache for TTS provider configurations
 tts_provider_configs_cache = {}
@@ -250,7 +250,7 @@ def get_audio_duration(filepath: str) -> Optional[float]:
        print(f"An unexpected error occurred while getting audio duration for {filepath}: {e}")
        return None

-def trim_audio_silence(input_filepath: str, output_filepath: str, silence_threshold_db: float = -60, min_silence_duration: float = 0.5):
+def trim_audio_silence(input_filepath: str, output_filepath: str, silence_threshold_db: float = -60, min_silence_duration: float = 0.5, enable_trim: bool = True):
    """
    Removes leading and trailing silence from an audio file using ffmpeg.
    
@@ -259,7 +259,17 @@ def trim_audio_silence(input_filepath: str, output_filepath: str, silence_thresh
        output_filepath (str): Path where the trimmed audio file will be saved.
        silence_threshold_db (float): Silence threshold in dB. Audio below this level is considered silence.
        min_silence_duration (float): Minimum duration of silence to detect, in seconds.
+        enable_trim (bool): Whether to enable silence trimming. If False, just copy the file.
    """
+    # 如果不启用去除空白，直接复制文件
+    if not enable_trim:
+        try:
+            subprocess.run(["ffmpeg", "-i", input_filepath, "-c", "copy", output_filepath], check=True, capture_output=True)
+            print(f"Silence trimming disabled. Copied {input_filepath} to {output_filepath}")
+            return
+        except subprocess.CalledProcessError as e:
+            raise RuntimeError(f"Error copying audio file: {e}")
+    
    try:
        # Check if ffmpeg is available
        subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
@@ -307,16 +317,21 @@ def trim_audio_silence(input_filepath: str, output_filepath: str, silence_thresh

        start_trim_val = 0.0 # Initialize start_trim_val
        end_trim_val = current_audio_duration # Initialize end_trim_val with the full duration
+        
+        # 保留首尾各30ms的空白
+        padding_ms = 0.2  # 30ms = 0.03秒

        if silence_starts and silence_ends:
            # Determine leading silence
            if silence_starts[0] == 0.0: # Silence at the very beginning
-                start_trim_val = silence_ends[0]
+                # 从静音结束处往前保留30ms
+                start_trim_val = max(0.0, silence_ends[0] - padding_ms)
            
            # Determine trailing silence
            # Only consider trimming from the end if there's silence close to the end
            if silence_ends[-1] >= (end_trim_val - min_silence_duration):
-                end_trim_val = silence_starts[-1]
+                # 从静音开始处往后保留30ms
+                end_trim_val = min(current_audio_duration, silence_starts[-1] + padding_ms)

        # If after trimming, the duration becomes too short or negative, skip trimming
        if (end_trim_val - start_trim_val) <= 0.01: # Add a small epsilon to avoid issues with very short audios
@@ -421,9 +436,13 @@ def _prepare_openai_settings(args, config_data):
 def _read_prompt_files():
    """Reads content from input, overview, and podcast script prompt files."""
    input_prompt = read_file_content('input.txt')
+
    overview_prompt = read_file_content('prompt/prompt-overview.txt')
    original_podscript_prompt = read_file_content('prompt/prompt-podscript.txt')
-    return input_prompt, overview_prompt, original_podscript_prompt
+    
+    story_overview_prompt = read_file_content('prompt/prompt-story-overview.txt')
+    story_podscript_prompt = read_file_content('prompt/prompt-story-podscript.txt')
+    return input_prompt, overview_prompt, original_podscript_prompt, story_overview_prompt, story_podscript_prompt

 def _extract_custom_content(input_prompt_content):
    """Extracts custom content from the input prompt."""
@@ -684,8 +703,17 @@ def generate_audio_for_item(item, config_data, tts_adapter, max_retries: int = 3
        except Exception as e: # Catch other unexpected errors
            raise RuntimeError(f"An unexpected error occurred for speaker {speaker_id} ({voice_code}) on attempt {attempt + 1}: {e}")

-def _generate_all_audio_files(podcast_script, config_data, tts_adapter, threads):
-    """Orchestrates the generation of individual audio files."""
+def _generate_all_audio_files(podcast_script, config_data, tts_adapter, threads, enable_trim_silence: bool = True):
+    """
+    Orchestrates the generation of individual audio files.
+    
+    Args:
+        podcast_script: The podcast script containing transcripts.
+        config_data: Configuration data.
+        tts_adapter: TTS adapter for audio generation.
+        threads: Number of threads for parallel processing.
+        enable_trim_silence: Whether to enable silence trimming for audio files. Default is True.
+    """
    os.makedirs(output_dir, exist_ok=True)
    print("\nGenerating audio files...")
    # test script
@@ -712,7 +740,7 @@ def _generate_all_audio_files(podcast_script, config_data, tts_adapter, threads)
                if original_audio_file:
                    # Define a path for the trimmed audio file
                    trimmed_audio_file = os.path.join(output_dir, f"trimmed_{os.path.basename(original_audio_file)}")
-                    trim_audio_silence(original_audio_file, trimmed_audio_file)
+                    trim_audio_silence(original_audio_file, trimmed_audio_file, enable_trim=enable_trim_silence)
                    # Use the trimmed file for the final merge
                    audio_files_dict[index] = trimmed_audio_file
                    # Clean up the original untrimmed file
@@ -873,7 +901,7 @@ def generate_podcast_audio():
    config_data = _load_configuration()
    api_key, base_url, model = _prepare_openai_settings(args, config_data)
    
-    input_prompt_content, overview_prompt, original_podscript_prompt = _read_prompt_files()
+    input_prompt_content, overview_prompt, original_podscript_prompt, story_overview_prompt, story_podscript_prompt = _read_prompt_files()
    custom_content, input_prompt = _extract_custom_content(input_prompt_content)
    podscript_prompt, pod_users, voices, turn_pattern = _prepare_podcast_prompts(config_data, original_podscript_prompt, custom_content, args.usetime, args.output_language)

@@ -886,7 +914,7 @@ def generate_podcast_audio():

    tts_adapter = _initialize_tts_adapter(config_data) # 初始化 TTS 适配器，现在返回适配器映射

-    audio_files = _generate_all_audio_files(podcast_script, config_data, tts_adapter, args.threads)
+    audio_files = _generate_all_audio_files(podcast_script, config_data, tts_adapter, args.threads, enable_trim_silence=True)
    file_list_path_created = _create_ffmpeg_file_list(audio_files, len(podcast_script.get("podcast_transcripts", [])))
    output_audio_filepath = merge_audio_files(file_list_path_created)
    return {
@@ -919,7 +947,7 @@ def generate_podcast_audio_api(args, config_path: str, input_txt_content: str, t
    config_data["podUsers"] = podUsers

    final_api_key, final_base_url, final_model = _prepare_openai_settings(args, config_data)
-    input_prompt, overview_prompt, original_podscript_prompt = _read_prompt_files()
+    input_prompt, overview_prompt, original_podscript_prompt, story_overview_prompt, story_podscript_prompt = _read_prompt_files()
    custom_content, input_prompt = _extract_custom_content(input_txt_content)
    # Assuming `output_language` is passed directly to the function
    podscript_prompt, pod_users, voices, turn_pattern = _prepare_podcast_prompts(config_data, original_podscript_prompt, custom_content, args.usetime, args.output_language)
@@ -933,7 +961,7 @@ def generate_podcast_audio_api(args, config_path: str, input_txt_content: str, t
    
    tts_adapter = _initialize_tts_adapter(config_data, tts_providers_config_content) # 初始化 TTS 适配器，现在返回适配器映射

-    audio_files = _generate_all_audio_files(podcast_script, config_data, tts_adapter, args.threads)
+    audio_files = _generate_all_audio_files(podcast_script, config_data, tts_adapter, args.threads, enable_trim_silence=True)
    file_list_path_created = _create_ffmpeg_file_list(audio_files, len(podcast_script.get("podcast_transcripts", [])))
    output_audio_filepath = merge_audio_files(file_list_path_created)
    
@@ -969,4 +997,62 @@ if __name__ == "__main__":
    finally:
        end_time = time.time()
        execution_time = end_time - start_time
-        print(f"\nTotal execution time: {execution_time:.2f} seconds")
+        print(f"\nTotal execution time: {execution_time:.2f} seconds")
+
+
+def generate_podcast_with_story_api(args, config_path: str, input_txt_content: str, tts_providers_config_content: str, podUsers_json_content: str) -> dict:
+    """
+    Generates a podcast audio file based on the provided parameters.
+
+    Args:
+        api_key (str): OpenAI API key.
+        base_url (str): OpenAI API base URL.
+        model (str): OpenAI model to use.
+        threads (int): Number of threads for audio generation.
+        config_path (str): Path to the configuration JSON file.
+        input_txt_content (str): Content of the input prompt.
+        output_language (str): Language for the podcast overview and script (default: Chinese).
+
+    Returns:
+        str: The path to the generated audio file.
+    """
+    print("Starting podcast audio generation...")
+    podUsers = json.loads(podUsers_json_content)
+    config_data = _load_configuration_path(config_path, podUsers)
+    config_data["podUsers"] = podUsers
+
+    final_api_key, final_base_url, final_model = _prepare_openai_settings(args, config_data)
+    input_prompt, overview_prompt, original_podscript_prompt, story_overview_prompt, story_podscript_prompt = _read_prompt_files()
+    custom_content, input_prompt = _extract_custom_content(input_txt_content)
+    # Assuming `output_language` is passed directly to the function
+    podscript_prompt, pod_users, voices, turn_pattern = _prepare_podcast_prompts(config_data, story_podscript_prompt, custom_content, args.usetime, args.output_language)
+
+    print(f"\nInput Prompt (from provided content):\n{input_prompt[:100]}...")
+    print(f"\nOverview Prompt (prompt-overview.txt):\n{story_overview_prompt[:100]}...")
+    print(f"\nPodscript Prompt (prompt-podscript.txt):\n{podscript_prompt[:1000]}...")
+
+    overview_content, title, tags = _generate_overview_content(final_api_key, final_base_url, final_model, story_overview_prompt, input_prompt, args.output_language)
+    podcast_script = _generate_podcast_script(final_api_key, final_base_url, final_model, podscript_prompt, input_prompt)
+    
+    tts_adapter = _initialize_tts_adapter(config_data, tts_providers_config_content) # 初始化 TTS 适配器，现在返回适配器映射
+
+    audio_files = _generate_all_audio_files(podcast_script, config_data, tts_adapter, args.threads, enable_trim_silence=True)
+    file_list_path_created = _create_ffmpeg_file_list(audio_files, len(podcast_script.get("podcast_transcripts", [])))
+    output_audio_filepath = merge_audio_files(file_list_path_created)
+    
+    audio_duration_seconds = get_audio_duration(os.path.join(output_dir, output_audio_filepath))
+    formatted_duration = "00:00"
+    if audio_duration_seconds is not None:
+        minutes = int(audio_duration_seconds // 60)
+        seconds = int(audio_duration_seconds % 60)
+        formatted_duration = f"{minutes:02}:{seconds:02}"
+
+    task_results = {
+        "output_audio_filepath": output_audio_filepath,
+        "podcast_script": podcast_script,
+        "podUsers": podUsers,
+        "audio_duration": formatted_duration,
+        "title": title,
+        "tags": tags,
+    }
+    return task_results
--- a/server/prompt/prompt-podscript.txt
+++ b/server/prompt/prompt-podscript.txt
@@ -76,10 +76,6 @@ You are a master podcast scriptwriter, adept at transforming diverse input conte
        *   **Debate & Contrasting Views:** Use the host personas to create discussions from different perspectives, compelling other hosts to provide more detailed defenses and explanations.
        *   **Restatement & Summary:** The host (`speaker_0`) should provide restatements and summaries during pauses in the discussion and at the end of topics.

-8. **Copy & Replacement:**
-	If a hyphen connects English letters and numbers or letters on both sides, replace it with a space.
-	Replace four-digit Arabic numerals with their Chinese character equivalents, one-to-one.
-
 </guidelines>

 <examples>
@@ -90,7 +86,7 @@ You are a master podcast scriptwriter, adept at transforming diverse input conte
    <turn_pattern>random</turn_pattern>
  </podcast_settings>
  <source_content>
-    Quantum computing uses quantum bits or qubits which can exist in multiple states simultaneously due to superposition. This is different from classical bits (0 or 1). Think of it like a spinning coin. This allows for massive parallel computation.
+    {{input_content}}
  </source_content>
 </input>
 <output_format>
@@ -139,6 +135,8 @@ You are a master podcast scriptwriter, adept at transforming diverse input conte
 ]
 }}
 </output_format>
+</examples>
+
 <final>
 Transform the source material into a lively and engaging podcast conversation based on the provided settings. Craft dialogue that showcases authentic group chemistry and natural interaction. Use varied speech patterns reflecting real human conversation, ensuring the final script effectively educates and entertains the listener.
 The final output is a JSON string without code blocks.
--- a/server/prompt/prompt-story-overview.txt
+++ b/server/prompt/prompt-story-overview.txt
@@ -0,0 +1,21 @@
+**1. Metadata Generation**
+
+*   **Step 1: Intermediate Core Summary Generation (Internal Step)**
+    *   **Task**: First, generate a core idea summary of approximately 150 characters based *only* on the **[body content]** of the document (ignoring titles and subtitles).
+    *   **Position**: As the **four line** of the final output.
+
+*   **Step 2: Title Generation**
+    *   **Source**: Must be refined from the "core summary" generated in the previous step.
+    *   **Length**: Strictly controlled to be between 15-20 characters.
+    *   **Format**: Adopt a "Main Title: Subtitle" structure, using a full-width colon ":" for separation. For example: "Brevity and Precision: Practical Engineering for AI Context".
+    *   **Position**: As the **first line** of the final output.
+
+*   **Step 3: Tag Generation**
+    *   **Source**: Extract from the **[body content]** of the document (ignoring titles and subtitles).
+    *   **Quantity**: 3 to 5.
+    *   **Format**: Keywords separated by the "#" symbol (e.g., #Keyword1#Keyword2).
+    *   **Position**: As the **second line** of the final output.
+
+**2. Output Language**
+
+*   ** Make sure the language of the output content is the original input language **.
--- a/server/prompt/prompt-story-podscript.txt
+++ b/server/prompt/prompt-story-podscript.txt
@@ -0,0 +1,119 @@
+* **Output Format:** No explanatory text! The final output is a JSON string without code blocks. Make sure the language of the output content is the same as the source content.
+* **End Format:** Do not add any summary or concluding remarks. The output must be only the JSON object.
+
+<podcast_generation_system>
+You are an intelligent text-processing system. Your task is to take the input content, segment it into complete sentences, assign speaker IDs according to the rules, and output the result as a raw JSON string, preserving the original text.
+
+<input>
+  <!-- Podcast settings provide high-level configuration for the script generation. -->
+  <podcast_settings>
+    <!-- Define the total number of speakers. Minimum 1. Every speaker must be assigned at least one statement. -->
+    <num_speakers>{{numSpeakers}}</num_speakers> 
+  </podcast_settings>
+  
+  <!-- The source_content contains the text to be processed. -->
+  <source_content>
+    {{input_content}}
+  </source_content>
+</input>
+
+<guidelines>
+
+1.  **Primary Goal & Output Format:**
+    *   Your only task is to convert the `<source_content>` into a JSON string.
+    *   The output must be a single JSON object with one key: `"podcast_transcripts"`.
+    *   The value of `"podcast_transcripts"` must be an array of objects, where each object has two keys: `"speaker_id"` (an integer) and `"dialog"` (a string).
+    *   **Strictly output only the JSON string.** Do not include any explanations, comments, or code block formatting (like ```json).
+
+2.  **Text Segmentation:**
+    *   Analyze the `<source_content>` and break it down into logical, complete sentences or statements.
+    *   Segmentation should occur at natural punctuation marks (e.g., periods, question marks, exclamation points) or logical breaks in the flow of a single speaker's thought.
+    *   **Crucially, you must not alter, summarize, or rewrite the original text.** The content of the `"dialog"` field must be an exact segment from the source.
+    *   The output language must be identical to the input language.
+
+3.  **Speaker ID Assignment Logic (Roles):**
+    *   **If Source Content Contains Speaker Roles:** If the `source_content` explicitly identifies speakers (e.g., "主持人:", "嘉宾A:", "Speaker 1:", "角色A："), you must map these roles to unique, consistent `speaker_id` integers (starting from 0). For example, "主持人" is always `speaker_id: 0`, "嘉宾A" is always `speaker_id: 1`, etc. Remove the role identifier (e.g., "主持人:") from the beginning of the `"dialog"` string.
+    *   **If Source Content Has No Roles:** Proceed to Guideline 4 for automatic assignment.
+
+4.  **Speaker Assignment & Distribution Logic (Automatic):**
+    *   **Rule 1 (Highest Priority): Logical Grouping.** This is the most important rule. Analyze the flow of the `<source_content>`. If multiple consecutive sentences form a single coherent thought, argument, or detailed explanation, they **must be assigned to the same `speaker_id`**. This is to ensure that a single speaker can fully develop a point before another speaker takes over. It is perfectly acceptable and encouraged for one speaker to have several consecutive dialogue blocks.
+    *   **Rule 2: Speaker Variation.** After applying the logical grouping rule, distribute the resulting sentences or logical blocks among the different speakers to create a varied conversation. Switch speakers at logical transition points in the text, where the topic or perspective shifts.
+    *   **Rule 3: Mandatory Speaker Inclusion.** You **must** ensure that every speaker, from `speaker_id: 0` to `speaker_id: num_speakers - 1`, is assigned at least one line of dialogue. Before finalizing the output, verify that all speakers have participated.
+
+5.  **Content Integrity:**
+    *   The entire `<source_content>` must be processed and included in the final JSON output. No part of the original text should be omitted.
+    *   The sum of all `"dialog"` strings in the output should reconstruct the original `<source_content>` (excluding any speaker role prefixes).
+
+</guidelines>
+
+<examples>
+<!-- Example 1: Input with no speaker roles, demonstrating logical grouping -->
+<input>
+  <podcast_settings>
+    <num_speakers>2</num_speakers>
+  </podcast_settings>
+  <source_content>
+    人工智能的发展进入了一个新阶段。其核心驱动力是大型语言模型的突破。这些模型能够理解和生成极其自然的文本，应用前景广阔。然而，我们也必须关注其伦理风险和潜在的滥用问题。
+  </source_content>
+</input>
+<output_format>
+{{
+"podcast_transcripts": [
+  {{
+    "speaker_id": 0,
+    "dialog": "人工智能的发展进入了一个新阶段。"
+  }},
+  {{
+    "speaker_id": 0,
+    "dialog": "其核心驱动力是大型语言模型的突破。"
+  }},
+  {{
+    "speaker_id": 0,
+    "dialog": "这些模型能够理解和生成极其自然的文本，应用前景广阔。"
+  }},
+  {{
+    "speaker_id": 1,
+    "dialog": "然而，我们也必须关注其伦理风险和潜在的滥用问题。"
+  }}
+]
+}}
+</output_format>
+
+<!-- Example 2: Input with explicit speaker roles -->
+<input>
+  <podcast_settings>
+    <num_speakers>2</num_speakers>
+  </podcast_settings>
+  <source_content>
+    主持人: 大家好，欢迎收听。今天我们来聊聊人工智能。
+    嘉宾: 是的，主持人。人工智能最近发展很快，特别是在大模型领域。
+  </source_content>
+</input>
+<output_format>
+{{
+"podcast_transcripts": [
+  {{
+    "speaker_id": 0,
+    "dialog": "大家好，欢迎收听。"
+  }},
+  {{
+    "speaker_id": 0,
+    "dialog": "今天我们来聊聊人工智能。"
+  }},
+  {{
+    "speaker_id": 1,
+    "dialog": "是的，主持人。"
+  }},
+  {{
+    "speaker_id": 1,
+    "dialog": "人工智能最近发展很快，特别是在大模型领域。"
+  }}
+]
+}}
+</output_format>
+</examples>
+
+<final>
+Adhering strictly to all guidelines, process the input `<source_content>` and generate only the final JSON string. The output must be perfectly formatted JSON and nothing else.
+</final>
+</podcast_generation_system>