From 69327f63c5bf4d667bf03a1af17056fb92132a8e Mon Sep 17 00:00:00 2001
From: zihanjian <zihan.jian@outlook.com>
Date: Mon, 1 Dec 2025 20:51:55 +0800
Subject: [PATCH] feat(language): add multilingual support across frontend and
 backend

---
 backend/src/agent/graph.py                   | 29 ++++++--
 backend/src/agent/prompts.py                 |  9 ++-
 backend/src/agent/state.py                   |  1 +
 backend/src/agent/utils.py                   |  2 +
 frontend/src/App.tsx                         |  9 ++-
 frontend/src/components/ChatMessagesView.tsx | 54 ++++++++++++--
 frontend/src/components/InputForm.tsx        | 78 ++++++++++++++++----
 frontend/src/components/WelcomeScreen.tsx    |  3 +-
 8 files changed, 152 insertions(+), 33 deletions(-)

diff --git a/backend/src/agent/graph.py b/backend/src/agent/graph.py
index 0f19c3f..5e2e10a 100644
--- a/backend/src/agent/graph.py
+++ b/backend/src/agent/graph.py
@@ -55,6 +55,7 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerati
         Dictionary with state update, including search_query key containing the generated queries
     """
     configurable = Configuration.from_runnable_config(config)
+    language = state.get("language") or "English"
 
     # check for custom initial search query count
     if state.get("initial_search_query_count") is None:
@@ -75,6 +76,7 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerati
         current_date=current_date,
         research_topic=get_research_topic(state["messages"]),
         number_queries=state["initial_search_query_count"],
+        language=language,
     )
     # Generate the search queries
     result = structured_llm.invoke(formatted_prompt)
@@ -106,9 +108,11 @@ def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
     """
     # Configure
     configurable = Configuration.from_runnable_config(config)
+    language = state.get("language") or "English"
     formatted_prompt = web_searcher_instructions.format(
         current_date=get_current_date(),
         research_topic=state["search_query"],
+        language=language,
     )
 
     # Uses the google genai client as the langchain client doesn't return grounding metadata
@@ -121,12 +125,15 @@ def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
         },
     )
     # resolve the urls to short urls for saving tokens and time
-    resolved_urls = resolve_urls(
-        response.candidates[0].grounding_metadata.grounding_chunks, state["id"]
-    )
+    candidate = response.candidates[0] if response and response.candidates else None
+    grounding_chunks = None
+    if candidate and getattr(candidate, "grounding_metadata", None):
+        grounding_chunks = getattr(candidate.grounding_metadata, "grounding_chunks", None)
+    resolved_urls = resolve_urls(grounding_chunks, state["id"])
     # Gets the citations and adds them to the generated text
     citations = get_citations(response, resolved_urls)
-    modified_text = insert_citation_markers(response.text, citations)
+    base_text = response.text or ""
+    modified_text = insert_citation_markers(base_text, citations)
     sources_gathered = [item for citation in citations for item in citation["segments"]]
 
     return {
@@ -151,6 +158,7 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
         Dictionary with state update, including search_query key containing the generated follow-up query
     """
     configurable = Configuration.from_runnable_config(config)
+    language = state.get("language") or "English"
     # Increment the research loop count and get the reasoning model
     state["research_loop_count"] = state.get("research_loop_count", 0) + 1
     reasoning_model = state.get("reasoning_model", configurable.reflection_model)
@@ -161,6 +169,7 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
         current_date=current_date,
         research_topic=get_research_topic(state["messages"]),
         summaries="\n\n---\n\n".join(state["web_research_result"]),
+        language=language,
     )
     # init Reasoning Model
     llm = ChatGoogleGenerativeAI(
@@ -232,13 +241,21 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
     """
     configurable = Configuration.from_runnable_config(config)
     reasoning_model = state.get("reasoning_model") or configurable.answer_model
+    language = state.get("language") or "English"
 
     # Format the prompt
     current_date = get_current_date()
+    # Escape braces in user content to avoid str.format KeyErrors when summaries contain JSON-like text
+    safe_topic = get_research_topic(state["messages"]).replace("{", "{{").replace(
+        "}", "}}"
+    )
+    summaries_text = "\n---\n\n".join(state["web_research_result"])
+    safe_summaries = summaries_text.replace("{", "{{").replace("}", "}}")
     formatted_prompt = answer_instructions.format(
         current_date=current_date,
-        research_topic=get_research_topic(state["messages"]),
-        summaries="\n---\n\n".join(state["web_research_result"]),
+        research_topic=safe_topic,
+        summaries=safe_summaries,
+        language=language,
     )
 
     # init Reasoning Model, default to Gemini 2.5 Flash
diff --git a/backend/src/agent/prompts.py b/backend/src/agent/prompts.py
index afd7650..556491c 100644
--- a/backend/src/agent/prompts.py
+++ b/backend/src/agent/prompts.py
@@ -16,6 +16,7 @@ Instructions:
 - Queries should be diverse, if the topic is broad, generate more than 1 query.
 - Don't generate multiple similar queries, 1 is enough.
 - Query should ensure that the most current information is gathered. The current date is {current_date}.
+- Always responed in {language}.
 
 Format: 
 - Format your response as a JSON object with ALL two of these exact keys:
@@ -43,6 +44,7 @@ Instructions:
 - Consolidate key findings while meticulously tracking the source(s) for each specific piece of information.
 - The output should be concise research notes oriented toward comic creation, not a narrative report. Capture factual details that help draw scenes and characters.
 - Only include the information found in the search results, don't make up any information.
+- Always responed in {language}.
 
 Research Topic:
 {research_topic}
@@ -55,6 +57,7 @@ Instructions:
 - If provided summaries are sufficient to answer the user's question, don't generate a follow-up query.
 - If there is a knowledge gap, generate a follow-up query that would help expand your understanding.
 - Focus on technical details, implementation specifics, or emerging trends that weren't fully covered.
+- Always responed in {language}.
 
 Requirements:
 - Ensure the follow-up query is self-contained and includes necessary context for web search.
@@ -88,12 +91,11 @@ Strict Requirements:
   - "id": integer, the 1-based page identifier (e.g., 1, 2, 3, ...)
   - "detail": string, a thorough page description that fine-grains every panel: characters' actions, attire, environment, camera/framing, dialogue with tone, props, transitions.
 - Do NOT invent facts. Ground all details in the provided summaries.
-- Include sources inline inside the "detail" string using markdown links (e.g., [apnews](https://vertexaisearch.cloud.google.com/id/1-0)).
 
 Example JSON (structure only):
 [
-  { "id": 1, "detail": "..." },
-  { "id": 2, "detail": "..." }
+  {{ "id": 1, "detail": "..." }},
+  {{ "id": 2, "detail": "..." }}
 ]
 
 Instructions:
@@ -102,6 +104,7 @@ Instructions:
 - Use the user's request and all research summaries to build the storyboard.
 - If the topic includes people, capture personality, visual appearance (hair, clothing, accessories), and speech style. If it includes objects, explain what they are and notable visual traits. If it includes locations or events, capture time period, atmosphere, and visual cues.
 - Output must be a page-by-page JSON where each page is an object with "id" and a single "detail" string that thoroughly covers all panels and specifics.
+- Always responed in {language}.
 
 User Context:
 - {research_topic}
diff --git a/backend/src/agent/state.py b/backend/src/agent/state.py
index d5ad4dc..63466ad 100644
--- a/backend/src/agent/state.py
+++ b/backend/src/agent/state.py
@@ -19,6 +19,7 @@ class OverallState(TypedDict):
     max_research_loops: int
     research_loop_count: int
     reasoning_model: str
+    language: str
 
 
 class ReflectionState(TypedDict):
diff --git a/backend/src/agent/utils.py b/backend/src/agent/utils.py
index d02c8d9..44b133d 100644
--- a/backend/src/agent/utils.py
+++ b/backend/src/agent/utils.py
@@ -24,6 +24,8 @@ def resolve_urls(urls_to_resolve: List[Any], id: int) -> Dict[str, str]:
     Create a map of the vertex ai search urls (very long) to a short url with a unique id for each url.
     Ensures each original URL gets a consistent shortened form while maintaining uniqueness.
     """
+    if not urls_to_resolve:
+        return {}
     prefix = f"https://vertexaisearch.cloud.google.com/id/"
     urls = [site.web.uri for site in urls_to_resolve]
 
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index d06d402..3eee790 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -21,6 +21,7 @@ export default function App() {
     initial_search_query_count: number;
     max_research_loops: number;
     reasoning_model: string;
+    language: string;
   }>({
     apiUrl: import.meta.env.DEV
       ? "http://localhost:2024"
@@ -100,7 +101,12 @@ export default function App() {
   }, [thread.messages, thread.isLoading, processedEventsTimeline]);
 
   const handleSubmit = useCallback(
-    (submittedInputValue: string, effort: string, model: string) => {
+    (
+      submittedInputValue: string,
+      effort: string,
+      model: string,
+      language: string
+    ) => {
       if (!submittedInputValue.trim()) return;
       setProcessedEventsTimeline([]);
       hasFinalizeEventOccurredRef.current = false;
@@ -139,6 +145,7 @@ export default function App() {
         initial_search_query_count: initial_search_query_count,
         max_research_loops: max_research_loops,
         reasoning_model: model,
+        language,
       });
     },
     [thread]
diff --git a/frontend/src/components/ChatMessagesView.tsx b/frontend/src/components/ChatMessagesView.tsx
index 1a245d8..77c21d5 100644
--- a/frontend/src/components/ChatMessagesView.tsx
+++ b/frontend/src/components/ChatMessagesView.tsx
@@ -181,6 +181,30 @@ const AiMessageBubble: React.FC<AiMessageBubbleProps> = ({
   handleCopy,
   copiedMessageId,
 }) => {
+  const parsedPages = (() => {
+    if (typeof message.content !== "string") return null;
+    try {
+      const data = JSON.parse(message.content);
+      if (
+        Array.isArray(data) &&
+        data.every(
+          (p) =>
+            p &&
+            typeof p === "object" &&
+            "id" in p &&
+            "detail" in p &&
+            typeof p.id === "number" &&
+            typeof p.detail === "string"
+        )
+      ) {
+        return data as { id: number; detail: string }[];
+      }
+    } catch (_e) {
+      return null;
+    }
+    return null;
+  })();
+
   // Determine which activity events to show and if it's for a live loading message
   const activityForThisBubble =
     isLastMessage && isOverallLoading ? liveActivity : historicalActivity;
@@ -196,11 +220,29 @@ const AiMessageBubble: React.FC<AiMessageBubbleProps> = ({
           />
         </div>
       )}
-      <ReactMarkdown components={mdComponents}>
-        {typeof message.content === "string"
-          ? message.content
-          : JSON.stringify(message.content)}
-      </ReactMarkdown>
+      {parsedPages ? (
+        <div className="space-y-3">
+          {parsedPages.map((page) => (
+            <div
+              key={page.id}
+              className="rounded-xl border border-neutral-700 bg-neutral-800/80 p-3 shadow-sm"
+            >
+              <div className="text-xs uppercase tracking-wide text-neutral-400 mb-1">
+                Page {page.id}
+              </div>
+              <ReactMarkdown components={mdComponents}>
+                {page.detail}
+              </ReactMarkdown>
+            </div>
+          ))}
+        </div>
+      ) : (
+        <ReactMarkdown components={mdComponents}>
+          {typeof message.content === "string"
+            ? message.content
+            : JSON.stringify(message.content)}
+        </ReactMarkdown>
+      )}
       <Button
         variant="default"
         className={`cursor-pointer bg-neutral-700 border-neutral-600 text-neutral-300 self-end ${
@@ -226,7 +268,7 @@ interface ChatMessagesViewProps {
   messages: Message[];
   isLoading: boolean;
   scrollAreaRef: React.RefObject<HTMLDivElement | null>;
-  onSubmit: (inputValue: string, effort: string, model: string) => void;
+  onSubmit: (inputValue: string, effort: string, model: string, language: string) => void;
   onCancel: () => void;
   liveActivityEvents: ProcessedEvent[];
   historicalActivities: Record<string, ProcessedEvent[]>;
diff --git a/frontend/src/components/InputForm.tsx b/frontend/src/components/InputForm.tsx
index 97aa5c6..3983e8f 100644
--- a/frontend/src/components/InputForm.tsx
+++ b/frontend/src/components/InputForm.tsx
@@ -1,6 +1,14 @@
 import { useState } from "react";
 import { Button } from "@/components/ui/button";
-import { SquarePen, Brain, Send, StopCircle, Zap, Cpu } from "lucide-react";
+import {
+  SquarePen,
+  Brain,
+  Send,
+  StopCircle,
+  Zap,
+  Cpu,
+  Languages,
+} from "lucide-react";
 import { Textarea } from "@/components/ui/textarea";
 import {
   Select,
@@ -12,7 +20,12 @@ import {
 
 // Updated InputFormProps
 interface InputFormProps {
-  onSubmit: (inputValue: string, effort: string, model: string) => void;
+  onSubmit: (
+    inputValue: string,
+    effort: string,
+    model: string,
+    language: string
+  ) => void;
   onCancel: () => void;
   isLoading: boolean;
   hasHistory: boolean;
@@ -26,12 +39,14 @@ export const InputForm: React.FC<InputFormProps> = ({
 }) => {
   const [internalInputValue, setInternalInputValue] = useState("");
   const [effort, setEffort] = useState("medium");
-  const [model, setModel] = useState("gemini-2.5-flash-preview-04-17");
+  // Default to a current, broadly capable model
+  const [model, setModel] = useState("gemini-2.5-flash");
+  const [language, setLanguage] = useState("English");
 
   const handleInternalSubmit = (e?: React.FormEvent) => {
     if (e) e.preventDefault();
     if (!internalInputValue.trim()) return;
-    onSubmit(internalInputValue, effort, model);
+    onSubmit(internalInputValue, effort, model, language);
     setInternalInputValue("");
   };
 
@@ -134,30 +149,61 @@ export const InputForm: React.FC<InputFormProps> = ({
               <SelectTrigger className="w-[150px] bg-transparent border-none cursor-pointer">
                 <SelectValue placeholder="Model" />
               </SelectTrigger>
+            <SelectContent className="bg-neutral-700 border-neutral-600 text-neutral-300 cursor-pointer">
+              <SelectItem
+                value="gemini-2.5-flash-lite"
+                className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
+              >
+                <div className="flex items-center">
+                  <Zap className="h-4 w-4 mr-2 text-yellow-400" /> 2.5 Lite
+                </div>
+              </SelectItem>
+              <SelectItem
+                value="gemini-2.5-flash"
+                className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
+              >
+                <div className="flex items-center">
+                  <Zap className="h-4 w-4 mr-2 text-orange-400" /> 2.5 Flash
+                </div>
+              </SelectItem>
+              <SelectItem
+                value="gemini-3-pro-preview"
+                className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
+              >
+                <div className="flex items-center">
+                  <Cpu className="h-4 w-4 mr-2 text-purple-400" /> 3 Pro Preview
+                </div>
+                </SelectItem>
+              </SelectContent>
+            </Select>
+          </div>
+          <div className="flex flex-row gap-2 bg-neutral-700 border-neutral-600 text-neutral-300 focus:ring-neutral-500 rounded-xl rounded-t-sm pl-2  max-w-[100%] sm:max-w-[90%]">
+            <div className="flex flex-row items-center text-sm ml-2">
+              <Languages className="h-4 w-4 mr-2" />
+              Language
+            </div>
+            <Select value={language} onValueChange={setLanguage}>
+              <SelectTrigger className="w-[150px] bg-transparent border-none cursor-pointer">
+                <SelectValue placeholder="Language" />
+              </SelectTrigger>
               <SelectContent className="bg-neutral-700 border-neutral-600 text-neutral-300 cursor-pointer">
                 <SelectItem
-                  value="gemini-2.0-flash"
+                  value="English"
                   className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
                 >
-                  <div className="flex items-center">
-                    <Zap className="h-4 w-4 mr-2 text-yellow-400" /> 2.0 Flash
-                  </div>
+                  English
                 </SelectItem>
                 <SelectItem
-                  value="gemini-2.5-flash-preview-04-17"
+                  value="简体中文"
                   className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
                 >
-                  <div className="flex items-center">
-                    <Zap className="h-4 w-4 mr-2 text-orange-400" /> 2.5 Flash
-                  </div>
+                  简体中文
                 </SelectItem>
                 <SelectItem
-                  value="gemini-2.5-pro-preview-05-06"
+                  value="日本語"
                   className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
                 >
-                  <div className="flex items-center">
-                    <Cpu className="h-4 w-4 mr-2 text-purple-400" /> 2.5 Pro
-                  </div>
+                  日本語
                 </SelectItem>
               </SelectContent>
             </Select>
diff --git a/frontend/src/components/WelcomeScreen.tsx b/frontend/src/components/WelcomeScreen.tsx
index b1015aa..9294e12 100644
--- a/frontend/src/components/WelcomeScreen.tsx
+++ b/frontend/src/components/WelcomeScreen.tsx
@@ -4,7 +4,8 @@ interface WelcomeScreenProps {
   handleSubmit: (
     submittedInputValue: string,
     effort: string,
-    model: string
+    model: string,
+    language: string
   ) => void;
   onCancel: () => void;
   isLoading: boolean;