feat(language): add multilingual support across frontend and backend

This commit is contained in:
zihanjian
2025-12-01 20:51:55 +08:00
parent 788210f9d5
commit 69327f63c5
8 changed files with 152 additions and 33 deletions

View File

@@ -55,6 +55,7 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerati
Dictionary with state update, including search_query key containing the generated queries
"""
configurable = Configuration.from_runnable_config(config)
language = state.get("language") or "English"
# check for custom initial search query count
if state.get("initial_search_query_count") is None:
@@ -75,6 +76,7 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerati
current_date=current_date,
research_topic=get_research_topic(state["messages"]),
number_queries=state["initial_search_query_count"],
language=language,
)
# Generate the search queries
result = structured_llm.invoke(formatted_prompt)
@@ -106,9 +108,11 @@ def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
"""
# Configure
configurable = Configuration.from_runnable_config(config)
language = state.get("language") or "English"
formatted_prompt = web_searcher_instructions.format(
current_date=get_current_date(),
research_topic=state["search_query"],
language=language,
)
# Uses the google genai client as the langchain client doesn't return grounding metadata
@@ -121,12 +125,15 @@ def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
},
)
# resolve the urls to short urls for saving tokens and time
resolved_urls = resolve_urls(
response.candidates[0].grounding_metadata.grounding_chunks, state["id"]
)
candidate = response.candidates[0] if response and response.candidates else None
grounding_chunks = None
if candidate and getattr(candidate, "grounding_metadata", None):
grounding_chunks = getattr(candidate.grounding_metadata, "grounding_chunks", None)
resolved_urls = resolve_urls(grounding_chunks, state["id"])
# Gets the citations and adds them to the generated text
citations = get_citations(response, resolved_urls)
modified_text = insert_citation_markers(response.text, citations)
base_text = response.text or ""
modified_text = insert_citation_markers(base_text, citations)
sources_gathered = [item for citation in citations for item in citation["segments"]]
return {
@@ -151,6 +158,7 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
Dictionary with state update, including search_query key containing the generated follow-up query
"""
configurable = Configuration.from_runnable_config(config)
language = state.get("language") or "English"
# Increment the research loop count and get the reasoning model
state["research_loop_count"] = state.get("research_loop_count", 0) + 1
reasoning_model = state.get("reasoning_model", configurable.reflection_model)
@@ -161,6 +169,7 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
current_date=current_date,
research_topic=get_research_topic(state["messages"]),
summaries="\n\n---\n\n".join(state["web_research_result"]),
language=language,
)
# init Reasoning Model
llm = ChatGoogleGenerativeAI(
@@ -232,13 +241,21 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
"""
configurable = Configuration.from_runnable_config(config)
reasoning_model = state.get("reasoning_model") or configurable.answer_model
language = state.get("language") or "English"
# Format the prompt
current_date = get_current_date()
# Escape braces in user content to avoid str.format KeyErrors when summaries contain JSON-like text
safe_topic = get_research_topic(state["messages"]).replace("{", "{{").replace(
"}", "}}"
)
summaries_text = "\n---\n\n".join(state["web_research_result"])
safe_summaries = summaries_text.replace("{", "{{").replace("}", "}}")
formatted_prompt = answer_instructions.format(
current_date=current_date,
research_topic=get_research_topic(state["messages"]),
summaries="\n---\n\n".join(state["web_research_result"]),
research_topic=safe_topic,
summaries=safe_summaries,
language=language,
)
# init Reasoning Model, default to Gemini 2.5 Flash

View File

@@ -16,6 +16,7 @@ Instructions:
- Queries should be diverse, if the topic is broad, generate more than 1 query.
- Don't generate multiple similar queries, 1 is enough.
- Query should ensure that the most current information is gathered. The current date is {current_date}.
- Always responed in {language}.
Format:
- Format your response as a JSON object with ALL two of these exact keys:
@@ -43,6 +44,7 @@ Instructions:
- Consolidate key findings while meticulously tracking the source(s) for each specific piece of information.
- The output should be concise research notes oriented toward comic creation, not a narrative report. Capture factual details that help draw scenes and characters.
- Only include the information found in the search results, don't make up any information.
- Always responed in {language}.
Research Topic:
{research_topic}
@@ -55,6 +57,7 @@ Instructions:
- If provided summaries are sufficient to answer the user's question, don't generate a follow-up query.
- If there is a knowledge gap, generate a follow-up query that would help expand your understanding.
- Focus on technical details, implementation specifics, or emerging trends that weren't fully covered.
- Always responed in {language}.
Requirements:
- Ensure the follow-up query is self-contained and includes necessary context for web search.
@@ -88,12 +91,11 @@ Strict Requirements:
- "id": integer, the 1-based page identifier (e.g., 1, 2, 3, ...)
- "detail": string, a thorough page description that fine-grains every panel: characters' actions, attire, environment, camera/framing, dialogue with tone, props, transitions.
- Do NOT invent facts. Ground all details in the provided summaries.
- Include sources inline inside the "detail" string using markdown links (e.g., [apnews](https://vertexaisearch.cloud.google.com/id/1-0)).
Example JSON (structure only):
[
{ "id": 1, "detail": "..." },
{ "id": 2, "detail": "..." }
{{ "id": 1, "detail": "..." }},
{{ "id": 2, "detail": "..." }}
]
Instructions:
@@ -102,6 +104,7 @@ Instructions:
- Use the user's request and all research summaries to build the storyboard.
- If the topic includes people, capture personality, visual appearance (hair, clothing, accessories), and speech style. If it includes objects, explain what they are and notable visual traits. If it includes locations or events, capture time period, atmosphere, and visual cues.
- Output must be a page-by-page JSON where each page is an object with "id" and a single "detail" string that thoroughly covers all panels and specifics.
- Always responed in {language}.
User Context:
- {research_topic}

View File

@@ -19,6 +19,7 @@ class OverallState(TypedDict):
max_research_loops: int
research_loop_count: int
reasoning_model: str
language: str
class ReflectionState(TypedDict):

View File

@@ -24,6 +24,8 @@ def resolve_urls(urls_to_resolve: List[Any], id: int) -> Dict[str, str]:
Create a map of the vertex ai search urls (very long) to a short url with a unique id for each url.
Ensures each original URL gets a consistent shortened form while maintaining uniqueness.
"""
if not urls_to_resolve:
return {}
prefix = f"https://vertexaisearch.cloud.google.com/id/"
urls = [site.web.uri for site in urls_to_resolve]