diff --git a/README.md b/README.md index def695c..9646507 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ bash <(curl -sS https://cdn.link-ai.tech/code/cow/run.sh) 项目支持国内外主流厂商的模型接口,可选模型及配置说明参考:[模型说明](#模型说明)。 -> 注:Agent模式下推荐使用以下模型,可根据效果及成本综合选择:MiniMax-M2.5、glm-5、kimi-k2.5、qwen3-max、claude-sonnet-4-5、gemini-3-flash-preview +> 注:Agent模式下推荐使用以下模型,可根据效果及成本综合选择:MiniMax-M2.5、glm-5、kimi-k2.5、qwen3.5-plus、claude-sonnet-4-5、gemini-3-flash-preview 同时支持使用 **LinkAI平台** 接口,可灵活切换 OpenAI、Claude、Gemini、DeepSeek、Qwen、Kimi 等多种常用模型,并支持知识库、工作流、插件等Agent能力,参考 [接口文档](https://docs.link-ai.tech/platform/api)。 @@ -175,7 +175,7 @@ pip3 install -r requirements-optional.txt
2. 其他配置 -+ `model`: 模型名称,Agent模式下推荐使用 `MiniMax-M2.5`、`glm-5`、`kimi-k2.5`、`qwen3-max`、`claude-sonnet-4-5`、`gemini-3-flash-preview`,全部模型名称参考[common/const.py](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/common/const.py)文件 ++ `model`: 模型名称,Agent模式下推荐使用 `MiniMax-M2.5`、`glm-5`、`kimi-k2.5`、`qwen3.5-plus`、`claude-sonnet-4-5`、`gemini-3-flash-preview`,全部模型名称参考[common/const.py](https://github.com/zhayujie/chatgpt-on-wechat/blob/master/common/const.py)文件 + `character_desc`:普通对话模式下的机器人系统提示词。在Agent模式下该配置不生效,由工作空间中的文件内容构成。 + `subscribe_msg`:订阅消息,公众号和企业微信channel中请填写,当被订阅时会自动回复, 可使用特殊占位符。目前支持的占位符有{trigger_prefix},在程序中它会自动替换成bot的触发词。
@@ -369,18 +369,18 @@ volumes: ```json { - "model": "qwen3-max", + "model": "qwen3.5-plus", "dashscope_api_key": "sk-qVxxxxG" } ``` - - `model`: 可填写 `qwen3-max、qwen-max、qwen-plus、qwen-turbo、qwen-long、qwq-plus` 等 + - `model`: 可填写 `qwen3.5-plus、qwen3-max、qwen-max、qwen-plus、qwen-turbo、qwen-long、qwq-plus` 等 - `dashscope_api_key`: 通义千问的 API-KEY,参考 [官方文档](https://bailian.console.aliyun.com/?tab=api#/api) ,在 [控制台](https://bailian.console.aliyun.com/?tab=model#/api-key) 创建 方式二:OpenAI兼容方式接入,配置如下: ```json { "bot_type": "chatGPT", - "model": "qwen3-max", + "model": "qwen3.5-plus", "open_ai_api_base": "https://dashscope.aliyuncs.com/compatible-mode/v1", "open_ai_api_key": "sk-qVxxxxG" } diff --git a/common/const.py b/common/const.py index b44a081..ee0ebb4 100644 --- a/common/const.py +++ b/common/const.py @@ -80,6 +80,7 @@ QWEN_PLUS = "qwen-plus" QWEN_MAX = "qwen-max" QWEN_LONG = "qwen-long" QWEN3_MAX = "qwen3-max" # Qwen3 Max - Agent推荐模型 +QWEN35_PLUS = "qwen3.5-plus" # Qwen3.5 Plus - Omni model (MultiModalConversation) QWQ_PLUS = "qwq-plus" # MiniMax @@ -153,7 +154,7 @@ MODEL_LIST = [ DEEPSEEK_CHAT, DEEPSEEK_REASONER, # Qwen - QWEN, QWEN_TURBO, QWEN_PLUS, QWEN_MAX, QWEN_LONG, QWEN3_MAX, + QWEN, QWEN_TURBO, QWEN_PLUS, QWEN_MAX, QWEN_LONG, QWEN3_MAX, QWEN35_PLUS, # MiniMax MiniMax, MINIMAX_M2_5, MINIMAX_M2_1, MINIMAX_M2_1_LIGHTNING, MINIMAX_M2, MINIMAX_ABAB6_5, diff --git a/docs/agent.md b/docs/agent.md index 2e4e68f..71aba68 100644 --- a/docs/agent.md +++ b/docs/agent.md @@ -141,7 +141,7 @@ Agent模式推荐使用以下模型,可根据效果及成本综合选择: - **GLM**: `glm-5` - **Kimi**: `kimi-k2.5` - **Doubao**: `doubao-seed-2-0-code-preview-260215` -- **Qwen**: `qwen3-max` +- **Qwen**: `qwen3.5-plus` - **Claude**: `claude-sonnet-4-5` - **Gemini**: `gemini-3-flash-preview` diff --git a/models/dashscope/dashscope_bot.py b/models/dashscope/dashscope_bot.py index 2ce21d2..26cf7db 100644 --- a/models/dashscope/dashscope_bot.py +++ b/models/dashscope/dashscope_bot.py @@ -10,6 +10,7 @@ from config import conf, load_config from .dashscope_session import DashscopeSession import os import dashscope +from dashscope import MultiModalConversation from http import HTTPStatus @@ -23,6 +24,11 @@ dashscope_models = { "qwen-bailian-v1": dashscope.Generation.Models.bailian_v1, } +# Model name prefixes that require MultiModalConversation API instead of Generation API. +# Qwen3.5+ series are omni models that only support MultiModalConversation. +MULTIMODAL_MODEL_PREFIXES = ("qwen3.5-",) + + # Qwen对话模型API class DashscopeBot(Bot): def __init__(self): @@ -34,6 +40,11 @@ class DashscopeBot(Bot): os.environ["DASHSCOPE_API_KEY"] = self.api_key self.client = dashscope.Generation + @staticmethod + def _is_multimodal_model(model_name: str) -> bool: + """Check if the model requires MultiModalConversation API""" + return model_name.startswith(MULTIMODAL_MODEL_PREFIXES) + def reply(self, query, context=None): # acquire reply content if context.type == ContextType.TEXT: @@ -88,16 +99,33 @@ class DashscopeBot(Bot): """ try: dashscope.api_key = self.api_key - response = self.client.call( - dashscope_models.get(self.model_name, self.model_name), - messages=session.messages, - result_format="message" - ) + model = dashscope_models.get(self.model_name, self.model_name) + if self._is_multimodal_model(self.model_name): + mm_messages = self._prepare_messages_for_multimodal(session.messages) + response = MultiModalConversation.call( + model=model, + messages=mm_messages, + result_format="message" + ) + else: + response = self.client.call( + model, + messages=session.messages, + result_format="message" + ) if response.status_code == HTTPStatus.OK: - content = response.output.choices[0]["message"]["content"] + resp_dict = self._response_to_dict(response) + choice = resp_dict["output"]["choices"][0] + content = choice.get("message", {}).get("content", "") + # Multimodal models may return content as a list of blocks + if isinstance(content, list): + content = "".join( + item.get("text", "") for item in content if isinstance(item, dict) + ) + usage = resp_dict.get("usage", {}) return { - "total_tokens": response.usage["total_tokens"], - "completion_tokens": response.usage["output_tokens"], + "total_tokens": usage.get("total_tokens", 0), + "completion_tokens": usage.get("output_tokens", 0), "content": content, } else: @@ -227,36 +255,54 @@ class DashscopeBot(Bot): try: # Set API key before calling dashscope.api_key = self.api_key - - response = dashscope.Generation.call( - model=dashscope_models.get(model_name, model_name), - messages=messages, - **parameters - ) - + model = dashscope_models.get(model_name, model_name) + + if self._is_multimodal_model(model_name): + messages = self._prepare_messages_for_multimodal(messages) + response = MultiModalConversation.call( + model=model, + messages=messages, + **parameters + ) + else: + response = dashscope.Generation.call( + model=model, + messages=messages, + **parameters + ) + if response.status_code == HTTPStatus.OK: - # Convert DashScope response to OpenAI-compatible format - choice = response.output.choices[0] + # Convert response to dict to avoid DashScope object KeyError issues + resp_dict = self._response_to_dict(response) + choice = resp_dict["output"]["choices"][0] + message = choice.get("message", {}) + content = message.get("content", "") + # Multimodal models may return content as a list of blocks + if isinstance(content, list): + content = "".join( + item.get("text", "") for item in content if isinstance(item, dict) + ) + usage = resp_dict.get("usage", {}) return { - "id": response.request_id, + "id": resp_dict.get("request_id"), "object": "chat.completion", "created": 0, "model": model_name, "choices": [{ "index": 0, "message": { - "role": choice.message.role, - "content": choice.message.content, + "role": message.get("role", "assistant"), + "content": content, "tool_calls": self._convert_tool_calls_to_openai_format( - choice.message.get("tool_calls") + message.get("tool_calls") ) }, - "finish_reason": choice.finish_reason + "finish_reason": choice.get("finish_reason") }], "usage": { - "prompt_tokens": response.usage.input_tokens, - "completion_tokens": response.usage.output_tokens, - "total_tokens": response.usage.total_tokens + "prompt_tokens": usage.get("input_tokens", 0), + "completion_tokens": usage.get("output_tokens", 0), + "total_tokens": usage.get("total_tokens", 0) } } else: @@ -266,7 +312,7 @@ class DashscopeBot(Bot): "message": response.message, "status_code": response.status_code } - + except Exception as e: logger.error(f"[DASHSCOPE] sync response error: {e}") return { @@ -280,48 +326,52 @@ class DashscopeBot(Bot): try: # Set API key before calling dashscope.api_key = self.api_key - - responses = dashscope.Generation.call( - model=dashscope_models.get(model_name, model_name), - messages=messages, - stream=True, - **parameters - ) + model = dashscope_models.get(model_name, model_name) + + if self._is_multimodal_model(model_name): + messages = self._prepare_messages_for_multimodal(messages) + responses = MultiModalConversation.call( + model=model, + messages=messages, + stream=True, + **parameters + ) + else: + responses = dashscope.Generation.call( + model=model, + messages=messages, + stream=True, + **parameters + ) # Stream chunks to caller, converting to OpenAI format for response in responses: - if response.status_code != HTTPStatus.OK: - logger.error(f"[DASHSCOPE] Stream error: {response.code} - {response.message}") + # Convert to dict first to avoid DashScope proxy object KeyError + resp_dict = self._response_to_dict(response) + status_code = resp_dict.get("status_code", 200) + + if status_code != HTTPStatus.OK: + err_code = resp_dict.get("code", "") + err_msg = resp_dict.get("message", "Unknown error") + logger.error(f"[DASHSCOPE] Stream error: {err_code} - {err_msg}") yield { "error": True, - "message": response.message, - "status_code": response.status_code + "message": err_msg, + "status_code": status_code } continue - - # Get choice - use try-except because DashScope raises KeyError on hasattr() - try: - if isinstance(response.output, dict): - choice = response.output['choices'][0] - else: - choice = response.output.choices[0] - except (KeyError, AttributeError, IndexError) as e: - logger.warning(f"[DASHSCOPE] Cannot get choice: {e}") + + choices = resp_dict.get("output", {}).get("choices", []) + if not choices: continue - - # Get finish_reason safely - finish_reason = None - try: - if isinstance(choice, dict): - finish_reason = choice.get('finish_reason') - else: - finish_reason = choice.finish_reason - except (KeyError, AttributeError): - pass - + + choice = choices[0] + finish_reason = choice.get("finish_reason") + message = choice.get("message", {}) + # Convert to OpenAI-compatible format openai_chunk = { - "id": response.request_id, + "id": resp_dict.get("request_id"), "object": "chat.completion.chunk", "created": 0, "model": model_name, @@ -331,66 +381,90 @@ class DashscopeBot(Bot): "finish_reason": finish_reason }] } - - # Get message safely - use try-except - message = {} - try: - if isinstance(choice, dict): - message = choice.get('message', {}) - else: - message = choice.message - except (KeyError, AttributeError): - pass - - # Add role if present - role = None - try: - if isinstance(message, dict): - role = message.get('role') - else: - role = message.role - except (KeyError, AttributeError): - pass + + # Add role + role = message.get("role") if role: openai_chunk["choices"][0]["delta"]["role"] = role - - # Add content if present - content = None - try: - if isinstance(message, dict): - content = message.get('content') - else: - content = message.content - except (KeyError, AttributeError): - pass + + # Add reasoning_content (thinking process from models like qwen3.5) + reasoning_content = message.get("reasoning_content") + if reasoning_content: + openai_chunk["choices"][0]["delta"]["reasoning_content"] = reasoning_content + + # Add content (multimodal models may return list of blocks) + content = message.get("content") + if isinstance(content, list): + content = "".join( + item.get("text", "") for item in content if isinstance(item, dict) + ) if content: openai_chunk["choices"][0]["delta"]["content"] = content - - # Add tool_calls if present - # DashScope's response object raises KeyError on hasattr() if attr doesn't exist - # So we use try-except instead - tool_calls = None - try: - if isinstance(message, dict): - tool_calls = message.get('tool_calls') - else: - tool_calls = message.tool_calls - except (KeyError, AttributeError): - pass - + + # Add tool_calls + tool_calls = message.get("tool_calls") if tool_calls: openai_chunk["choices"][0]["delta"]["tool_calls"] = self._convert_tool_calls_to_openai_format(tool_calls) - + yield openai_chunk - + except Exception as e: - logger.error(f"[DASHSCOPE] stream response error: {e}") + logger.error(f"[DASHSCOPE] stream response error: {e}", exc_info=True) yield { "error": True, "message": str(e), "status_code": 500 } + @staticmethod + def _response_to_dict(response) -> dict: + """ + Convert DashScope response object to a plain dict. + + DashScope SDK wraps responses in proxy objects whose __getattr__ + delegates to __getitem__, raising KeyError (not AttributeError) + when an attribute is missing. Standard hasattr / getattr only + catch AttributeError, so we must use try-except everywhere. + """ + _SENTINEL = object() + + def _safe_getattr(obj, name, default=_SENTINEL): + """getattr that also catches KeyError from DashScope proxy objects.""" + try: + return getattr(obj, name) + except (AttributeError, KeyError, TypeError): + return default + + def _has_attr(obj, name): + return _safe_getattr(obj, name) is not _SENTINEL + + def _to_dict(obj): + if isinstance(obj, (str, int, float, bool, type(None))): + return obj + if isinstance(obj, dict): + return {k: _to_dict(v) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + return [_to_dict(i) for i in obj] + # DashScope response objects behave like dicts (have .keys()) + if _has_attr(obj, "keys"): + try: + return {k: _to_dict(obj[k]) for k in obj.keys()} + except Exception: + pass + return obj + + result = {} + # Extract known top-level fields safely + for attr in ("request_id", "status_code", "code", "message", "output", "usage"): + val = _safe_getattr(response, attr) + if val is _SENTINEL: + try: + val = response[attr] + except (KeyError, TypeError, IndexError): + continue + result[attr] = _to_dict(val) + return result + def _convert_tools_to_dashscope_format(self, tools): """ Convert tools from Claude format to DashScope format @@ -419,6 +493,37 @@ class DashscopeBot(Bot): return dashscope_tools + @staticmethod + def _prepare_messages_for_multimodal(messages: list) -> list: + """ + Ensure messages are compatible with MultiModalConversation API. + + MultiModalConversation._preprocess_messages iterates every message + with ``content = message["content"]; for elem in content: ...``, + which means: + 1. Every message MUST have a 'content' key. + 2. 'content' MUST be an iterable (list), not a plain string. + The expected format is [{"text": "..."}, ...]. + + Meanwhile the DashScope API requires role='tool' messages to follow + assistant tool_calls, so we must NOT convert them to role='user'. + We just ensure they have a list-typed 'content'. + """ + result = [] + for msg in messages: + msg = dict(msg) # shallow copy + + # Normalize content to list format [{"text": "..."}] + content = msg.get("content") + if content is None or (isinstance(content, str) and content == ""): + msg["content"] = [{"text": ""}] + elif isinstance(content, str): + msg["content"] = [{"text": content}] + # If content is already a list, keep as-is (already in multimodal format) + + result.append(msg) + return result + def _convert_messages_to_dashscope_format(self, messages): """ Convert messages from Claude format to DashScope format diff --git a/run.sh b/run.sh index 269fff7..70f237c 100644 --- a/run.sh +++ b/run.sh @@ -274,7 +274,7 @@ select_model() { echo -e "${YELLOW}2) Zhipu AI (glm-5, glm-4.7, etc.)${NC}" echo -e "${YELLOW}3) Kimi (kimi-k2.5, kimi-k2, etc.)${NC}" echo -e "${YELLOW}4) Doubao (doubao-seed-2-0-code-preview-260215, etc.)${NC}" - echo -e "${YELLOW}5) Qwen (qwen3-max, qwen-plus, qwq-plus, etc.)${NC}" + echo -e "${YELLOW}5) Qwen (qwen3.5-plus, qwen3-max, qwq-plus, etc.)${NC}" echo -e "${YELLOW}6) Claude (claude-sonnet-4-5, claude-opus-4-0, etc.)${NC}" echo -e "${YELLOW}7) Gemini (gemini-3-flash-preview, gemini-2.5-pro, etc.)${NC}" echo -e "${YELLOW}8) OpenAI GPT (gpt-5.2, gpt-4.1, etc.)${NC}" @@ -342,8 +342,8 @@ configure_model() { # Qwen (DashScope) echo -e "${GREEN}Configuring Qwen (DashScope)...${NC}" read -p "Enter DashScope API Key: " dashscope_key - read -p "Enter model name [press Enter for default: qwen3-max]: " model_name - model_name=${model_name:-qwen3-max} + read -p "Enter model name [press Enter for default: qwen3.5-plus]: " model_name + model_name=${model_name:-qwen3.5-plus} MODEL_NAME="$model_name" DASHSCOPE_KEY="$dashscope_key"