初始提交

This commit is contained in:
Zylan
2025-04-23 13:30:10 +08:00
commit db26c07bb3
49 changed files with 40973 additions and 0 deletions

30903
function/chengyu.csv Normal file

File diff suppressed because it is too large Load Diff

89
function/func_chengyu.py Normal file
View File

@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
import os
import random
import logging
import pandas as pd
# 获取模块级 logger
logger = logging.getLogger(__name__)
class Chengyu(object):
def __init__(self) -> None:
root = os.path.dirname(os.path.abspath(__file__))
self.df = pd.read_csv(f"{root}/chengyu.csv", delimiter="\t")
self.cys, self.zis, self.yins = self._build_data()
def _build_data(self):
df = self.df.copy()
df["shouzi"] = df["chengyu"].apply(lambda x: x[0])
df["mozi"] = df["chengyu"].apply(lambda x: x[-1])
df["shouyin"] = df["pingyin"].apply(lambda x: x.split(" ")[0])
df["moyin"] = df["pingyin"].apply(lambda x: x.split(" ")[-1])
cys = dict(zip(df["chengyu"], df["moyin"]))
zis = df.groupby("shouzi").agg({"chengyu": set})["chengyu"].to_dict()
yins = df.groupby("shouyin").agg({"chengyu": set})["chengyu"].to_dict()
return cys, zis, yins
def isChengyu(self, cy: str) -> bool:
return self.cys.get(cy, None) is not None
def getNext(self, cy: str, tongyin: bool = True) -> str:
"""获取下一个成语
cy: 当前成语
tongyin: 是否允许同音字
"""
zi = cy[-1]
ansers = list(self.zis.get(zi, {}))
try:
ansers.remove(cy) # 移除当前成语
except Exception as e:
pass # Just ignore...
if ansers:
return random.choice(ansers)
# 如果找不到同字,允许同音
if tongyin:
yin = self.cys.get(cy)
ansers = list(self.yins.get(yin, {}))
try:
ansers.remove(cy) # 移除当前成语
except Exception as e:
pass # Just ignore...
if ansers:
return random.choice(ansers)
return None
def getMeaning(self, cy: str) -> str:
ress = self.df[self.df["chengyu"] == cy].to_dict(orient="records")
if ress:
res = ress[0]
rsp = res["chengyu"] + "\n" + res["pingyin"] + "\n" + res["jieshi"]
if res["chuchu"] and res["chuchu"] != "":
rsp += "\n出处:" + res["chuchu"]
if res["lizi"] and res["lizi"] != "":
rsp += "\n例子:" + res["lizi"]
return rsp
return None
cy = Chengyu()
if __name__ == "__main__":
# 设置测试用的日志配置
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s'
)
answer = cy.getNext("便宜行事")
logger.info(answer)

1546
function/func_duel.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,102 @@
import random
from typing import TYPE_CHECKING, Callable, Any
from wcferry import WxMsg
from function.func_duel import DuelRankSystem
if TYPE_CHECKING:
from logging import Logger
from wcferry import Wcf
from typing import Dict
class GoblinGiftManager:
"""管理古灵阁妖精的馈赠事件"""
def __init__(self, config: Any, wcf: 'Wcf', log: 'Logger', send_text_msg: Callable):
"""初始化馈赠管理器
Args:
config: 配置对象包含GOBLIN_GIFT配置项
wcf: WCF实例用于获取群聊昵称等信息
log: 日志记录器
send_text_msg: 发送文本消息的函数
"""
self.config = config
self.wcf = wcf
self.LOG = log
self.sendTextMsg = send_text_msg
def try_trigger(self, msg: WxMsg) -> None:
"""尝试触发古灵阁妖精的馈赠事件
Args:
msg: 微信消息对象
"""
# 检查配置是否存在
if not hasattr(self.config, 'GOBLIN_GIFT'):
return
# 检查全局开关
if not self.config.GOBLIN_GIFT.get('enable', False):
return
# 检查群聊白名单
allowed_groups = self.config.GOBLIN_GIFT.get('allowed_groups', [])
if not allowed_groups or msg.roomid not in allowed_groups:
return
# 只在群聊中才触发
if not msg.from_group():
return
# 获取触发概率默认1%
probability = self.config.GOBLIN_GIFT.get('probability', 0.01)
# 按概率触发
if random.random() < probability:
try:
# 获取玩家昵称
player_name = self.wcf.get_alias_in_chatroom(msg.sender, msg.roomid)
if not player_name:
player_name = msg.sender # 如果获取不到昵称用wxid代替
# 初始化对应群聊的积分系统
rank_system = DuelRankSystem(group_id=msg.roomid)
# 获取配置的积分范围默认10-100
min_points = self.config.GOBLIN_GIFT.get('min_points', 10)
max_points = self.config.GOBLIN_GIFT.get('max_points', 100)
# 随机增加积分
points_added = random.randint(min_points, max_points)
# 更新玩家数据
player_data = rank_system.get_player_data(player_name)
player_data['score'] += points_added
# 保存数据
rank_system._save_ranks()
# 准备随机馈赠消息
gift_sources = [
f"✨ 一只迷路的家养小精灵往 {player_name} 口袋里塞了什么东西!",
f"💰 古灵阁的妖精似乎格外青睐 {player_name},留下了一袋金加隆(折合积分)!",
f"🦉 一只送信的猫头鹰丢错了包裹,{player_name} 意外发现了一笔“意外之财”!",
f"🍀 {player_name} 踩到了一株幸运四叶草,好运带来了额外的积分!",
f"🍄 在禁林的边缘,{player_name} 发现了一簇闪闪发光的魔法蘑菇!",
f"{player_name} 捡到了一个有求必应屋掉出来的神秘物品!",
f"🔮 временами удача улыбается {player_name}!", # 偶尔来点不一样的语言增加神秘感
f"🎉 费尔奇打瞌睡时掉了一小袋没收来的积分,刚好被 {player_name} 捡到!",
f"📜 一张古老的藏宝图碎片指引 {player_name} 找到了一些失落的积分!",
f"🧙‍♂️ 邓布利多教授对 {player_name} 的行为表示赞赏,特批“为学院加分”!",
f"🧪 {player_name} 的魔药课作业获得了斯拉格霍恩教授的额外加分!",
f"🌟 一颗流星划过霍格沃茨上空,{player_name} 许下的愿望成真了!"
]
gift_message = random.choice(gift_sources)
final_message = f"{gift_message}\n获得积分: +{points_added} 分!"
# 发送馈赠通知 (@发送者)
self.sendTextMsg(final_message, msg.roomid, msg.sender)
self.LOG.info(f"古灵阁馈赠触发: 群 {msg.roomid}, 用户 {player_name}, 获得 {points_added} 积分")
except Exception as e:
self.LOG.error(f"触发古灵阁馈赠时出错: {e}")

164
function/func_insult.py Normal file
View File

@@ -0,0 +1,164 @@
import random
import re
from wcferry import Wcf
from typing import Callable, Optional
class InsultGenerator:
"""
生成贴吧风格的骂人话术
"""
# 贴吧风格骂人话术模板
INSULT_TEMPLATES = [
"{target},你这想法属实有点抽象,建议回炉重造。",
"不是吧,{target},这都能说出来?大脑是用来思考的,不是用来长个儿的。",
"乐,{target} 你成功逗笑了我,就像看猴戏一样。",
"我说 {target} 啊,网上吵架没赢过,现实打架没输过是吧?",
"{target},听君一席话,浪费十分钟。",
"给你个梯子,{target},下个台阶吧,别搁这丢人现眼了。",
"就这?{target},就这?我还以为多大事呢。",
"{target},你是不是网线直连马桶的?味儿有点冲。",
"讲道理,{target},你这发言水平,在贴吧都活不过三楼。",
"{target},建议你去买两斤猪脑子煲汤喝,补补智商。",
"说真的,{target},你这智商要是放在好声音能把那四把椅子都转回来。",
"{target},放着好端端的智商不用,非得秀下限是吧?",
"我看你是典型的脑子搭错弦,{target},说话一套一套的。",
"{target},别整天搁这儿水经验了,你这水平也就适合到幼儿园门口卖糖水。",
"你这句话水平跟你智商一样,{target},都在地平线以下。",
"就你这个水平,{target},看王者荣耀的视频都能让你买错装备。",
"{target},整天叫唤啥呢?我没看《西游记》的时候真不知道猴子能说人话。",
"我听懂了,{target},你说的都对,可是能不能先把脑子装回去再说话?",
"{target}鼓个掌,成功把我逗乐了,这么多年的乐子人,今天是栽你手里了。",
"{target},我看你是孔子放屁——闻(文)所未闻(闻)啊。",
"收敛点吧,{target},你这智商余额明显不足了。",
"{target},你要是没话说可以咬个打火机,大家爱看那个。",
"{target},知道你急,但你先别急,喝口水慢慢说。",
"{target},你这发言跟你长相一样,突出一个随心所欲。",
"不是,{target},你这脑回路是盘山公路吗?九曲十八弯啊?",
"{target},太平洋没加盖,觉得委屈可以跳下去。",
"搁这儿装啥大尾巴狼呢 {target}?尾巴都快摇断了吧?",
"{target},我看你不是脑子进水,是脑子被驴踢了吧?",
"给你脸了是吧 {target}?真以为自己是个人物了?",
"{target},少在这里狺狺狂吠,影响市容。",
"你这智商,{target},二维码扫出来都得是付款码。",
"乐死我了,{target},哪来的自信在这里指点江山?",
"{target},回去多读两年书吧,省得出来丢人现眼。",
"赶紧爬吧 {target},别在这污染空气了。",
"我看你是没挨过打,{target},这么嚣张。",
"给你个键盘,{target},你能敲出一部《圣经》来是吧?",
"脑子是个好东西,{target},希望你也有一个。",
"{target},少在这里秀你的智商下限。",
"就这?{target}?我还以为多牛逼呢,原来是个憨批。",
"{target},你这理解能力,怕不是胎教没做好。",
"{target},我看你像个小丑,上蹿下跳的。",
"你这逻辑,{target},体育老师教的吧?",
"你这发言,{target},堪称当代迷惑行为大赏。",
"{target},你这狗叫声能不能小点?",
"你是猴子请来的救兵吗?{target}",
"{target},你这脑容量,怕是连条草履虫都不如。",
"给你个杆子你就往上爬是吧?{target}",
"{target},你这嘴跟开了光似的,叭叭个没完。",
"省省吧 {target},你的智商税已经交得够多了。",
"{target},你这发言如同老太太的裹脚布,又臭又长。",
"{target},我看你是真的皮痒了。",
"少在这里妖言惑众,{target},滚回你的老鼠洞去。",
"{target},你就像个苍蝇一样,嗡嗡嗡烦死人。"
]
@staticmethod
def generate_insult(target_name: str) -> str:
"""
随机生成一句针对目标用户的骂人话术(贴吧风格)
Args:
target_name (str): 被骂的人的名字
Returns:
str: 生成的骂人语句
"""
if not target_name or target_name.strip() == "":
target_name = "那个谁" # 兜底,防止名字为空
template = random.choice(InsultGenerator.INSULT_TEMPLATES)
return template.format(target=target_name)
def generate_random_insult(target_name: str) -> str:
"""
随机生成一句针对目标用户的骂人话术(贴吧风格)
函数封装,方便直接调用
Args:
target_name (str): 被骂的人的名字
Returns:
str: 生成的骂人语句
"""
return InsultGenerator.generate_insult(target_name)
def handle_insult_request(
wcf: Wcf,
logger,
bot_wxid: str,
send_text_func: Callable[[str, str, Optional[str]], None],
trigger_goblin_gift_func: Callable[[object], None],
msg,
target_mention_name: str
) -> bool:
"""
处理群聊中的"骂一下"请求。
Args:
wcf: Wcf 实例。
logger: 日志记录器。
bot_wxid: 机器人自身的 wxid。
send_text_func: 发送文本消息的函数 (content, receiver, at_list=None)。
trigger_goblin_gift_func: 触发哥布林馈赠的函数。
msg: 原始消息对象 (需要 .roomid 属性)。
target_mention_name: 从消息中提取的被@用户的名称。
Returns:
bool: 如果处理了该请求(无论成功失败),返回 True否则返回 False。
"""
logger.info(f"群聊 {msg.roomid} 中处理骂人指令,提及目标:{target_mention_name}")
actual_target_name = target_mention_name
target_wxid = None
try:
room_members = wcf.get_chatroom_members(msg.roomid)
found = False
for wxid, name in room_members.items():
if target_mention_name == name:
target_wxid = wxid
actual_target_name = name
found = True
break
if not found:
for wxid, name in room_members.items():
if target_mention_name in name and wxid != bot_wxid:
target_wxid = wxid
actual_target_name = name
logger.info(f"部分匹配到用户: {name} ({wxid})")
break
except Exception as e:
logger.error(f"查找群成员信息时出错: {e}")
if target_wxid and target_wxid == bot_wxid:
send_text_func("😅 不行,我不能骂我自己。", msg.roomid)
return True
try:
insult_text = generate_random_insult(actual_target_name)
send_text_func(insult_text, msg.roomid)
logger.info(f"已发送骂人消息至群 {msg.roomid},目标: {actual_target_name}")
if trigger_goblin_gift_func:
trigger_goblin_gift_func(msg)
except Exception as e:
logger.error(f"生成或发送骂人消息时出错: {e}")
send_text_func("呃,我想骂但出错了...", msg.roomid)
return True

76
function/func_news.py Normal file
View File

@@ -0,0 +1,76 @@
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
import logging
import time
from datetime import datetime
import requests
from lxml import etree
class News(object):
def __init__(self) -> None:
self.LOG = logging.getLogger(__name__)
self.week = {0: "周一", 1: "周二", 2: "周三", 3: "周四", 4: "周五", 5: "周六", 6: "周日"}
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}
def get_important_news(self):
"""
获取重要新闻。
返回一个元组 (is_today, news_content)。
is_today: 布尔值True表示是当天新闻False表示是旧闻或获取失败。
news_content: 格式化后的新闻字符串,或在失败时为空字符串。
"""
url = "https://www.cls.cn/api/sw?app=CailianpressWeb&os=web&sv=7.7.5"
data = {"type": "telegram", "keyword": "你需要知道的隔夜全球要闻", "page": 0,
"rn": 1, "os": "web", "sv": "7.7.5", "app": "CailianpressWeb"}
try:
rsp = requests.post(url=url, headers=self.headers, data=data)
data = json.loads(rsp.text)["data"]["telegram"]["data"][0]
news = data["descr"]
timestamp = data["time"]
ts = time.localtime(timestamp)
weekday_news = datetime(*ts[:6]).weekday()
# 格式化新闻内容
fmt_time = time.strftime("%Y年%m月%d", ts)
news = re.sub(r"(\d{1,2}、)", r"\n\1", news)
fmt_news = "".join(etree.HTML(news).xpath(" // text()"))
fmt_news = re.sub(r"周[一|二|三|四|五|六|日]你需要知道的", r"", fmt_news)
formatted_news = f"{fmt_time} {self.week[weekday_news]}\n{fmt_news}"
# 检查是否是当天新闻
weekday_now = datetime.now().weekday()
date_news_str = time.strftime("%Y%m%d", ts)
date_now_str = time.strftime("%Y%m%d", time.localtime())
# 使用日期字符串比较,而不是仅比较星期
is_today = (date_news_str == date_now_str)
if is_today:
return (True, formatted_news) # 当天新闻
else:
self.LOG.info(f"获取到的是旧闻 (发布于 {fmt_time})")
return (False, formatted_news) # 旧闻
except Exception as e:
self.LOG.error(e)
return (False, "") # 获取失败
if __name__ == "__main__":
# 设置测试用的日志配置
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s'
)
logger = logging.getLogger(__name__)
news = News()
is_today, content = news.get_important_news()
logger.info(f"Is Today: {is_today}")
logger.info(content)

398
function/func_reminder.py Normal file
View File

@@ -0,0 +1,398 @@
# -*- coding: utf-8 -*-
import sqlite3
import uuid
import time
import schedule
from datetime import datetime, timedelta
import logging
import threading
from typing import Optional, Dict, Tuple # 添加类型提示导入
# 获取 Logger 实例
logger = logging.getLogger("ReminderManager")
class ReminderManager:
# 使用线程锁确保数据库操作的线程安全
_db_lock = threading.Lock()
def __init__(self, robot, db_path: str, check_interval_minutes=1):
"""
初始化 ReminderManager。
:param robot: Robot 实例,用于发送消息。
:param db_path: SQLite 数据库文件路径。
:param check_interval_minutes: 检查提醒任务的频率(分钟)。
"""
self.robot = robot
self.db_path = db_path
self._create_table() # 初始化时确保表存在
# 注册周期性检查任务
schedule.every(check_interval_minutes).minutes.do(self.check_and_trigger_reminders)
logger.info(f"提醒管理器已初始化,连接到数据库 '{db_path}',每 {check_interval_minutes} 分钟检查一次。")
def _get_db_conn(self) -> sqlite3.Connection:
"""获取数据库连接"""
try:
# connect_timeout 增加等待时间check_same_thread=False 允许其他线程使用 (配合锁)
conn = sqlite3.connect(self.db_path, timeout=10, check_same_thread=False)
conn.row_factory = sqlite3.Row # 让查询结果可以像字典一样访问列
return conn
except sqlite3.Error as e:
logger.error(f"无法连接到 SQLite 数据库 '{self.db_path}': {e}", exc_info=True)
raise # 连接失败是严重问题,直接抛出异常
def _create_table(self):
"""创建 reminders 表(如果不存在)"""
sql = """
CREATE TABLE IF NOT EXISTS reminders (
id TEXT PRIMARY KEY,
wxid TEXT NOT NULL,
type TEXT NOT NULL CHECK(type IN ('once', 'daily', 'weekly')),
time_str TEXT NOT NULL,
content TEXT NOT NULL,
created_at TEXT NOT NULL,
last_triggered_at TEXT,
weekday INTEGER,
roomid TEXT
);
"""
# 创建索引的 SQL
index_sql_wxid = "CREATE INDEX IF NOT EXISTS idx_reminders_wxid ON reminders (wxid);"
index_sql_type = "CREATE INDEX IF NOT EXISTS idx_reminders_type ON reminders (type);"
index_sql_roomid = "CREATE INDEX IF NOT EXISTS idx_reminders_roomid ON reminders (roomid);"
try:
with self._db_lock: # 加锁保护数据库连接和操作
with self._get_db_conn() as conn:
cursor = conn.cursor()
# 1. 先确保表存在
cursor.execute(sql)
# 2. 尝试添加新列(如果表已存在且没有该列)
try:
# 检查列是否存在
cursor.execute("PRAGMA table_info(reminders);")
columns = [col['name'] for col in cursor.fetchall()]
# 添加 weekday 列(如果不存在)
if 'weekday' not in columns:
cursor.execute("ALTER TABLE reminders ADD COLUMN weekday INTEGER;")
logger.info("成功添加 'weekday' 列到 'reminders' 表。")
# 添加 roomid 列(如果不存在)
if 'roomid' not in columns:
cursor.execute("ALTER TABLE reminders ADD COLUMN roomid TEXT;")
logger.info("成功添加 'roomid' 列到 'reminders' 表。")
except sqlite3.OperationalError as e:
# 如果列已存在,会报错误,可以忽略
logger.warning(f"尝试添加列时发生错误: {e}")
# 3. 创建索引
cursor.execute(index_sql_wxid)
cursor.execute(index_sql_type)
cursor.execute(index_sql_roomid)
conn.commit()
logger.info("数据库表 'reminders' 检查/创建 完成。")
except sqlite3.Error as e:
logger.error(f"创建/检查数据库表 'reminders' 失败: {e}", exc_info=True)
# --- 对外接口 ---
def add_reminder(self, wxid: str, data: dict, roomid: Optional[str] = None) -> Tuple[bool, str]:
"""
将解析后的提醒数据添加到数据库。
:param wxid: 用户的微信 ID。
:param data: 包含 type, time, content 的字典。
:param roomid: 群聊ID如果在群聊中设置提醒则不为空
:return: (是否成功, 提醒 ID 或 错误信息)
"""
reminder_id = str(uuid.uuid4())
created_at_iso = datetime.now().isoformat()
# 校验数据 (基本)
required_keys = {"type", "time", "content"}
if not required_keys.issubset(data.keys()):
return False, "AI 返回的 JSON 缺少必要字段 (type, time, content)"
if data["type"] not in ["once", "daily", "weekly"]:
return False, f"不支持的提醒类型: {data['type']}"
# 进一步校验时间格式 (根据类型)
weekday_val = None # 初始化 weekday
try:
if data["type"] == "once":
# 尝试解析,确保格式正确,并且是未来的时间
trigger_dt = datetime.strptime(data["time"], "%Y-%m-%d %H:%M")
if trigger_dt <= datetime.now():
return False, f"一次性提醒时间 ({data['time']}) 必须是未来的时间"
elif data["type"] == "daily":
datetime.strptime(data["time"], "%H:%M") # 只校验格式
elif data["type"] == "weekly":
datetime.strptime(data["time"], "%H:%M") # 校验时间格式
if "weekday" not in data or not isinstance(data["weekday"], int) or not (0 <= data["weekday"] <= 6):
return False, "每周提醒必须提供有效的 weekday 字段 (0-6)"
weekday_val = data["weekday"] # 获取 weekday 值
except ValueError as e:
return False, f"时间格式错误 ({data['time']}),需要 'YYYY-MM-DD HH:MM' (once) 或 'HH:MM' (daily/weekly): {e}"
# 准备插入数据库
sql = """
INSERT INTO reminders (id, wxid, type, time_str, content, created_at, last_triggered_at, weekday, roomid)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
"""
params = (
reminder_id,
wxid,
data["type"],
data["time"],
data["content"],
created_at_iso,
None, # last_triggered_at 初始为 NULL
weekday_val, # weekday 字段
roomid # 新增roomid 参数
)
try:
with self._db_lock: # 加锁
with self._get_db_conn() as conn:
cursor = conn.cursor()
cursor.execute(sql, params)
conn.commit()
# 记录日志时包含群聊信息
log_target = f"用户 {wxid}" + (f" 在群聊 {roomid}" if roomid else "")
logger.info(f"成功添加提醒 {reminder_id} for {log_target} 到数据库。")
return True, reminder_id
except sqlite3.IntegrityError as e: # 例如,如果 UUID 冲突 (极不可能)
logger.error(f"添加提醒失败 (数据冲突): {e}", exc_info=True)
return False, f"添加提醒失败 (数据冲突): {e}"
except sqlite3.Error as e:
logger.error(f"添加提醒到数据库失败: {e}", exc_info=True)
return False, f"数据库错误: {e}"
# --- 核心检查逻辑 ---
def check_and_trigger_reminders(self):
"""由 schedule 定期调用。检查数据库,触发到期的提醒。"""
now = datetime.now()
now_iso = now.isoformat()
current_weekday = now.weekday() # 获取今天是周几 (0-6)
current_hm = now.strftime("%H:%M") # 当前时分
reminders_to_delete = [] # 存储需要删除的 once 提醒 ID
reminders_to_update = [] # 存储需要更新 last_triggered_at 的 daily/weekly 提醒 ID
try:
with self._db_lock: # 加锁
with self._get_db_conn() as conn:
cursor = conn.cursor()
# 1. 查询到期的一次性提醒
sql_once = """
SELECT id, wxid, content, roomid FROM reminders
WHERE type = 'once' AND time_str <= ?
"""
cursor.execute(sql_once, (now.strftime("%Y-%m-%d %H:%M"),))
due_once_reminders = cursor.fetchall()
for reminder in due_once_reminders:
self._send_reminder(reminder["wxid"], reminder["content"], reminder["id"], reminder["roomid"])
reminders_to_delete.append(reminder["id"])
logger.info(f"一次性提醒 {reminder['id']} 已触发并标记删除。")
# 2. 查询到期的每日提醒
# a. 获取当前时间 HH:MM
# b. 查询所有 daily 提醒
sql_daily_all = "SELECT id, wxid, content, time_str, last_triggered_at, roomid FROM reminders WHERE type = 'daily'"
cursor.execute(sql_daily_all)
all_daily_reminders = cursor.fetchall()
for reminder in all_daily_reminders:
# 检查时间是否到达或超过 daily 设置的 HH:MM
if current_hm >= reminder["time_str"]:
last_triggered_dt = None
if reminder["last_triggered_at"]:
try:
last_triggered_dt = datetime.fromisoformat(reminder["last_triggered_at"])
except ValueError:
logger.warning(f"无法解析 daily 提醒 {reminder['id']} 的 last_triggered_at: {reminder['last_triggered_at']}")
# 计算今天应该触发的时间点 (用于比较)
trigger_hm_dt = datetime.strptime(reminder["time_str"], "%H:%M").time()
today_trigger_dt = now.replace(hour=trigger_hm_dt.hour, minute=trigger_hm_dt.minute, second=0, microsecond=0)
# 如果从未触发过,或者上次触发是在今天的触发时间点之前,则应该触发
if last_triggered_dt is None or last_triggered_dt < today_trigger_dt:
self._send_reminder(reminder["wxid"], reminder["content"], reminder["id"], reminder["roomid"])
reminders_to_update.append(reminder["id"])
logger.info(f"每日提醒 {reminder['id']} 已触发并标记更新触发时间。")
# 3. 查询并处理到期的 'weekly' 提醒
sql_weekly = """
SELECT id, wxid, content, time_str, last_triggered_at, roomid FROM reminders
WHERE type = 'weekly' AND weekday = ? AND time_str <= ?
"""
cursor.execute(sql_weekly, (current_weekday, current_hm))
due_weekly_reminders = cursor.fetchall()
for reminder in due_weekly_reminders:
last_triggered_dt = None
if reminder["last_triggered_at"]:
try:
last_triggered_dt = datetime.fromisoformat(reminder["last_triggered_at"])
except ValueError:
logger.warning(f"无法解析 weekly 提醒 {reminder['id']} 的 last_triggered_at")
# 计算今天应该触发的时间点 (用于比较)
trigger_hm_dt = datetime.strptime(reminder["time_str"], "%H:%M").time()
today_trigger_dt = now.replace(hour=trigger_hm_dt.hour, minute=trigger_hm_dt.minute, second=0, microsecond=0)
# 如果今天是设定的星期几,时间已到,且今天还未触发过
if last_triggered_dt is None or last_triggered_dt < today_trigger_dt:
self._send_reminder(reminder["wxid"], reminder["content"], reminder["id"], reminder["roomid"])
reminders_to_update.append(reminder["id"]) # 每周提醒也需要更新触发时间
logger.info(f"每周提醒 {reminder['id']} (周{current_weekday+1}) 已触发并标记更新触发时间。")
# 4. 在事务中执行删除和更新
if reminders_to_delete:
# 使用 executemany 提高效率
sql_delete = "DELETE FROM reminders WHERE id = ?"
cursor.executemany(sql_delete, [(rid,) for rid in reminders_to_delete])
logger.info(f"从数据库删除了 {len(reminders_to_delete)} 条一次性提醒。")
if reminders_to_update:
sql_update = "UPDATE reminders SET last_triggered_at = ? WHERE id = ?"
cursor.executemany(sql_update, [(now_iso, rid) for rid in reminders_to_update])
logger.info(f"更新了 {len(reminders_to_update)} 条提醒的最后触发时间。")
# 提交事务
if reminders_to_delete or reminders_to_update:
conn.commit()
except sqlite3.Error as e:
logger.error(f"检查并触发提醒时数据库出错: {e}", exc_info=True)
except Exception as e: # 捕获其他潜在错误
logger.error(f"检查并触发提醒时发生意外错误: {e}", exc_info=True)
def _send_reminder(self, wxid: str, content: str, reminder_id: str, roomid: Optional[str] = None):
"""
安全地发送提醒消息。
根据roomid是否存在决定发送方式
- 如果roomid存在则发送到群聊并@用户
- 如果roomid不存在则发送私聊消息
"""
try:
message = f"⏰ 提醒:{content}"
if roomid:
# 群聊提醒: 发送到群聊并@设置提醒的用户
self.robot.sendTextMsg(message, roomid, wxid)
logger.info(f"已尝试发送群聊提醒 {reminder_id} 到群 {roomid} @ 用户 {wxid}")
else:
# 私聊提醒: 直接发送给用户
self.robot.sendTextMsg(message, wxid)
logger.info(f"已尝试发送私聊提醒 {reminder_id} 给用户 {wxid}")
except Exception as e:
target = f"{roomid} @ 用户 {wxid}" if roomid else f"用户 {wxid}"
logger.error(f"发送提醒 {reminder_id}{target} 失败: {e}", exc_info=True)
# --- 查看和删除提醒功能 ---
def list_reminders(self, wxid: str) -> list:
"""列出用户的所有提醒(包括私聊和群聊中设置的),按类型和时间排序"""
reminders = []
try:
with self._db_lock:
with self._get_db_conn() as conn:
cursor = conn.cursor()
# 按类型(once->daily->weekly),再按时间排序
sql = """
SELECT id, type, time_str, content, created_at, last_triggered_at, weekday, roomid
FROM reminders
WHERE wxid = ?
ORDER BY
CASE type
WHEN 'once' THEN 1
WHEN 'daily' THEN 2
WHEN 'weekly' THEN 3
ELSE 4 END ASC,
time_str ASC
"""
cursor.execute(sql, (wxid,))
results = cursor.fetchall()
# 将 sqlite3.Row 对象转换为普通字典列表
reminders = [dict(row) for row in results]
logger.info(f"为用户 {wxid} 查询到 {len(reminders)} 条提醒。")
return reminders
except sqlite3.Error as e:
logger.error(f"为用户 {wxid} 列出提醒时数据库出错: {e}", exc_info=True)
return [] # 出错返回空列表
def delete_reminder(self, wxid: str, reminder_id: str) -> Tuple[bool, str]:
"""
删除用户的特定提醒。
用户可以删除自己的任何提醒,无论是在私聊还是群聊中设置的。
:return: (是否成功, 消息)
"""
try:
with self._db_lock:
with self._get_db_conn() as conn:
cursor = conn.cursor()
# 确保用户只能删除自己的提醒
sql_check = "SELECT COUNT(*), roomid FROM reminders WHERE id = ? AND wxid = ? GROUP BY roomid"
cursor.execute(sql_check, (reminder_id, wxid))
result = cursor.fetchone()
if not result or result[0] == 0:
logger.warning(f"用户 {wxid} 尝试删除不存在或不属于自己的提醒 {reminder_id}")
return False, f"未找到 ID 为 {reminder_id[:6]}... 的提醒,或该提醒不属于您。"
# 获取roomid用于日志记录
roomid = result[1] if len(result) > 1 else None
sql_delete = "DELETE FROM reminders WHERE id = ? AND wxid = ?"
cursor.execute(sql_delete, (reminder_id, wxid))
conn.commit()
# 在日志中记录位置信息
location_info = f"在群聊 {roomid}" if roomid else "在私聊"
logger.info(f"用户 {wxid} 成功删除了{location_info}设置的提醒 {reminder_id}")
return True, f"已成功删除提醒 (ID: {reminder_id[:6]}...)"
except sqlite3.Error as e:
logger.error(f"用户 {wxid} 删除提醒 {reminder_id} 时数据库出错: {e}", exc_info=True)
return False, f"删除提醒时发生数据库错误: {e}"
except Exception as e:
logger.error(f"用户 {wxid} 删除提醒 {reminder_id} 时发生意外错误: {e}", exc_info=True)
return False, f"删除提醒时发生未知错误: {e}"
def delete_all_reminders(self, wxid: str) -> Tuple[bool, str, int]:
"""
删除用户的所有提醒(包括群聊和私聊中设置的)。
:param wxid: 用户的微信ID
:return: (是否成功, 消息, 删除的提醒数量)
"""
try:
with self._db_lock:
with self._get_db_conn() as conn:
cursor = conn.cursor()
# 先查询用户有多少条提醒
count_sql = "SELECT COUNT(*) FROM reminders WHERE wxid = ?"
cursor.execute(count_sql, (wxid,))
count = cursor.fetchone()[0]
if count == 0:
return False, "您当前没有任何提醒。", 0
# 删除用户的所有提醒
delete_sql = "DELETE FROM reminders WHERE wxid = ?"
cursor.execute(delete_sql, (wxid,))
conn.commit()
logger.info(f"用户 {wxid} 删除了其所有 {count} 条提醒")
return True, f"已成功删除您的所有提醒(共 {count} 条)。", count
except sqlite3.Error as e:
logger.error(f"用户 {wxid} 删除所有提醒时数据库出错: {e}", exc_info=True)
return False, f"删除提醒时发生数据库错误: {e}", 0
except Exception as e:
logger.error(f"用户 {wxid} 删除所有提醒时发生意外错误: {e}", exc_info=True)
return False, f"删除提醒时发生未知错误: {e}", 0

View File

@@ -0,0 +1,62 @@
import calendar
import datetime
from chinese_calendar import is_workday
from robot import Robot
class ReportReminder:
@staticmethod
def remind(robot: Robot) -> None:
receivers = robot.config.REPORT_REMINDERS
if not receivers:
receivers = ["filehelper"]
# 日报周报月报提醒
for receiver in receivers:
today = datetime.datetime.now().date()
# 如果是非工作日
if not is_workday(today):
#robot.sendTextMsg("休息日快乐", receiver)
pass
# 如果是工作日
if is_workday(today):
robot.sendTextMsg("该发日报啦", receiver)
# 如果是本周最后一个工作日
if ReportReminder.last_work_day_of_week(today) == today:
robot.sendTextMsg("该发周报啦", receiver)
# 如果本日是本月最后一整周的最后一个工作日:
if ReportReminder.last_work_friday_of_month(today) == today:
robot.sendTextMsg("该发月报啦", receiver)
# 计算本月最后一个周的最后一个工作日
@staticmethod
def last_work_friday_of_month(d: datetime.date) -> datetime.date:
days_in_month = calendar.monthrange(d.year, d.month)[1]
weekday = calendar.weekday(d.year, d.month, days_in_month)
if weekday == 4:
last_friday_of_month = datetime.date(
d.year, d.month, days_in_month)
else:
if weekday >= 5:
last_friday_of_month = datetime.date(d.year, d.month, days_in_month) - \
datetime.timedelta(days=(weekday - 4))
else:
last_friday_of_month = datetime.date(d.year, d.month, days_in_month) - \
datetime.timedelta(days=(weekday + 3))
while not is_workday(last_friday_of_month):
last_friday_of_month = last_friday_of_month - datetime.timedelta(days=1)
return last_friday_of_month
# 计算本周最后一个工作日
@staticmethod
def last_work_day_of_week(d: datetime.date) -> datetime.date:
weekday = calendar.weekday(d.year, d.month, d.day)
last_work_day_of_week = datetime.date(
d.year, d.month, d.day) + datetime.timedelta(days=(6 - weekday))
while not is_workday(last_work_day_of_week):
last_work_day_of_week = last_work_day_of_week - \
datetime.timedelta(days=1)
return last_work_day_of_week

462
function/func_summary.py Normal file
View File

@@ -0,0 +1,462 @@
# -*- coding: utf-8 -*-
import logging
import time
import re
from collections import deque
# from threading import Lock # 不再需要锁使用SQLite的事务机制
import sqlite3 # 添加sqlite3模块
import os # 用于处理文件路径
from function.func_xml_process import XmlProcessor # 导入XmlProcessor
class MessageSummary:
"""消息总结功能类 (使用SQLite持久化)
用于记录、管理和生成聊天历史消息的总结
"""
def __init__(self, max_history=300, db_path="data/message_history.db"):
"""初始化消息总结功能
Args:
max_history: 每个聊天保存的最大消息数量
db_path: SQLite数据库文件路径
"""
self.LOG = logging.getLogger("MessageSummary")
self.max_history = max_history
self.db_path = db_path
# 实例化XML处理器用于提取引用消息
self.xml_processor = XmlProcessor(self.LOG)
# 移除旧的内存存储相关代码
# self._msg_history = {} # 使用字典以群ID或用户ID为键
# self._msg_history_lock = Lock() # 添加锁以保证线程安全
try:
# 确保数据库文件所在的目录存在
db_dir = os.path.dirname(self.db_path)
if db_dir and not os.path.exists(db_dir):
os.makedirs(db_dir)
self.LOG.info(f"创建数据库目录: {db_dir}")
# 连接到数据库 (如果文件不存在会自动创建)
# check_same_thread=False 允许在不同线程中使用此连接
# 这在多线程机器人应用中是必要的,但要注意事务管理
self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
self.cursor = self.conn.cursor()
self.LOG.info(f"已连接到 SQLite 数据库: {self.db_path}")
# 创建消息表 (如果不存在)
# 使用 INTEGER PRIMARY KEY AUTOINCREMENT 作为 rowid 的别名,方便管理
# timestamp_float 用于排序和限制数量
# timestamp_str 用于显示
self.cursor.execute("""
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
chat_id TEXT NOT NULL,
sender TEXT NOT NULL,
content TEXT NOT NULL,
timestamp_float REAL NOT NULL,
timestamp_str TEXT NOT NULL
)
""")
# 为 chat_id 和 timestamp_float 创建索引,提高查询效率
self.cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_chat_time ON messages (chat_id, timestamp_float)
""")
self.conn.commit() # 提交更改
self.LOG.info("消息表已准备就绪")
except sqlite3.Error as e:
self.LOG.error(f"数据库初始化失败: {e}")
# 如果数据库连接失败,抛出异常或进行其他错误处理
raise ConnectionError(f"无法连接或初始化数据库: {e}") from e
except OSError as e:
self.LOG.error(f"创建数据库目录失败: {e}")
raise OSError(f"无法创建数据库目录: {e}") from e
def close_db(self):
"""关闭数据库连接"""
if hasattr(self, 'conn') and self.conn:
try:
self.conn.commit() # 确保所有更改都已保存
self.conn.close()
self.LOG.info("数据库连接已关闭")
except sqlite3.Error as e:
self.LOG.error(f"关闭数据库连接时出错: {e}")
def record_message(self, chat_id, sender_name, content, timestamp=None):
"""记录单条消息到数据库
Args:
chat_id: 聊天ID群ID或用户ID
sender_name: 发送者名称
content: 消息内容
timestamp: 时间戳,默认为当前时间
"""
try:
# 生成浮点数时间戳用于排序
current_time_float = time.time()
# 生成或使用传入的时间字符串
if not timestamp:
timestamp_str = time.strftime("%H:%M", time.localtime(current_time_float))
else:
timestamp_str = timestamp
# 插入新消息
self.cursor.execute("""
INSERT INTO messages (chat_id, sender, content, timestamp_float, timestamp_str)
VALUES (?, ?, ?, ?, ?)
""", (chat_id, sender_name, content, current_time_float, timestamp_str))
# 删除超出 max_history 的旧消息
# 使用子查询找到要保留的最新 N 条消息的 id然后删除不在这个列表中的该 chat_id 的其他消息
self.cursor.execute("""
DELETE FROM messages
WHERE chat_id = ? AND id NOT IN (
SELECT id
FROM messages
WHERE chat_id = ?
ORDER BY timestamp_float DESC
LIMIT ?
)
""", (chat_id, chat_id, self.max_history))
self.conn.commit() # 提交事务
except sqlite3.Error as e:
self.LOG.error(f"记录消息到数据库时出错: {e}")
# 可以考虑回滚事务
try:
self.conn.rollback()
except:
pass
def clear_message_history(self, chat_id):
"""清除指定聊天的消息历史记录
Args:
chat_id: 聊天ID群ID或用户ID
Returns:
bool: 是否成功清除
"""
try:
# 删除指定chat_id的所有消息
self.cursor.execute("DELETE FROM messages WHERE chat_id = ?", (chat_id,))
rows_deleted = self.cursor.rowcount # 获取删除的行数
self.conn.commit()
self.LOG.info(f"为 chat_id={chat_id} 清除了 {rows_deleted} 条历史消息")
return True # 删除0条也视为成功完成操作
except sqlite3.Error as e:
self.LOG.error(f"清除消息历史时出错 (chat_id={chat_id}): {e}")
return False
def get_message_count(self, chat_id):
"""获取指定聊天的消息数量
Args:
chat_id: 聊天ID群ID或用户ID
Returns:
int: 消息数量
"""
try:
# 使用COUNT查询获取消息数量
self.cursor.execute("SELECT COUNT(*) FROM messages WHERE chat_id = ?", (chat_id,))
result = self.cursor.fetchone() # fetchone() 返回一个元组,例如 (5,)
return result[0] if result else 0
except sqlite3.Error as e:
self.LOG.error(f"获取消息数量时出错 (chat_id={chat_id}): {e}")
return 0
def get_messages(self, chat_id):
"""获取指定聊天的所有消息 (按时间升序)
Args:
chat_id: 聊天ID群ID或用户ID
Returns:
list: 消息列表,格式为 [{"sender": ..., "content": ..., "time": ...}]
"""
messages = []
try:
# 查询需要的字段,按浮点时间戳升序排序,限制数量
self.cursor.execute("""
SELECT sender, content, timestamp_str
FROM messages
WHERE chat_id = ?
ORDER BY timestamp_float ASC
LIMIT ?
""", (chat_id, self.max_history))
rows = self.cursor.fetchall() # fetchall() 返回包含元组的列表
# 将数据库行转换为期望的字典列表格式
for row in rows:
messages.append({
"sender": row[0],
"content": row[1],
"time": row[2] # 使用存储的 timestamp_str
})
except sqlite3.Error as e:
self.LOG.error(f"获取消息列表时出错 (chat_id={chat_id}): {e}")
# 出错时返回空列表,保持与原逻辑一致
return messages
def _basic_summarize(self, messages):
"""基本的消息总结逻辑不使用AI
Args:
messages: 消息列表
Returns:
str: 消息总结
"""
if not messages:
return "没有可以总结的历史消息。"
# 构建总结
res = ["以下是近期聊天记录摘要:\n"]
for msg in messages:
res.append(f"[{msg['time']}]{msg['sender']}: {msg['content']}")
return "\n".join(res)
def _ai_summarize(self, messages, chat_model, chat_id):
"""使用AI模型生成消息总结
Args:
messages: 消息列表
chat_model: AI聊天模型对象
chat_id: 聊天ID
Returns:
str: 消息总结
"""
if not messages:
return "没有可以总结的历史消息。"
# 构建用于AI总结的消息格式
formatted_msgs = []
for msg in messages:
formatted_msgs.append(f"[{msg['time']}]{msg['sender']}: {msg['content']}")
# 构建提示词 - 更加客观、中立
prompt = (
"请仔细阅读并分析以下聊天记录,生成一简要的、结构清晰且抓住重点的摘要。\n\n"
"摘要格式要求:\n"
"1. 使用数字编号列表 (例如 1., 2., 3.) 来组织内容每个编号代表一个独立的主要讨论主题不要超过3个主题。\n"
"2. 在每个编号的主题下,写成一段不带格式的文字,每个主题单独成段并空行,需包含以下内容:\n"
" - 这个讨论的核心的简要描述。\n"
" - 该讨论的关键成员 (用括号 [用户名] 格式) 和他们的关键发言内容、成员之间的关键互动。\n"
" - 该讨论的讨论结果。\n"
"3. 总结需客观、精炼、简短精悍,直接呈现最核心且精简的事实,尽量不要添加额外的评论或分析。\n"
"4. 不要暴露出格式不要说核心是xxx参与者是xxx结果是xxx自然一点。\n\n"
"聊天记录如下:\n" + "\n".join(formatted_msgs)
)
# 使用AI模型生成总结 - 创建一个临时的聊天会话ID避免污染正常对话上下文
try:
# 对于支持新会话参数的模型,使用特殊标记告知这是独立的总结请求
if hasattr(chat_model, 'get_answer_with_context') and callable(getattr(chat_model, 'get_answer_with_context')):
# 使用带上下文参数的方法
summary = chat_model.get_answer_with_context(prompt, "summary_" + chat_id, clear_context=True)
else:
# 普通方法使用特殊会话ID
summary = chat_model.get_answer(prompt, "summary_" + chat_id)
if not summary:
return self._basic_summarize(messages)
return summary
except Exception as e:
self.LOG.error(f"使用AI生成总结失败: {e}")
return self._basic_summarize(messages)
def summarize_messages(self, chat_id, chat_model=None):
"""生成消息总结
Args:
chat_id: 聊天ID群ID或用户ID
chat_model: AI聊天模型对象如果为None则使用基础总结
Returns:
str: 消息总结
"""
messages = self.get_messages(chat_id)
if not messages:
return "没有可以总结的历史消息。"
# 根据是否提供了AI模型决定使用哪种总结方式
if chat_model:
return self._ai_summarize(messages, chat_model, chat_id)
else:
return self._basic_summarize(messages)
def process_message_from_wxmsg(self, msg, wcf, all_contacts, bot_wxid=None):
"""从微信消息对象中处理并记录与总结相关的文本消息
使用 XmlProcessor 提取用户实际输入的新内容或卡片标题。
Args:
msg: 微信消息对象(WxMsg)
wcf: 微信接口对象
all_contacts: 所有联系人字典
bot_wxid: 机器人自己的wxid用于检测@机器人的消息
"""
# 1. 基本筛选只记录群聊中的、非自己发送的文本消息或App消息
if not msg.from_group():
return
if msg.type != 0x01 and msg.type != 49: # 只记录文本消息和App消息(包括引用消息)
return
if msg.from_self():
return
chat_id = msg.roomid
# 2. 检查是否 @机器人 (如果提供了 bot_wxid)
original_content = msg.content # 获取原始content用于检测@和后续处理
if bot_wxid:
# 获取机器人在群里的昵称
bot_name_in_group = wcf.get_alias_in_chatroom(bot_wxid, chat_id)
if not bot_name_in_group:
# 如果获取不到群昵称,使用通讯录中的名称或默认名称
bot_name_in_group = all_contacts.get(bot_wxid, "泡泡") # 默认使用"泡泡"
# 检查消息中任意位置是否@机器人(含特殊空格\u2005
mention_pattern = f"@{bot_name_in_group}"
if mention_pattern in original_content:
# 消息提及了机器人,不记录
self.LOG.debug(f"跳过包含@机器人的消息: {original_content[:30]}...")
return
# 使用正则表达式匹配更复杂的情况(考虑特殊空格)
if re.search(rf"@{re.escape(bot_name_in_group)}(\u2005|\s|$)", original_content):
self.LOG.debug(f"通过正则跳过包含@机器人的消息: {original_content[:30]}...")
return
# 3. 使用 XmlProcessor 提取消息详情
try:
extracted_data = self.xml_processor.extract_quoted_message(msg)
except Exception as e:
self.LOG.error(f"使用XmlProcessor提取消息内容时出错 (msg.id={msg.id}): {e}")
return # 出错时,保守起见,不记录
# 4. 确定要记录的内容 (content_to_record)
content_to_record = ""
source_info = "未知来源"
# 优先使用提取到的新内容 (来自回复或普通文本或<title>)
if extracted_data.get("new_content", "").strip():
content_to_record = extracted_data["new_content"].strip()
source_info = "来自 new_content (回复/文本/标题)"
# 如果是引用类型消息,添加引用标记和引用内容的简略信息
if extracted_data.get("has_quote", False):
quoted_sender = extracted_data.get("quoted_sender", "")
quoted_content = extracted_data.get("quoted_content", "")
# 处理被引用内容
if quoted_content:
# 对较长的引用内容进行截断
max_quote_length = 30
if len(quoted_content) > max_quote_length:
quoted_content = quoted_content[:max_quote_length] + "..."
# 如果被引用的是卡片,则使用标准卡片格式
if extracted_data.get("quoted_is_card", False):
quoted_card_title = extracted_data.get("quoted_card_title", "")
quoted_card_type = extracted_data.get("quoted_card_type", "")
# 根据卡片类型确定内容类型
card_type = "卡片"
if "链接" in quoted_card_type or "消息" in quoted_card_type:
card_type = "链接"
elif "视频" in quoted_card_type or "音乐" in quoted_card_type:
card_type = "媒体"
elif "位置" in quoted_card_type:
card_type = "位置"
elif "图片" in quoted_card_type:
card_type = "图片"
elif "文件" in quoted_card_type:
card_type = "文件"
# 整个卡片内容包裹在【】中
quoted_content = f"{card_type}: {quoted_card_title}"
# 根据是否有被引用者信息构建引用前缀
if quoted_sender:
# 添加带引用人的引用格式,将新内容放在前面,引用内容放在后面
content_to_record = f"{content_to_record} 【回复 {quoted_sender}{quoted_content}"
else:
# 仅添加引用内容,将新内容放在前面,引用内容放在后面
content_to_record = f"{content_to_record} 【回复:{quoted_content}"
# 其次,如果新内容为空,但这是一个卡片且有标题,则使用卡片标题
elif extracted_data.get("is_card") and extracted_data.get("card_title", "").strip():
# 卡片消息使用固定格式,包含标题和描述
card_title = extracted_data.get("card_title", "").strip()
card_description = extracted_data.get("card_description", "").strip()
card_type = extracted_data.get("card_type", "")
card_source = extracted_data.get("card_appname") or extracted_data.get("card_sourcedisplayname", "")
# 构建格式化的卡片内容,包含标题和描述
# 根据卡片类型进行特殊处理
if "链接" in card_type or "消息" in card_type:
content_type = "链接"
elif "视频" in card_type or "音乐" in card_type:
content_type = "媒体"
elif "位置" in card_type:
content_type = "位置"
elif "图片" in card_type:
content_type = "图片"
elif "文件" in card_type:
content_type = "文件"
else:
content_type = "卡片"
# 构建完整卡片内容
card_content = f"{content_type}: {card_title}"
# 添加描述内容(如果有)
if card_description:
# 对较长的描述进行截断
max_desc_length = 50
if len(card_description) > max_desc_length:
card_description = card_description[:max_desc_length] + "..."
card_content += f" - {card_description}"
# 添加来源信息(如果有)
if card_source:
card_content += f" (来自:{card_source})"
# 将整个卡片内容包裹在【】中
content_to_record = f"{card_content}"
source_info = "来自 卡片(标题+描述)"
# 普通文本消息的保底处理
elif msg.type == 0x01 and not ("<" in original_content and ">" in original_content):
content_to_record = original_content.strip()
source_info = "来自 纯文本消息"
# 5. 如果最终没有提取到有效内容,则不记录
if not content_to_record:
self.LOG.debug(f"XmlProcessor未能提取到有效文本内容跳过记录 (msg.id={msg.id}) - Quote: {extracted_data.get('has_quote', False)}, IsCard: {extracted_data.get('is_card', False)}")
return
# 6. 获取发送者昵称
sender_name = wcf.get_alias_in_chatroom(msg.sender, msg.roomid)
if not sender_name: # 如果没有群昵称,尝试获取微信昵称
sender_data = all_contacts.get(msg.sender)
sender_name = sender_data if sender_data else msg.sender # 最后使用wxid
# 获取当前时间(只用于记录,不再打印)
current_time_str = time.strftime("%H:%M", time.localtime())
# 8. 记录提取到的有效内容
self.LOG.debug(f"记录消息 (来源: {source_info}): '[{current_time_str}]{sender_name}: {content_to_record}' (来自 msg.id={msg.id})")
self.record_message(chat_id, sender_name, content_to_record, current_time_str)

105
function/func_weather.py Normal file
View File

@@ -0,0 +1,105 @@
import requests, json
import logging
import re # 导入正则表达式模块,用于提取数字
class Weather:
def __init__(self, city_code: str) -> None:
self.city_code = city_code
self.LOG = logging.getLogger("Weather")
def _extract_temp(self, temp_str: str) -> str:
"""从高温/低温字符串中提取温度数值"""
if not temp_str:
return ""
# 匹配温度数字部分
match = re.search(r"(\d+(?:\.\d+)?)", temp_str)
if match:
return match.group(1)
return ""
def get_weather(self, include_forecast: bool = False) -> str:
# api地址
url = 'http://t.weather.sojson.com/api/weather/city/'
# 网络请求传入请求api+城市代码
self.LOG.info(f"获取天气: {url + str(self.city_code)}")
try:
response = requests.get(url + str(self.city_code))
self.LOG.info(f"获取天气成功: 状态码={response.status_code}")
if response.status_code != 200:
self.LOG.error(f"API返回非200状态码: {response.status_code}")
return f"获取天气失败: 服务器返回状态码 {response.status_code}"
except Exception as e:
self.LOG.error(f"获取天气失败: {str(e)}")
return "由于网络原因,获取天气失败"
try:
# 将数据以json形式返回这个d就是返回的json数据
d = response.json()
except json.JSONDecodeError as e:
self.LOG.error(f"解析JSON失败: {str(e)}")
return "获取天气失败: 返回数据格式错误"
# 当返回状态码为200输出天气状况
if(d.get('status') == 200):
city_info = d.get('cityInfo', {})
data = d.get('data', {})
forecast = data.get('forecast', [])
if not forecast:
self.LOG.warning("API返回的数据中没有forecast字段")
return "获取天气失败: 数据不完整"
today = forecast[0] if forecast else {}
# 提取今日温度
low_temp = self._extract_temp(today.get('low', ''))
high_temp = self._extract_temp(today.get('high', ''))
temp_range = f"{low_temp}~{high_temp}" if low_temp and high_temp else "N/A"
# 基础天气信息(当天)
result = [
f"城市:{city_info.get('parent', '')}/{city_info.get('city', '')}",
f"时间:{d.get('time', '')} {today.get('week', '')}",
f"温度:{temp_range}",
f"天气:{today.get('type', '')}"
]
# 如果需要预报信息,添加未来几天的天气
if include_forecast and len(forecast) > 1:
result.append("\n📅 天气预报:") # 修改标题
# 显示未来4天的预报 (索引 1, 2, 3, 4)
for day in forecast[1:5]: # 增加到4天预报
# 提取星期的最后一个字
week_day = day.get('week', '')
week_char = week_day[-1] if week_day else ''
# 提取温度数值
low_temp = self._extract_temp(day.get('low', ''))
high_temp = self._extract_temp(day.get('high', ''))
temp_range = f"{low_temp}~{high_temp}" if low_temp and high_temp else "N/A"
# 天气类型
weather_type = day.get('type', '未知')
# 简化格式:只显示周几、温度范围和天气类型
result.append(f"- 周{week_char} {temp_range} {weather_type}")
return "\n".join(result)
else:
return "获取天气失败"
if __name__ == "__main__":
# 设置测试用的日志配置
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 测试当天天气
w = Weather("101010100") # 北京
logger.info(w.get_weather()) # 不带预报
# 测试天气预报
logger.info(w.get_weather(include_forecast=True)) # 带预报

View File

@@ -0,0 +1,856 @@
import logging
import re
import html
import time
import xml.etree.ElementTree as ET
from wcferry import WxMsg
class XmlProcessor:
"""处理微信消息XML解析的工具类"""
def __init__(self, logger=None):
"""初始化XML处理器
Args:
logger: 日志对象,如果不提供则创建一个新的
"""
self.logger = logger or logging.getLogger("XmlProcessor")
def extract_quoted_message(self, msg: WxMsg) -> dict:
"""从微信消息中提取引用内容
Args:
msg: 微信消息对象
Returns:
dict: {
"new_content": "", # 用户新发送的内容
"quoted_content": "", # 引用的内容
"quoted_sender": "", # 被引用消息的发送者
"media_type": "", # 媒体类型(文本/图片/视频/链接等)
"has_quote": False, # 是否包含引用
"is_card": False, # 是否为卡片消息
"card_type": "", # 卡片类型
"card_title": "", # 卡片标题
"card_description": "", # 卡片描述
"card_url": "", # 卡片链接
"card_appname": "", # 卡片来源应用
"card_sourcedisplayname": "", # 来源显示名称
"quoted_is_card": False, # 被引用的内容是否为卡片
"quoted_card_type": "", # 被引用的卡片类型
"quoted_card_title": "", # 被引用的卡片标题
"quoted_card_description": "", # 被引用的卡片描述
"quoted_card_url": "", # 被引用的卡片链接
"quoted_card_appname": "", # 被引用的卡片来源应用
"quoted_card_sourcedisplayname": "" # 被引用的来源显示名称
}
"""
result = {
"new_content": "",
"quoted_content": "",
"quoted_sender": "",
"media_type": "文本",
"has_quote": False,
"is_card": False,
"card_type": "",
"card_title": "",
"card_description": "",
"card_url": "",
"card_appname": "",
"card_sourcedisplayname": "",
"quoted_is_card": False,
"quoted_card_type": "",
"quoted_card_title": "",
"quoted_card_description": "",
"quoted_card_url": "",
"quoted_card_appname": "",
"quoted_card_sourcedisplayname": ""
}
try:
# 检查消息类型
if msg.type != 0x01 and msg.type != 49: # 普通文本消息或APP消息
return result
self.logger.info(f"处理群聊消息: 类型={msg.type}, 发送者={msg.sender}")
# 检查是否为引用消息类型 (type 57)
is_quote_msg = False
appmsg_type_match = re.search(r'<appmsg.*?type="(\d+)"', msg.content, re.DOTALL)
if appmsg_type_match and appmsg_type_match.group(1) == "57":
is_quote_msg = True
self.logger.info("检测到引用类型消息 (type 57)")
# 检查是否包含refermsg标签
has_refermsg = "<refermsg>" in msg.content
# 确定是否是引用操作
is_referring = is_quote_msg or has_refermsg
# 处理App类型消息类型49
if msg.type == 49:
if not is_referring:
# 如果不是引用消息,按普通卡片处理
card_details = self.extract_card_details(msg.content)
result.update(card_details)
# 根据卡片类型更新媒体类型
if card_details["is_card"] and card_details["card_type"]:
result["media_type"] = card_details["card_type"]
# 引用消息情况下我们不立即更新result的卡片信息因为外层appmsg是引用容器
# 处理用户新输入内容
# 优先检查是否有<title>标签内容
title_match = re.search(r'<title>(.*?)</title>', msg.content)
if title_match:
# 对于引用消息从title标签提取用户新输入
if is_referring:
result["new_content"] = title_match.group(1).strip()
self.logger.info(f"引用消息中的新内容: {result['new_content']}")
else:
# 对于普通卡片消息避免将card_title重复设为new_content
extracted_title = title_match.group(1).strip()
if not (result["is_card"] and result["card_title"] == extracted_title):
result["new_content"] = extracted_title
self.logger.info(f"从title标签提取到用户新消息: {result['new_content']}")
elif msg.type == 0x01: # 纯文本消息
# 检查是否有XML标签如果没有则视为普通消息
if not ("<" in msg.content and ">" in msg.content):
result["new_content"] = msg.content
return result
# 如果是引用消息处理refermsg部分
if is_referring:
result["has_quote"] = True
# 提取refermsg内容
refer_data = self.extract_refermsg(msg.content)
result["quoted_sender"] = refer_data.get("sender", "")
result["quoted_content"] = refer_data.get("content", "")
# 从raw_content尝试解析被引用内容的卡片信息
raw_content = refer_data.get("raw_content", "")
if raw_content and "<appmsg" in raw_content:
quoted_card_details = self.extract_card_details(raw_content)
# 将引用的卡片详情存储到quoted_前缀的字段
result["quoted_is_card"] = quoted_card_details["is_card"]
result["quoted_card_type"] = quoted_card_details["card_type"]
result["quoted_card_title"] = quoted_card_details["card_title"]
result["quoted_card_description"] = quoted_card_details["card_description"]
result["quoted_card_url"] = quoted_card_details["card_url"]
result["quoted_card_appname"] = quoted_card_details["card_appname"]
result["quoted_card_sourcedisplayname"] = quoted_card_details["card_sourcedisplayname"]
# 如果没有提取到有效内容使用卡片标题作为quoted_content
if not result["quoted_content"] and quoted_card_details["card_title"]:
result["quoted_content"] = quoted_card_details["card_title"]
self.logger.info(f"成功从引用内容中提取卡片信息: {quoted_card_details['card_type']}")
else:
# 如果未发现卡片特征尝试fallback方法
if not result["quoted_content"]:
fallback_content = self.extract_quoted_fallback(msg.content)
if fallback_content:
if fallback_content.startswith("引用内容:") or fallback_content.startswith("相关内容:"):
result["quoted_content"] = fallback_content.split(":", 1)[1].strip()
else:
result["quoted_content"] = fallback_content
# 设置媒体类型
if result["is_card"] and result["card_type"]:
result["media_type"] = result["card_type"]
elif is_referring and result["quoted_is_card"]:
# 如果当前消息是引用,且引用的是卡片,则媒体类型设为"引用消息"
result["media_type"] = "引用消息"
else:
# 普通消息,使用群聊消息类型识别
result["media_type"] = self.identify_message_type(msg.content)
return result
except Exception as e:
self.logger.error(f"处理群聊引用消息时出错: {e}")
return result
def extract_private_quoted_message(self, msg: WxMsg) -> dict:
"""专门处理私聊引用消息,返回结构化数据
Args:
msg: 微信消息对象
Returns:
dict: {
"new_content": "", # 用户新发送的内容
"quoted_content": "", # 引用的内容
"quoted_sender": "", # 被引用消息的发送者
"media_type": "", # 媒体类型(文本/图片/视频/链接等)
"has_quote": False, # 是否包含引用
"is_card": False, # 是否为卡片消息
"card_type": "", # 卡片类型
"card_title": "", # 卡片标题
"card_description": "", # 卡片描述
"card_url": "", # 卡片链接
"card_appname": "", # 卡片来源应用
"card_sourcedisplayname": "", # 来源显示名称
"quoted_is_card": False, # 被引用的内容是否为卡片
"quoted_card_type": "", # 被引用的卡片类型
"quoted_card_title": "", # 被引用的卡片标题
"quoted_card_description": "", # 被引用的卡片描述
"quoted_card_url": "", # 被引用的卡片链接
"quoted_card_appname": "", # 被引用的卡片来源应用
"quoted_card_sourcedisplayname": "" # 被引用的来源显示名称
}
"""
result = {
"new_content": "",
"quoted_content": "",
"quoted_sender": "",
"media_type": "文本",
"has_quote": False,
"is_card": False,
"card_type": "",
"card_title": "",
"card_description": "",
"card_url": "",
"card_appname": "",
"card_sourcedisplayname": "",
"quoted_is_card": False,
"quoted_card_type": "",
"quoted_card_title": "",
"quoted_card_description": "",
"quoted_card_url": "",
"quoted_card_appname": "",
"quoted_card_sourcedisplayname": ""
}
try:
# 检查消息类型
if msg.type != 0x01 and msg.type != 49: # 普通文本消息或APP消息
return result
self.logger.info(f"处理私聊消息: 类型={msg.type}, 发送者={msg.sender}")
# 检查是否为引用消息类型 (type 57)
is_quote_msg = False
appmsg_type_match = re.search(r'<appmsg.*?type="(\d+)"', msg.content, re.DOTALL)
if appmsg_type_match and appmsg_type_match.group(1) == "57":
is_quote_msg = True
self.logger.info("检测到引用类型消息 (type 57)")
# 检查是否包含refermsg标签
has_refermsg = "<refermsg>" in msg.content
# 确定是否是引用操作
is_referring = is_quote_msg or has_refermsg
# 处理App类型消息类型49
if msg.type == 49:
if not is_referring:
# 如果不是引用消息,按普通卡片处理
card_details = self.extract_card_details(msg.content)
result.update(card_details)
# 根据卡片类型更新媒体类型
if card_details["is_card"] and card_details["card_type"]:
result["media_type"] = card_details["card_type"]
# 引用消息情况下我们不立即更新result的卡片信息因为外层appmsg是引用容器
# 处理用户新输入内容
# 优先检查是否有<title>标签内容
title_match = re.search(r'<title>(.*?)</title>', msg.content)
if title_match:
# 对于引用消息从title标签提取用户新输入
if is_referring:
result["new_content"] = title_match.group(1).strip()
self.logger.info(f"引用消息中的新内容: {result['new_content']}")
else:
# 对于普通卡片消息避免将card_title重复设为new_content
extracted_title = title_match.group(1).strip()
if not (result["is_card"] and result["card_title"] == extracted_title):
result["new_content"] = extracted_title
self.logger.info(f"从title标签提取到用户新消息: {result['new_content']}")
elif msg.type == 0x01: # 纯文本消息
# 检查是否有XML标签如果没有则视为普通消息
if not ("<" in msg.content and ">" in msg.content):
result["new_content"] = msg.content
return result
# 如果是引用消息处理refermsg部分
if is_referring:
result["has_quote"] = True
# 提取refermsg内容
refer_data = self.extract_private_refermsg(msg.content)
result["quoted_sender"] = refer_data.get("sender", "")
result["quoted_content"] = refer_data.get("content", "")
# 从raw_content尝试解析被引用内容的卡片信息
raw_content = refer_data.get("raw_content", "")
if raw_content and "<appmsg" in raw_content:
quoted_card_details = self.extract_card_details(raw_content)
# 将引用的卡片详情存储到quoted_前缀的字段
result["quoted_is_card"] = quoted_card_details["is_card"]
result["quoted_card_type"] = quoted_card_details["card_type"]
result["quoted_card_title"] = quoted_card_details["card_title"]
result["quoted_card_description"] = quoted_card_details["card_description"]
result["quoted_card_url"] = quoted_card_details["card_url"]
result["quoted_card_appname"] = quoted_card_details["card_appname"]
result["quoted_card_sourcedisplayname"] = quoted_card_details["card_sourcedisplayname"]
# 如果没有提取到有效内容使用卡片标题作为quoted_content
if not result["quoted_content"] and quoted_card_details["card_title"]:
result["quoted_content"] = quoted_card_details["card_title"]
self.logger.info(f"成功从引用内容中提取卡片信息: {quoted_card_details['card_type']}")
else:
# 如果未发现卡片特征尝试fallback方法
if not result["quoted_content"]:
fallback_content = self.extract_quoted_fallback(msg.content)
if fallback_content:
if fallback_content.startswith("引用内容:") or fallback_content.startswith("相关内容:"):
result["quoted_content"] = fallback_content.split(":", 1)[1].strip()
else:
result["quoted_content"] = fallback_content
# 设置媒体类型
if result["is_card"] and result["card_type"]:
result["media_type"] = result["card_type"]
elif is_referring and result["quoted_is_card"]:
# 如果当前消息是引用,且引用的是卡片,则媒体类型设为"引用消息"
result["media_type"] = "引用消息"
else:
# 普通消息,使用私聊消息类型识别
result["media_type"] = self.identify_private_message_type(msg.content)
return result
except Exception as e:
self.logger.error(f"处理私聊引用消息时出错: {e}")
return result
def extract_refermsg(self, content: str) -> dict:
"""专门提取群聊refermsg节点内容包括HTML解码
Args:
content: 消息内容
Returns:
dict: {
"sender": "", # 发送者
"content": "", # 引用内容
"raw_content": "" # 解码后的原始XML内容用于后续解析
}
"""
result = {"sender": "", "content": "", "raw_content": ""}
try:
# 使用正则表达式精确提取refermsg内容避免完整XML解析
refermsg_match = re.search(r'<refermsg>(.*?)</refermsg>', content, re.DOTALL)
if not refermsg_match:
return result
refermsg_content = refermsg_match.group(1)
# 提取发送者
displayname_match = re.search(r'<displayname>(.*?)</displayname>', refermsg_content, re.DOTALL)
if displayname_match:
result["sender"] = displayname_match.group(1).strip()
# 提取内容并进行HTML解码
content_match = re.search(r'<content>(.*?)</content>', refermsg_content, re.DOTALL)
if content_match:
# 获取引用的原始内容可能是HTML编码的XML
extracted_content = content_match.group(1)
# 保存解码后的原始内容,用于后续解析
decoded_content = html.unescape(extracted_content)
result["raw_content"] = decoded_content
# 清理内容中的HTML标签用于文本展示
cleaned_content = re.sub(r'<.*?>', '', extracted_content)
# 清理HTML实体编码和多余空格
cleaned_content = re.sub(r'\s+', ' ', cleaned_content).strip()
# 解码HTML实体
cleaned_content = html.unescape(cleaned_content)
result["content"] = cleaned_content
return result
except Exception as e:
self.logger.error(f"提取群聊refermsg内容时出错: {e}")
return result
def extract_private_refermsg(self, content: str) -> dict:
"""专门提取私聊refermsg节点内容包括HTML解码
Args:
content: 消息内容
Returns:
dict: {
"sender": "", # 发送者
"content": "", # 引用内容
"raw_content": "" # 解码后的原始XML内容用于后续解析
}
"""
result = {"sender": "", "content": "", "raw_content": ""}
try:
# 使用正则表达式精确提取refermsg内容避免完整XML解析
refermsg_match = re.search(r'<refermsg>(.*?)</refermsg>', content, re.DOTALL)
if not refermsg_match:
return result
refermsg_content = refermsg_match.group(1)
# 提取发送者
displayname_match = re.search(r'<displayname>(.*?)</displayname>', refermsg_content, re.DOTALL)
if displayname_match:
result["sender"] = displayname_match.group(1).strip()
# 提取内容并进行HTML解码
content_match = re.search(r'<content>(.*?)</content>', refermsg_content, re.DOTALL)
if content_match:
# 获取引用的原始内容可能是HTML编码的XML
extracted_content = content_match.group(1)
# 保存解码后的原始内容,用于后续解析
decoded_content = html.unescape(extracted_content)
result["raw_content"] = decoded_content
# 清理内容中的HTML标签用于文本展示
cleaned_content = re.sub(r'<.*?>', '', extracted_content)
# 清理HTML实体编码和多余空格
cleaned_content = re.sub(r'\s+', ' ', cleaned_content).strip()
# 解码HTML实体
cleaned_content = html.unescape(cleaned_content)
result["content"] = cleaned_content
return result
except Exception as e:
self.logger.error(f"提取私聊refermsg内容时出错: {e}")
return result
def identify_message_type(self, content: str) -> str:
"""识别群聊消息的媒体类型
Args:
content: 消息内容
Returns:
str: 媒体类型描述
"""
try:
if "<appmsg type=\"2\"" in content:
return "图片"
elif "<appmsg type=\"5\"" in content:
return "文件"
elif "<appmsg type=\"4\"" in content:
return "链接分享"
elif "<appmsg type=\"3\"" in content:
return "音频"
elif "<appmsg type=\"6\"" in content:
return "视频"
elif "<appmsg type=\"8\"" in content:
return "动画表情"
elif "<appmsg type=\"1\"" in content:
return "文本卡片"
elif "<appmsg type=\"7\"" in content:
return "位置分享"
elif "<appmsg type=\"17\"" in content:
return "实时位置分享"
elif "<appmsg type=\"19\"" in content:
return "频道消息"
elif "<appmsg type=\"33\"" in content:
return "小程序"
elif "<appmsg type=\"57\"" in content:
return "引用消息"
else:
return "文本"
except Exception as e:
self.logger.error(f"识别消息类型时出错: {e}")
return "文本"
def identify_private_message_type(self, content: str) -> str:
"""识别私聊消息的媒体类型
Args:
content: 消息内容
Returns:
str: 媒体类型描述
"""
try:
if "<appmsg type=\"2\"" in content:
return "图片"
elif "<appmsg type=\"5\"" in content:
return "文件"
elif "<appmsg type=\"4\"" in content:
return "链接分享"
elif "<appmsg type=\"3\"" in content:
return "音频"
elif "<appmsg type=\"6\"" in content:
return "视频"
elif "<appmsg type=\"8\"" in content:
return "动画表情"
elif "<appmsg type=\"1\"" in content:
return "文本卡片"
elif "<appmsg type=\"7\"" in content:
return "位置分享"
elif "<appmsg type=\"17\"" in content:
return "实时位置分享"
elif "<appmsg type=\"19\"" in content:
return "频道消息"
elif "<appmsg type=\"33\"" in content:
return "小程序"
elif "<appmsg type=\"57\"" in content:
return "引用消息"
else:
return "文本"
except Exception as e:
self.logger.error(f"识别消息类型时出错: {e}")
return "文本"
def extract_quoted_fallback(self, content: str) -> str:
"""当XML解析失败时的后备提取方法
Args:
content: 原始消息内容
Returns:
str: 提取的引用内容,如果未找到返回空字符串
"""
try:
# 使用正则表达式直接从内容中提取
# 查找<content>标签内容
content_match = re.search(r'<content>(.*?)</content>', content, re.DOTALL)
if content_match:
extracted = content_match.group(1)
# 清理可能存在的XML标签
extracted = re.sub(r'<.*?>', '', extracted)
# 去除换行符和多余空格
extracted = re.sub(r'\s+', ' ', extracted).strip()
# 解码HTML实体
extracted = html.unescape(extracted)
return extracted
# 查找displayname和content的组合
display_name_match = re.search(r'<displayname>(.*?)</displayname>', content, re.DOTALL)
content_match = re.search(r'<content>(.*?)</content>', content, re.DOTALL)
if display_name_match and content_match:
name = re.sub(r'<.*?>', '', display_name_match.group(1))
text = re.sub(r'<.*?>', '', content_match.group(1))
# 去除换行符和多余空格
text = re.sub(r'\s+', ' ', text).strip()
# 解码HTML实体
name = html.unescape(name)
text = html.unescape(text)
return f"{name}: {text}"
# 查找引用或回复的关键词
if "引用" in content or "回复" in content:
# 寻找引用关键词后的内容
match = re.search(r'[引用|回复].*?[:](.*?)(?:<|$)', content, re.DOTALL)
if match:
text = match.group(1).strip()
text = re.sub(r'<.*?>', '', text)
# 去除换行符和多余空格
text = re.sub(r'\s+', ' ', text).strip()
# 解码HTML实体
text = html.unescape(text)
return text
return ""
except Exception as e:
self.logger.error(f"后备提取引用内容时出错: {e}")
return ""
def extract_card_details(self, content: str) -> dict:
"""从消息内容中提取卡片详情 (使用 ElementTree 解析)
Args:
content: 消息内容 (XML 字符串)
Returns:
dict: 包含卡片详情的字典
"""
result = {
"is_card": False,
"card_type": "",
"card_title": "",
"card_description": "",
"card_url": "",
"card_appname": "",
"card_sourcedisplayname": ""
}
try:
# 1. 定位并提取 <appmsg> 标签内容
# 正则表达式用于精确找到 <appmsg>...</appmsg> 部分,避免解析整个消息体可能引入的错误
appmsg_match = re.search(r'<appmsg.*?>(.*?)</appmsg>', content, re.DOTALL | re.IGNORECASE)
if not appmsg_match:
# 有些简单的 appmsg 可能没有闭合标签,尝试匹配自闭合或非标准格式
appmsg_match_simple = re.search(r'(<appmsg[^>]*>)', content, re.IGNORECASE)
if not appmsg_match_simple:
# 尝试查找 <msg> 下的 <appmsg> 作为根
msg_match = re.search(r'<msg>(.*?)</msg>', content, re.DOTALL | re.IGNORECASE)
if msg_match:
inner_content = msg_match.group(1)
try:
# 尝试将<msg>内的内容解析为根然后查找appmsg
# 为了容错,添加一个虚拟根标签
root = ET.fromstring(f"<root>{inner_content}</root>")
appmsg_node = root.find('.//appmsg')
if appmsg_node is None:
self.logger.debug("在 <msg> 内未找到 <appmsg> 标签")
return result # 未找到 appmsg不是标准卡片
# 将 Element 对象转回字符串以便后续统一处理(或直接使用 Element对象查找
# 为简化后续流程我们还是转回字符串交给下面的ET.fromstring处理
# 注意:这里需要重新构造 appmsg 标签本身ET.tostring只包含内容
appmsg_xml_str = ET.tostring(appmsg_node, encoding='unicode', method='xml')
except ET.ParseError as parse_error:
self.logger.debug(f"解析 <msg> 内容时出错: {parse_error}")
return result # 解析失败
else:
self.logger.debug("未找到 <appmsg> 标签")
return result # 未找到 appmsg不是标准卡片
else:
# 对于 <appmsg ... /> 这种简单情况,可能无法提取内部标签,但也标记为卡片
appmsg_xml_str = appmsg_match_simple.group(1)
result["is_card"] = True # 标记为卡片,即使可能无法提取详细信息
else:
# 需要重新包含 <appmsg ...> 标签本身来解析属性
appmsg_outer_match = re.search(r'(<appmsg[^>]*>).*?</appmsg>', content, re.DOTALL | re.IGNORECASE)
if not appmsg_outer_match:
# 如果上面的正则失败,尝试简单匹配开始标签
appmsg_outer_match = re.search(r'(<appmsg[^>]*>)', content, re.IGNORECASE)
if appmsg_outer_match:
appmsg_tag_start = appmsg_outer_match.group(1)
appmsg_inner_content = appmsg_match.group(1)
appmsg_xml_str = f"{appmsg_tag_start}{appmsg_inner_content}</appmsg>"
else:
self.logger.warning("无法提取完整的 <appmsg> 标签结构")
return result # 结构不完整
# 2. 使用 ElementTree 解析 <appmsg> 内容
try:
# 尝试解析提取出的 <appmsg> XML 字符串
# 使用 XML 而不是 fromstring因为它对根元素要求更宽松
appmsg_root = ET.XML(appmsg_xml_str)
result["is_card"] = True # 解析成功,确认是卡片
# 3. 提取卡片类型 (来自 <appmsg> 标签的 type 属性)
card_type_num = appmsg_root.get('type', '') # 安全获取属性
if card_type_num:
result["card_type"] = self.get_card_type_name(card_type_num)
else:
# 尝试从内部 <type> 标签获取 (兼容旧格式或特殊格式)
type_node = appmsg_root.find('./type')
if type_node is not None and type_node.text:
result["card_type"] = self.get_card_type_name(type_node.text.strip())
# 4. 提取标题 (<title>)
title = appmsg_root.findtext('./title', default='').strip()
if title:
result["card_title"] = html.unescape(title)
# 5. 提取描述 (<des>)
description = appmsg_root.findtext('./des', default='').strip()
if description:
cleaned_desc = re.sub(r'<.*?>', '', description) # 清理HTML标签
result["card_description"] = html.unescape(cleaned_desc)
# 6. 提取链接 (<url>)
url = appmsg_root.findtext('./url', default='').strip()
if url:
result["card_url"] = html.unescape(url)
# 7. 提取应用名称 (<appinfo/appname> 或 <sourcedisplayname>)
# 优先尝试 <appinfo><appname>
appname_node = appmsg_root.find('./appinfo/appname')
if appname_node is not None and appname_node.text:
appname = appname_node.text.strip()
result["card_appname"] = html.unescape(appname)
# 如果没找到,或者为空,尝试 <sourcedisplayname>
sourcedisplayname_node = appmsg_root.find('./sourcedisplayname')
if sourcedisplayname_node is not None and sourcedisplayname_node.text:
sourcedisplayname = sourcedisplayname_node.text.strip()
result["card_sourcedisplayname"] = html.unescape(sourcedisplayname)
# 如果 appname 为空,使用 sourcedisplayname 作为 appname
if not result["card_appname"]:
result["card_appname"] = result["card_sourcedisplayname"]
# 兼容直接在 appmsg 下的 appname
if not result["card_appname"]:
appname_direct = appmsg_root.findtext('./appname', default='').strip()
if appname_direct:
result["card_appname"] = html.unescape(appname_direct)
# 记录提取结果用于调试
self.logger.debug(f"ElementTree 解析结果: type={result['card_type']}, title={result['card_title']}, desc_len={len(result['card_description'])}, url_len={len(result['card_url'])}, app={result['card_appname']}, source={result['card_sourcedisplayname']}")
except ET.ParseError as e:
self.logger.error(f"使用 ElementTree 解析 <appmsg> 时出错: {e}\nXML 内容片段: {appmsg_xml_str[:500]}...", exc_info=True)
# 即使解析<appmsg>出错,如果正则找到了<appmsg>,仍然标记为卡片
if result["is_card"] == False and ('<appmsg' in content or '<msg>' in content):
result["is_card"] = True # 基本判断是卡片,但细节提取失败
# 尝试用正则提取基础信息作为后备
type_match_fallback = re.search(r'<type>(\d+)</type>', content)
title_match_fallback = re.search(r'<title>(.*?)</title>', content, re.DOTALL)
if type_match_fallback:
result["card_type"] = self.get_card_type_name(type_match_fallback.group(1))
if title_match_fallback:
result["card_title"] = html.unescape(title_match_fallback.group(1).strip())
self.logger.warning("ElementTree 解析失败,已尝试正则后备提取基础信息")
except Exception as e:
self.logger.error(f"提取卡片详情时发生意外错误: {e}", exc_info=True)
# 尽量判断是否是卡片
if not result["is_card"] and ('<appmsg' in content or '<msg>' in content):
result["is_card"] = True
return result
def get_card_type_name(self, type_num: str) -> str:
"""根据卡片类型编号获取类型名称
Args:
type_num: 类型编号
Returns:
str: 类型名称
"""
card_types = {
"1": "文本卡片",
"2": "图片",
"3": "音频",
"4": "视频",
"5": "链接",
"6": "文件",
"7": "位置",
"8": "表情动画",
"17": "实时位置",
"19": "频道消息",
"33": "小程序",
"36": "转账",
"50": "视频号",
"51": "直播间",
"57": "引用消息",
"62": "视频号直播",
"63": "视频号商品",
"87": "群收款",
"88": "语音通话"
}
return card_types.get(type_num, f"未知类型({type_num})")
def format_message_for_ai(self, msg_data: dict, sender_name: str) -> str:
"""将提取的消息数据格式化为发送给AI的最终文本
Args:
msg_data: 提取的消息数据
sender_name: 发送者名称
Returns:
str: 格式化后的文本
"""
result = []
current_time = time.strftime("%H:%M", time.localtime())
# 添加用户新消息
if msg_data["new_content"]:
result.append(f"[{current_time}] {sender_name}: {msg_data['new_content']}")
# 处理当前消息的卡片信息(如果不是引用消息而是直接分享的卡片)
if msg_data["is_card"] and not msg_data["has_quote"]:
card_info = []
card_info.append(f"[卡片信息]")
if msg_data["card_type"]:
card_info.append(f"类型: {msg_data['card_type']}")
if msg_data["card_title"]:
card_info.append(f"标题: {msg_data['card_title']}")
if msg_data["card_description"]:
# 如果描述过长,截取一部分
description = msg_data["card_description"]
if len(description) > 100:
description = description[:97] + "..."
card_info.append(f"描述: {description}")
if msg_data["card_appname"] or msg_data["card_sourcedisplayname"]:
source = msg_data["card_appname"] or msg_data["card_sourcedisplayname"]
card_info.append(f"来源: {source}")
if msg_data["card_url"]:
# 如果URL过长截取一部分
url = msg_data["card_url"]
if len(url) > 80:
url = url[:77] + "..."
card_info.append(f"链接: {url}")
# 只有当有实质性内容时才添加卡片信息
if len(card_info) > 1: # 不只有[卡片信息]这一行
result.append("\n".join(card_info))
# 添加引用内容(如果有)
if msg_data["has_quote"]:
quoted_header = f"[用户引用]"
if msg_data["quoted_sender"]:
quoted_header += f" {msg_data['quoted_sender']}"
# 检查被引用内容是否为卡片
if msg_data["quoted_is_card"]:
# 格式化被引用的卡片信息
quoted_info = [quoted_header]
if msg_data["quoted_card_type"]:
quoted_info.append(f"类型: {msg_data['quoted_card_type']}")
if msg_data["quoted_card_title"]:
quoted_info.append(f"标题: {msg_data['quoted_card_title']}")
if msg_data["quoted_card_description"]:
# 如果描述过长,截取一部分
description = msg_data["quoted_card_description"]
if len(description) > 100:
description = description[:97] + "..."
quoted_info.append(f"描述: {description}")
if msg_data["quoted_card_appname"] or msg_data["quoted_card_sourcedisplayname"]:
source = msg_data["quoted_card_appname"] or msg_data["quoted_card_sourcedisplayname"]
quoted_info.append(f"来源: {source}")
if msg_data["quoted_card_url"]:
# 如果URL过长截取一部分
url = msg_data["quoted_card_url"]
if len(url) > 80:
url = url[:77] + "..."
quoted_info.append(f"链接: {url}")
result.append("\n".join(quoted_info))
elif msg_data["quoted_content"]:
# 如果是普通文本引用
result.append(f"{quoted_header}: {msg_data['quoted_content']}")
# 如果没有任何内容,但有媒体类型,添加基本信息
if not result and msg_data["media_type"] and msg_data["media_type"] != "文本":
result.append(f"[{current_time}] {sender_name} 发送了 [{msg_data['media_type']}]")
# 如果完全没有内容,返回一个默认消息
if not result:
result.append(f"[{current_time}] {sender_name} 发送了消息")
return "\n\n".join(result)

448
function/main_city.json Normal file
View File

@@ -0,0 +1,448 @@
{
"七台河": "101051002",
"万宁": "101310215",
"万州天城": "101041200",
"万州龙宝": "101041300",
"万盛": "101040600",
"三亚": "101310201",
"三明": "101230801",
"三门峡": "101181701",
"上海": "101020100",
"上饶": "101240301",
"东丽": "101030400",
"东方": "101310202",
"东莞": "101281601",
"东营": "101121201",
"中卫": "101170501",
"中山": "101281701",
"丰台": "101010900",
"丰都": "101043000",
"临夏": "101161101",
"临汾": "101100701",
"临沂": "101120901",
"临沧": "101291101",
"临河": "101080801",
"临高": "101310203",
"丹东": "101070601",
"丽水": "101210801",
"丽江": "101291401",
"乌兰浩特": "101081101",
"乌海": "101080301",
"乌鲁木齐": "101130101",
"乐东": "101310221",
"乐山": "101271401",
"九江": "101240201",
"云浮": "101281401",
"云阳": "101041700",
"五指山": "101310222",
"亳州": "101220901",
"仙桃": "101201601",
"伊宁": "101131001",
"伊春": "101050801",
"佛山": "101280800",
"佛爷顶": "101011700",
"佳木斯": "101050401",
"保亭": "101310214",
"保定": "101090201",
"保山": "101290501",
"信阳": "101180601",
"儋州": "101310205",
"克拉玛依": "101130201",
"八达岭": "101011600",
"六安": "101221501",
"六盘水": "101260801",
"兰州": "101160101",
"兴义": "101260906",
"内江": "101271201",
"凉山": "101271601",
"凯里": "101260501",
"包头": "101080201",
"北京": "101010100",
"北京城区": "101012200",
"北海": "101301301",
"北碚": "101040800",
"北辰": "101030600",
"十堰": "101201101",
"南京": "101190101",
"南充": "101270501",
"南宁": "101300101",
"南川": "101040400",
"南平": "101230901",
"南昌": "101240101",
"南汇": "101020600",
"南沙岛": "101310220",
"南通": "101190501",
"南阳": "101180701",
"博乐": "101131601",
"厦门": "101230201",
"双鸭山": "101051301",
"台中": "101340401",
"台北县": "101340101",
"台州": "101210601",
"合作": "101161201",
"合川": "101040300",
"合肥": "101220101",
"吉安": "101240601",
"吉林": "101060201",
"吉首": "101251501",
"吐鲁番": "101130501",
"吕梁": "101101100",
"吴忠": "101170301",
"周口": "101181401",
"呼伦贝尔": "101081000",
"呼和浩特": "101080101",
"和田": "101131301",
"咸宁": "101200701",
"咸阳": "101110200",
"哈密": "101131201",
"哈尔滨": "101050101",
"唐山": "101090501",
"商丘": "101181001",
"商洛": "101110601",
"喀什": "101130901",
"嘉兴": "101210301",
"嘉定": "101020500",
"嘉峪关": "101161401",
"四平": "101060401",
"固原": "101170401",
"垫江": "101042200",
"城口": "101041600",
"塔城": "101131101",
"塘沽": "101031100",
"大兴": "101011100",
"大兴安岭": "101050701",
"大同": "101100201",
"大庆": "101050901",
"大港": "101031200",
"大理": "101290201",
"大足": "101042600",
"大连": "101070201",
"天水": "101160901",
"天津": "101030100",
"天门": "101201501",
"太原": "101100101",
"奉节": "101041900",
"奉贤": "101021000",
"威海": "101121301",
"娄底": "101250801",
"孝感": "101200401",
"宁德": "101230301",
"宁河": "101030700",
"宁波": "101210401",
"安庆": "101220601",
"安康": "101110701",
"安阳": "101180201",
"安顺": "101260301",
"定安": "101310209",
"定西": "101160201",
"宜宾": "101271101",
"宜昌": "101200901",
"宜春": "101240501",
"宝坻": "101030300",
"宝山": "101020300",
"宝鸡": "101110901",
"宣城": "101221401",
"宿州": "101220701",
"宿迁": "101191301",
"密云": "101011300",
"密云上甸子": "101011900",
"屯昌": "101310210",
"山南": "101140301",
"岳阳": "101251001",
"崇左": "101300201",
"崇明": "101021100",
"巢湖": "101221601",
"巫山": "101042000",
"巫溪": "101041800",
"巴中": "101270901",
"巴南": "101040900",
"常州": "101191101",
"常德": "101250601",
"平凉": "101160301",
"平谷": "101011500",
"平顶山": "101180501",
"广元": "101272101",
"广安": "101270801",
"广州": "101280101",
"庆阳": "101160401",
"库尔勒": "101130601",
"廊坊": "101090601",
"延吉": "101060301",
"延安": "101110300",
"延庆": "101010800",
"开县": "101041500",
"开封": "101180801",
"张家口": "101090301",
"张家界": "101251101",
"张掖": "101160701",
"彭水": "101043200",
"徐家汇": "101021200",
"徐州": "101190801",
"德宏": "101291501",
"德州": "101120401",
"德阳": "101272001",
"忠县": "101042400",
"忻州": "101101001",
"怀化": "101251201",
"怀柔": "101010500",
"怒江": "101291201",
"恩施": "101201001",
"惠州": "101280301",
"成都": "101270101",
"房山": "101011200",
"扬州": "101190601",
"承德": "101090402",
"抚州": "101240401",
"抚顺": "101070401",
"拉萨": "101140101",
"揭阳": "101281901",
"攀枝花": "101270201",
"文山": "101290601",
"文昌": "101310212",
"斋堂": "101012000",
"新乡": "101180301",
"新余": "101241001",
"无锡": "101190201",
"日喀则": "101140201",
"日照": "101121501",
"昆明": "101290101",
"昌吉": "101130401",
"昌平": "101010700",
"昌江": "101310206",
"昌都": "101140501",
"昭通": "101291001",
"晋中": "101100401",
"晋城": "101100601",
"晋江": "101230509",
"普洱": "101290901",
"景德镇": "101240801",
"景洪": "101291601",
"曲靖": "101290401",
"朔州": "101100901",
"朝阳": "101071201",
"本溪": "101070501",
"来宾": "101300401",
"杭州": "101210101",
"松原": "101060801",
"松江": "101020900",
"林芝": "101140401",
"果洛": "101150501",
"枣庄": "101121401",
"柳州": "101300301",
"株洲": "101250301",
"桂林": "101300501",
"梁平": "101042300",
"梅州": "101280401",
"梧州": "101300601",
"楚雄": "101290801",
"榆林": "101110401",
"武威": "101160501",
"武汉": "101200101",
"武清": "101030200",
"武都": "101161001",
"武隆": "101043100",
"毕节": "101260701",
"永川": "101040200",
"永州": "101251401",
"汉中": "101110801",
"汉沽": "101030800",
"汕头": "101280501",
"汕尾": "101282101",
"江津": "101040500",
"江门": "101281101",
"池州": "101221701",
"汤河口": "101011800",
"沈阳": "101070101",
"沙坪坝": "101043700",
"沧州": "101090701",
"河池": "101301201",
"河源": "101281201",
"泉州": "101230501",
"泰安": "101120801",
"泰州": "101191201",
"泸州": "101271001",
"洛阳": "101180901",
"津南": "101031000",
"济南": "101120101",
"济宁": "101120701",
"济源": "101181801",
"浦东": "101021300",
"海东": "101150201",
"海北": "101150801",
"海南": "101150401",
"海口": "101310101",
"海淀": "101010200",
"海西": "101150701",
"涪陵": "101041400",
"淄博": "101120301",
"淮北": "101221201",
"淮南": "101220401",
"淮安": "101190901",
"深圳": "101280601",
"清远": "101281301",
"渝北": "101040700",
"温州": "101210701",
"渭南": "101110501",
"湖州": "101210201",
"湘潭": "101250201",
"湛江": "101281001",
"滁州": "101221101",
"滨州": "101121101",
"漯河": "101181501",
"漳州": "101230601",
"潍坊": "101120601",
"潜江": "101201701",
"潮州": "101281501",
"潼南": "101042100",
"澄迈": "101310204",
"濮阳": "101181301",
"烟台": "101120501",
"焦作": "101181101",
"牡丹江": "101050301",
"玉林": "101300901",
"玉树": "101150601",
"玉溪": "101290701",
"珠海": "101280701",
"琼中": "101310208",
"琼山": "101310102",
"琼海": "101310211",
"璧山": "101042900",
"甘孜": "101271801",
"白城": "101060601",
"白山": "101060901",
"白沙": "101310207",
"白银": "101161301",
"百色": "101301001",
"益阳": "101250700",
"盐城": "101190701",
"盘锦": "101071301",
"眉山": "101271501",
"石嘴山": "101170201",
"石家庄": "101090101",
"石景山": "101011000",
"石柱": "101042500",
"石河子": "101130301",
"神农架": "101201201",
"福州": "101230101",
"秀山": "101043600",
"秦皇岛": "101091101",
"綦江": "101043300",
"红河": "101290301",
"绍兴": "101210501",
"绥化": "101050501",
"绵阳": "101270401",
"聊城": "101121701",
"肇庆": "101280901",
"自贡": "101270301",
"舟山": "101211101",
"芜湖": "101220301",
"苏州": "101190401",
"茂名": "101282001",
"荆州": "101200801",
"荆门": "101201401",
"荣昌": "101042700",
"莆田": "101230401",
"莱芜": "101121601",
"菏泽": "101121001",
"萍乡": "101240901",
"营口": "101070801",
"葫芦岛": "101071401",
"蓟县": "101031400",
"蚌埠": "101220201",
"衡水": "101090801",
"衡阳": "101250401",
"衢州": "101211001",
"襄樊": "101200201",
"西宁": "101150101",
"西安": "101110101",
"西沙": "101310217",
"西青": "101030500",
"许昌": "101180401",
"贵港": "101300801",
"贵阳": "101260101",
"贺州": "101300701",
"资阳": "101271301",
"赣州": "101240701",
"赤峰": "101080601",
"辽源": "101060701",
"辽阳": "101071001",
"达州": "101270601",
"运城": "101100801",
"连云港": "101191001",
"通化": "101060501",
"通州": "101010600",
"通辽": "101080501",
"遂宁": "101270701",
"遵义": "101260201",
"邢台": "101090901",
"那曲": "101140601",
"邯郸": "101091001",
"邵阳": "101250901",
"郑州": "101180101",
"郴州": "101250501",
"都匀": "101260401",
"鄂尔多斯": "101080701",
"鄂州": "101200301",
"酉阳": "101043400",
"酒泉": "101160801",
"重庆": "101040100",
"金华": "101210901",
"金山": "101020700",
"金昌": "101160601",
"钦州": "101301101",
"铁岭": "101071101",
"铜仁": "101260601",
"铜川": "101111001",
"铜梁": "101042800",
"铜陵": "101221301",
"银川": "101170101",
"锡林浩特": "101080901",
"锦州": "101070701",
"镇江": "101190301",
"长寿": "101041000",
"长春": "101060101",
"长沙": "101250101",
"长治": "101100501",
"门头沟": "101011400",
"闵行": "101020200",
"阜新": "101070901",
"阜阳": "101220801",
"防城港": "101301401",
"阳江": "101281801",
"阳泉": "101100301",
"阿克苏": "101130801",
"阿勒泰": "101131401",
"阿图什": "101131501",
"阿坝": "101271901",
"阿拉善左旗": "101081201",
"阿拉尔": "101130701",
"阿里": "101140701",
"陵水": "101310216",
"随州": "101201301",
"雅安": "101271701",
"集宁": "101080401",
"霞云岭": "101012100",
"青岛": "101120201",
"青浦": "101020800",
"静海": "101030900",
"鞍山": "101070301",
"韶关": "101280201",
"顺义": "101010400",
"香格里拉": "101291301",
"马鞍山": "101220501",
"驻马店": "101181601",
"高雄": "101340201",
"鸡西": "101051101",
"鹤壁": "101181201",
"鹤岗": "101051201",
"鹰潭": "101241101",
"黄冈": "101200501",
"黄南": "101150301",
"黄山": "101221001",
"黄石": "101200601",
"黑河": "101050601",
"黔江": "101041100",
"黔阳": "101251301",
"齐齐哈尔": "101050201",
"龙岩": "101230701"
}