From 8374c32ae595c116a8532250ca5c0026656dfc41 Mon Sep 17 00:00:00 2001 From: justlovemaki <274166795@qq.com> Date: Tue, 15 Jul 2025 17:56:40 +0800 Subject: [PATCH] =?UTF-8?q?feat(=E6=95=B0=E6=8D=AE=E6=BA=90):=20=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0Reddit=E6=95=B0=E6=8D=AE=E6=BA=90=E5=B9=B6=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E7=8E=B0=E6=9C=89=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增Reddit数据源支持,包括获取、翻译和展示功能 优化Twitter数据源显示逻辑 添加fetch请求超时处理 调整数据源配置和分类 更新页脚链接和图片地址 --- src/chatapi.js | 39 ++++- src/dataFetchers.js | 7 +- src/dataSources/reddit.js | 200 ++++++++++++++++++++++ src/dataSources/twitter.js | 2 +- src/foot.js | 4 +- src/handlers/commitToGitHub.js | 2 +- src/handlers/writeRssData.js | 65 ++++++- src/prompt/summarizationSimplifyPrompt.js | 7 + wrangler.toml | 7 +- 9 files changed, 315 insertions(+), 18 deletions(-) create mode 100644 src/dataSources/reddit.js create mode 100644 src/prompt/summarizationSimplifyPrompt.js diff --git a/src/chatapi.js b/src/chatapi.js index 6d780a8..0855a3e 100644 --- a/src/chatapi.js +++ b/src/chatapi.js @@ -32,7 +32,7 @@ async function callGeminiChatAPI(env, promptText, systemPromptText = null) { } try { - const response = await fetch(url, { + const response = await fetchWithTimeout(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payload) @@ -137,7 +137,7 @@ async function* callGeminiChatAPIStream(env, promptText, systemPromptText = null let response; try { - response = await fetch(url, { + response = await fetchWithTimeout(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payload) @@ -334,7 +334,7 @@ async function callOpenAIChatAPI(env, promptText, systemPromptText = null) { }; try { - const response = await fetch(url, { + const response = await fetchWithTimeout(url, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -413,7 +413,7 @@ async function* callOpenAIChatAPIStream(env, promptText, systemPromptText = null let response; try { - response = await fetch(url, { + response = await fetchWithTimeout(url, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -565,3 +565,34 @@ export async function* callChatAPIStream(env, promptText, systemPromptText = nul yield* callGeminiChatAPIStream(env, promptText, systemPromptText); } } + + +/** + * 带有超时功能的 fetch 封装 + * @param {string} resource fetch 的请求 URL + * @param {object} options fetch 的配置对象 + * @param {number} timeout 超时时间,单位毫秒 + * @returns {Promise} + */ +async function fetchWithTimeout(resource, options = {}, timeout = 60000) { + const controller = new AbortController(); + const id = setTimeout(() => controller.abort(), timeout); + + try { + const response = await fetch(resource, { + ...options, + signal: controller.signal // 关联 AbortController + }); + return response; + } catch (error) { + // 当 abort() 被调用时,fetch 会抛出一个 AbortError + if (error.name === 'AbortError') { + throw new Error('Request timed out'); + } + // 其他网络错误等 + throw error; + } finally { + // 清除计时器,防止内存泄漏 + clearTimeout(id); + } +} \ No newline at end of file diff --git a/src/dataFetchers.js b/src/dataFetchers.js index f5bf958..8e65ae8 100644 --- a/src/dataFetchers.js +++ b/src/dataFetchers.js @@ -7,13 +7,14 @@ import QBitDataSource from './dataSources/qbit.js'; import JiqizhixinDataSource from './dataSources/jiqizhixin.js'; import XiaohuDataSource from './dataSources/xiaohu.js'; import TwitterDataSource from './dataSources/twitter.js'; +import RedditDataSource from './dataSources/reddit.js'; // Register data sources as arrays to support multiple sources per type export const dataSources = { - news: { name: '新闻', sources: [AibaseDataSource, XiaohuDataSource] }, + news: { name: '新闻', sources: [AibaseDataSource, XiaohuDataSource, QBitDataSource, XinZhiYuanDataSource] }, project: { name: '项目', sources: [GithubTrendingDataSource] }, - paper: { name: '论文', sources: [HuggingfacePapersDataSource, XinZhiYuanDataSource, QBitDataSource, JiqizhixinDataSource] }, - socialMedia: { name: '社交平台', sources: [TwitterDataSource] }, + paper: { name: '论文', sources: [HuggingfacePapersDataSource, JiqizhixinDataSource] }, + socialMedia: { name: '社交平台', sources: [TwitterDataSource, RedditDataSource] }, // Add new data sources here as arrays, e.g., // newType: { name: '新类型', sources: [NewTypeDataSource1, NewTypeDataSource2] }, }; diff --git a/src/dataSources/reddit.js b/src/dataSources/reddit.js new file mode 100644 index 0000000..c229802 --- /dev/null +++ b/src/dataSources/reddit.js @@ -0,0 +1,200 @@ +import { getRandomUserAgent, sleep, isDateWithinLastDays, stripHtml, formatDateToChineseWithTime, escapeHtml} from '../helpers'; +import { callChatAPI } from '../chatapi.js'; +import { removeMarkdownCodeBlock } from '../helpers.js'; + +const RedditDataSource = { + async fetch(env, foloCookie) { + const listId = env.REDDIT_LIST_ID; + const fetchPages = parseInt(env.REDDIT_FETCH_PAGES || '3', 10); + const allRedditItems = []; + const filterDays = parseInt(env.FOLO_FILTER_DAYS || '3', 10); + + if (!listId) { + console.error('REDDIT_LIST_ID is not set in environment variables.'); + return { + version: "https://jsonfeed.org/version/1.1", + title: "Reddit Feeds", + home_page_url: "https://www.reddit.com/", + description: "Aggregated Reddit feeds from various subreddits/users", + language: "zh-cn", + items: [] + }; + } + + let publishedAfter = null; + for (let i = 0; i < fetchPages; i++) { + const userAgent = getRandomUserAgent(); + const headers = { + 'User-Agent': userAgent, + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'accept-language': 'zh-CN,zh;q=0.9', + 'baggage': 'sentry-environment=stable,sentry-release=5251fa921ef6cbb6df0ac4271c41c2b4a0ce7c50,sentry-public_key=e5bccf7428aa4e881ed5cb713fdff181,sentry-trace_id=2da50ca5ad944cb794670097d876ada8,sentry-sampled=true,sentry-sample_rand=0.06211835167903246,sentry-sample_rate=1', + 'origin': 'https://app.follow.is', + 'priority': 'u=1, i', + 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', + 'sec-ch-ua-mobile': '?1', + 'sec-ch-ua-platform': '"Android"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-site', + 'x-app-name': 'Folo Web', + 'x-app-version': '0.4.9', + }; + + if (foloCookie) { + headers['Cookie'] = foloCookie; + } + + const body = { + listId: listId, + view: 1, + withContent: true, + }; + + if (publishedAfter) { + body.publishedAfter = publishedAfter; + } + + try { + console.log(`Fetching Reddit data, page ${i + 1}...`); + const response = await fetch(env.FOLO_DATA_API, { + method: 'POST', + headers: headers, + body: JSON.stringify(body), + }); + + if (!response.ok) { + console.error(`Failed to fetch Reddit data, page ${i + 1}: ${response.statusText}`); + break; + } + const data = await response.json(); + if (data && data.data && data.data.length > 0) { + const filteredItems = data.data.filter(entry => isDateWithinLastDays(entry.entries.publishedAt, filterDays)); + allRedditItems.push(...filteredItems.map(entry => ({ + id: entry.entries.id, + url: entry.entries.url, + title: entry.entries.title, + content_html: entry.entries.content, + date_published: entry.entries.publishedAt, + authors: [{ name: entry.entries.author }], + source: `${entry.feeds.title}` , + }))); + publishedAfter = data.data[data.data.length - 1].entries.publishedAt; + } else { + console.log(`No more data for Reddit, page ${i + 1}.`); + break; + } + } catch (error) { + console.error(`Error fetching Reddit data, page ${i + 1}:`, error); + break; + } + + await sleep(Math.random() * 5000); + } + + const redditData = { + version: "https://jsonfeed.org/version/1.1", + title: "Reddit Feeds", + home_page_url: "https://www.reddit.com/", + description: "Aggregated Reddit feeds from various subreddits/users", + language: "zh-cn", + items: allRedditItems + }; + + if (redditData.items.length === 0) { + console.log("No reddit posts found for today or after filtering."); + return redditData; + } + + if (!env.OPEN_TRANSLATE === "true") { + console.warn("Skipping reddit translations."); + redditData.items = redditData.items.map(item => ({ + ...item, + title_zh: item.title || "" + })); + return redditData; + } + + const itemsToTranslate = redditData.items.map((item, index) => ({ + id: index, + original_title: item.title || "" + })); + + const hasContentToTranslate = itemsToTranslate.some(item => item.original_title.trim() !== ""); + if (!hasContentToTranslate) { + console.log("No non-empty reddit titles to translate for today's posts."); + redditData.items = redditData.items.map(item => ({ ...item, title_zh: item.title || "" })); + return redditData; + } + + const promptText = `You will be given a JSON array of reddit data objects. Each object has an "id" and "original_title". +Translate "original_title" into Chinese. +Return a JSON array of objects. Each output object MUST have: +- "id": The same id from the input. +- "title_zh": Chinese translation of "original_title". Empty if original is empty. +Input: ${JSON.stringify(itemsToTranslate)} +Respond ONLY with the JSON array.`; + + let translatedItemsMap = new Map(); + try { + console.log(`Requesting translation for ${itemsToTranslate.length} reddit titles for today.`); + const chatResponse = await callChatAPI(env, promptText); + const parsedTranslations = JSON.parse(removeMarkdownCodeBlock(chatResponse)); + + if (parsedTranslations) { + parsedTranslations.forEach(translatedItem => { + if (translatedItem && typeof translatedItem.id === 'number' && + typeof translatedItem.title_zh === 'string') { + translatedItemsMap.set(translatedItem.id, translatedItem); + } + }); + } + } catch (translationError) { + console.error("Failed to translate reddit titles in batch:", translationError.message); + } + + redditData.items = redditData.items.map((originalItem, index) => { + const translatedData = translatedItemsMap.get(index); + return { + ...originalItem, + title_zh: translatedData ? translatedData.title_zh : (originalItem.title || "") + }; + }); + + return redditData; + }, + + transform(rawData, sourceType) { + if (!rawData || !rawData.items) { + return []; + } + + return rawData.items.map(item => ({ + id: item.id, + type: sourceType, + url: item.url, + title: item.title_zh || item.title, // Use translated title if available + description: stripHtml(item.content_html || ""), + published_date: item.date_published, + authors: item.authors ? item.authors.map(author => author.name).join(', ') : 'Unknown', + source: item.source || 'reddit', + details: { + content_html: item.content_html || "" + } + })); + }, + + generateHtml: (item) => { + return ` + ${escapeHtml(item.title)}
+ 来源: ${escapeHtml(item.source || '未知')} | 发布日期: ${formatDateToChineseWithTime(item.published_date)} +
+ ${item.details.content_html || '无内容。'} +
+ 查看 Reddit 帖子 + `; + } +}; + +export default RedditDataSource; diff --git a/src/dataSources/twitter.js b/src/dataSources/twitter.js index 806f380..479e08c 100644 --- a/src/dataSources/twitter.js +++ b/src/dataSources/twitter.js @@ -77,7 +77,7 @@ const TwitterDataSource = { content_html: entry.entries.content, date_published: entry.entries.publishedAt, authors: [{ name: entry.entries.author }], - source: entry.feeds.title && entry.feeds.title.includes('即刻圈子') ? `${entry.feeds.title} - ${entry.entries.author}` : `twitter-${entry.entries.author}`, + source: entry.feeds.title && entry.feeds.title.startsWith('Twitter') ? `twitter-${entry.entries.author}` : `${entry.feeds.title} - ${entry.entries.author}` , }))); publishedAfter = data.data[data.data.length - 1].entries.publishedAt; } else { diff --git a/src/foot.js b/src/foot.js index b010820..3ee01c3 100644 --- a/src/foot.js +++ b/src/foot.js @@ -7,8 +7,8 @@ export function insertFoot() { | 🎙️ **小宇宙** | 📹 **抖音** | | --- | --- | -| [来生小酒馆](https://www.xiaoyuzhoufm.com/podcast/683c62b7c1ca9cf575a5030e) | [来生情报站](https://www.douyin.com/user/MS4wLjABAAAAwpwqPQlu38sO38VyWgw9ZjDEnN4bMR5j8x111UxpseHR9DpB6-CveI5KRXOWuFwG)| -| ![小酒馆](https://s1.imagehub.cc/images/2025/06/24/f959f7984e9163fc50d3941d79a7f262.md.png) | ![情报站](https://s1.imagehub.cc/images/2025/06/24/7fc30805eeb831e1e2baa3a240683ca3.md.png) | +| [来生小酒馆](https://www.xiaoyuzhoufm.com/podcast/683c62b7c1ca9cf575a5030e) | [自媒体账号](https://www.douyin.com/user/MS4wLjABAAAAwpwqPQlu38sO38VyWgw9ZjDEnN4bMR5j8x111UxpseHR9DpB6-CveI5KRXOWuFwG)| +| ![小酒馆](https://cdn.jsdmirror.com/gh/justlovemaki/imagehub@main/logo/f959f7984e9163fc50d3941d79a7f262.md.png) | ![情报站](https://cdn.jsdmirror.com/gh/justlovemaki/imagehub@main/logo/7fc30805eeb831e1e2baa3a240683ca3.md.png) | `; } diff --git a/src/handlers/commitToGitHub.js b/src/handlers/commitToGitHub.js index c231df7..03c23c5 100644 --- a/src/handlers/commitToGitHub.js +++ b/src/handlers/commitToGitHub.js @@ -28,7 +28,7 @@ export async function handleCommitToGitHub(request, env) { if (dailyMd) { filesToCommit.push({ path: `daily/${dateStr}.md`, content: formatMarkdownText(dailyMd), description: "Daily Summary File" }); - report.content_html = marked.parse(formatMarkdownText(replaceImageProxy(env.IMG_PROXY, dailyMd))); + report.content_html = marked.parse(formatMarkdownText(env.IMG_PROXY, dailyMd)); storeInKV(env.DATA_KV, `${dateStr}-report`, report); } if (podcastMd) { diff --git a/src/handlers/writeRssData.js b/src/handlers/writeRssData.js index 5678847..27df630 100644 --- a/src/handlers/writeRssData.js +++ b/src/handlers/writeRssData.js @@ -2,22 +2,31 @@ import { replaceImageProxy, formatMarkdownText, formatDateToGMT12WithTime } from import { getDailyReportContent } from '../github.js'; import { storeInKV } from '../kv.js'; import { marked } from '../marked.esm.js'; +import { callChatAPI } from '../chatapi.js'; // 导入 callChatAPI +import { getSummarizationSimplifyPrompt } from "../prompt/summarizationSimplifyPrompt"; export async function handleWriteRssData(request, env) { const url = new URL(request.url); const dateStr = url.searchParams.get('date'); + console.log(`[writeRssData] Received request for date: ${dateStr}`); if (!dateStr) { + console.error('[writeRssData] Missing date parameter'); return new Response('Missing date parameter', { status: 400 }); } try { const path = `daily/${dateStr}.md`; - const content = await getDailyReportContent(env, path); + console.log(`[writeRssData] Attempting to get content from GitHub path: ${path}`); + let content = await getDailyReportContent(env, path); + if (!content) { + console.warn(`[writeRssData] No content found for ${path}. Returning 404.`); return new Response(`No content found for ${path}`, { status: 404 }); } + console.log(`[writeRssData] Successfully retrieved content for ${path}. Content length: ${content.length}`); + //content = extractContentFromSecondHash(content); // 从 "YYYY-MM-DD" 格式的 dateStr 中提取 "YYYY-MM" const yearMonth = dateStr.substring(0, 7); const report = { @@ -28,15 +37,61 @@ export async function handleWriteRssData(request, env) { // 可以添加其他相關欄位,例如作者、來源等 published_date: formatDateToGMT12WithTime(new Date()) // 記錄保存時間 } - report.content_html = marked.parse(formatMarkdownText(replaceImageProxy(env.IMG_PROXY, content))); - storeInKV(env.DATA_KV, `${dateStr}-report`, report); + report.content_html = marked.parse(formatMarkdownText(replaceImageProxy(env, content))); + + const kvKey = `${dateStr}-report`; + console.log(`[writeRssData] Preparing to store report in KV. Key: ${kvKey}, Report object:`, JSON.stringify(report).substring(0, 200) + '...'); // Log first 200 chars + await storeInKV(env.DATA_KV, kvKey, report); + console.log(`[writeRssData] Successfully stored report in KV with key: ${kvKey}`); return new Response(JSON.stringify(report), { headers: { 'Content-Type': 'application/json' }, status: 200 }); } catch (error) { - console.error('Error handling daily report:', error.message); + console.error('[writeRssData] Error handling daily report:', error.message, error.stack); return new Response(`Error handling daily report: ${error.message}`, { status: 500 }); } -} \ No newline at end of file +} + +/** + * 从第二个 ### 开始截取内容,包括 ###。 + * + * @param {string} content - 原始文本内容。 + * @returns {string} 截取后的内容。 + */ +export function extractContentFromSecondHash(content) { + const parts = content.split('###'); + if (parts.length > 2) { + // 原始逻辑:重新组合从第二个 ### 开始的所有部分 + const newcontent = '###' + parts.slice(2).join('###'); + const lastHashIndex = newcontent.lastIndexOf('###'); + if (lastHashIndex !== -1) { + return newcontent.substring(0, lastHashIndex); + } + } + return content; // 如果没有找到 ### 或不符合上述条件,则返回原始内容 +} + +/** + * 调用 Gemini 或 OpenAI 模型生成指定提示词的内容。 + * 此方法可供外部调用。 + * + * @param {object} env - 环境对象,包含 AI 模型相关的配置。 + * @param {string} promptText - 用户提示词。 + * @returns {Promise} AI 模型生成的内容。 + * @throws {Error} 如果 API 调用失败或返回空内容。 + */ +export async function generateAIContent(env, promptText) { + console.log(`[generateAIContent] Calling AI model with prompt: ${promptText.substring(0, 100)}...`); + try { + let result = await callChatAPI(env, promptText, getSummarizationSimplifyPrompt()); + console.log(`[generateAIContent] AI model returned content. Length: ${result.length}`); + + result += "\n\n
"+env.INSERT_APP_URL; + return result; + } catch (error) { + console.error('[generateAIContent] Error calling AI model:', error.message, error.stack); + throw new Error(`Failed to generate AI content: ${error.message}`); + } +} diff --git a/src/prompt/summarizationSimplifyPrompt.js b/src/prompt/summarizationSimplifyPrompt.js new file mode 100644 index 0000000..53b54b7 --- /dev/null +++ b/src/prompt/summarizationSimplifyPrompt.js @@ -0,0 +1,7 @@ +// Add new data sources +export function getSummarizationSimplifyPrompt() { + return ` +简化每一段的文字为一句话描述,每句话不超过30个字,将所有的句子过渡词和连接词替换为最基础、最常用的词语。尽量使用简单、直接的表达方式,避免使用复杂或生僻的词汇。确保句子之间的逻辑关系清晰。 +可以合并同类的输出信息,保持原有的小标题,为生成后的每一段内容从1开始排序. + `; +} diff --git a/wrangler.toml b/wrangler.toml index 96c7fb4..d576edb 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -34,7 +34,9 @@ QBIT_FETCH_PAGES = "1" XINZHIYUAN_FEED_ID = "60901577013168128" XINZHIYUAN_FETCH_PAGES = "1" TWITTER_LIST_ID = "153028784690326528" -TWITTER_FETCH_PAGES = "5" +TWITTER_FETCH_PAGES = "2" +REDDIT_LIST_ID = "167576006499975168" +REDDIT_FETCH_PAGES = "2" PROJECTS_API_URL = "https://git-trending.justlikemaki.vip/topone/?since=daily" GITHUB_TOKEN = "github_pat_xxxxxx" GITHUB_REPO_OWNER = "justlovemaki" @@ -48,4 +50,5 @@ PODCAST_TITLE = "来生小酒馆" PODCAST_BEGIN = "嘿,亲爱的V,欢迎收听新一期的来生情报站,我是你们的老朋友,何夕2077" PODCAST_END = "今天的情报就到这里,注意隐蔽,赶紧撤离" BOOK_LINK = "" -INSERT_FOOT = "false" \ No newline at end of file +INSERT_FOOT = "false" +INSERT_APP_URL = "

[查看完整版AI日报↗️ https://ai.hubtoday.app/](https://ai.hubtoday.app/)

" \ No newline at end of file