feat(数据源): 添加Reddit数据源并优化现有功能
新增Reddit数据源支持,包括获取、翻译和展示功能 优化Twitter数据源显示逻辑 添加fetch请求超时处理 调整数据源配置和分类 更新页脚链接和图片地址
This commit is contained in:
@@ -32,7 +32,7 @@ async function callGeminiChatAPI(env, promptText, systemPromptText = null) {
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
const response = await fetchWithTimeout(url, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload)
|
||||
@@ -137,7 +137,7 @@ async function* callGeminiChatAPIStream(env, promptText, systemPromptText = null
|
||||
|
||||
let response;
|
||||
try {
|
||||
response = await fetch(url, {
|
||||
response = await fetchWithTimeout(url, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload)
|
||||
@@ -334,7 +334,7 @@ async function callOpenAIChatAPI(env, promptText, systemPromptText = null) {
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
const response = await fetchWithTimeout(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
@@ -413,7 +413,7 @@ async function* callOpenAIChatAPIStream(env, promptText, systemPromptText = null
|
||||
|
||||
let response;
|
||||
try {
|
||||
response = await fetch(url, {
|
||||
response = await fetchWithTimeout(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
@@ -565,3 +565,34 @@ export async function* callChatAPIStream(env, promptText, systemPromptText = nul
|
||||
yield* callGeminiChatAPIStream(env, promptText, systemPromptText);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 带有超时功能的 fetch 封装
|
||||
* @param {string} resource fetch 的请求 URL
|
||||
* @param {object} options fetch 的配置对象
|
||||
* @param {number} timeout 超时时间,单位毫秒
|
||||
* @returns {Promise<Response>}
|
||||
*/
|
||||
async function fetchWithTimeout(resource, options = {}, timeout = 60000) {
|
||||
const controller = new AbortController();
|
||||
const id = setTimeout(() => controller.abort(), timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(resource, {
|
||||
...options,
|
||||
signal: controller.signal // 关联 AbortController
|
||||
});
|
||||
return response;
|
||||
} catch (error) {
|
||||
// 当 abort() 被调用时,fetch 会抛出一个 AbortError
|
||||
if (error.name === 'AbortError') {
|
||||
throw new Error('Request timed out');
|
||||
}
|
||||
// 其他网络错误等
|
||||
throw error;
|
||||
} finally {
|
||||
// 清除计时器,防止内存泄漏
|
||||
clearTimeout(id);
|
||||
}
|
||||
}
|
||||
@@ -7,13 +7,14 @@ import QBitDataSource from './dataSources/qbit.js';
|
||||
import JiqizhixinDataSource from './dataSources/jiqizhixin.js';
|
||||
import XiaohuDataSource from './dataSources/xiaohu.js';
|
||||
import TwitterDataSource from './dataSources/twitter.js';
|
||||
import RedditDataSource from './dataSources/reddit.js';
|
||||
|
||||
// Register data sources as arrays to support multiple sources per type
|
||||
export const dataSources = {
|
||||
news: { name: '新闻', sources: [AibaseDataSource, XiaohuDataSource] },
|
||||
news: { name: '新闻', sources: [AibaseDataSource, XiaohuDataSource, QBitDataSource, XinZhiYuanDataSource] },
|
||||
project: { name: '项目', sources: [GithubTrendingDataSource] },
|
||||
paper: { name: '论文', sources: [HuggingfacePapersDataSource, XinZhiYuanDataSource, QBitDataSource, JiqizhixinDataSource] },
|
||||
socialMedia: { name: '社交平台', sources: [TwitterDataSource] },
|
||||
paper: { name: '论文', sources: [HuggingfacePapersDataSource, JiqizhixinDataSource] },
|
||||
socialMedia: { name: '社交平台', sources: [TwitterDataSource, RedditDataSource] },
|
||||
// Add new data sources here as arrays, e.g.,
|
||||
// newType: { name: '新类型', sources: [NewTypeDataSource1, NewTypeDataSource2] },
|
||||
};
|
||||
|
||||
200
src/dataSources/reddit.js
Normal file
200
src/dataSources/reddit.js
Normal file
@@ -0,0 +1,200 @@
|
||||
import { getRandomUserAgent, sleep, isDateWithinLastDays, stripHtml, formatDateToChineseWithTime, escapeHtml} from '../helpers';
|
||||
import { callChatAPI } from '../chatapi.js';
|
||||
import { removeMarkdownCodeBlock } from '../helpers.js';
|
||||
|
||||
const RedditDataSource = {
|
||||
async fetch(env, foloCookie) {
|
||||
const listId = env.REDDIT_LIST_ID;
|
||||
const fetchPages = parseInt(env.REDDIT_FETCH_PAGES || '3', 10);
|
||||
const allRedditItems = [];
|
||||
const filterDays = parseInt(env.FOLO_FILTER_DAYS || '3', 10);
|
||||
|
||||
if (!listId) {
|
||||
console.error('REDDIT_LIST_ID is not set in environment variables.');
|
||||
return {
|
||||
version: "https://jsonfeed.org/version/1.1",
|
||||
title: "Reddit Feeds",
|
||||
home_page_url: "https://www.reddit.com/",
|
||||
description: "Aggregated Reddit feeds from various subreddits/users",
|
||||
language: "zh-cn",
|
||||
items: []
|
||||
};
|
||||
}
|
||||
|
||||
let publishedAfter = null;
|
||||
for (let i = 0; i < fetchPages; i++) {
|
||||
const userAgent = getRandomUserAgent();
|
||||
const headers = {
|
||||
'User-Agent': userAgent,
|
||||
'Content-Type': 'application/json',
|
||||
'accept': 'application/json',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'baggage': 'sentry-environment=stable,sentry-release=5251fa921ef6cbb6df0ac4271c41c2b4a0ce7c50,sentry-public_key=e5bccf7428aa4e881ed5cb713fdff181,sentry-trace_id=2da50ca5ad944cb794670097d876ada8,sentry-sampled=true,sentry-sample_rand=0.06211835167903246,sentry-sample_rate=1',
|
||||
'origin': 'https://app.follow.is',
|
||||
'priority': 'u=1, i',
|
||||
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
||||
'sec-ch-ua-mobile': '?1',
|
||||
'sec-ch-ua-platform': '"Android"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-site',
|
||||
'x-app-name': 'Folo Web',
|
||||
'x-app-version': '0.4.9',
|
||||
};
|
||||
|
||||
if (foloCookie) {
|
||||
headers['Cookie'] = foloCookie;
|
||||
}
|
||||
|
||||
const body = {
|
||||
listId: listId,
|
||||
view: 1,
|
||||
withContent: true,
|
||||
};
|
||||
|
||||
if (publishedAfter) {
|
||||
body.publishedAfter = publishedAfter;
|
||||
}
|
||||
|
||||
try {
|
||||
console.log(`Fetching Reddit data, page ${i + 1}...`);
|
||||
const response = await fetch(env.FOLO_DATA_API, {
|
||||
method: 'POST',
|
||||
headers: headers,
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(`Failed to fetch Reddit data, page ${i + 1}: ${response.statusText}`);
|
||||
break;
|
||||
}
|
||||
const data = await response.json();
|
||||
if (data && data.data && data.data.length > 0) {
|
||||
const filteredItems = data.data.filter(entry => isDateWithinLastDays(entry.entries.publishedAt, filterDays));
|
||||
allRedditItems.push(...filteredItems.map(entry => ({
|
||||
id: entry.entries.id,
|
||||
url: entry.entries.url,
|
||||
title: entry.entries.title,
|
||||
content_html: entry.entries.content,
|
||||
date_published: entry.entries.publishedAt,
|
||||
authors: [{ name: entry.entries.author }],
|
||||
source: `${entry.feeds.title}` ,
|
||||
})));
|
||||
publishedAfter = data.data[data.data.length - 1].entries.publishedAt;
|
||||
} else {
|
||||
console.log(`No more data for Reddit, page ${i + 1}.`);
|
||||
break;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error fetching Reddit data, page ${i + 1}:`, error);
|
||||
break;
|
||||
}
|
||||
|
||||
await sleep(Math.random() * 5000);
|
||||
}
|
||||
|
||||
const redditData = {
|
||||
version: "https://jsonfeed.org/version/1.1",
|
||||
title: "Reddit Feeds",
|
||||
home_page_url: "https://www.reddit.com/",
|
||||
description: "Aggregated Reddit feeds from various subreddits/users",
|
||||
language: "zh-cn",
|
||||
items: allRedditItems
|
||||
};
|
||||
|
||||
if (redditData.items.length === 0) {
|
||||
console.log("No reddit posts found for today or after filtering.");
|
||||
return redditData;
|
||||
}
|
||||
|
||||
if (!env.OPEN_TRANSLATE === "true") {
|
||||
console.warn("Skipping reddit translations.");
|
||||
redditData.items = redditData.items.map(item => ({
|
||||
...item,
|
||||
title_zh: item.title || ""
|
||||
}));
|
||||
return redditData;
|
||||
}
|
||||
|
||||
const itemsToTranslate = redditData.items.map((item, index) => ({
|
||||
id: index,
|
||||
original_title: item.title || ""
|
||||
}));
|
||||
|
||||
const hasContentToTranslate = itemsToTranslate.some(item => item.original_title.trim() !== "");
|
||||
if (!hasContentToTranslate) {
|
||||
console.log("No non-empty reddit titles to translate for today's posts.");
|
||||
redditData.items = redditData.items.map(item => ({ ...item, title_zh: item.title || "" }));
|
||||
return redditData;
|
||||
}
|
||||
|
||||
const promptText = `You will be given a JSON array of reddit data objects. Each object has an "id" and "original_title".
|
||||
Translate "original_title" into Chinese.
|
||||
Return a JSON array of objects. Each output object MUST have:
|
||||
- "id": The same id from the input.
|
||||
- "title_zh": Chinese translation of "original_title". Empty if original is empty.
|
||||
Input: ${JSON.stringify(itemsToTranslate)}
|
||||
Respond ONLY with the JSON array.`;
|
||||
|
||||
let translatedItemsMap = new Map();
|
||||
try {
|
||||
console.log(`Requesting translation for ${itemsToTranslate.length} reddit titles for today.`);
|
||||
const chatResponse = await callChatAPI(env, promptText);
|
||||
const parsedTranslations = JSON.parse(removeMarkdownCodeBlock(chatResponse));
|
||||
|
||||
if (parsedTranslations) {
|
||||
parsedTranslations.forEach(translatedItem => {
|
||||
if (translatedItem && typeof translatedItem.id === 'number' &&
|
||||
typeof translatedItem.title_zh === 'string') {
|
||||
translatedItemsMap.set(translatedItem.id, translatedItem);
|
||||
}
|
||||
});
|
||||
}
|
||||
} catch (translationError) {
|
||||
console.error("Failed to translate reddit titles in batch:", translationError.message);
|
||||
}
|
||||
|
||||
redditData.items = redditData.items.map((originalItem, index) => {
|
||||
const translatedData = translatedItemsMap.get(index);
|
||||
return {
|
||||
...originalItem,
|
||||
title_zh: translatedData ? translatedData.title_zh : (originalItem.title || "")
|
||||
};
|
||||
});
|
||||
|
||||
return redditData;
|
||||
},
|
||||
|
||||
transform(rawData, sourceType) {
|
||||
if (!rawData || !rawData.items) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return rawData.items.map(item => ({
|
||||
id: item.id,
|
||||
type: sourceType,
|
||||
url: item.url,
|
||||
title: item.title_zh || item.title, // Use translated title if available
|
||||
description: stripHtml(item.content_html || ""),
|
||||
published_date: item.date_published,
|
||||
authors: item.authors ? item.authors.map(author => author.name).join(', ') : 'Unknown',
|
||||
source: item.source || 'reddit',
|
||||
details: {
|
||||
content_html: item.content_html || ""
|
||||
}
|
||||
}));
|
||||
},
|
||||
|
||||
generateHtml: (item) => {
|
||||
return `
|
||||
<strong>${escapeHtml(item.title)}</strong><br>
|
||||
<small>来源: ${escapeHtml(item.source || '未知')} | 发布日期: ${formatDateToChineseWithTime(item.published_date)}</small>
|
||||
<div class="content-html">
|
||||
${item.details.content_html || '无内容。'}
|
||||
</div>
|
||||
<a href="${escapeHtml(item.url)}" target="_blank" rel="noopener noreferrer">查看 Reddit 帖子</a>
|
||||
`;
|
||||
}
|
||||
};
|
||||
|
||||
export default RedditDataSource;
|
||||
@@ -77,7 +77,7 @@ const TwitterDataSource = {
|
||||
content_html: entry.entries.content,
|
||||
date_published: entry.entries.publishedAt,
|
||||
authors: [{ name: entry.entries.author }],
|
||||
source: entry.feeds.title && entry.feeds.title.includes('即刻圈子') ? `${entry.feeds.title} - ${entry.entries.author}` : `twitter-${entry.entries.author}`,
|
||||
source: entry.feeds.title && entry.feeds.title.startsWith('Twitter') ? `twitter-${entry.entries.author}` : `${entry.feeds.title} - ${entry.entries.author}` ,
|
||||
})));
|
||||
publishedAfter = data.data[data.data.length - 1].entries.publishedAt;
|
||||
} else {
|
||||
|
||||
@@ -7,8 +7,8 @@ export function insertFoot() {
|
||||
|
||||
| 🎙️ **小宇宙** | 📹 **抖音** |
|
||||
| --- | --- |
|
||||
| [来生小酒馆](https://www.xiaoyuzhoufm.com/podcast/683c62b7c1ca9cf575a5030e) | [来生情报站](https://www.douyin.com/user/MS4wLjABAAAAwpwqPQlu38sO38VyWgw9ZjDEnN4bMR5j8x111UxpseHR9DpB6-CveI5KRXOWuFwG)|
|
||||
|  |  |
|
||||
| [来生小酒馆](https://www.xiaoyuzhoufm.com/podcast/683c62b7c1ca9cf575a5030e) | [自媒体账号](https://www.douyin.com/user/MS4wLjABAAAAwpwqPQlu38sO38VyWgw9ZjDEnN4bMR5j8x111UxpseHR9DpB6-CveI5KRXOWuFwG)|
|
||||
|  |  |
|
||||
|
||||
`;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ export async function handleCommitToGitHub(request, env) {
|
||||
|
||||
if (dailyMd) {
|
||||
filesToCommit.push({ path: `daily/${dateStr}.md`, content: formatMarkdownText(dailyMd), description: "Daily Summary File" });
|
||||
report.content_html = marked.parse(formatMarkdownText(replaceImageProxy(env.IMG_PROXY, dailyMd)));
|
||||
report.content_html = marked.parse(formatMarkdownText(env.IMG_PROXY, dailyMd));
|
||||
storeInKV(env.DATA_KV, `${dateStr}-report`, report);
|
||||
}
|
||||
if (podcastMd) {
|
||||
|
||||
@@ -2,22 +2,31 @@ import { replaceImageProxy, formatMarkdownText, formatDateToGMT12WithTime } from
|
||||
import { getDailyReportContent } from '../github.js';
|
||||
import { storeInKV } from '../kv.js';
|
||||
import { marked } from '../marked.esm.js';
|
||||
import { callChatAPI } from '../chatapi.js'; // 导入 callChatAPI
|
||||
import { getSummarizationSimplifyPrompt } from "../prompt/summarizationSimplifyPrompt";
|
||||
|
||||
export async function handleWriteRssData(request, env) {
|
||||
const url = new URL(request.url);
|
||||
const dateStr = url.searchParams.get('date');
|
||||
console.log(`[writeRssData] Received request for date: ${dateStr}`);
|
||||
|
||||
if (!dateStr) {
|
||||
console.error('[writeRssData] Missing date parameter');
|
||||
return new Response('Missing date parameter', { status: 400 });
|
||||
}
|
||||
|
||||
try {
|
||||
const path = `daily/${dateStr}.md`;
|
||||
const content = await getDailyReportContent(env, path);
|
||||
console.log(`[writeRssData] Attempting to get content from GitHub path: ${path}`);
|
||||
let content = await getDailyReportContent(env, path);
|
||||
|
||||
if (!content) {
|
||||
console.warn(`[writeRssData] No content found for ${path}. Returning 404.`);
|
||||
return new Response(`No content found for ${path}`, { status: 404 });
|
||||
}
|
||||
console.log(`[writeRssData] Successfully retrieved content for ${path}. Content length: ${content.length}`);
|
||||
|
||||
//content = extractContentFromSecondHash(content);
|
||||
// 从 "YYYY-MM-DD" 格式的 dateStr 中提取 "YYYY-MM"
|
||||
const yearMonth = dateStr.substring(0, 7);
|
||||
const report = {
|
||||
@@ -28,15 +37,61 @@ export async function handleWriteRssData(request, env) {
|
||||
// 可以添加其他相關欄位,例如作者、來源等
|
||||
published_date: formatDateToGMT12WithTime(new Date()) // 記錄保存時間
|
||||
}
|
||||
report.content_html = marked.parse(formatMarkdownText(replaceImageProxy(env.IMG_PROXY, content)));
|
||||
storeInKV(env.DATA_KV, `${dateStr}-report`, report);
|
||||
report.content_html = marked.parse(formatMarkdownText(replaceImageProxy(env, content)));
|
||||
|
||||
const kvKey = `${dateStr}-report`;
|
||||
console.log(`[writeRssData] Preparing to store report in KV. Key: ${kvKey}, Report object:`, JSON.stringify(report).substring(0, 200) + '...'); // Log first 200 chars
|
||||
await storeInKV(env.DATA_KV, kvKey, report);
|
||||
console.log(`[writeRssData] Successfully stored report in KV with key: ${kvKey}`);
|
||||
|
||||
return new Response(JSON.stringify(report), {
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
status: 200
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error handling daily report:', error.message);
|
||||
console.error('[writeRssData] Error handling daily report:', error.message, error.stack);
|
||||
return new Response(`Error handling daily report: ${error.message}`, { status: 500 });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从第二个 ### 开始截取内容,包括 ###。
|
||||
*
|
||||
* @param {string} content - 原始文本内容。
|
||||
* @returns {string} 截取后的内容。
|
||||
*/
|
||||
export function extractContentFromSecondHash(content) {
|
||||
const parts = content.split('###');
|
||||
if (parts.length > 2) {
|
||||
// 原始逻辑:重新组合从第二个 ### 开始的所有部分
|
||||
const newcontent = '###' + parts.slice(2).join('###');
|
||||
const lastHashIndex = newcontent.lastIndexOf('###');
|
||||
if (lastHashIndex !== -1) {
|
||||
return newcontent.substring(0, lastHashIndex);
|
||||
}
|
||||
}
|
||||
return content; // 如果没有找到 ### 或不符合上述条件,则返回原始内容
|
||||
}
|
||||
|
||||
/**
|
||||
* 调用 Gemini 或 OpenAI 模型生成指定提示词的内容。
|
||||
* 此方法可供外部调用。
|
||||
*
|
||||
* @param {object} env - 环境对象,包含 AI 模型相关的配置。
|
||||
* @param {string} promptText - 用户提示词。
|
||||
* @returns {Promise<string>} AI 模型生成的内容。
|
||||
* @throws {Error} 如果 API 调用失败或返回空内容。
|
||||
*/
|
||||
export async function generateAIContent(env, promptText) {
|
||||
console.log(`[generateAIContent] Calling AI model with prompt: ${promptText.substring(0, 100)}...`);
|
||||
try {
|
||||
let result = await callChatAPI(env, promptText, getSummarizationSimplifyPrompt());
|
||||
console.log(`[generateAIContent] AI model returned content. Length: ${result.length}`);
|
||||
|
||||
result += "\n\n </br>"+env.INSERT_APP_URL;
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('[generateAIContent] Error calling AI model:', error.message, error.stack);
|
||||
throw new Error(`Failed to generate AI content: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
7
src/prompt/summarizationSimplifyPrompt.js
Normal file
7
src/prompt/summarizationSimplifyPrompt.js
Normal file
@@ -0,0 +1,7 @@
|
||||
// Add new data sources
|
||||
export function getSummarizationSimplifyPrompt() {
|
||||
return `
|
||||
简化每一段的文字为一句话描述,每句话不超过30个字,将所有的句子过渡词和连接词替换为最基础、最常用的词语。尽量使用简单、直接的表达方式,避免使用复杂或生僻的词汇。确保句子之间的逻辑关系清晰。
|
||||
可以合并同类的输出信息,保持原有的小标题,为生成后的每一段内容从1开始排序.
|
||||
`;
|
||||
}
|
||||
@@ -34,7 +34,9 @@ QBIT_FETCH_PAGES = "1"
|
||||
XINZHIYUAN_FEED_ID = "60901577013168128"
|
||||
XINZHIYUAN_FETCH_PAGES = "1"
|
||||
TWITTER_LIST_ID = "153028784690326528"
|
||||
TWITTER_FETCH_PAGES = "5"
|
||||
TWITTER_FETCH_PAGES = "2"
|
||||
REDDIT_LIST_ID = "167576006499975168"
|
||||
REDDIT_FETCH_PAGES = "2"
|
||||
PROJECTS_API_URL = "https://git-trending.justlikemaki.vip/topone/?since=daily"
|
||||
GITHUB_TOKEN = "github_pat_xxxxxx"
|
||||
GITHUB_REPO_OWNER = "justlovemaki"
|
||||
@@ -48,4 +50,5 @@ PODCAST_TITLE = "来生小酒馆"
|
||||
PODCAST_BEGIN = "嘿,亲爱的V,欢迎收听新一期的来生情报站,我是你们的老朋友,何夕2077"
|
||||
PODCAST_END = "今天的情报就到这里,注意隐蔽,赶紧撤离"
|
||||
BOOK_LINK = ""
|
||||
INSERT_FOOT = "false"
|
||||
INSERT_FOOT = "false"
|
||||
INSERT_APP_URL = "<h3>[查看完整版AI日报↗️ https://ai.hubtoday.app/](https://ai.hubtoday.app/)</h3>"
|
||||
Reference in New Issue
Block a user