feat: 重构数据源配置并优化日报生成流程

重构数据源配置,合并多个新闻源为聚合源,简化配置参数
新增广告插入功能和日报页面直接生成功能
优化时区处理为东八区并改进摘要生成提示词
移除不必要的翻译功能并更新相关依赖项
This commit is contained in:
justlovemaki
2025-08-07 16:55:00 +08:00
parent 2000d43058
commit 2834d7886c
16 changed files with 465 additions and 157 deletions

19
src/ad.js Normal file
View File

@@ -0,0 +1,19 @@
export function insertAd() {
return `
---
## **AI产品自荐: [AIClient2API ↗️](https://github.com/justlovemaki/AIClient-2-API)**
厌倦了在各种AI模型间来回切换被烦人的API额度限制束缚手脚现在你有了一个终极解决方案🎉 'AIClient-2-API' 不仅仅是一个普通的API代理它是一个能将 Gemini CLI 和 Kiro 客户端等工具“点石成金”,变为强大 OpenAI 兼容 API 的魔法盒子。
这个项目的核心魅力在于它的“逆向思维”和强大功能:
✨ **客户端变API解锁新姿势**:我们巧妙地利用 Gemini CLI 的 OAuth 登录,让你轻松**突破官方免费API的速率和额度限制**。更令人兴奋的是,通过封装 Kiro 客户端的接口,我们成功**破解其API让你能免费丝滑地调用强大的 Claude 模型**!这为你提供了 **“使用免费Claude API加 Claude Code开发编程的经济实用方案”**。
🔧 **系统提示词,由你掌控**想让AI更听话我们提供了强大的系统提示词System Prompt管理功能。你可以轻松**提取、替换('overwrite')或追加('append'**任何请求中的系统提示词在服务端精细地调整AI的行为而无需修改客户端代码。
💡 **顶级体验,平民成本**:想象一下,**在你的编辑器里用 Kilo 代码助手,加上 Cursor 的高效提示词,再配上任意顶级大模型——用 Cursor又何必是 Cursor** 本项目让你能以极低的成本组合出媲美付费工具的开发体验。同时支持MCP协议和图片、文档等多模态输入让你的创意不再受限。
告别繁琐配置和昂贵账单拥抱这个集免费、强大、灵活于一身的AI开发新范式吧
`;
}

View File

@@ -32,7 +32,7 @@ async function callGeminiChatAPI(env, promptText, systemPromptText = null) {
}
try {
const response = await fetchWithTimeout(url, {
const response = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload)
@@ -126,6 +126,10 @@ async function* callGeminiChatAPIStream(env, promptText, systemPromptText = null
contents: [{
parts: [{ text: promptText }]
}],
generationConfig: {
temperature: 1,
topP: 0.95
}
};
if (systemPromptText && typeof systemPromptText === 'string' && systemPromptText.trim() !== '') {
@@ -137,7 +141,7 @@ async function* callGeminiChatAPIStream(env, promptText, systemPromptText = null
let response;
try {
response = await fetchWithTimeout(url, {
response = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload)
@@ -334,7 +338,7 @@ async function callOpenAIChatAPI(env, promptText, systemPromptText = null) {
};
try {
const response = await fetchWithTimeout(url, {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@@ -413,7 +417,7 @@ async function* callOpenAIChatAPIStream(env, promptText, systemPromptText = null
let response;
try {
response = await fetchWithTimeout(url, {
response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@@ -574,7 +578,7 @@ export async function* callChatAPIStream(env, promptText, systemPromptText = nul
* @param {number} timeout 超时时间,单位毫秒
* @returns {Promise<Response>}
*/
async function fetchWithTimeout(resource, options = {}, timeout = 60000) {
async function fetchWithTimeout(resource, options = {}, timeout = 180000) {
const controller = new AbortController();
const id = setTimeout(() => controller.abort(), timeout);

View File

@@ -1,19 +1,16 @@
// src/dataFetchers.js
import AibaseDataSource from './dataSources/aibase.js';
import NewsAggregatorDataSource from './dataSources/newsAggregator.js';
import GithubTrendingDataSource from './dataSources/github-trending.js';
import HuggingfacePapersDataSource from './dataSources/huggingface-papers.js';
import XinZhiYuanDataSource from './dataSources/xinzhiyuan.js';
import QBitDataSource from './dataSources/qbit.js';
import JiqizhixinDataSource from './dataSources/jiqizhixin.js';
import XiaohuDataSource from './dataSources/xiaohu.js';
import PapersDataSource from './dataSources/papers.js';
import TwitterDataSource from './dataSources/twitter.js';
import RedditDataSource from './dataSources/reddit.js';
// Register data sources as arrays to support multiple sources per type
export const dataSources = {
news: { name: '新闻', sources: [AibaseDataSource, XiaohuDataSource, QBitDataSource, XinZhiYuanDataSource] },
news: { name: '新闻', sources: [NewsAggregatorDataSource] },
project: { name: '项目', sources: [GithubTrendingDataSource] },
paper: { name: '论文', sources: [HuggingfacePapersDataSource, JiqizhixinDataSource] },
paper: { name: '论文', sources: [PapersDataSource] },
socialMedia: { name: '社交平台', sources: [TwitterDataSource, RedditDataSource] },
// Add new data sources here as arrays, e.g.,
// newType: { name: '新类型', sources: [NewTypeDataSource1, NewTypeDataSource2] },

View File

@@ -0,0 +1,135 @@
import { getRandomUserAgent, sleep, isDateWithinLastDays, stripHtml, formatDateToChineseWithTime, escapeHtml } from '../helpers';
const NewsAggregatorDataSource = {
type: 'news-aggregator',
async fetch(env, foloCookie) {
const listId = env.NEWS_AGGREGATOR_LIST_ID;
const fetchPages = parseInt(env.NEWS_AGGREGATOR_FETCH_PAGES || '1', 10);
const allNewsItems = [];
const filterDays = parseInt(env.FOLO_FILTER_DAYS || '3', 10);
if (!listId) {
console.warn('NEWS_AGGREGATOR_LIST_ID is not set in environment variables. Skipping news aggregator fetch.');
return {
version: "https://jsonfeed.org/version/1.1",
title: "Aggregated News",
home_page_url: "https://example.com/news",
description: "Aggregated news from various sources",
language: "zh-cn",
items: []
};
}
let publishedAfter = null;
for (let i = 0; i < fetchPages; i++) {
const userAgent = getRandomUserAgent();
const headers = {
'User-Agent': userAgent,
'Content-Type': 'application/json',
'accept': 'application/json',
'accept-language': 'zh-CN,zh;q=0.9',
'baggage': 'sentry-environment=stable,sentry-release=5251fa921ef6cbb6df0ac4271c41c2b4a0ce7c50,sentry-public_key=e5bccf7428aa4e881ed5cb713fdff181,sentry-trace_id=2da50ca5ad944cb794670097d876ada8,sentry-sampled=true,sentry-sample_rand=0.06211835167903246,sentry-sample_rate=1',
'origin': 'https://app.follow.is',
'priority': 'u=1, i',
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
'sec-ch-ua-mobile': '?1',
'sec-ch-ua-platform': '"Android"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'x-app-name': 'Folo Web',
'x-app-version': '0.4.9',
};
if (foloCookie) {
headers['Cookie'] = foloCookie;
}
const body = {
listId: listId,
view: 1,
withContent: true,
};
if (publishedAfter) {
body.publishedAfter = publishedAfter;
}
try {
console.log(`Fetching News Aggregator data, page ${i + 1}...`);
const response = await fetch(env.FOLO_DATA_API, {
method: 'POST',
headers: headers,
body: JSON.stringify(body),
});
if (!response.ok) {
console.error(`Failed to fetch News Aggregator data, page ${i + 1}: ${response.statusText}`);
break;
}
const data = await response.json();
if (data && data.data && data.data.length > 0) {
const filteredItems = data.data.filter(entry => isDateWithinLastDays(entry.entries.publishedAt, filterDays));
allNewsItems.push(...filteredItems.map(entry => ({
id: entry.entries.id,
url: entry.entries.url,
title: entry.entries.title,
content_html: entry.entries.content,
date_published: entry.entries.publishedAt,
authors: [{ name: entry.entries.author }],
source: entry.entries.author ? `${entry.feeds.title} - ${entry.entries.author}` : entry.feeds.title,
})));
publishedAfter = data.data[data.data.length - 1].entries.publishedAt;
} else {
console.log(`No more data for News Aggregator, page ${i + 1}.`);
break;
}
} catch (error) {
console.error(`Error fetching News Aggregator data, page ${i + 1}:`, error);
break;
}
await sleep(Math.random() * 5000);
}
return {
version: "https://jsonfeed.org/version/1.1",
title: "Aggregated News",
home_page_url: "https://example.com/news",
description: "Aggregated news from various sources",
language: "zh-cn",
items: allNewsItems
};
},
transform(rawData, sourceType) {
if (!rawData || !rawData.items) {
return [];
}
return rawData.items.map(item => ({
id: item.id,
type: sourceType,
url: item.url,
title: item.title,
description: stripHtml(item.content_html || ""),
published_date: item.date_published,
authors: item.authors ? item.authors.map(author => author.name).join(', ') : 'Unknown',
source: item.source || 'Aggregated News',
details: {
content_html: item.content_html || ""
}
}));
},
generateHtml: (item) => {
return `
<strong>${escapeHtml(item.title)}</strong><br>
<small>来源: ${escapeHtml(item.source || '未知')} | 发布日期: ${formatDateToChineseWithTime(item.published_date)}</small>
<div class="content-html">${item.details.content_html || '无内容。'}</div>
<a href="${escapeHtml(item.url)}" target="_blank" rel="noopener noreferrer">阅读更多</a>
`;
}
};
export default NewsAggregatorDataSource;

137
src/dataSources/papers.js Normal file
View File

@@ -0,0 +1,137 @@
import { getRandomUserAgent, sleep, isDateWithinLastDays, stripHtml, formatDateToChineseWithTime, escapeHtml } from '../helpers';
const PapersDataSource = {
type: 'papers',
async fetch(env, foloCookie) {
const hgPapersListId = env.HGPAPERS_LIST_ID;
const fetchPages = parseInt(env.HGPAPERS_FETCH_PAGES || '1', 10);
const allPaperItems = [];
const filterDays = parseInt(env.FOLO_FILTER_DAYS || '3', 10);
if (!hgPapersListId) {
console.warn('HGPAPERS_LIST_ID is not set in environment variables. Skipping papers fetch.');
return {
version: "https://jsonfeed.org/version/1.1",
title: "Aggregated Papers",
home_page_url: "https://example.com/papers",
description: "Aggregated papers from various sources",
language: "zh-cn",
items: []
};
}
let publishedAfter = null;
for (let i = 0; i < fetchPages; i++) {
const userAgent = getRandomUserAgent();
const headers = {
'User-Agent': userAgent,
'Content-Type': 'application/json',
'accept': 'application/json',
'accept-language': 'zh-CN,zh;q=0.9',
'baggage': 'sentry-environment=stable,sentry-release=5251fa921ef6cbb6df0ac4271c41c2b4a0ce7c50,sentry-public_key=e5bccf7428aa4e881ed5cb713fdff181,sentry-trace_id=2da50ca5ad944cb794670097d876ada8,sentry-sampled=true,sentry-sample_rand=0.06211835167903246,sentry-sample_rate=1',
'origin': 'https://app.follow.is',
'priority': 'u=1, i',
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
'sec-ch-ua-mobile': '?1',
'sec-ch-ua-platform': '"Android"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'x-app-name': 'Folo Web',
'x-app-version': '0.4.9',
};
if (foloCookie) {
headers['Cookie'] = foloCookie;
}
const body = {
listId: hgPapersListId,
view: 1,
withContent: true,
};
if (publishedAfter) {
body.publishedAfter = publishedAfter;
}
try {
console.log(`Fetching Papers data, page ${i + 1}...`);
const response = await fetch(env.FOLO_DATA_API, {
method: 'POST',
headers: headers,
body: JSON.stringify(body),
});
if (!response.ok) {
console.error(`Failed to fetch Papers data, page ${i + 1}: ${response.statusText}`);
break;
}
const data = await response.json();
if (data && data.data && data.data.length > 0) {
const filteredItems = data.data.filter(entry => isDateWithinLastDays(entry.entries.publishedAt, filterDays));
allPaperItems.push(...filteredItems.map(entry => ({
id: entry.entries.id,
url: entry.entries.url,
title: entry.entries.title,
content_html: entry.entries.content,
date_published: entry.entries.publishedAt,
authors: [{ name: entry.entries.author }],
source: entry.feeds.title,
})));
publishedAfter = data.data[data.data.length - 1].entries.publishedAt;
} else {
console.log(`No more data for Papers, page ${i + 1}.`);
break;
}
} catch (error) {
console.error(`Error fetching Papers data, page ${i + 1}:`, error);
break;
}
await sleep(Math.random() * 5000);
}
return {
version: "https://jsonfeed.org/version/1.1",
title: "Aggregated Papers",
home_page_url: "https://example.com/papers",
description: "Aggregated papers from various sources",
language: "zh-cn",
items: allPaperItems
};
},
transform(rawData, sourceType) {
if (!rawData || !rawData.items) {
return [];
}
return rawData.items.map(item => ({
id: item.id,
type: sourceType,
url: item.url,
title: item.title,
description: stripHtml(item.content_html || ""),
published_date: item.date_published,
authors: item.authors ? item.authors.map(author => author.name).join(', ') : 'Unknown',
source: item.source || 'Aggregated Papers',
details: {
content_html: item.content_html || ""
}
}));
},
generateHtml: (item) => {
return `
<strong>${escapeHtml(item.title)}</strong><br>
<small>来源: ${escapeHtml(item.source || '未知')} | 发布日期: ${formatDateToChineseWithTime(item.published_date)}</small>
<div class="content-html">
${item.details.content_html || '无内容。'}<hr>
</div>
<a href="${escapeHtml(item.url)}" target="_blank" rel="noopener noreferrer">在 ArXiv/来源 阅读</a>
`;
}
};
export default PapersDataSource;

View File

@@ -1,6 +1,4 @@
import { getRandomUserAgent, sleep, isDateWithinLastDays, stripHtml, formatDateToChineseWithTime, escapeHtml} from '../helpers';
import { callChatAPI } from '../chatapi.js';
import { removeMarkdownCodeBlock } from '../helpers.js';
const RedditDataSource = {
async fetch(env, foloCookie) {
@@ -107,61 +105,11 @@ const RedditDataSource = {
return redditData;
}
if (!env.OPEN_TRANSLATE === "true") {
console.warn("Skipping reddit translations.");
redditData.items = redditData.items.map(item => ({
...item,
title_zh: item.title || ""
}));
return redditData;
}
const itemsToTranslate = redditData.items.map((item, index) => ({
id: index,
original_title: item.title || ""
redditData.items = redditData.items.map(item => ({
...item,
title_zh: item.title || ""
}));
const hasContentToTranslate = itemsToTranslate.some(item => item.original_title.trim() !== "");
if (!hasContentToTranslate) {
console.log("No non-empty reddit titles to translate for today's posts.");
redditData.items = redditData.items.map(item => ({ ...item, title_zh: item.title || "" }));
return redditData;
}
const promptText = `You will be given a JSON array of reddit data objects. Each object has an "id" and "original_title".
Translate "original_title" into Chinese.
Return a JSON array of objects. Each output object MUST have:
- "id": The same id from the input.
- "title_zh": Chinese translation of "original_title". Empty if original is empty.
Input: ${JSON.stringify(itemsToTranslate)}
Respond ONLY with the JSON array.`;
let translatedItemsMap = new Map();
try {
console.log(`Requesting translation for ${itemsToTranslate.length} reddit titles for today.`);
const chatResponse = await callChatAPI(env, promptText);
const parsedTranslations = JSON.parse(removeMarkdownCodeBlock(chatResponse));
if (parsedTranslations) {
parsedTranslations.forEach(translatedItem => {
if (translatedItem && typeof translatedItem.id === 'number' &&
typeof translatedItem.title_zh === 'string') {
translatedItemsMap.set(translatedItem.id, translatedItem);
}
});
}
} catch (translationError) {
console.error("Failed to translate reddit titles in batch:", translationError.message);
}
redditData.items = redditData.items.map((originalItem, index) => {
const translatedData = translatedItemsMap.get(index);
return {
...originalItem,
title_zh: translatedData ? translatedData.title_zh : (originalItem.title || "")
};
});
return redditData;
},
@@ -174,7 +122,7 @@ Respond ONLY with the JSON array.`;
id: item.id,
type: sourceType,
url: item.url,
title: item.title_zh || item.title, // Use translated title if available
title: item.title,
description: stripHtml(item.content_html || ""),
published_date: item.date_published,
authors: item.authors ? item.authors.map(author => author.name).join(', ') : 'Unknown',

View File

@@ -3,7 +3,7 @@ export function insertFoot() {
---
## **收听语音版AI日报**
## **AI资讯日报语音版**
| 🎙️ **小宇宙** | 📹 **抖音** |
| --- | --- |

View File

@@ -1,5 +1,5 @@
// src/handlers/commitToGitHub.js
import { getISODate, formatMarkdownText, replaceImageProxy,formatDateToGMT12WithTime } from '../helpers.js';
import { getISODate, formatMarkdownText } from '../helpers.js';
import { getGitHubFileSha, createOrUpdateGitHubFile } from '../github.js';
import { storeInKV } from '../kv.js';
import { marked } from '../marked.esm.js';
@@ -13,23 +13,12 @@ export async function handleCommitToGitHub(request, env) {
const dateStr = formData.get('date') || getISODate();
const dailyMd = formData.get('daily_summary_markdown');
const podcastMd = formData.get('podcast_script_markdown');
// 从 "YYYY-MM-DD" 格式的 dateStr 中提取 "YYYY-MM"
const yearMonth = dateStr.substring(0, 7);
const report = {
report_date: dateStr,
title: dateStr+'日刊',
link: '/'+yearMonth+'/'+dateStr+'/',
content_html: null,
// 可以添加其他相關欄位,例如作者、來源等
published_date: formatDateToGMT12WithTime(new Date()) // 記錄保存時間
}
const filesToCommit = [];
if (dailyMd) {
filesToCommit.push({ path: `daily/${dateStr}.md`, content: formatMarkdownText(dailyMd), description: "Daily Summary File" });
report.content_html = marked.parse(formatMarkdownText(env.IMG_PROXY, dailyMd));
storeInKV(env.DATA_KV, `${dateStr}-report`, report);
}
if (podcastMd) {
filesToCommit.push({ path: `podcast/${dateStr}.md`, content: podcastMd, description: "Podcast Script File" });

View File

@@ -4,12 +4,14 @@ import { getFromKV } from '../kv.js';
import { callChatAPIStream } from '../chatapi.js';
import { generateGenAiPageHtml } from '../htmlGenerators.js';
import { dataSources } from '../dataFetchers.js'; // Import dataSources
import { getSystemPromptSummarizationStepOne } from "../prompt/summarizationPromptStepOne";
import { getSystemPromptSummarizationStepOne } from "../prompt/summarizationPromptStepZero";
import { getSystemPromptSummarizationStepTwo } from "../prompt/summarizationPromptStepTwo";
import { getSystemPromptSummarizationStepThree } from "../prompt/summarizationPromptStepThree";
import { getSystemPromptPodcastFormatting, getSystemPromptShortPodcastFormatting } from '../prompt/podcastFormattingPrompt.js';
import { getSystemPromptDailyAnalysis } from '../prompt/dailyAnalysisPrompt.js'; // Import new prompt
import { insertFoot } from '../foot.js';
import { insertAd } from '../ad.js';
import { getDailyReportContent } from '../github.js'; // 导入 getDailyReportContent
export async function handleGenAIPodcastScript(request, env) {
let dateStr;
@@ -26,7 +28,25 @@ export async function handleGenAIPodcastScript(request, env) {
formData = await request.formData();
dateStr = formData.get('date');
selectedItemsParams = formData.getAll('selectedItems');
outputOfCall1 = formData.get('summarizedContent'); // Get summarized content from form data
const readGithub = formData.get('readGithub') === 'true';
if (readGithub) {
const filePath = `daily/${dateStr}.md`;
console.log(`从 GitHub 读取文件: ${filePath}`);
try {
outputOfCall1 = await getDailyReportContent(env, filePath);
if (!outputOfCall1) {
throw new Error(`从 GitHub 读取文件 ${filePath} 失败或内容为空。`);
}
console.log(`成功从 GitHub 读取文件,内容长度: ${outputOfCall1.length}`);
} catch (error) {
console.error(`读取 GitHub 文件出错: ${error}`);
const errorHtml = generateGenAiPageHtml(env, '生成AI播客脚本出错', `<p><strong>从 GitHub 读取文件失败:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, dateStr, true, null, null, null, null, null, null, outputOfCall1, null);
return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
} else {
outputOfCall1 = formData.get('summarizedContent'); // Get summarized content from form data
}
if (!outputOfCall1) {
const errorHtml = generateGenAiPageHtml(env, '生成AI播客脚本出错', '<p><strong>Summarized content is missing.</strong> Please go back and generate AI content first.</p>', dateStr, true, null, null, null, null, null, null, outputOfCall1, null);
@@ -156,7 +176,7 @@ export async function handleGenAIContent(request, env) {
// Add new data sources
switch (item.type) {
case 'news':
itemText = `News Title: ${item.title}\nPublished: ${item.published_date}\nContent Summary: ${stripHtml(item.details.content_html)}`;
itemText = `News Title: ${item.title}\nPublished: ${item.published_date}\nUrl: ${item.url}\nContent Summary: ${stripHtml(item.details.content_html)}`;
break;
case 'project':
itemText = `Project Name: ${item.title}\nPublished: ${item.published_date}\nUrl: ${item.url}\nDescription: ${item.description}\nStars: ${item.details.totalStars}`;
@@ -191,43 +211,43 @@ export async function handleGenAIContent(request, env) {
}
//提示词内不能有英文引号,否则会存储数据缺失。
fullPromptForCall1_System = getSystemPromptSummarizationStepOne();
fullPromptForCall1_User = '\n\n------\n\n'+selectedContentItems.join('\n\n------\n\n')+'\n\n------\n\n'; // Keep this for logging/error reporting if needed
// fullPromptForCall1_System = getSystemPromptSummarizationStepOne();
// fullPromptForCall1_User = '\n\n------\n\n'+selectedContentItems.join('\n\n------\n\n')+'\n\n------\n\n'; // Keep this for logging/error reporting if needed
console.log("Call 1 to Chat (Summarization): User prompt length:", fullPromptForCall1_User.length);
try {
const chunkSize = 3;
const summaryPromises = [];
// console.log("Call 1 to Chat (Summarization): User prompt length:", fullPromptForCall1_User.length);
// try {
// const chunkSize = 3;
// const summaryPromises = [];
for (let i = 0; i < selectedContentItems.length; i += chunkSize) {
const chunk = selectedContentItems.slice(i, i + chunkSize);
const chunkPrompt = chunk.join('\n\n---\n\n'); // Join selected items with the separator
// for (let i = 0; i < selectedContentItems.length; i += chunkSize) {
// const chunk = selectedContentItems.slice(i, i + chunkSize);
// const chunkPrompt = chunk.join('\n\n---\n\n'); // Join selected items with the separator
summaryPromises.push((async () => {
let summarizedChunks = [];
for await (const streamChunk of callChatAPIStream(env, chunkPrompt, fullPromptForCall1_System)) {
summarizedChunks.push(streamChunk);
}
return summarizedChunks.join('');
})());
}
// summaryPromises.push((async () => {
// let summarizedChunks = [];
// for await (const streamChunk of callChatAPIStream(env, chunkPrompt, fullPromptForCall1_System)) {
// summarizedChunks.push(streamChunk);
// }
// return summarizedChunks.join('');
// })());
// }
const allSummarizedResults = await Promise.all(summaryPromises);
outputOfCall1 = allSummarizedResults.join('\n\n'); // Join all summarized parts
// const allSummarizedResults = await Promise.all(summaryPromises);
// outputOfCall1 = allSummarizedResults.join('\n\n'); // Join all summarized parts
if (!outputOfCall1 || outputOfCall1.trim() === "") throw new Error("Chat summarization call returned empty content.");
outputOfCall1 = removeMarkdownCodeBlock(outputOfCall1); // Clean the output
console.log("Call 1 (Summarization) successful. Output length:", outputOfCall1.length);
} catch (error) {
console.error("Error in Chat API Call 1 (Summarization):", error);
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错(分段处理)', `<p><strong>Failed during summarization:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, dateStr, true, selectedItemsParams, fullPromptForCall1_System, fullPromptForCall1_User);
return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
// if (!outputOfCall1 || outputOfCall1.trim() === "") throw new Error("Chat summarization call returned empty content.");
// outputOfCall1 = removeMarkdownCodeBlock(outputOfCall1); // Clean the output
// console.log("Call 1 (Summarization) successful. Output length:", outputOfCall1.length);
// } catch (error) {
// console.error("Error in Chat API Call 1 (Summarization):", error);
// const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错(分段处理)', `<p><strong>Failed during summarization:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, dateStr, true, selectedItemsParams, fullPromptForCall1_System, fullPromptForCall1_User);
// return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
// }
// Call 2: Process outputOfCall1
let outputOfCall2 = null;
let fullPromptForCall2_System = getSystemPromptSummarizationStepTwo(); // Re-using summarization prompt for now
let fullPromptForCall2_User = outputOfCall1; // Input for Call 2 is output of Call 1
let fullPromptForCall2_System = getSystemPromptSummarizationStepOne(); // Re-using summarization prompt for now
let fullPromptForCall2_User = '\n\n------\n\n'+selectedContentItems.join('\n\n------\n\n')+'\n\n------\n\n'; // Input for Call 2 is output of Call 1
console.log("Call 2 to Chat (Processing Call 1 Output): User prompt length:", fullPromptForCall2_User.length);
try {
@@ -241,25 +261,28 @@ export async function handleGenAIContent(request, env) {
console.log("Call 2 (Processing Call 1 Output) successful. Output length:", outputOfCall2.length);
} catch (error) {
console.error("Error in Chat API Call 2 (Processing Call 1 Output):", error);
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错(格式化)', `<p><strong>Failed during processing of summarized content:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, dateStr, true, selectedItemsParams, fullPromptForCall1_System, fullPromptForCall1_User, fullPromptForCall2_System, fullPromptForCall2_User);
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错(格式化)', `<p><strong>Failed during processing of summarized content:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, dateStr, true, selectedItemsParams, fullPromptForCall2_System, fullPromptForCall2_User);
return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
let promptsMarkdownContent = `# Prompts for ${dateStr}\n\n`;
promptsMarkdownContent += `## Call 1: Content Summarization\n\n`;
if (fullPromptForCall1_System) promptsMarkdownContent += `### System Instruction\n\`\`\`\n${fullPromptForCall1_System}\n\`\`\`\n\n`;
if (fullPromptForCall1_User) promptsMarkdownContent += `### User Input\n\`\`\`\n${fullPromptForCall1_User}\n\`\`\`\n\n`;
// promptsMarkdownContent += `## Call 1: Content Summarization\n\n`;
// if (fullPromptForCall1_System) promptsMarkdownContent += `### System Instruction\n\`\`\`\n${fullPromptForCall1_System}\n\`\`\`\n\n`;
// if (fullPromptForCall1_User) promptsMarkdownContent += `### User Input\n\`\`\`\n${fullPromptForCall1_User}\n\`\`\`\n\n`;
promptsMarkdownContent += `## Call 2: Summarized Content Format\n\n`;
if (fullPromptForCall2_System) promptsMarkdownContent += `### System Instruction\n\`\`\`\n${fullPromptForCall2_System}\n\`\`\`\n\n`;
if (fullPromptForCall2_User) promptsMarkdownContent += `### User Input (Output of Call 1)\n\`\`\`\n${fullPromptForCall2_User}\n\`\`\`\n\n`;
let dailySummaryMarkdownContent = `## ${env.DAILY_TITLE} ${formatDateToChinese(dateStr)}` + '\n\n';
dailySummaryMarkdownContent += '> '+ env.DAILY_TITLE_MIN + '\n\n';
let fullPromptForCall3_System = getSystemPromptSummarizationStepThree(); // Re-using summarization prompt for now
let fullPromptForCall3_User = outputOfCall2; // Input for Call 2 is output of Call 1
let outputOfCall3 = null;
console.log("Call 3 to Chat (Processing Call 2 Output): User prompt length:", outputOfCall2.length);
console.log("Call 3 to Chat (Processing Call 1 Output): User prompt length:", fullPromptForCall3_User.length);
try {
let processedChunks = [];
for await (const chunk of callChatAPIStream(env, outputOfCall2, getSystemPromptSummarizationStepThree())) {
for await (const chunk of callChatAPIStream(env, fullPromptForCall3_User, fullPromptForCall3_System)) {
processedChunks.push(chunk);
}
outputOfCall3 = processedChunks.join('');
@@ -268,12 +291,13 @@ export async function handleGenAIContent(request, env) {
console.log("Call 3 (Processing Call 2 Output) successful. Output length:", outputOfCall3.length);
} catch (error) {
console.error("Error in Chat API Call 3 (Processing Call 2 Output):", error);
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错(摘要)', `<p><strong>Failed during processing of summarized content:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, dateStr, true, selectedItemsParams, fullPromptForCall1_System, fullPromptForCall1_User, fullPromptForCall2_System, fullPromptForCall2_User);
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错(摘要)', `<p><strong>Failed during processing of summarized content:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, dateStr, true, selectedItemsParams, fullPromptForCall3_System, fullPromptForCall3_User);
return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
dailySummaryMarkdownContent += '\n\n### **AI内容摘要**\n\n```\n' + outputOfCall3 + '\n```\n\n';
dailySummaryMarkdownContent += '\n\n### **今日摘要**\n\n```\n' + outputOfCall3 + '\n```\n\n';
dailySummaryMarkdownContent += `\n\n${removeMarkdownCodeBlock(outputOfCall2)}`;
if (env.INSERT_AD=='true') dailySummaryMarkdownContent += insertAd() +`\n`;
if (env.INSERT_FOOT=='true') dailySummaryMarkdownContent += insertFoot() +`\n\n`;
const successHtml = generateGenAiPageHtml(
@@ -281,12 +305,11 @@ export async function handleGenAIContent(request, env) {
'AI日报', // Title for Call 1 page
escapeHtml(dailySummaryMarkdownContent),
dateStr, false, selectedItemsParams,
fullPromptForCall1_System, fullPromptForCall1_User,
fullPromptForCall2_System, fullPromptForCall2_User,
null, null, // Pass Call 2 prompts
convertEnglishQuotesToChinese(removeMarkdownCodeBlock(promptsMarkdownContent)),
convertEnglishQuotesToChinese(dailySummaryMarkdownContent),
null, // No podcast script for this page
outputOfCall1 // Pass summarized content for the next step (original outputOfCall1)
);
return new Response(successHtml, { headers: { 'Content-Type': 'text/html; charset=utf-8' } });
@@ -294,7 +317,7 @@ export async function handleGenAIContent(request, env) {
console.error("Error in /genAIContent (outer try-catch):", error);
const pageDateForError = dateStr || getISODate();
const itemsForActionOnError = Array.isArray(selectedItemsParams) ? selectedItemsParams : [];
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错', `<p><strong>Unexpected error:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, pageDateForError, true, itemsForActionOnError, fullPromptForCall1_System, fullPromptForCall1_User, fullPromptForCall2_System, fullPromptForCall2_User);
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错', `<p><strong>Unexpected error:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, pageDateForError, true, itemsForActionOnError, fullPromptForCall2_System, fullPromptForCall2_User);
return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
}

View File

@@ -0,0 +1,42 @@
import { getISODate, escapeHtml, formatDateToChinese, convertEnglishQuotesToChinese} from '../helpers.js';
import { generateGenAiPageHtml } from '../htmlGenerators.js';
import { insertFoot } from '../foot.js';
import { insertAd } from '../ad.js';
export async function handleGenAIDailyPage(request, env) {
let dateStr;
try {
const url = new URL(request.url);
const dateParam = url.searchParams.get('date');
dateStr = dateParam ? dateParam : getISODate();
let dailySummaryMarkdownContent = `## ${env.DAILY_TITLE} ${formatDateToChinese(dateStr)}` + '\n\n';
dailySummaryMarkdownContent += '> '+ env.DAILY_TITLE_MIN + '\n\n';
dailySummaryMarkdownContent += '\n\n### **今日摘要**\n\n```\n' + '这里输入内容摘要' + '\n```\n\n';
if (env.INSERT_AD=='true') dailySummaryMarkdownContent += insertAd() +`\n`;
if (env.INSERT_FOOT=='true') dailySummaryMarkdownContent += insertFoot() +`\n\n`;
const successHtml = generateGenAiPageHtml(
env,
'AI日报', // Title for the page
escapeHtml(dailySummaryMarkdownContent),
dateStr,
false, // isError
[], // selectedItemsParams (not applicable here)
null, null, // Call 1 prompts (not applicable here)
null, null, // Call 2 prompts (not applicable here)
'webbuild', // promptsMarkdownContent (not applicable here)
convertEnglishQuotesToChinese(dailySummaryMarkdownContent), // dailySummaryMarkdownContent
null, // podcastScriptMarkdownContent (not applicable here)
true, // readGithub
);
return new Response(successHtml, { headers: { 'Content-Type': 'text/html; charset=utf-8' } });
} catch (error) {
console.error("Error in /genAIDailyPage:", error);
const pageDateForError = dateStr || getISODate();
const errorHtml = generateGenAiPageHtml(env, '生成AI日报页面出错', `<p><strong>Unexpected error:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, pageDateForError, true, []);
return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
}

View File

@@ -1,4 +1,4 @@
import { replaceImageProxy, formatMarkdownText, formatDateToGMT12WithTime } from '../helpers.js';
import { replaceImageProxy, formatMarkdownText, formatDateToGMT8WithTime, removeMarkdownCodeBlock } from '../helpers.js';
import { getDailyReportContent } from '../github.js';
import { storeInKV } from '../kv.js';
import { marked } from '../marked.esm.js';
@@ -26,7 +26,7 @@ export async function handleWriteRssData(request, env) {
}
console.log(`[writeRssData] Successfully retrieved content for ${path}. Content length: ${content.length}`);
//content = extractContentFromSecondHash(content);
// content = extractContentFromSecondHash(content);
// 从 "YYYY-MM-DD" 格式的 dateStr 中提取 "YYYY-MM"
const yearMonth = dateStr.substring(0, 7);
const report = {
@@ -35,9 +35,11 @@ export async function handleWriteRssData(request, env) {
link: '/'+yearMonth+'/'+dateStr+'/',
content_html: null,
// 可以添加其他相關欄位,例如作者、來源等
published_date: formatDateToGMT12WithTime(new Date()) // 記錄保存時間
published_date: formatDateToGMT8WithTime(new Date()) // 記錄保存時間
}
report.content_html = marked.parse(formatMarkdownText(replaceImageProxy(env, content)));
//report.content_html = marked.parse(formatMarkdownText(await generateAIContent(env, content)));
const kvKey = `${dateStr}-report`;
console.log(`[writeRssData] Preparing to store report in KV. Key: ${kvKey}, Report object:`, JSON.stringify(report).substring(0, 200) + '...'); // Log first 200 chars
@@ -64,11 +66,12 @@ export function extractContentFromSecondHash(content) {
const parts = content.split('###');
if (parts.length > 2) {
// 原始逻辑:重新组合从第二个 ### 开始的所有部分
const newcontent = '###' + parts.slice(2).join('###');
const lastHashIndex = newcontent.lastIndexOf('###');
let newcontent = '###' + parts.slice(2).join('###');
const lastHashIndex = newcontent.lastIndexOf('AI资讯日报语音版');
if (lastHashIndex !== -1) {
return newcontent.substring(0, lastHashIndex);
newcontent = newcontent.substring(0, lastHashIndex-10);
}
return newcontent;
}
return content; // 如果没有找到 ### 或不符合上述条件,则返回原始内容
}
@@ -87,8 +90,8 @@ export async function generateAIContent(env, promptText) {
try {
let result = await callChatAPI(env, promptText, getSummarizationSimplifyPrompt());
console.log(`[generateAIContent] AI model returned content. Length: ${result.length}`);
result += "\n\n </br>"+env.INSERT_APP_URL;
result = removeMarkdownCodeBlock(result);
result += "\n\n</br>"+env.INSERT_APP_URL;
return result;
} catch (error) {
console.error('[generateAIContent] Error calling AI model:', error.message, error.stack);

View File

@@ -257,7 +257,7 @@ export function formatRssDate(date) {
return new Intl.DateTimeFormat('zh-CN', options).format(date);
}
export function formatDateToGMT12WithTime(isoDateString) {
export function formatDateToGMT8WithTime(isoDateString) {
if (!isoDateString) return '';
const date = new Date(isoDateString);
const options = {
@@ -268,7 +268,7 @@ export function formatRssDate(date) {
minute: '2-digit',
second: '2-digit',
hour12: false, // 使用24小时制
timeZone: 'Asia/Kamchatka'// 指定东12时区
timeZone: 'Asia/Shanghai'// 指定东8时区
};
// 使用 'zh-CN' 语言环境以确保中文格式
return new Intl.DateTimeFormat('zh-CN', options).format(date);

View File

@@ -282,7 +282,8 @@ function generatePromptSectionHtmlForGenAI(systemPrompt, userPrompt, promptTitle
export function generateGenAiPageHtml(env, title, bodyContent, pageDate, isErrorPage = false, selectedItemsForAction = null,
systemP1 = null, userP1 = null, systemP2 = null, userP2 = null,
promptsMd = null, dailyMd = null, podcastMd = null) {
promptsMd = null, dailyMd = null, podcastMd = null, readGithub = null) {
let actionButtonHtml = '';
// Regenerate button for AI Content Summary page
@@ -333,6 +334,7 @@ export function generateGenAiPageHtml(env, title, bodyContent, pageDate, isError
generatePodcastButtonHtml = `
<form action="/genAIPodcastScript" method="POST" style="display: inline-block; margin-left: 0.5rem;">
<input type="hidden" name="date" value="${escapeHtml(pageDate)}">
<input type="hidden" name="readGithub" value="${readGithub}">
${selectedItemsForAction.map(item => `<input type="hidden" name="selectedItems" value="${escapeHtml(item)}">`).join('')}
<input type="hidden" name="summarizedContent" value="${escapeHtml(convertEnglishQuotesToChinese(bodyContent))}">
<button type="submit" class="button-link">生成播客脚本</button>
@@ -347,7 +349,7 @@ export function generateGenAiPageHtml(env, title, bodyContent, pageDate, isError
}
let promptDisplayHtml = '';
if (title === 'AI日报') {
if (title === 'AI日报' || title.includes('生成AI日报出错(')) {
if (systemP1 || userP1) {
promptDisplayHtml = `
<div style="margin-top: 1.5rem;">

View File

@@ -2,7 +2,8 @@
import { handleWriteData } from './handlers/writeData.js';
import { handleGetContent } from './handlers/getContent.js';
import { handleGetContentHtml } from './handlers/getContentHtml.js';
import { handleGenAIContent, handleGenAIPodcastScript, handleGenAIDailyAnalysis } from './handlers/genAIContent.js'; // Import handleGenAIPodcastScript and handleGenAIDailyAnalysis
import { handleGenAIContent, handleGenAIPodcastScript, handleGenAIDailyAnalysis } from './handlers/genAIContent.js';
import { handleGenAIDailyPage } from './handlers/genAIDailyPage.js'; // Import handleGenAIDailyPage
import { handleCommitToGitHub } from './handlers/commitToGitHub.js';
import { handleRss } from './handlers/getRss.js';
import { handleWriteRssData } from './handlers/writeRssData.js';
@@ -18,9 +19,6 @@ export default {
'LOGIN_USERNAME', 'LOGIN_PASSWORD',
'PODCAST_TITLE','PODCAST_BEGIN','PODCAST_END',
'FOLO_COOKIE_KV_KEY','FOLO_DATA_API','FOLO_FILTER_DAYS',
'AIBASE_FEED_ID', 'XIAOHU_FEED_ID', 'HGPAPERS_FEED_ID', 'TWITTER_LIST_ID',
'AIBASE_FETCH_PAGES', 'XIAOHU_FETCH_PAGES', 'HGPAPERS_FETCH_PAGES', 'TWITTER_FETCH_PAGES',
//'AIBASE_API_URL', 'XIAOHU_API_URL','PROJECTS_API_URL','HGPAPERS_API_URL', 'TWITTER_API_URL', 'TWITTER_USERNAMES',
];
console.log(env);
const missingVars = requiredEnvVars.filter(varName => !env[varName]);
@@ -79,6 +77,8 @@ export default {
response = await handleGenAIPodcastScript(request, env);
} else if (path === '/genAIDailyAnalysis' && request.method === 'POST') { // New route for AI Daily Analysis
response = await handleGenAIDailyAnalysis(request, env);
} else if (path === '/genAIDailyPage' && request.method === 'GET') { // New route for AI Daily Page
response = await handleGenAIDailyPage(request, env);
} else if (path === '/commitToGitHub' && request.method === 'POST') {
response = await handleCommitToGitHub(request, env);
} else {

View File

@@ -0,0 +1,16 @@
// Add new data sources
export function getSystemPromptSummarizationStepOne() {
return `
你是一名专业的文本摘要助理。你的任务是根据收到的文本类型(或其包含的多种内容类型)执行特定类型的摘要。
重要通用原则:所有摘要内容必须严格来源于原文。不得捏造、歪曲或添加原文未提及的信息。
**最终输出要求:**
* 参照以上条件优化文本内容,按内容自动分段,段落数量要和原始一样。
* 仅输出最终生成的摘要。不要包含任何关于你如何分析文本、确定其类型、分割文本或应用规则的解释性文字。如果合并了来自多个片段的摘要,请确保合并后的文本流畅自然。
* 输出语言与格式:内容必须为简体中文,并严格采用 Markdown 格式进行排版。
* 关键词高亮:请在内容中自动识别并对核心关键词或重要概念进行加黑加粗处理,以增强可读性和重点突出。
* 给最终内容加上标题,前置标题为“### **今日AI资讯**”。
* 段落序列化在每个独立段落的开头必须添加以“1.”开头的阿拉伯数字序列确保数字正确递增例如1.、2.、3.、...)。
`;
}

View File

@@ -20,23 +20,15 @@ OPENAI_API_URL = "https://api.deepseek.com" # Or your OpenAI compatible API URL
DEFAULT_OPEN_MODEL = "deepseek-chat"
FOLO_COOKIE_KV_KEY = "folo_auth_cookie"
FOLO_DATA_API = "https://api.follow.is/entries"
FOLO_FILTER_DAYS = 3
AIBASE_FEED_ID = "69533603812632576"
AIBASE_FETCH_PAGES = "3"
XIAOHU_FEED_ID = "151846580097413120"
XIAOHU_FETCH_PAGES = "2"
HGPAPERS_FEED_ID = "41359648680482832"
HGPAPERS_FETCH_PAGES = "2"
JIQIZHIXIN_FEED_ID = "41459996870678583"
JIQIZHIXIN_FETCH_PAGES = "1"
QBIT_FEED_ID = "58864180026527744"
QBIT_FETCH_PAGES = "1"
XINZHIYUAN_FEED_ID = "60901577013168128"
XINZHIYUAN_FETCH_PAGES = "1"
FOLO_FILTER_DAYS = 1
NEWS_AGGREGATOR_LIST_ID = "158437828119024640"
NEWS_AGGREGATOR_FETCH_PAGES = "1"
HGPAPERS_LIST_ID = "158437917409783808"
HGPAPERS_FETCH_PAGES = "1"
TWITTER_LIST_ID = "153028784690326528"
TWITTER_FETCH_PAGES = "2"
TWITTER_FETCH_PAGES = "1"
REDDIT_LIST_ID = "167576006499975168"
REDDIT_FETCH_PAGES = "2"
REDDIT_FETCH_PAGES = "1"
PROJECTS_API_URL = "https://git-trending.justlikemaki.vip/topone/?since=daily"
GITHUB_TOKEN = "github_pat_xxxxxx"
GITHUB_REPO_OWNER = "justlovemaki"
@@ -51,4 +43,5 @@ PODCAST_BEGIN = "嘿亲爱的V欢迎收听新一期的来生情报站
PODCAST_END = "今天的情报就到这里,注意隐蔽,赶紧撤离"
BOOK_LINK = ""
INSERT_FOOT = "false"
INSERT_AD = "false"
INSERT_APP_URL = "<h3>[查看完整版AI日报↗ https://ai.hubtoday.app/](https://ai.hubtoday.app/)</h3>"