refactor(handlers): 拆分RSS工作流为独立的生成与存储阶段

引入两阶段RSS处理架构以支持订阅源摘要预览功能:

第一阶段 - 内容生成(/generateRssContent):
  * 读取daily目录原始markdown
  * 调用AI生成精简摘要
  * 持久化至GitHub rss目录

第二阶段 - 数据同步(/writeRssData):
  * 消费rss目录预生成内容
  * 转换为HTML并同步至KV存储

其他调整:
  * 创建官网引流模板(appUrl.js)
  * 实现智能断行的文本裁剪工具
  * 优化日报页面广告展示顺序
  * 修正getDailyReportContent异常日志输出
This commit is contained in:
luofeng
2026-01-12 18:03:53 +08:00
parent 5698bb0394
commit cd81280bcb
5 changed files with 165 additions and 39 deletions

27
src/appUrl.js Normal file
View File

@@ -0,0 +1,27 @@
export function getAppUrl() {
return `
---
**📢 关于 AI日报 的一次小调整**
>
坦白说,想要长久地把**AI日报**做下去,单靠“为爱发电”确实面临现实压力。为了更有热情的**投入精力**,我在网站接入了少量 Google 广告。
>
由于 RSS 无法展示广告带来收入,即日起 RSS 将**试运行“摘要模式”一段时间**。
>
**💡 您的每一次点击,都是对我最大的支持**
诚挚邀请您移步官网阅读全文。那里不仅有更舒适的**排版**和清晰的**代码高亮**,还能在评论区与大家交流。
>
感谢您的理解与陪伴,让我们一起走得更远!
>
👇 **点击下方链接,阅读今日完整资讯**
### [🚀 前往官网查看完整版 (ai.hubtoday.app)](https://ai.hubtoday.app/)
>
<small>如有建议,欢迎随时邮件沟通:[justlikemaki@foxmail.com](mailto:justlikemaki@foxmail.com)</small>
<br/>
<small>或直接扫码进群提供建议:</small>
<br/>
![进群-何夕2077AI日报问题反馈](https://source.hubtoday.app/logo/wechat-qun-ex2.jpg)
`;
}

2
src/github.js Normal file → Executable file
View File

@@ -106,7 +106,7 @@ export async function getDailyReportContent(env, filePath) {
const data = await callGitHubApi(env, `/contents/${filePath}?ref=${GITHUB_BRANCH}`);
return b64DecodeUnicode(data.content);
} catch (error) {
console.error(`Error fetching daily report content from ${rawUrl}:`, error);
console.error(`Error fetching daily report content from ${filePath}:`, error);
throw error;
}
}

34
src/handlers/genAIContent.js Normal file → Executable file
View File

@@ -77,7 +77,7 @@ export async function handleGenAIPodcastScript(request, env) {
let promptsMarkdownContent = `# Prompts for ${dateStr}\n\n`;
promptsMarkdownContent += `## Call 3: Podcast Formatting\n\n`;
if (fullPromptForCall3_System) promptsMarkdownContent += `### System One Instruction\n\`\`\`\n${fullPromptForCall3_System}\n\`\`\`\n\n`;
let fullPromptForCall4_System = getSystemPromptShortPodcastFormatting(env);
console.log("Call 4 to Chat (Podcast Formatting): User prompt length:", userPromptPodcastFormattingData.length);
@@ -105,13 +105,13 @@ export async function handleGenAIPodcastScript(request, env) {
let podcastScriptMarkdownContent = `# ${env.PODCAST_TITLE} ${formatDateToChinese(dateStr)}\n\n${removeMarkdownCodeBlock(finalAiResponseOut)}`;
const successHtml = generateGenAiPageHtml(
env,
env,
'AI播客脚本',
escapeHtml(finalAiResponseOut),
escapeHtml(finalAiResponseOut),
dateStr, false, selectedItemsParams,
null, null, // No Call 1 prompts for this page
fullPromptForCallSystem, fullPromptForCall3_User,
convertEnglishQuotesToChinese(removeMarkdownCodeBlock(promptsMarkdownContent)),
convertEnglishQuotesToChinese(removeMarkdownCodeBlock(promptsMarkdownContent)),
outputOfCall1, // No daily summary for this page
convertEnglishQuotesToChinese(podcastScriptMarkdownContent)
);
@@ -119,7 +119,7 @@ export async function handleGenAIPodcastScript(request, env) {
} catch (error) {
console.error("Error in /genAIPodcastScript (outer try-catch):", error);
const pageDateForError = dateStr || getISODate();
const pageDateForError = dateStr || getISODate();
const itemsForActionOnError = Array.isArray(selectedItemsParams) ? selectedItemsParams : [];
const errorHtml = generateGenAiPageHtml(env, '生成AI播客脚本出错', `<p><strong>Unexpected error:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, pageDateForError, true, itemsForActionOnError, null, null, fullPromptForCall3_System, fullPromptForCall3_User);
return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
@@ -146,7 +146,7 @@ export async function handleGenAIContent(request, env) {
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错未选生成条目', '<p><strong>No items were selected.</strong> Please go back and select at least one item.</p>', dateStr, true, null);
return new Response(errorHtml, { status: 400, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
console.log(`Generating AI content for ${selectedItemsParams.length} selected item references from date ${dateStr}`);
const allFetchedData = {};
@@ -195,7 +195,7 @@ export async function handleGenAIContent(request, env) {
if (item.details && item.details.content_html) itemText += `\nContent: ${stripHtml(item.details.content_html)}`;
break;
}
if (itemText) {
selectedContentItems.push(itemText);
validItemsProcessedCount++;
@@ -209,7 +209,7 @@ export async function handleGenAIContent(request, env) {
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错可生成条目为空', '<p><strong>Selected items could not be retrieved or resulted in no content.</strong> Please check the data or try different selections.</p>', dateStr, true, selectedItemsParams);
return new Response(errorHtml, { status: 404, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
//提示词内不能有英文引号,否则会存储数据缺失。
// fullPromptForCall1_System = getSystemPromptSummarizationStepOne();
// fullPromptForCall1_User = '\n\n------\n\n'+selectedContentItems.join('\n\n------\n\n')+'\n\n------\n\n'; // Keep this for logging/error reporting if needed
@@ -218,11 +218,11 @@ export async function handleGenAIContent(request, env) {
// try {
// const chunkSize = 3;
// const summaryPromises = [];
// for (let i = 0; i < selectedContentItems.length; i += chunkSize) {
// const chunk = selectedContentItems.slice(i, i + chunkSize);
// const chunkPrompt = chunk.join('\n\n---\n\n'); // Join selected items with the separator
// summaryPromises.push((async () => {
// let summarizedChunks = [];
// for await (const streamChunk of callChatAPIStream(env, chunkPrompt, fullPromptForCall1_System)) {
@@ -264,7 +264,7 @@ export async function handleGenAIContent(request, env) {
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错(格式化)', `<p><strong>Failed during processing of summarized content:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, dateStr, true, selectedItemsParams, fullPromptForCall2_System, fullPromptForCall2_User);
return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
let promptsMarkdownContent = `# Prompts for ${dateStr}\n\n`;
// promptsMarkdownContent += `## Call 1: Content Summarization\n\n`;
// if (fullPromptForCall1_System) promptsMarkdownContent += `### System Instruction\n\`\`\`\n${fullPromptForCall1_System}\n\`\`\`\n\n`;
@@ -295,27 +295,27 @@ export async function handleGenAIContent(request, env) {
return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
dailySummaryMarkdownContent += '\n\n### **今日摘要**\n\n```\n' + outputOfCall3 + '\n```\n\n';
if (env.INSERT_AD=='true') dailySummaryMarkdownContent += insertAd() +`\n`;
dailySummaryMarkdownContent += `\n\n${removeMarkdownCodeBlock(outputOfCall2)}`;
if (env.INSERT_AD=='true') dailySummaryMarkdownContent += insertAd() +`\n`;
if (env.INSERT_FOOT=='true') dailySummaryMarkdownContent += insertFoot() +`\n\n`;
const successHtml = generateGenAiPageHtml(
env,
env,
'AI日报', // Title for Call 1 page
escapeHtml(dailySummaryMarkdownContent),
escapeHtml(dailySummaryMarkdownContent),
dateStr, false, selectedItemsParams,
fullPromptForCall2_System, fullPromptForCall2_User,
null, null, // Pass Call 2 prompts
convertEnglishQuotesToChinese(removeMarkdownCodeBlock(promptsMarkdownContent)),
convertEnglishQuotesToChinese(dailySummaryMarkdownContent),
convertEnglishQuotesToChinese(removeMarkdownCodeBlock(promptsMarkdownContent)),
convertEnglishQuotesToChinese(dailySummaryMarkdownContent),
null, // No podcast script for this page
);
return new Response(successHtml, { headers: { 'Content-Type': 'text/html; charset=utf-8' } });
} catch (error) {
console.error("Error in /genAIContent (outer try-catch):", error);
const pageDateForError = dateStr || getISODate();
const pageDateForError = dateStr || getISODate();
const itemsForActionOnError = Array.isArray(selectedItemsParams) ? selectedItemsParams : [];
const errorHtml = generateGenAiPageHtml(env, '生成AI日报出错', `<p><strong>Unexpected error:</strong> ${escapeHtml(error.message)}</p>${error.stack ? `<pre>${escapeHtml(error.stack)}</pre>` : ''}`, pageDateForError, true, itemsForActionOnError, fullPromptForCall2_System, fullPromptForCall2_User);
return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } });

135
src/handlers/writeRssData.js Normal file → Executable file
View File

@@ -1,10 +1,80 @@
import { replaceImageProxy, formatMarkdownText, formatDateToGMT8WithTime, removeMarkdownCodeBlock } from '../helpers.js';
import { getDailyReportContent } from '../github.js';
import { getDailyReportContent, getGitHubFileSha, createOrUpdateGitHubFile } from '../github.js';
import { storeInKV } from '../kv.js';
import { marked } from '../marked.esm.js';
import { callChatAPI } from '../chatapi.js'; // 导入 callChatAPI
import { getSummarizationSimplifyPrompt } from "../prompt/summarizationSimplifyPrompt";
import { getAppUrl } from '../appUrl.js';
/**
* 处理生成RSS内容的请求从daily目录读取生成AI内容写入rss目录
* @param {Request} request - 请求对象
* @param {object} env - 环境对象
* @returns {Promise<Response>} 包含生成内容的响应
*/
export async function handleGenerateRssContent(request, env) {
const url = new URL(request.url);
const dateStr = url.searchParams.get('date');
console.log(`[generateRssContent] Received request for date: ${dateStr}`);
if (!dateStr) {
console.error('[generateRssContent] Missing date parameter');
return new Response('Missing date parameter', { status: 400 });
}
try {
// 从daily目录读取原始内容
const dailyPath = `daily/${dateStr}.md`;
console.log(`[generateRssContent] Attempting to get content from GitHub path: ${dailyPath}`);
let content = await getDailyReportContent(env, dailyPath);
if (!content) {
console.warn(`[generateRssContent] No content found for ${dailyPath}. Returning 404.`);
return new Response(`No content found for ${dailyPath}`, { status: 404 });
}
console.log(`[generateRssContent] Successfully retrieved content for ${dailyPath}. Content length: ${content.length}`);
content = extractContentFromSecondHash(content);
// 生成AI内容内部已包含截断逻辑
const aiContent = await generateAIContent(env, content);
// 写入到rss目录
const rssPath = `rss/${dateStr}.md`;
const existingSha = await getGitHubFileSha(env, rssPath);
const commitMessage = `${existingSha ? 'Update' : 'Create'} RSS content for ${dateStr}`;
await createOrUpdateGitHubFile(env, rssPath, aiContent, commitMessage, existingSha);
console.log(`[generateRssContent] Successfully wrote AI content to GitHub: ${rssPath}`);
// 从 "YYYY-MM-DD" 格式的 dateStr 中提取 "YYYY-MM"
const yearMonth = dateStr.substring(0, 7);
const result = {
report_date: dateStr,
title: dateStr + '日刊',
link: '/' + yearMonth + '/' + dateStr + '/',
content_markdown: aiContent,
github_path: rssPath,
published_date: formatDateToGMT8WithTime(new Date())
};
console.log(`[generateRssContent] Successfully generated and saved content for ${dateStr}. Content length: ${aiContent.length}`);
return new Response(JSON.stringify(result), {
headers: { 'Content-Type': 'application/json' },
status: 200
});
} catch (error) {
console.error('[generateRssContent] Error generating content:', error.message, error.stack);
return new Response(`Error generating content: ${error.message}`, { status: 500 });
}
}
/**
* 处理写入RSS数据的请求从rss目录读取已生成的内容写入KV
* @param {Request} request - 请求对象
* @param {object} env - 环境对象
* @returns {Promise<Response>} 包含写入结果的响应
*/
export async function handleWriteRssData(request, env) {
const url = new URL(request.url);
const dateStr = url.searchParams.get('date');
@@ -16,31 +86,28 @@ export async function handleWriteRssData(request, env) {
}
try {
const path = `daily/${dateStr}.md`;
console.log(`[writeRssData] Attempting to get content from GitHub path: ${path}`);
let content = await getDailyReportContent(env, path);
if (!content) {
console.warn(`[writeRssData] No content found for ${path}. Returning 404.`);
return new Response(`No content found for ${path}`, { status: 404 });
}
console.log(`[writeRssData] Successfully retrieved content for ${path}. Content length: ${content.length}`);
// 从rss目录读取已生成的AI内容
const rssPath = `rss/${dateStr}.md`;
console.log(`[writeRssData] Attempting to get content from GitHub path: ${rssPath}`);
let content = await getDailyReportContent(env, rssPath);
if (!content) {
console.warn(`[writeRssData] No content found for ${rssPath}. Returning 404.`);
return new Response(`No content found for ${rssPath}. Please run /generateRssContent first.`, { status: 404 });
}
console.log(`[writeRssData] Successfully retrieved content for ${rssPath}. Content length: ${content.length}`);
// content = extractContentFromSecondHash(content);
// 从 "YYYY-MM-DD" 格式的 dateStr 中提取 "YYYY-MM"
const yearMonth = dateStr.substring(0, 7);
const report = {
report_date: dateStr,
title: dateStr+'日刊',
link: '/'+yearMonth+'/'+dateStr+'/',
content_html: null,
title: dateStr + '日刊',
link: '/' + yearMonth + '/' + dateStr + '/',
content_html: marked.parse(formatMarkdownText(content)),
// 可以添加其他相關欄位,例如作者、來源等
published_date: formatDateToGMT8WithTime(new Date()) // 記錄保存時間
}
report.content_html = marked.parse(formatMarkdownText(replaceImageProxy(env, content)));
//report.content_html = marked.parse(formatMarkdownText(await generateAIContent(env, content)));
};
const kvKey = `${dateStr}-report`;
console.log(`[writeRssData] Preparing to store report in KV. Key: ${kvKey}, Report object:`, JSON.stringify(report).substring(0, 200) + '...'); // Log first 200 chars
await storeInKV(env.DATA_KV, kvKey, report);
@@ -76,6 +143,34 @@ export function extractContentFromSecondHash(content) {
return content; // 如果没有找到 ### 或不符合上述条件,则返回原始内容
}
/**
* 截断内容到指定字数,并添加省略样式
* @param {string} content - 原始内容
* @param {number} maxLength - 最大字数默认150
* @returns {string} 截断后的内容
*/
export function truncateContent(content, maxLength = 150) {
if (!content || content.length <= maxLength) {
return content;
}
// 截断到指定长度
let truncated = content.substring(0, maxLength);
// 尝试在最后一个换行符处截断
const lastNewlineEnd = truncated.lastIndexOf('\n');
// 如果找到换行符且位置合理(至少保留一半内容),则在换行符处截断
if (lastNewlineEnd > maxLength / 2) {
truncated = content.substring(0, lastNewlineEnd);
}
// 添加省略样式
truncated += '\n\n......\n\n*[剩余内容已省略]*';
return truncated;
}
/**
* 调用 Gemini 或 OpenAI 模型生成指定提示词的内容。
* 此方法可供外部调用。
@@ -91,7 +186,9 @@ export async function generateAIContent(env, promptText) {
let result = await callChatAPI(env, promptText, getSummarizationSimplifyPrompt());
console.log(`[generateAIContent] AI model returned content. Length: ${result.length}`);
result = removeMarkdownCodeBlock(result);
result += "\n\n</br>"+env.INSERT_APP_URL;
// 截断内容到360字并添加省略样式
result = truncateContent(result, 360);
result += "\n\n</br>" + getAppUrl();
return result;
} catch (error) {
console.error('[generateAIContent] Error calling AI model:', error.message, error.stack);

6
src/index.js Normal file → Executable file
View File

@@ -6,7 +6,7 @@ import { handleGenAIContent, handleGenAIPodcastScript, handleGenAIDailyAnalysis
import { handleGenAIDailyPage } from './handlers/genAIDailyPage.js'; // Import handleGenAIDailyPage
import { handleCommitToGitHub } from './handlers/commitToGitHub.js';
import { handleRss } from './handlers/getRss.js';
import { handleWriteRssData } from './handlers/writeRssData.js';
import { handleWriteRssData, handleGenerateRssContent } from './handlers/writeRssData.js';
import { dataSources } from './dataFetchers.js';
import { handleLogin, isAuthenticated, handleLogout } from './auth.js';
@@ -32,7 +32,7 @@ export default {
<p>Please contact the administrator.</p></body></html>`;
return new Response(errorPage, { status: 503, headers: { 'Content-Type': 'text/html; charset=utf-8' } });
}
const url = new URL(request.url);
const path = url.pathname;
console.log(`Request received: ${request.method} ${path}`);
@@ -48,6 +48,8 @@ export default {
return await handleRss(request, env);
} else if (path === '/writeRssData' && request.method === 'GET') {
return await handleWriteRssData(request, env);
} else if (path === '/generateRssContent' && request.method === 'GET') {
return await handleGenerateRssContent(request, env);
}
// Authentication check for all other paths