add rss & crawl & webhook
This commit is contained in:
9
Makefile
9
Makefile
@@ -4,7 +4,7 @@ REGISTRY ?= glidea
|
||||
FULL_IMAGE_NAME = $(REGISTRY)/$(IMAGE_NAME)
|
||||
|
||||
|
||||
.PHONY: test push build-installer
|
||||
.PHONY: test push dev-push
|
||||
|
||||
test:
|
||||
go test -race -v -coverprofile=coverage.out -coverpkg=./... ./...
|
||||
@@ -16,3 +16,10 @@ push:
|
||||
-t $(FULL_IMAGE_NAME):$(VERSION) \
|
||||
-t $(FULL_IMAGE_NAME):latest \
|
||||
--push .
|
||||
|
||||
dev-push:
|
||||
docker buildx create --use --name multi-platform-builder || true
|
||||
docker buildx build --platform linux/amd64,linux/arm64 \
|
||||
--build-arg VERSION=$(VERSION) \
|
||||
-t $(FULL_IMAGE_NAME):$(VERSION) \
|
||||
--push .
|
||||
|
||||
@@ -73,7 +73,7 @@ Just for the exquisite email styles, install and use it now!
|
||||
|
||||
### 1. Installation
|
||||
|
||||
By default, uses SiliconFlow's Qwen/Qwen2.5-7B-Instruct (free) and Pro/BAAI/bge-m3. If you don't have a SiliconFlow account yet, use this [invitation link](https://cloud.siliconflow.cn/i/U2VS0Q5A) to get a ¥14 credit.
|
||||
By default, uses SiliconFlow's Qwen/Qwen3-8B (free) and Pro/BAAI/bge-m3. If you don't have a SiliconFlow account yet, use this [invitation link](https://cloud.siliconflow.cn/i/U2VS0Q5A) to get a ¥14 credit.
|
||||
|
||||
Support for other vendors or models is available; follow the instructions below.
|
||||
|
||||
@@ -84,7 +84,7 @@ curl -L -O https://raw.githubusercontent.com/glidea/zenfeed/main/docker-compose.
|
||||
|
||||
# If you need to customize more configuration parameters, directly edit docker-compose.yml#configs.zenfeed_config.content BEFORE running the command below.
|
||||
# Configuration Docs: https://github.com/glidea/zenfeed/blob/main/docs/config.md
|
||||
API_KEY=your_apikey TZ=your_local_IANA LANG=English docker-compose -p zenfeed up -d
|
||||
API_KEY=your_apikey TZ=your_local_IANA LANGUAGE=English docker-compose -p zenfeed up -d
|
||||
```
|
||||
|
||||
#### Windows
|
||||
@@ -94,7 +94,7 @@ Invoke-WebRequest -Uri "https://raw.githubusercontent.com/glidea/zenfeed/main/do
|
||||
|
||||
# If you need to customize more configuration parameters, directly edit docker-compose.yml#configs.zenfeed_config.content BEFORE running the command below.
|
||||
# Configuration Docs: https://github.com/glidea/zenfeed/blob/main/docs/config.md
|
||||
$env:API_KEY = "your_apikey"; $env:TZ = "your_local_IANA"; $env:LANG = "English"; docker-compose -p zenfeed up -d
|
||||
$env:API_KEY = "your_apikey"; $env:TZ = "your_local_IANA"; $env:LANGUAGE = "English"; docker-compose -p zenfeed up -d
|
||||
```
|
||||
|
||||
### 2. Using the Web UI
|
||||
|
||||
19
README.md
19
README.md
@@ -6,16 +6,19 @@
|
||||
|
||||
**1. AI 版 RSS 阅读器**
|
||||
|
||||
* 在线服务
|
||||
* https://zenfeed.xyz
|
||||
* 或 Folo 搜索 zenfeed
|
||||
|
||||
**2. 实时 “新闻” 知识库**
|
||||
|
||||
**3. 帮你时刻关注 “指定事件” 的秘书(如 “关税政策变化”,“xx 股票波动”)**,并支持整理研究报告
|
||||
|
||||
开箱即用的公共服务站:https://zenfeed.xyz (集成 Hacker News,Github Trending,V2EX 热榜等常见公开信源)
|
||||
|
||||
每日研究报告(包含播客)(实验性质)
|
||||
每日研究报告(包含播客)(实验性质) -- 已暂停更新
|
||||
* [V2EX](https://v2ex.analysis.zenfeed.xyz/)
|
||||
* [LinuxDO](https://linuxdo.analysis.zenfeed.xyz/)
|
||||
|
||||
---
|
||||
技术说明文档见:[HLD](docs/tech/hld-zh.md)
|
||||
|
||||
## 前言
|
||||
@@ -98,7 +101,7 @@ zenfeed 是你的智能信息助手。它自动收集、筛选并总结关注的
|
||||
### 1. 安装
|
||||
> 最快 1min 拉起
|
||||
|
||||
默认使用硅基流动的 Qwen/Qwen2.5-7B-Instruct(免费) 和 Pro/BAAI/bge-m3。如果你还没有硅基账号,使用 [邀请链接](https://cloud.siliconflow.cn/i/U2VS0Q5A) 得 14 元额度
|
||||
默认使用硅基流动的 Qwen/Qwen3-8B (免费) 和 Pro/BAAI/bge-m3。如果你还没有硅基账号,使用 [邀请链接](https://cloud.siliconflow.cn/i/U2VS0Q5A) 得 14 元额度
|
||||
|
||||
如果需要使用其他厂商或模型,或自定义部署:请编辑下方 **docker-compose.yml**#configs.zenfeed_config.content.
|
||||
参考 [配置文档](https://github.com/glidea/zenfeed/blob/main/docs/config-zh.md)
|
||||
@@ -142,6 +145,14 @@ $env:API_KEY = "硅基流动apikey"; docker-compose -p zenfeed up -d
|
||||
以 Cherry Studio 为例,配置 MCP 并连接到 Zenfeed,见 [Cherry Studio MCP](docs/cherry-studio-mcp.md)
|
||||
> 默认地址 http://localhost:1301/sse
|
||||
|
||||
### 后续
|
||||
|
||||
zenfeed 提供了超多的自定义配置,还有很多玩法等待你挖掘。详细请查阅[文档](/docs/)
|
||||
|
||||
### Roadmap
|
||||
|
||||
[Roadmap](/docs/roadmap-zh.md)
|
||||
|
||||
## 欢迎加群讨论
|
||||
> 使用问题请提 Issue,谢绝微信私聊。帮助有类似问题的朋友
|
||||
|
||||
|
||||
@@ -1,19 +1,22 @@
|
||||
| 字段 | 类型 | 描述 | 默认值 | 是否必需 |
|
||||
| :--------- | :------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------- | :------------- |
|
||||
| `timezone` | `string` | 应用的时区。例如 `Asia/Shanghai`。 | 服务器本地时区 | 否 |
|
||||
| `log` | `object` | 日志配置。详见下方的 **日志配置** 部分。 | (见具体字段) | 否 |
|
||||
| `api` | `object` | API 配置。详见下方的 **API 配置** 部分。 | (见具体字段) | 否 |
|
||||
| `llms` | `列表` | 大语言模型 (LLM) 配置。会被其他配置部分引用。详见下方的 **LLM 配置** 部分。 | `[]` | 是 (至少 1 个) |
|
||||
| `scrape` | `object` | 抓取配置。详见下方的 **抓取配置** 部分。 | (见具体字段) | 否 |
|
||||
| `storage` | `object` | 存储配置。详见下方的 **存储配置** 部分。 | (见具体字段) | 否 |
|
||||
| `scheduls` | `object` | 用于监控 Feed 的调度配置 (也称为监控规则)。详见下方的 **调度配置** 部分。 | (见具体字段) | 否 |
|
||||
| `notify` | `object` | 通知配置。它接收来自调度模块的结果,通过路由配置进行分组,并通过通知渠道发送给通知接收者。详见下方的 **通知配置**, **通知路由**, **通知接收者**, **通知渠道** 部分。 | (见具体字段) | 是 |
|
||||
| 字段 | 类型 | 描述 | 默认值 | 是否必需 |
|
||||
| :---------- | :------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------- | :------------- |
|
||||
| `timezone` | `string` | 应用的时区。例如 `Asia/Shanghai`。 | 服务器本地时区 | 否 |
|
||||
| `telemetry` | `object` | Telemetry 配置。详见下方的 **Telemetry 配置** 部分。 | (见具体字段) | 否 |
|
||||
| `api` | `object` | API 配置。详见下方的 **API 配置** 部分。 | (见具体字段) | 否 |
|
||||
| `llms` | `列表` | 大语言模型 (LLM) 配置。会被其他配置部分引用。详见下方的 **LLM 配置** 部分。 | `[]` | 是 (至少 1 个) |
|
||||
| `jina` | `object` | Jina AI 配置。详见下方的 **Jina AI 配置** 部分。 | (见具体字段) | 否 |
|
||||
| `scrape` | `object` | 抓取配置。详见下方的 **抓取配置** 部分。 | (见具体字段) | 否 |
|
||||
| `storage` | `object` | 存储配置。详见下方的 **存储配置** 部分。 | (见具体字段) | 否 |
|
||||
| `scheduls` | `object` | 用于监控 Feed 的调度配置 (也称为监控规则)。详见下方的 **调度配置** 部分。 | (见具体字段) | 否 |
|
||||
| `notify` | `object` | 通知配置。它接收来自调度模块的结果,通过路由配置进行分组,并通过通知渠道发送给通知接收者。详见下方的 **通知配置**, **通知路由**, **通知接收者**, **通知渠道** 部分。 | (见具体字段) | 是 |
|
||||
|
||||
### 日志配置 (`log`)
|
||||
### Telemetry 配置 (`telemetry`)
|
||||
|
||||
| 字段 | 类型 | 描述 | 默认值 | 是否必需 |
|
||||
| :---------- | :------- | :--------------------------------------------------------- | :----- | :------- |
|
||||
| `log.level` | `string` | 日志级别, 可选值为 `debug`, `info`, `warn`, `error` 之一。 | `info` | 否 |
|
||||
| 字段 | 类型 | 描述 | 默认值 | 是否必需 |
|
||||
| :-------------------- | :------- | :----------------------------------------------------------------------------- | :----------- | :------- |
|
||||
| `telemetry.address` | `string` | 暴露 Prometheus 指标 & pprof。 | | 否 |
|
||||
| `telemetry.log` | `object` | Telemetry 相关的日志配置。 | (见具体字段) | 否 |
|
||||
| `telemetry.log.level` | `string` | Telemetry 相关消息的日志级别, 可选值为 `debug`, `info`, `warn`, `error` 之一。 | `info` | 否 |
|
||||
|
||||
### API 配置 (`api`)
|
||||
|
||||
@@ -40,6 +43,14 @@
|
||||
| `llms[].embedding_model` | `string` | LLM 的 Embedding 模型。例如 `text-embedding-3-small`。如果用于 Embedding,则不能为空。如果此 LLM 被使用,则不能与 `model` 同时为空。**注意:** 初次使用后请勿直接修改,应添加新的 LLM 配置。 | | 条件性必需 |
|
||||
| `llms[].temperature` | `float32` | LLM 的温度 (0-2)。 | `0.0` | 否 |
|
||||
|
||||
### Jina AI 配置 (`jina`)
|
||||
|
||||
此部分用于配置 Jina AI Reader API 的相关参数,主要供重写规则中的 `crawl_by_jina` 类型使用。
|
||||
|
||||
| 字段 | 类型 | 描述 | 默认值 | 是否必需 |
|
||||
| :----------- | :------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----- | :------- |
|
||||
| `jina.token` | `string` | Jina AI 的 API Token。从 [Jina AI API Dashboard](https://jina.ai/api-dashboard/) 获取。提供 Token 可以获得更高的服务速率限制。如果留空,将以匿名用户身份请求,速率限制较低。 | | 否 |
|
||||
|
||||
### 抓取配置 (`scrape`)
|
||||
|
||||
| 字段 | 类型 | 描述 | 默认值 | 是否必需 |
|
||||
@@ -88,15 +99,16 @@
|
||||
|
||||
定义在存储前处理 Feed 的规则。规则按顺序应用。
|
||||
|
||||
| 字段 | 类型 | 描述 | 默认值 | 是否必需 |
|
||||
| :--------------------------------------- | :------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------- | :--------------------------------------------- |
|
||||
| `...rewrites[].source_label` | `string` | 用作转换源文本的 Feed 标签。默认标签包括: `type`, `source`, `title`, `link`, `pub_time`, `content`。 | `content` | 否 |
|
||||
| `...rewrites[].skip_too_short_threshold` | `*int` | 如果设置,`source_label` 文本长度低于此阈值的 Feed 将被此规则跳过 (处理将继续进行下一条规则,如果没有更多规则则进行 Feed 存储)。有助于过滤掉过短/信息量不足的 Feed。 | `300` | 否 |
|
||||
| `...rewrites[].transform` | `object` | 配置如何转换 `source_label` 文本。详见下方的 **重写规则转换配置**。如果未设置,则直接使用 `source_label` 文本进行匹配。 | `nil` | 否 |
|
||||
| `...rewrites[].match` | `string` | 用于匹配 (转换后) 文本的简单字符串。不能与 `match_re` 同时设置。 | | 否 (使用 `match` 或 `match_re`) |
|
||||
| `...rewrites[].match_re` | `string` | 用于匹配 (转换后) 文本的正则表达式。 | `.*` (匹配所有) | 否 (使用 `match` 或 `match_re`) |
|
||||
| `...rewrites[].action` | `string` | 匹配时执行的操作: `create_or_update_label` (使用匹配/转换后的文本添加/更新标签), `drop_feed` (完全丢弃该 Feed)。 | `create_or_update_label` | 否 |
|
||||
| `...rewrites[].label` | `string` | 要创建或更新的 Feed 标签名称。 | | 是 (如果 `action` 是 `create_or_update_label`) |
|
||||
| 字段 | 类型 | 描述 | 默认值 | 是否必需 |
|
||||
| :--------------------------------------- | :----------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------- | :--------------------------------------------- |
|
||||
| `...rewrites[].if` | `字符串列表` | 用于匹配 Feed 的条件配置。如果未设置,则表示匹配所有 Feed。类似于标签过滤器,例如 `["source=github", "title!=xxx"]`。如果条件不满足,则跳过此规则。 | `[]` (匹配所有) | 否 |
|
||||
| `...rewrites[].source_label` | `string` | 用作转换源文本的 Feed 标签。默认标签包括: `type`, `source`, `title`, `link`, `pub_time`, `content`。 | `content` | 否 |
|
||||
| `...rewrites[].skip_too_short_threshold` | `*int` | 如果设置,`source_label` 文本长度低于此阈值的 Feed 将被此规则跳过 (处理将继续进行下一条规则,如果没有更多规则则进行 Feed 存储)。有助于过滤掉过短/信息量不足的 Feed。 | `300` | 否 |
|
||||
| `...rewrites[].transform` | `object` | 配置如何转换 `source_label` 文本。详见下方的 **重写规则转换配置**。如果未设置,则直接使用 `source_label` 文本进行匹配。 | `nil` | 否 |
|
||||
| `...rewrites[].match` | `string` | 用于匹配 (转换后) 文本的简单字符串。不能与 `match_re` 同时设置。 | | 否 (使用 `match` 或 `match_re`) |
|
||||
| `...rewrites[].match_re` | `string` | 用于匹配 (转换后) 文本的正则表达式。 | `.*` (匹配所有) | 否 (使用 `match` 或 `match_re`) |
|
||||
| `...rewrites[].action` | `string` | 匹配时执行的操作: `create_or_update_label` (使用匹配/转换后的文本添加/更新标签), `drop_feed` (完全丢弃该 Feed)。 | `create_or_update_label` | 否 |
|
||||
| `...rewrites[].label` | `string` | 要创建或更新的 Feed 标签名称。 | | 是 (如果 `action` 是 `create_or_update_label`) |
|
||||
|
||||
### 重写规则转换配置 (`storage.feed.rewrites[].transform`)
|
||||
|
||||
@@ -106,10 +118,13 @@
|
||||
|
||||
### 重写规则转换为文本配置 (`storage.feed.rewrites[].transform.to_text`)
|
||||
|
||||
| 字段 | 类型 | 描述 | 默认值 | 是否必需 |
|
||||
| :------------------ | :------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---------------------- | :------- |
|
||||
| `...to_text.llm` | `string` | 用于转换的 LLM 名称 (来自 `llms` 部分)。 | `llms` 部分中的默认 LLM | 否 |
|
||||
| `...to_text.prompt` | `string` | 用于转换的 Prompt。源文本将被注入。可以使用 Go 模板语法引用内置 Prompt: `{{ .summary }}`, `{{ .category }}`, `{{ .tags }}`, `{{ .score }}`, `{{ .comment_confucius }}`, `{{ .summary_html_snippet }}`。 | | 是 |
|
||||
此配置定义了如何将 `source_label` 的文本进行转换。
|
||||
|
||||
| 字段 | 类型 | 描述 | 默认值 | 是否必需 |
|
||||
| :------------------ | :------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------- | :--------------------------- |
|
||||
| `...to_text.type` | `string` | 转换的类型。可选值:<ul><li>`prompt` (默认): 使用 LLM 和指定的 Prompt 转换源文本。</li><li>`crawl`: 将源文本视为 URL,直接抓取该 URL 指向的网页内容,并将其转换为 Markdown 格式。此方式为本地抓取,会尝试遵循 `robots.txt`。</li><li>`crawl_by_jina`: 将源文本视为 URL,通过 [Jina AI Reader API](https://jina.ai/reader/) 抓取和处理网页内容,并返回 Markdown。功能可能更强大,例如处理动态页面,但依赖 Jina AI 服务。</li></ul> | `prompt` | 否 |
|
||||
| `...to_text.llm` | `string` | **仅当 `type` 为 `prompt` 时有效。** 用于转换的 LLM 名称 (来自 `llms` 部分)。如果未指定,将使用在 `llms` 部分中标记为 `default: true` 的 LLM。 | `llms` 部分中的默认 LLM | 否 |
|
||||
| `...to_text.prompt` | `string` | **仅当 `type` 为 `prompt` 时有效。** 用于转换的 Prompt。源文本将被注入。可以使用 Go 模板语法引用内置 Prompt: `{{ .summary }}`, `{{ .category }}`, `{{ .tags }}`, `{{ .score }}`, `{{ .comment_confucius }}`, `{{ .summary_html_snippet }}`。 | | 是 (如果 `type` 是 `prompt`) |
|
||||
|
||||
### 调度配置 (`scheduls`)
|
||||
|
||||
|
||||
267
docs/config.md
267
docs/config.md
@@ -1,181 +1,196 @@
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :------- | :----- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------- | :-------- |
|
||||
| timezone | string | The timezone of the app. e.g. `Asia/Shanghai`. | server's local timezone | No |
|
||||
| log | object | The log config. See **Log Configuration** section below. | (see fields) | No |
|
||||
| api | object | The API config. See **API Configuration** section below. | (see fields) | No |
|
||||
| llms | list | The LLMs config. Refered by other config sections. See **LLM Configuration** section below. | `[]` | Yes (>=1) |
|
||||
| scrape | object | The scrape config. See **Scrape Configuration** section below. | (see fields) | No |
|
||||
| storage | object | The storage config. See **Storage Configuration** section below. | (see fields) | No |
|
||||
| scheduls | object | The scheduls config for monitoring feeds (aka monitoring rules). See **Scheduls Configuration** section below. | (see fields) | No |
|
||||
| notify | object | The notify config. It receives results from scheduls, groups them via route config, and sends to receivers via channels. See **Notify Configuration**, **Notify Route**, **Notify Receiver**, **Notify Channels** sections below. | (see fields) | Yes |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :---------- | :------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------- | :--------------- |
|
||||
| `timezone` | `string` | The application's timezone. E.g., `Asia/Shanghai`. | Server local time | No |
|
||||
| `telemetry` | `object` | Telemetry configuration. See the **Telemetry Configuration** section below. | (See specific fields) | No |
|
||||
| `api` | `object` | API configuration. See the **API Configuration** section below. | (See specific fields) | No |
|
||||
| `llms` | `list` | Large Language Model (LLM) configuration. Referenced by other configuration sections. See the **LLM Configuration** section below. | `[]` | Yes (at least 1) |
|
||||
| `jina` | `object` | Jina AI configuration. See the **Jina AI Configuration** section below. | (See specific fields) | No |
|
||||
| `scrape` | `object` | Scrape configuration. See the **Scrape Configuration** section below. | (See specific fields) | No |
|
||||
| `storage` | `object` | Storage configuration. See the **Storage Configuration** section below. | (See specific fields) | No |
|
||||
| `scheduls` | `object` | Scheduling configuration for monitoring feeds (also known as monitoring rules). See the **Scheduling Configuration** section below. | (See specific fields) | No |
|
||||
| `notify` | `object` | Notification configuration. It receives results from the scheduling module, groups them via routing configuration, and sends them to notification receivers via notification channels. See the **Notification Configuration**, **Notification Routing**, **Notification Receivers**, **Notification Channels** sections below. | (See specific fields) | Yes |
|
||||
|
||||
### Log Configuration (`log`)
|
||||
### Telemetry Configuration (`telemetry`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :---------- | :----- | :-------------------------------------------------- | :------ | :------- |
|
||||
| `log.level` | string | Log level, one of `debug`, `info`, `warn`, `error`. | `info` | No |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :-------------------- | :------- | :--------------------------------------------------------------------------------- | :-------------------- | :------- |
|
||||
| `telemetry.address` | `string` | Exposes Prometheus metrics & pprof. | | No |
|
||||
| `telemetry.log` | `object` | Log configuration related to telemetry. | (See specific fields) | No |
|
||||
| `telemetry.log.level` | `string` | Log level for telemetry-related messages, one of `debug`, `info`, `warn`, `error`. | `info` | No |
|
||||
|
||||
**API Configuration (`api`)**
|
||||
### API Configuration (`api`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :----------------- | :----- | :------------------------------------------------------------------------------------------------------------------ | :---------------------------- | :------------------------------------- |
|
||||
| `api.http` | object | The HTTP API config. | (see fields) | No |
|
||||
| `api.http.address` | string | The address (`[host]:port`) of the HTTP API. e.g. `0.0.0.0:1300`. Cannot be changed after the app is running. | `:1300` | No |
|
||||
| `api.mcp` | object | The MCP API config. | (see fields) | No |
|
||||
| `api.mcp.address` | string | The address (`[host]:port`) of the MCP API. e.g. `0.0.0.0:1301`. Cannot be changed after the app is running. | `:1301` | No |
|
||||
| `api.llm` | string | The LLM name for summarizing feeds. e.g. `my-favorite-gemini-king`. Refers to an LLM defined in the `llms` section. | default LLM in `llms` section | Yes (if summarization feature is used) |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :----------------- | :------- | :--------------------------------------------------------------------------------------------------------------------------- | :---------------------------- | :--------------------- |
|
||||
| `api.http` | `object` | HTTP API configuration. | (See specific fields) | No |
|
||||
| `api.http.address` | `string` | Address for the HTTP API (`[host]:port`). E.g., `0.0.0.0:1300`. Cannot be changed after the application starts. | `:1300` | No |
|
||||
| `api.mcp` | `object` | MCP API configuration. | (See specific fields) | No |
|
||||
| `api.mcp.address` | `string` | Address for the MCP API (`[host]:port`). E.g., `0.0.0.0:1301`. Cannot be changed after the application starts. | `:1301` | No |
|
||||
| `api.llm` | `string` | Name of the LLM used for summarizing feeds. E.g., `my-favorite-gemini-king`. Refers to an LLM defined in the `llms` section. | Default LLM in `llms` section | Yes (if using summary) |
|
||||
|
||||
### LLM Configuration (`llms[]`)
|
||||
|
||||
This section defines a list of available Large Language Models. At least one LLM configuration is required.
|
||||
This section defines the list of available Large Language Models. At least one LLM configuration is required.
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :----------------------- | :------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------- | :------------------------------------------------------------- |
|
||||
| `llms[].name` | string | The name (or 'id') of the LLM. e.g. `my-favorite-gemini-king`. Used to refer to this LLM in other sections (`api.llm`, `storage.feed.embedding_llm`, etc.). | | Yes |
|
||||
| `llms[].default` | bool | Whether this LLM is the default LLM. Only one LLM can be the default. | `false` | No (but one must be `true` if default behavior is relied upon) |
|
||||
| `llms[].provider` | string | The provider of the LLM, one of `openai`, `openrouter`, `deepseek`, `gemini`, `volc`, `siliconflow`. e.g. `openai`. | | Yes |
|
||||
| `llms[].endpoint` | string | The custom endpoint of the LLM. e.g. `https://api.openai.com/v1`. | (provider specific default) | No |
|
||||
| `llms[].api_key` | string | The API key of the LLM. | | Yes |
|
||||
| `llms[].model` | string | The model of the LLM. e.g. `gpt-4o-mini`. Cannot be empty if used for generation tasks (like summarization). Cannot be empty with `embedding_model` at same time if this LLM is used. | | Conditionally Yes |
|
||||
| `llms[].embedding_model` | string | The embedding model of the LLM. e.g. `text-embedding-3-small`. Cannot be empty if used for embedding. Cannot be empty with `model` at same time if this LLM is used. **NOTE:** Do not modify after initial use; add a new LLM config instead. | | Conditionally Yes |
|
||||
| `llms[].temperature` | float32 | The temperature (0-2) of the LLM. | `0.0` | No |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :----------------------- | :-------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------------------------- | :--------------------------------------------------------- |
|
||||
| `llms[].name` | `string` | Name (or 'id') of the LLM. E.g., `my-favorite-gemini-king`. Used to refer to this LLM in other configuration sections (e.g., `api.llm`, `storage.feed.embedding_llm`). | | Yes |
|
||||
| `llms[].default` | `bool` | Whether this LLM is the default LLM. Only one LLM can be the default. | `false` | No (but one must be `true` if relying on default behavior) |
|
||||
| `llms[].provider` | `string` | Provider of the LLM, one of `openai`, `openrouter`, `deepseek`, `gemini`, `volc`, `siliconflow`. E.g., `openai`. | | Yes |
|
||||
| `llms[].endpoint` | `string` | Custom endpoint for the LLM. E.g., `https://api.openai.com/v1`. | (Provider-specific default) | No |
|
||||
| `llms[].api_key` | `string` | API key for the LLM. | | Yes |
|
||||
| `llms[].model` | `string` | Model of the LLM. E.g., `gpt-4o-mini`. Cannot be empty if used for generation tasks (e.g., summarization). If this LLM is used, cannot be empty along with `embedding_model`. | | Conditionally Required |
|
||||
| `llms[].embedding_model` | `string` | Embedding model of the LLM. E.g., `text-embedding-3-small`. Cannot be empty if used for embedding. If this LLM is used, cannot be empty along with `model`. **Note:** Do not modify directly after initial use; add a new LLM configuration instead. | | Conditionally Required |
|
||||
| `llms[].temperature` | `float32` | Temperature of the LLM (0-2). | `0.0` | No |
|
||||
|
||||
### Jina AI Configuration (`jina`)
|
||||
|
||||
This section configures parameters related to the Jina AI Reader API, primarily used by the `crawl_by_jina` type in rewrite rules.
|
||||
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :----------- | :------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------ | :------- |
|
||||
| `jina.token` | `string` | API Token for Jina AI. Obtain from [Jina AI API Dashboard](https://jina.ai/api-dashboard/). Providing a token grants higher service rate limits. If left empty, requests will be made as an anonymous user with lower rate limits. | | No |
|
||||
|
||||
### Scrape Configuration (`scrape`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :----------------------- | :-------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | :-------------------------------- |
|
||||
| `scrape.past` | duration | The lookback time window for scraping feeds. e.g. `1h` means only scrape feeds in the past 1 hour. | `24h` | No |
|
||||
| `scrape.interval` | duration | How often to scrape each source (global default). e.g. `1h`. | `1h` | No |
|
||||
| `scrape.rsshub_endpoint` | string | The endpoint of the RSSHub. You can deploy your own or use a public one (see [RSSHub Docs](https://docs.rsshub.app/guide/instances)). e.g. `https://rsshub.app`. | | Yes (if `rsshub_route_path` used) |
|
||||
| `scrape.sources` | list of objects | The sources for scraping feeds. See **Scrape Source Configuration** below. | `[]` | Yes (at least one) |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :----------------------- | :---------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------ | :----------------------------------- |
|
||||
| `scrape.past` | `time.Duration` | Time window to look back when scraping feeds. E.g., `1h` means only scrape feeds from the past 1 hour. | `24h` | No |
|
||||
| `scrape.interval` | `time.Duration` | Frequency to scrape each source (global default). E.g., `1h`. | `1h` | No |
|
||||
| `scrape.rsshub_endpoint` | `string` | Endpoint for RSSHub. You can deploy your own RSSHub server or use a public instance (see [RSSHub Documentation](https://docs.rsshub.app/guide/instances)). E.g., `https://rsshub.app`. | | Yes (if `rsshub_route_path` is used) |
|
||||
| `scrape.sources` | `list of objects` | List of sources to scrape feeds from. See **Scrape Source Configuration** below. | `[]` | Yes (at least one) |
|
||||
|
||||
### Scrape Source Configuration (`scrape.sources[]`)
|
||||
|
||||
Describes each source to be scraped.
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :-------------------------- | :---------------- | :------------------------------------------------------------------------------------------------------------------------------------- | :-------------- | :-------------------------- |
|
||||
| `scrape.sources[].interval` | duration | How often to scrape this specific source. Overrides the global `scrape.interval`. | global interval | No |
|
||||
| `scrape.sources[].name` | string | The name of the source. Used for labeling feeds. | | Yes |
|
||||
| `scrape.sources[].labels` | map[string]string | Additional key-value labels to add to feeds from this source. | `{}` | No |
|
||||
| `scrape.sources[].rss` | object | The RSS config for this source. See **Scrape Source RSS Configuration** below. Only one source type (e.g., RSS) can be set per source. | `nil` | Yes (if source type is RSS) |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :-------------------------- | :------------------ | :------------------------------------------------------------------------------------------------------------------------------------ | :---------------- | :-------------------------- |
|
||||
| `scrape.sources[].interval` | `time.Duration` | Frequency to scrape this specific source. Overrides global `scrape.interval`. | Global `interval` | No |
|
||||
| `scrape.sources[].name` | `string` | Name of the source. Used to tag feeds. | | Yes |
|
||||
| `scrape.sources[].labels` | `map[string]string` | Additional key-value labels to attach to feeds from this source. | `{}` | No |
|
||||
| `scrape.sources[].rss` | `object` | RSS configuration for this source. See **Scrape Source RSS Configuration** below. Each source can only have one type set (e.g., RSS). | `nil` | Yes (if source type is RSS) |
|
||||
|
||||
### Scrape Source RSS Configuration (`scrape.sources[].rss`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :--------------------------------------- | :----- | :------------------------------------------------------------------------------------------------------------------------------------ | :------ | :---------------------------------------------------- |
|
||||
| `scrape.sources[].rss.url` | string | The full URL of the RSS feed. e.g. `http://localhost:1200/github/trending/daily/any`. Cannot be set if `rsshub_route_path` is set. | | Yes (unless `rsshub_route_path` is set) |
|
||||
| `scrape.sources[].rss.rsshub_route_path` | string | The RSSHub route path. e.g. `github/trending/daily/any`. Will be joined with `scrape.rsshub_endpoint`. Cannot be set if `url` is set. | | Yes (unless `url` is set, requires `rsshub_endpoint`) |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :--------------------------------------- | :------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------ | :-------------------------------------------------------- |
|
||||
| `scrape.sources[].rss.url` | `string` | Full URL of the RSS feed. E.g., `http://localhost:1200/github/trending/daily/any`. Cannot be set if `rsshub_route_path` is set. | | Yes (unless `rsshub_route_path` is set) |
|
||||
| `scrape.sources[].rss.rsshub_route_path` | `string` | RSSHub route path. E.g., `github/trending/daily/any`. Will be concatenated with `scrape.rsshub_endpoint` to form the final URL. Cannot be set if `url` is set. | | Yes (unless `url` is set, and requires `rsshub_endpoint`) |
|
||||
|
||||
### Storage Configuration (`storage`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :------------- | :----- | :------------------------------------------------------------------------------- | :----------- | :------- |
|
||||
| `storage.dir` | string | The base directory for all storages. Cannot be changed after the app is running. | `./data` | No |
|
||||
| `storage.feed` | object | The feed storage config. See **Feed Storage Configuration** below. | (see fields) | No |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :------------- | :------- | :------------------------------------------------------------------------------ | :-------------------- | :------- |
|
||||
| `storage.dir` | `string` | Base directory for all storage. Cannot be changed after the application starts. | `./data` | No |
|
||||
| `storage.feed` | `object` | Feed storage configuration. See **Feed Storage Configuration** below. | (See specific fields) | No |
|
||||
|
||||
### Feed Storage Configuration (`storage.feed`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :---------------------------- | :-------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------- | :------- |
|
||||
| `storage.feed.rewrites` | list of objects | How to process each feed before storing it. Inspired by Prometheus relabeling. See **Rewrite Rule Configuration** below. | `[]` | No |
|
||||
| `storage.feed.flush_interval` | duration | How often to flush feed storage to the database. Higher value risks data loss but improves performance. | `200ms` | No |
|
||||
| `storage.feed.embedding_llm` | string | The name of the LLM (from `llms` section) used for embedding feeds. Affects semantic search accuracy. **NOTE:** If changing, keep the old LLM config defined as past data relies on it. | default LLM in `llms` section | No |
|
||||
| `storage.feed.retention` | duration | How long to keep a feed. | `8d` | No |
|
||||
| `storage.feed.block_duration` | duration | How long to keep each time-based feed storage block (similar to Prometheus TSDB Block). | `25h` | No |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :---------------------------- | :---------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---------------------------- | :------- |
|
||||
| `storage.feed.rewrites` | `list of objects` | How to process each feed before storing it. Inspired by Prometheus relabeling. See **Rewrite Rule Configuration** below. | `[]` | No |
|
||||
| `storage.feed.flush_interval` | `time.Duration` | Frequency to flush feed storage to the database. Higher values risk more data loss but reduce disk operations and improve performance. | `200ms` | No |
|
||||
| `storage.feed.embedding_llm` | `string` | Name of the LLM used for feed embedding (from `llms` section). Significantly impacts semantic search accuracy. **Note:** If switching, keep the old LLM configuration as past data is implicitly associated with it, otherwise past data cannot be semantically searched. | Default LLM in `llms` section | No |
|
||||
| `storage.feed.retention` | `time.Duration` | Retention duration for feeds. | `8d` | No |
|
||||
| `storage.feed.block_duration` | `time.Duration` | Retention duration for each time-based feed storage block (similar to Prometheus TSDB Block). | `25h` | No |
|
||||
|
||||
### Rewrite Rule Configuration (`storage.feed.rewrites[]`)
|
||||
|
||||
Defines rules to process feeds before storage. Rules are applied in order.
|
||||
Defines rules to process feeds before storage. Rules are applied sequentially.
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :--------------------------------------- | :----- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :----------------------- | :-------------------------------------------- |
|
||||
| `...rewrites[].source_label` | string | The feed label to use as the source text for transformation. Default labels: `type`, `source`, `title`, `link`, `pub_time`, `content`. | `content` | No |
|
||||
| `...rewrites[].skip_too_short_threshold` | *int | If set, feeds where the `source_label` text length is below this threshold are skipped by this rule (processing continues with the next rule or feed storage if no more rules). Helps filter short/uninformative feeds. | `300` | No |
|
||||
| `...rewrites[].transform` | object | Configures how to transform the `source_label` text. See **Rewrite Rule Transform Configuration** below. If unset, the `source_label` text is used directly for matching. | `nil` | No |
|
||||
| `...rewrites[].match` | string | A simple string to match against the (transformed) text. Cannot be set with `match_re`. | | No (use `match` or `match_re`) |
|
||||
| `...rewrites[].match_re` | string | A regular expression to match against the (transformed) text. | `.*` (matches all) | No (use `match` or `match_re`) |
|
||||
| `...rewrites[].action` | string | Action to perform if matched: `create_or_update_label` (adds/updates a label with the matched/transformed text), `drop_feed` (discards the feed entirely). | `create_or_update_label` | No |
|
||||
| `...rewrites[].label` | string | The feed label name to create or update. | | Yes (if `action` is `create_or_update_label`) |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :--------------------------------------- | :---------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :----------------------- | :-------------------------------------------- |
|
||||
| `...rewrites[].if` | `list of strings` | Conditions to match feeds. If not set, matches all feeds. Similar to label filters, e.g., `["source=github", "title!=xxx"]`. If conditions are not met, this rule is skipped. | `[]` (matches all) | No |
|
||||
| `...rewrites[].source_label` | `string` | Feed label used as the source text for transformation. Default labels include: `type`, `source`, `title`, `link`, `pub_time`, `content`. | `content` | No |
|
||||
| `...rewrites[].skip_too_short_threshold` | `*int` | If set, feeds where the `source_label` text length is below this threshold will be skipped by this rule (processing continues to the next rule, or feed storage if no more rules). Helps filter out feeds that are too short/uninformative. | `300` | No |
|
||||
| `...rewrites[].transform` | `object` | Configures how to transform the `source_label` text. See **Rewrite Rule Transform Configuration** below. If not set, the `source_label` text is used directly for matching. | `nil` | No |
|
||||
| `...rewrites[].match` | `string` | Simple string to match against the (transformed) text. Cannot be set with `match_re`. | | No (use `match` or `match_re`) |
|
||||
| `...rewrites[].match_re` | `string` | Regular expression to match against the (transformed) text. | `.*` (matches all) | No (use `match` or `match_re`) |
|
||||
| `...rewrites[].action` | `string` | Action to perform on match: `create_or_update_label` (adds/updates a label with the matched/transformed text), `drop_feed` (discards the feed entirely). | `create_or_update_label` | No |
|
||||
| `...rewrites[].label` | `string` | Name of the feed label to create or update. | | Yes (if `action` is `create_or_update_label`) |
|
||||
|
||||
### Rewrite Rule Transform Configuration (`storage.feed.rewrites[].transform`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :--------------------- | :----- | :---------------------------------------------------------------------------------------------------------- | :------ | :------- |
|
||||
| `...transform.to_text` | object | Transform the source text to text using an LLM. See **Rewrite Rule Transform To Text Configuration** below. | `nil` | No |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :--------------------- | :------- | :--------------------------------------------------------------------------------------------- | :------------ | :------- |
|
||||
| `...transform.to_text` | `object` | Transforms source text to text using an LLM. See **Rewrite Rule To Text Configuration** below. | `nil` | No |
|
||||
|
||||
### Rewrite Rule Transform To Text Configuration (`storage.feed.rewrites[].transform.to_text`)
|
||||
### Rewrite Rule To Text Configuration (`storage.feed.rewrites[].transform.to_text`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :------------------ | :----- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :---------------------------- | :------- |
|
||||
| `...to_text.llm` | string | The name of the LLM (from `llms` section) to use for transformation. | default LLM in `llms` section | No |
|
||||
| `...to_text.prompt` | string | The prompt used for transformation. The source text is injected. Go template syntax can refer to built-in prompts: `{{ .summary }}`, `{{ .category }}`, `{{ .tags }}`, `{{ .score }}`, `{{ .comment_confucius }}`, `{{ .summary_html_snippet }}`. | | Yes |
|
||||
This configuration defines how to transform the text from `source_label`.
|
||||
|
||||
### Scheduls Configuration (`scheduls`)
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :------------------ | :------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------- | :-------------------------- |
|
||||
| `...to_text.type` | `string` | Type of transformation. Options: <ul><li>`prompt` (default): Uses an LLM and a specified prompt to transform the source text.</li><li>`crawl`: Treats the source text as a URL, directly crawls the web page content pointed to by the URL, and converts it to Markdown format. This method performs local crawling and attempts to follow `robots.txt`.</li><li>`crawl_by_jina`: Treats the source text as a URL, crawls and processes web page content via the [Jina AI Reader API](https://jina.ai/reader/), and returns Markdown. Potentially more powerful, e.g., for handling dynamic pages, but relies on the Jina AI service.</li></ul> | `prompt` | No |
|
||||
| `...to_text.llm` | `string` | **Only valid if `type` is `prompt`.** Name of the LLM used for transformation (from `llms` section). If not specified, the LLM marked as `default: true` in the `llms` section will be used. | Default LLM in `llms` section | No |
|
||||
| `...to_text.prompt` | `string` | **Only valid if `type` is `prompt`.** Prompt used for transformation. The source text will be injected. You can use Go template syntax to reference built-in prompts: `{{ .summary }}`, `{{ .category }}`, `{{ .tags }}`, `{{ .score }}`, `{{ .comment_confucius }}`, `{{ .summary_html_snippet }}`. | | Yes (if `type` is `prompt`) |
|
||||
|
||||
### Scheduling Configuration (`scheduls`)
|
||||
|
||||
Defines rules for querying and monitoring feeds.
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :--------------- | :-------------- | :------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | :------- |
|
||||
| `scheduls.rules` | list of objects | The rules for scheduling feeds. Each rule's result (matched feeds) is sent to the notify route. See **Scheduls Rule Configuration** section below. | `[]` | No |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :--------------- | :---------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------ | :------- |
|
||||
| `scheduls.rules` | `list of objects` | List of rules for scheduling feeds. The results of each rule (matched feeds) will be sent to notification routes. See **Scheduling Rule Configuration** below. | `[]` | No |
|
||||
|
||||
### Scheduls Rule Configuration (`scheduls.rules[]`)
|
||||
### Scheduling Rule Configuration (`scheduls.rules[]`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :-------------------------------- | :-------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | :--------------------------------------- |
|
||||
| `scheduls.rules[].name` | string | The name of the rule. | | Yes |
|
||||
| `scheduls.rules[].query` | string | The semantic query to find relevant feeds. Optional. | | No |
|
||||
| `scheduls.rules[].threshold` | float32 | Relevance score threshold (0-1) to filter semantic query results. Only works if `query` is set. | `0.6` | No |
|
||||
| `scheduls.rules[].label_filters` | list of strings | Filters based on feed labels (exact match or non-match). e.g. `["category=tech", "source!=github"]`. | `[]` | No |
|
||||
| `scheduls.rules[].every_day` | string | Query range relative to the end of each day. Format: `start~end` (HH:MM). e.g., `00:00~23:59` (today), `-22:00~07:00` (yesterday 22:00 to today 07:00). Cannot be set with `watch_interval`. | | No (use `every_day` or `watch_interval`) |
|
||||
| `scheduls.rules[].watch_interval` | duration | How often to run the query. e.g. `10m`. Cannot be set with `every_day`. | `10m` | No (use `every_day` or `watch_interval`) |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :-------------------------------- | :---------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------ | :--------------------------------------- |
|
||||
| `scheduls.rules[].name` | `string` | Name of the rule. | | Yes |
|
||||
| `scheduls.rules[].query` | `string` | Semantic query to find relevant feeds. Optional. | | No |
|
||||
| `scheduls.rules[].threshold` | `float32` | Relevance score threshold (0-1) for filtering semantic query results. Only effective if `query` is set. | `0.6` | No |
|
||||
| `scheduls.rules[].label_filters` | `list of strings` | Filters based on feed labels (equals or not equals). E.g., `["category=tech", "source!=github"]`. | `[]` | No |
|
||||
| `scheduls.rules[].every_day` | `string` | Query range relative to the end of each day. Format: `start~end` (HH:MM). E.g., `00:00~23:59` (today), `-22:00~07:00` (yesterday 22:00 to today 07:00). Cannot be set with `watch_interval`. | | No (use `every_day` or `watch_interval`) |
|
||||
| `scheduls.rules[].watch_interval` | `time.Duration` | Frequency to run the query. E.g., `10m`. Cannot be set with `every_day`. | `10m` | No (use `every_day` or `watch_interval`) |
|
||||
|
||||
### Notify Configuration (`notify`)
|
||||
### Notification Configuration (`notify`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :----------------- | :-------------- | :------------------------------------------------------------------------------------------------------------- | :----------- | :---------------------- |
|
||||
| `notify.route` | object | The main notify routing configuration. See **Notify Route Configuration** below. | (see fields) | Yes |
|
||||
| `notify.receivers` | list of objects | Defines the notification receivers (e.g., email addresses). See **Notify Receiver Configuration** below. | `[]` | Yes (at least one) |
|
||||
| `notify.channels` | object | Configures the notification channels (e.g., email SMTP settings). See **Notify Channels Configuration** below. | (see fields) | Yes (if using channels) |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :----------------- | :---------------- | :-------------------------------------------------------------------------------------------------------------- | :-------------------- | :---------------------- |
|
||||
| `notify.route` | `object` | Main notification routing configuration. See **Notification Routing Configuration** below. | (See specific fields) | Yes |
|
||||
| `notify.receivers` | `list of objects` | Defines notification receivers (e.g., email addresses). See **Notification Receiver Configuration** below. | `[]` | Yes (at least one) |
|
||||
| `notify.channels` | `object` | Configures notification channels (e.g., email SMTP settings). See **Notification Channel Configuration** below. | (See specific fields) | Yes (if using channels) |
|
||||
|
||||
### Notify Route Configuration (`notify.route` and `notify.route.sub_routes[]`)
|
||||
### Notification Routing Configuration (`notify.route` and `notify.route.sub_routes[]`)
|
||||
|
||||
This structure can be nested using `sub_routes`. A feed is matched against sub-routes first; if no sub-route matches, the parent route's configuration applies.
|
||||
This structure can be nested using `sub_routes`. Feeds will first try to match sub-routes; if no sub-route matches, the parent route's configuration is applied.
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :--------------------------------- | :-------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------- | :------------------- |
|
||||
| `...matchers` (only in sub-routes) | list of strings | Label matchers to determine if a feed belongs to this sub-route. e.g. `["category=tech", "source!=github"]`. | `[]` | Yes (for sub-routes) |
|
||||
| `...receivers` | list of strings | Names of the receivers (defined in `notify.receivers`) to send notifications for feeds matching this route. | `[]` | Yes (at least one) |
|
||||
| `...group_by` | list of strings | Labels to group feeds by before sending notifications. Each group results in a separate notification. e.g., `["source", "category"]`. | `[]` | Yes (at least one) |
|
||||
| `...source_label` | string | The source label to extract the content from each feed, and summarize them. Default are all labels. It is very recommended to set it to 'summary' to reduce context length. | all labels | No |
|
||||
| `...summary_prompt` | string | The prompt to summarize the feeds of each group. | | No |
|
||||
| `...llm` | string | The LLM name to use. Default is the default LLM in `llms` section. A large context length LLM is recommended. | default LLM in `llms` section | No |
|
||||
| `...compress_by_related_threshold` | *float32 | If set, compresses highly similar feeds (based on semantic relatedness) within a group, sending only one representative. Threshold (0-1). Higher means more similar. | `0.85` | No |
|
||||
| `...sub_routes` | list of objects | Nested routes. Allows defining more specific routing rules. Each object follows the **Notify Route Configuration**. | `[]` | No |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :--------------------------------- | :---------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------- | :-------------------- |
|
||||
| `...matchers` (sub-routes only) | `list of strings` | Label matchers to determine if a feed belongs to this sub-route. E.g., `["category=tech", "source!=github"]`. | `[]` | Yes (sub-routes only) |
|
||||
| `...receivers` | `list of strings` | List of receiver names (defined in `notify.receivers`) to send notifications for feeds matching this route. | `[]` | Yes (at least one) |
|
||||
| `...group_by` | `list of strings` | List of labels to group feeds by before sending notifications. Each group results in a separate notification. E.g., `["source", "category"]`. | `[]` | Yes (at least one) |
|
||||
| `...source_label` | `string` | Source label to extract content from each feed for summarization. Defaults to all labels. Strongly recommended to set to 'summary' to reduce context length. | All labels | No |
|
||||
| `...summary_prompt` | `string` | Prompt to summarize feeds for each group. | | No |
|
||||
| `...llm` | `string` | Name of the LLM to use. Defaults to the default LLM in the `llms` section. Recommended to use an LLM with a large context length. | Default LLM in `llms` section | No |
|
||||
| `...compress_by_related_threshold` | `*float32` | If set, compresses highly similar feeds within a group based on semantic relatedness, sending only one representative. Threshold (0-1), higher means more similar. | `0.85` | No |
|
||||
| `...sub_routes` | `list of objects` | List of nested routes. Allows defining more specific routing rules. Each object follows **Notification Routing Configuration**. | `[]` | No |
|
||||
|
||||
### Notify Receiver Configuration (`notify.receivers[]`)
|
||||
### Notification Receiver Configuration (`notify.receivers[]`)
|
||||
|
||||
Defines *who* receives notifications.
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :------------------------- | :----- | :----------------------------------------------- | :------ | :------------------- |
|
||||
| `notify.receivers[].name` | string | The unique name of the receiver. Used in routes. | | Yes |
|
||||
| `notify.receivers[].email` | string | The email address of the receiver. | | Yes (if using email) |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :------------------------- | :------- | :------------------------------------------- | :------------ | :------------------- |
|
||||
| `notify.receivers[].name` | `string` | Unique name of the receiver. Used in routes. | | Yes |
|
||||
| `notify.receivers[].email` | `string` | Email address of the receiver. | | Yes (if using Email) |
|
||||
|
||||
### Notify Channels Configuration (`notify.channels`)
|
||||
### Notification Channel Configuration (`notify.channels`)
|
||||
|
||||
Configures *how* notifications are sent.
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :---------------------- | :----- | :--------------------------------------------------------------------------------- | :------ | :------------------- |
|
||||
| `notify.channels.email` | object | The global email channel config. See **Notify Channel Email Configuration** below. | `nil` | Yes (if using email) |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :---------------------- | :------- | :------------------------------------------------------------------------------------------ | :------------ | :------------------- |
|
||||
| `notify.channels.email` | `object` | Global Email channel configuration. See **Notification Channel Email Configuration** below. | `nil` | Yes (if using Email) |
|
||||
|
||||
### Notify Channel Email Configuration (`notify.channels.email`)
|
||||
### Notification Channel Email Configuration (`notify.channels.email`)
|
||||
|
||||
| Field | Type | Description | Default | Required |
|
||||
| :------------------------------------ | :----- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------- | :------- |
|
||||
| `...email.smtp_endpoint` | string | The SMTP server endpoint. e.g. `smtp.gmail.com:587`. | | Yes |
|
||||
| `...email.from` | string | The sender email address. | | Yes |
|
||||
| `...email.password` | string | The application password for the sender email. (For Gmail, see [Google App Passwords](https://support.google.com/mail/answer/185833)). | | Yes |
|
||||
| `...email.feed_markdown_template` | string | Markdown template for formatting each feed in the email body. Default renders the feed content. Cannot be set with `feed_html_snippet_template`. Available template variables depend on feed labels. | `{{ .content }}` | No |
|
||||
| `...email.feed_html_snippet_template` | string | HTML snippet template for formatting each feed. Cannot be set with `feed_markdown_template`. Available template variables depend on feed labels. | | No |
|
||||
| Field | Type | Description | Default Value | Required |
|
||||
| :------------------------------------ | :------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------- | :------- |
|
||||
| `...email.smtp_endpoint` | `string` | SMTP server endpoint. E.g., `smtp.gmail.com:587`. | | Yes |
|
||||
| `...email.from` | `string` | Sender's email address. | | Yes |
|
||||
| `...email.password` | `string` | App-specific password for the sender's email. (For Gmail, see [Google App Passwords](https://support.google.com/mail/answer/185833)). | | Yes |
|
||||
| `...email.feed_markdown_template` | `string` | Markdown template for formatting each feed in the email body. Renders feed content by default. Cannot be set with `feed_html_snippet_template`. Available template variables depend on feed labels. | `{{ .content }}` | No |
|
||||
| `...email.feed_html_snippet_template` | `string` | HTML snippet template for formatting each feed. Cannot be set with `feed_markdown_template`. Available template variables depend on feed labels. | | No |
|
||||
|
||||
88
docs/crawl-zh.md
Normal file
88
docs/crawl-zh.md
Normal file
@@ -0,0 +1,88 @@
|
||||
# 使用 Zenfeed 爬虫功能
|
||||
|
||||
Zenfeed 提供了将网页内容抓取并转换为 Markdown 格式的功能。这主要通过重写规则 (`rewrites` rule) 中的 `transform.to_text.type` 配置项实现。
|
||||
|
||||
## 如何使用
|
||||
|
||||
在你的配置文件中,找到 `storage.feed.rewrites` 部分。当你定义一条重写规则时,可以通过 `transform` 字段来启用爬虫功能。
|
||||
|
||||
具体配置如下:
|
||||
|
||||
```yaml
|
||||
storage:
|
||||
feed:
|
||||
rewrites:
|
||||
- if: ["source=xxx", ...]
|
||||
source_label: "link" # 指定包含 URL 的标签,例如 feed 中的 'link' 标签
|
||||
transform:
|
||||
to_text:
|
||||
type: "crawl" # 或 "crawl_by_jina"
|
||||
# llm: "your-llm-name" # crawl 类型不需要 llm
|
||||
# prompt: "your-prompt" # crawl 类型不需要 prompt
|
||||
# match: ".*" # 可选:对抓取到的 Markdown 内容进行匹配
|
||||
action: "create_or_update_label" # 对抓取到的内容执行的动作
|
||||
label: "crawled_content" # 将抓取到的 Markdown 存储到这个新标签
|
||||
# ... 其他配置 ...
|
||||
jina: # 如果使用 crawl_by_jina,并且需要更高的速率限制(匿名ip: 20 RPM),请配置 Jina API Token
|
||||
token: "YOUR_JINA_AI_TOKEN" # 从 https://jina.ai/api-dashboard/ 获取
|
||||
```
|
||||
|
||||
### 转换类型 (`transform.to_text.type`)
|
||||
|
||||
你有以下几种选择:
|
||||
|
||||
1. **`crawl`**:
|
||||
* Zenfeed 将使用内置的本地爬虫尝试抓取 `source_label` 中指定的 URL。
|
||||
* 它会尝试遵循目标网站的 `robots.txt` 协议。
|
||||
* 适用于静态网页或结构相对简单的网站。
|
||||
|
||||
2. **`crawl_by_jina`**:
|
||||
* Zenfeed 将通过 [Jina AI Reader API](https://jina.ai/reader/) 来抓取和处理 `source_label` 中指定的 URL。
|
||||
* Jina AI 可能能更好地处理动态内容和复杂网站结构。
|
||||
* 同样遵循目标网站的 `robots.txt` 协议。
|
||||
* **依赖 Jina AI 服务**:
|
||||
* 建议在配置文件的顶层添加 `jina.token` (如上示例) 来提供你的 Jina AI API Token,以获得更高的服务速率限制。
|
||||
* 如果未提供 Token,将以匿名用户身份请求,速率限制较低。
|
||||
* 请查阅 Jina AI 的服务条款和隐私政策。
|
||||
|
||||
### 关键配置说明
|
||||
|
||||
* `source_label`: 此标签的值**必须是一个有效的 URL**。例如,如果你的 RSS Feed 中的 `link` 标签指向的是一篇包含完整文章的网页,你可以将 `source_label` 设置为 `link`。
|
||||
* `action`: 通常设置为 `create_or_update_label`,将抓取并转换后的 Markdown 内容存入一个新的标签中(由 `label` 字段指定)。
|
||||
* `label`: 指定存储抓取到的 Markdown 内容的新标签名称。
|
||||
|
||||
## 使用场景
|
||||
|
||||
**全文内容提取**:
|
||||
很多 RSS 源只提供文章摘要和原文链接。使用爬虫功能可以将原文完整内容抓取下来,转换为 Markdown 格式,方便后续的 AI 处理(如总结、打标签、分类等)或直接阅读。
|
||||
|
||||
## 免责声明
|
||||
|
||||
**在使用 Zenfeed 的爬虫功能(包括 `crawl` 和 `crawl_by_jina` 类型)前,请仔细阅读并理解以下声明。您的使用行为即表示您已接受本声明的所有条款。**
|
||||
|
||||
1. **用户责任与授权**:
|
||||
* 您将对使用爬虫功能的所有行为承担全部责任。
|
||||
* 您必须确保拥有访问、抓取和处理所提供 URL 内容的合法权利。
|
||||
* 请严格遵守目标网站的 `robots.txt` 协议、服务条款 (ToS)、版权政策以及所有相关的法律法规。
|
||||
* 不得使用本功能处理、存储或分发任何非法、侵权、诽谤、淫秽或其他令人反感的内容。
|
||||
|
||||
2. **内容准确性与完整性**:
|
||||
* 网页抓取和 Markdown 转换过程的结果可能不准确、不完整或存在偏差。这可能受到目标网站结构、反爬虫机制、动态内容渲染、网络问题等多种因素的影响。
|
||||
* Zenfeed 项目作者和贡献者不对抓取内容的准确性、完整性、及时性或质量作任何保证。
|
||||
|
||||
3. **第三方服务依赖 (`crawl_by_jina`)**:
|
||||
* `crawl_by_jina` 功能依赖于 Jina AI 提供的第三方服务。
|
||||
* Jina AI 服务的可用性、性能、数据处理政策、服务条款以及可能的费用(超出免费额度后)均由 Jina AI 自行决定。
|
||||
* 项目作者和贡献者不对 Jina AI 服务的任何方面负责。请在使用前查阅 [Jina AI 的相关条款](https://jina.ai/terms/) 和 [隐私政策](https://jina.ai/privacy/)。
|
||||
|
||||
4. **无间接或后果性损害赔偿**:
|
||||
* 在任何情况下,无论基于何种法律理论,项目作者和贡献者均不对因使用或无法使用爬虫功能而导致的任何直接、间接、偶然、特殊、惩戒性或后果性损害负责,包括但不限于利润损失、数据丢失、商誉损失或业务中断。
|
||||
|
||||
5. **法律与合规风险**:
|
||||
* 未经授权抓取、复制、存储、处理或传播受版权保护的内容,或违反网站服务条款的行为,可能违反相关法律法规,并可能导致法律纠纷或处罚。
|
||||
* 用户需自行承担因使用爬虫功能而产生的所有法律风险和责任。
|
||||
|
||||
6. **"按原样"提供**:
|
||||
* 爬虫功能按"现状"和"可用"的基础提供,不附带任何形式的明示或默示担保。
|
||||
|
||||
**强烈建议您在启用和配置爬虫功能前,仔细评估相关风险,并确保您的使用行为完全合法合规。对于任何因用户滥用或不当使用本软件(包括爬虫功能)而引起的法律纠纷、损失或损害,Zenfeed 项目作者和贡献者不承担任何责任。**
|
||||
BIN
docs/images/folo-html.png
Normal file
BIN
docs/images/folo-html.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 157 KiB |
12
docs/model-selection-zh.md
Normal file
12
docs/model-selection-zh.md
Normal file
@@ -0,0 +1,12 @@
|
||||
如果无需使用 HTML 总结,模型可以随便选择
|
||||
|
||||
## 背景 & 原则
|
||||
* Token 使用会很多,你可以想象每篇 RSS 都总结一遍会有多少消耗。所以优先选择免费模型,或者按次计费
|
||||
* HTML 生成对模型有较高要求。所以你现在知道了为什么自部署的默认总结效果比不上 https://zenfeed.xyz
|
||||
* 那为什么不支持 Markdown 呢?web 还没精力支持,你可以先用邮件日报替代
|
||||
* 总结都是后台任务,且支持有状态重试,对模型速率限制 & 稳定性没有要求
|
||||
* 所以 “1. 质量”,“2. 低价”,“3. 稳定”。首选 1,兼顾 2,无需 3
|
||||
|
||||
## 如果你对默认的模型效果不满意,首选推荐
|
||||
* **不缺钱 or “有路子”**:Gemini 2.5 Pro
|
||||
* **再便宜点的**:Gemini 2.5 Flash
|
||||
19
docs/roadmap-zh.md
Normal file
19
docs/roadmap-zh.md
Normal file
@@ -0,0 +1,19 @@
|
||||
## 短期
|
||||
* 播客
|
||||
* NotebookLM 的播客效果让人惊艳
|
||||
* 技术上复刻一个并不难,难的是没有又便宜效果又好的 TTS API(只用得起小帅的声音😭)
|
||||
* TTS 音色进步也只是近几年的事情,长期需要等成本下降
|
||||
* 短期因为我个人很喜欢播客总结(应该也很适合大家通勤),会先本地部署模型,提供给 https://zenfeed.xyz 使用
|
||||
|
||||
* ebup2rss
|
||||
* 见过 rss2ebup,但你绝没见过反着来的
|
||||
* 严格上这并不属于 zenfeed,顶多算生态项目吧
|
||||
* 抛开时效性,书比新闻更有价值。但当你立下 “坚持阅读” 的 flag,然后呢?
|
||||
* 这个子项目旨在实现:每日更新一章,作为 rss 暴露。在阅读新闻 RSS 时,“顺便” 把书给看了
|
||||
* 这里遵循《掌控习惯》的几个原理
|
||||
* 让它显而易见:在你的新闻阅读器里
|
||||
* 让它简便易行:配合 zenfeed 总结,更轻松地阅读要点(进一步了解原文逃不掉,但这时你已经被勾住了,相信这事已经没那么困难了)
|
||||
* 让你感觉到爽:zenfeed 阅读完后的木鱼声,嗯这算一个,确信
|
||||
|
||||
## 中长期
|
||||
* WIP
|
||||
59
docs/rss-api-zh.md
Normal file
59
docs/rss-api-zh.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# 托管源
|
||||
|
||||
## Folo
|
||||
|
||||
直接搜索 zenfeed
|
||||
|
||||
## Other
|
||||
|
||||
```bash
|
||||
https://zenfeed.xyz/rss?.... 参数用法见下方《自部署》
|
||||
|
||||
https://zenfeed.xyz/rss?label_filter=source=知乎热榜 # 你在 zenfeed.xyz 中看到的源名称
|
||||
|
||||
https://zenfeed.xyz/rss?query=AI # 语义搜索。请不要滥用,成本 cover 不住可能随时下线
|
||||
```
|
||||
|
||||
# 自部署
|
||||
|
||||
## 1. 配置(可选)
|
||||
|
||||
```yaml
|
||||
api:
|
||||
rss:
|
||||
content_html_template: | # 可自由排版搭配(go template 语法);需要确保渲染后的内容是正确的 HTML
|
||||
{{ .summary_html_snippet }} # 默认值
|
||||
```
|
||||
|
||||
## 2. enjoy RSS address!
|
||||
|
||||
```bash
|
||||
your_zenfeed_address/rss?label_filter=label1=value1&label_filter=label2!=value2&query=xxx
|
||||
|
||||
# e.g.
|
||||
|
||||
## Past 24h rss feed for GithubTrending
|
||||
http://localhost:1302/rss?label_filter=source=GithubTrending
|
||||
|
||||
## Past 24h rss feed for Tech category
|
||||
http://localhost:1302/rss?label_filter=category=Tech
|
||||
|
||||
## Past 24h rss feed for dynamic query
|
||||
http://localhost:1302/rss?query=特朗普最新消息
|
||||
```
|
||||
|
||||
# FAQ
|
||||
|
||||
## 添加失败怎么办?
|
||||
|
||||
部分 RSS 阅读器通过服务端间接访问 RSS 地址,如果 zenfeed 部署到本地,将无法访问
|
||||
|
||||
你需要通过内网穿透,或者 VPS 暴露到公网上,注意仅暴露 1302 端口
|
||||
|
||||
## Folo 看起来只有纯文本?
|
||||
|
||||

|
||||
|
||||
## 暗黑模式显示有问题?
|
||||
|
||||
嗯就是有问题,请使用白底背景,否则样式渲染会出现问题
|
||||
@@ -5,11 +5,13 @@
|
||||
## 1. 设计理念与哲学
|
||||
|
||||
* **Prometheus 的 `relabel_config`**: 借鉴其强大的标签重写能力。在 Prometheus 中,`relabel_config` 允许用户在采集指标前后动态地修改标签集,实现服务发现、指标过滤和路由等高级功能。`rewrite` 组件将此思想应用于信息流处理,将每一条信息(如一篇文章、一个帖子)视为一个标签集,通过规则来操作这些标签。
|
||||
* **管道 (Pipeline) 处理模式**: 信息的处理过程被设计成一个可配置的管道。每个规则是管道中的一个处理阶段,信息流经这些规则,逐步被转换和打标。这种模式使得复杂的处理逻辑可以被分解为一系列简单、独立的步骤,易于理解和维护。
|
||||
* **管道 (Pipeline) 处理模式**: 信息的处理过程被设计成一个可配置的 ETL 管道。每个规则是管道中的一个处理阶段,信息流经这些规则,逐步被转换和打标。这种模式使得复杂的处理逻辑可以被分解为一系列简单、独立的步骤,易于理解和维护。
|
||||
* **AI 能力的模块化与按需应用**: 大型语言模型 (LLM) 被视为一种强大的"转换函数"。用户可以根据需求,在规则中指定使用哪个 LLM、配合什么样的提示词 (Prompt) 来处理特定的文本内容(例如,从文章正文生成摘要、分类、评分等)。这种设计使得 AI 能力可以灵活地嵌入到信息处理的任意环节。
|
||||
* **内容即标签 (Content as Labels)**: 这是 zenfeed 的一个核心抽象。原始信息(如标题、正文、链接、发布时间)和经过 AI 或规则处理后产生的衍生信息(如类别、标签、评分、摘要)都被统一表示为键值对形式的"标签"。这种统一表示简化了后续的查询、过滤、路由和展示逻辑。
|
||||
* **声明式配置优于命令式代码**: 用户通过 YAML 配置文件定义重写规则,而不是编写代码来实现处理逻辑。这降低了使用门槛,使得非程序员也能方便地定制自己的信息处理流程,同时也使得配置更易于管理和版本控制。
|
||||
|
||||
> 简单说这是一条专门针对 Feed 处理的可配置工作流
|
||||
|
||||
## 2. 业务流程
|
||||
|
||||
内容重写组件的核心工作流程是接收一个代表信息单元的标签集 (`model.Labels`),然后按顺序应用预定义的重写规则 (`Rule`),最终输出一个经过修改的标签集,或者指示该信息单元应被丢弃。
|
||||
|
||||
148
docs/wehook-zh.md
Normal file
148
docs/wehook-zh.md
Normal file
@@ -0,0 +1,148 @@
|
||||
# Zenfeed Webhook 通知对接指南
|
||||
|
||||
Zenfeed 支持通过 Webhook 将分组和总结后的 Feed 通知推送到您指定的 HTTP(S) 端点。这允许您将 Zenfeed 的通知集成到自定义的应用或工作流程中。
|
||||
|
||||
## 1. 配置方法
|
||||
|
||||
要在 Zenfeed 中配置 Webhook 通知,您需要在配置文件的 `notify.receivers` 部分定义一个或多个接收者,并为每个 Webhook 接收者指定其唯一的 `name` 和 `webhook` 配置块。
|
||||
|
||||
**示例配置 (`config.yaml`):**
|
||||
|
||||
```yaml
|
||||
notify:
|
||||
# ... 其他通知配置 ...
|
||||
|
||||
receivers:
|
||||
- name: my_awesome_webhook # 接收者的唯一名称,将在路由规则中引用
|
||||
webhook:
|
||||
url: "https://your-service.com/webhook-endpoint" # 您的 Webhook 接收端点 URL
|
||||
|
||||
# 示例:路由规则中如何使用此接收者
|
||||
route: # or sub_routes..
|
||||
receivers:
|
||||
- my_awesome_webhook # 引用上面定义的接收者名称
|
||||
# ... 其他路由配置 ...
|
||||
```
|
||||
|
||||
在上述示例中:
|
||||
- 我们定义了一个名为 `my_awesome_webhook` 的接收者。
|
||||
- `webhook.url` 字段指定了当有匹配此接收者的通知时,Zenfeed 将向哪个 URL 发送 POST 请求。
|
||||
|
||||
## 2. 数据格式详解
|
||||
|
||||
当 Zenfeed 向您的 Webhook 端点发送通知时,它会发送一个 `POST` 请求,请求体为 JSON 格式。
|
||||
|
||||
请求体结构如下:
|
||||
|
||||
```json
|
||||
{
|
||||
"group": "string",
|
||||
"labels": {
|
||||
"label_key1": "label_value1",
|
||||
"label_key2": "label_value2"
|
||||
},
|
||||
"summary": "string",
|
||||
"feeds": [
|
||||
{
|
||||
"labels": {
|
||||
"title": "Feed Title 1",
|
||||
"link": "http://example.com/feed1",
|
||||
"content": "Feed content snippet 1...",
|
||||
"source": "example_source",
|
||||
"pub_time": "2024-07-30T10:00:00Z"
|
||||
// ... 其他自定义或标准标签
|
||||
},
|
||||
"time": "2024-07-30T10:00:00Z",
|
||||
"related": [
|
||||
// 可选:与此 Feed 相关的其他 Feed 对象,结构同父 Feed
|
||||
]
|
||||
}
|
||||
// ...更多 Feed 对象
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**字段说明:**
|
||||
|
||||
- `group` (`string`):
|
||||
当前通知所属的组名。这个名称是根据通知路由配置中 `group_by` 定义的标签值组合而成的。例如,如果 `group_by: ["source", "category"]`,且一个 Feed 组的 `source` 是 `github_trending`,`category` 是 `golang`,那么 `group` 可能类似于 `"github_trending/golang"`。
|
||||
|
||||
- `labels` (`object`):
|
||||
一个键值对对象,表示当前通知组的标签。这些标签是根据通知路由配置中 `group_by` 所指定的标签及其对应的值。
|
||||
例如,如果 `group_by: ["source"]` 且当前组的 `source` 标签值为 `rsshub`,则 `labels` 会是 `{"source": "rsshub"}`。
|
||||
|
||||
- `summary` (`string`):
|
||||
由大语言模型 (LLM) 为当前这一组 Feed 生成的摘要文本。如果通知路由中没有配置 LLM 总结,此字段可能为空字符串或省略 (取决于具体的实现细节,但通常会是空字符串)。
|
||||
|
||||
- `feeds` (`array` of `object`):
|
||||
一个数组,包含了属于当前通知组的所有 Feed 对象。每个 Feed 对象包含以下字段:
|
||||
* `labels` (`object`): Feed 的元数据。这是一个键值对对象,包含了该 Feed 的所有标签,例如:
|
||||
* `title` (`string`): Feed 的标题。
|
||||
* `link` (`string`): Feed 的原始链接。
|
||||
* `content` (`string`): Feed 的内容摘要或全文 (取决于抓取和重写规则)。
|
||||
* `source` (`string`): Feed 的来源标识。
|
||||
* `pub_time` (`string`): Feed 的发布时间 (RFC3339 格式的字符串,例如 `2025-01-01T00:00:00Z`)。
|
||||
* ...以及其他在抓取或重写过程中添加的自定义标签。
|
||||
* `time` (`string`): Feed 的时间戳,通常是其发布时间,采用 RFC3339 格式 (例如 `2025-01-01T00:00:00Z`)。此字段与 `labels.pub_time` 通常一致,但 `time` 是系统内部用于时间序列处理的主要时间字段。
|
||||
* `related` (`array` of `object`, 可选):
|
||||
一个数组,包含了与当前 Feed 语义相关的其他 Feed 对象。这通常在通知路由中启用了 `compress_by_related_threshold` 选项时填充。每个相关的 Feed 对象结构与父 Feed 对象完全相同。如果未启用相关性压缩或没有相关的 Feed,此字段可能为空数组或不存在。
|
||||
|
||||
## 3. 请求示例
|
||||
|
||||
以下是一个发送到您的 Webhook 端点的 JSON 请求体示例:
|
||||
|
||||
```json
|
||||
{
|
||||
"group": "my_favorite_blogs",
|
||||
"labels": {
|
||||
"category": "tech_updates",
|
||||
},
|
||||
"summary": "今天有多篇关于最新 AI 技术进展的文章,重点关注了大型语言模型在代码生成方面的应用,以及其对未来软件开发模式的潜在影响。",
|
||||
"feeds": [
|
||||
{
|
||||
"labels": {
|
||||
"content": "AlphaCode X 展示了惊人的代码理解和生成能力,在多个编程竞赛中超越了人类平均水平...",
|
||||
"link": "https://example.blog/alphacode-x-details",
|
||||
"pub_time": "2024-07-30T14:35:10Z",
|
||||
"source": "Example Tech Blog",
|
||||
"title": "AlphaCode X: 下一代 AI 编码助手",
|
||||
"type": "blog_post"
|
||||
},
|
||||
"time": "2024-07-30T14:35:10Z",
|
||||
"related": []
|
||||
},
|
||||
{
|
||||
"labels": {
|
||||
"content": "讨论了当前 LLM 在实际软件工程项目中落地所面临的挑战,包括成本、可控性和安全性问题。",
|
||||
"link": "https://another.blog/llm-in-swe-challenges",
|
||||
"pub_time": "2024-07-30T11:15:00Z",
|
||||
"source": "Another Tech Review",
|
||||
"title": "LLM 在软件工程中的应用:机遇与挑战",
|
||||
"type": "rss"
|
||||
},
|
||||
"time": "2024-07-30T11:15:00Z",
|
||||
"related": [
|
||||
{
|
||||
"labels": {
|
||||
"content": "一篇关于如何更经济有效地部署和微调大型语言模型的指南。",
|
||||
"link": "https://some.other.blog/cost-effective-llm",
|
||||
"pub_time": "2024-07-30T09:00:00Z",
|
||||
"source": "AI Infra Weekly",
|
||||
"title": "经济高效的 LLM 部署策略",
|
||||
"type": "rss"
|
||||
},
|
||||
"time": "2024-07-30T09:00:00Z",
|
||||
"related": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## 4. 响应要求
|
||||
|
||||
Zenfeed 期望您的 Webhook 端点在成功接收并处理通知后,返回 HTTP `200 OK` 状态码。
|
||||
如果 Zenfeed 收到任何非 `200` 的状态码,它会将该次通知尝试标记为失败,并可能根据重试策略进行重试 (具体重试行为取决于 Zenfeed 的内部实现)。
|
||||
|
||||
请确保您的端点能够及时响应,以避免超时。
|
||||
4
go.mod
4
go.mod
@@ -7,13 +7,14 @@ require (
|
||||
github.com/benbjohnson/clock v1.3.5
|
||||
github.com/chewxy/math32 v1.10.1
|
||||
github.com/edsrzf/mmap-go v1.2.0
|
||||
github.com/gorilla/feeds v1.2.0
|
||||
github.com/mark3labs/mcp-go v0.17.0
|
||||
github.com/mmcdole/gofeed v1.3.0
|
||||
github.com/nutsdb/nutsdb v1.0.4
|
||||
github.com/onsi/gomega v1.36.1
|
||||
github.com/pkg/errors v0.9.1
|
||||
github.com/prometheus/client_golang v1.21.1
|
||||
github.com/sashabaranov/go-openai v1.36.1
|
||||
github.com/sashabaranov/go-openai v1.40.1
|
||||
github.com/stretchr/testify v1.10.0
|
||||
github.com/veqryn/slog-dedup v0.5.0
|
||||
github.com/yuin/goldmark v1.7.8
|
||||
@@ -45,6 +46,7 @@ require (
|
||||
github.com/prometheus/common v0.62.0 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/stretchr/objx v0.5.2 // indirect
|
||||
github.com/temoto/robotstxt v1.1.2
|
||||
github.com/tidwall/btree v1.6.0 // indirect
|
||||
github.com/xujiajun/mmap-go v1.0.1 // indirect
|
||||
github.com/xujiajun/utils v0.0.0-20220904132955-5f7c5b914235 // indirect
|
||||
|
||||
11
go.sum
11
go.sum
@@ -36,6 +36,8 @@ github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5 h1:5iH8iuqE5apketRbSF
|
||||
github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/feeds v1.2.0 h1:O6pBiXJ5JHhPvqy53NsjKOThq+dNFm8+DFrxBEdzSCc=
|
||||
github.com/gorilla/feeds v1.2.0/go.mod h1:WMib8uJP3BbY+X8Szd1rA5Pzhdfh+HCCAYT2z7Fza6Y=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
|
||||
@@ -44,8 +46,9 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/mark3labs/mcp-go v0.17.0 h1:5Ps6T7qXr7De/2QTqs9h6BKeZ/qdeUeGrgM5lPzi930=
|
||||
@@ -84,8 +87,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6O
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
||||
github.com/sashabaranov/go-openai v1.36.1 h1:EVfRXwIlW2rUzpx6vR+aeIKCK/xylSrVYAx1TMTSX3g=
|
||||
github.com/sashabaranov/go-openai v1.36.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.40.1 h1:bJ08Iwct5mHBVkuvG6FEcb9MDTfsXdTYPGjYLRdeTEU=
|
||||
github.com/sashabaranov/go-openai v1.40.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sebdah/goldie/v2 v2.5.3 h1:9ES/mNN+HNUbNWpVAlrzuZ7jE+Nrczbj8uFRjM7624Y=
|
||||
github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI=
|
||||
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
|
||||
@@ -99,6 +102,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg=
|
||||
github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
|
||||
github.com/tidwall/btree v1.6.0 h1:LDZfKfQIBHGHWSwckhXI0RPSXzlo+KYdjK7FWSqOzzg=
|
||||
github.com/tidwall/btree v1.6.0/go.mod h1:twD9XRA5jj9VUQGELzDO4HPQTNJsoWWfYEL+EUQ2cKY=
|
||||
github.com/veqryn/slog-dedup v0.5.0 h1:2pc4va3q8p7Tor1SjVvi1ZbVK/oKNPgsqG15XFEt0iM=
|
||||
|
||||
43
main.go
43
main.go
@@ -28,6 +28,7 @@ import (
|
||||
"github.com/glidea/zenfeed/pkg/api"
|
||||
"github.com/glidea/zenfeed/pkg/api/http"
|
||||
"github.com/glidea/zenfeed/pkg/api/mcp"
|
||||
"github.com/glidea/zenfeed/pkg/api/rss"
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/config"
|
||||
"github.com/glidea/zenfeed/pkg/llm"
|
||||
@@ -47,6 +48,7 @@ import (
|
||||
"github.com/glidea/zenfeed/pkg/storage/feed/block/index/vector"
|
||||
"github.com/glidea/zenfeed/pkg/storage/kv"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
telemetryserver "github.com/glidea/zenfeed/pkg/telemetry/server"
|
||||
timeutil "github.com/glidea/zenfeed/pkg/util/time"
|
||||
)
|
||||
|
||||
@@ -118,6 +120,7 @@ type App struct {
|
||||
configPath string
|
||||
configMgr config.Manager
|
||||
conf *config.App
|
||||
telemetry telemetryserver.Server
|
||||
|
||||
kvStorage kv.Storage
|
||||
llmFactory llm.Factory
|
||||
@@ -126,6 +129,7 @@ type App struct {
|
||||
api api.API
|
||||
http http.Server
|
||||
mcp mcp.Server
|
||||
rss rss.Server
|
||||
scraperMgr scrape.Manager
|
||||
scheduler schedule.Scheduler
|
||||
notifier notify.Notifier
|
||||
@@ -153,6 +157,10 @@ func (a *App) setup() error {
|
||||
return a.applyGlobals(newConf)
|
||||
}))
|
||||
|
||||
if err := a.setupTelemetryServer(); err != nil {
|
||||
return errors.Wrap(err, "setup telemetry server")
|
||||
}
|
||||
|
||||
if err := a.setupKVStorage(); err != nil {
|
||||
return errors.Wrap(err, "setup kv storage")
|
||||
}
|
||||
@@ -174,6 +182,9 @@ func (a *App) setup() error {
|
||||
if err := a.setupMCPServer(); err != nil {
|
||||
return errors.Wrap(err, "setup mcp server")
|
||||
}
|
||||
if err := a.setupRSSServer(); err != nil {
|
||||
return errors.Wrap(err, "setup rss server")
|
||||
}
|
||||
if err := a.setupScraper(); err != nil {
|
||||
return errors.Wrap(err, "setup scraper")
|
||||
}
|
||||
@@ -209,8 +220,8 @@ func (a *App) applyGlobals(conf *config.App) error {
|
||||
if err := timeutil.SetLocation(conf.Timezone); err != nil {
|
||||
return errors.Wrapf(err, "set timezone to %s", conf.Timezone)
|
||||
}
|
||||
if err := log.SetLevel(log.Level(conf.Log.Level)); err != nil {
|
||||
return errors.Wrapf(err, "set log level to %s", conf.Log.Level)
|
||||
if err := log.SetLevel(log.Level(conf.Telemetry.Log.Level)); err != nil {
|
||||
return errors.Wrapf(err, "set log level to %s", conf.Telemetry.Log.Level)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -271,6 +282,16 @@ func (a *App) setupFeedStorage() (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupTelemetryServer initializes the Telemetry server.
|
||||
func (a *App) setupTelemetryServer() (err error) {
|
||||
a.telemetry, err = telemetryserver.NewFactory().New(component.Global, a.conf, telemetryserver.Dependencies{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupAPI initializes the API service.
|
||||
func (a *App) setupAPI() (err error) {
|
||||
a.api, err = api.NewFactory().New(component.Global, a.conf, api.Dependencies{
|
||||
@@ -315,6 +336,20 @@ func (a *App) setupMCPServer() (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupRSSServer initializes the RSS server.
|
||||
func (a *App) setupRSSServer() (err error) {
|
||||
a.rss, err = rss.NewFactory().New(component.Global, a.conf, rss.Dependencies{
|
||||
API: a.api,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
a.configMgr.Subscribe(a.rss)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupScraper initializes the Scraper manager.
|
||||
func (a *App) setupScraper() (err error) {
|
||||
a.scraperMgr, err = scrape.NewFactory().New(component.Global, a.conf, scrape.Dependencies{
|
||||
@@ -384,12 +419,12 @@ func (a *App) run(ctx context.Context) error {
|
||||
log.Info(ctx, "starting application components...")
|
||||
if err := component.Run(ctx,
|
||||
component.Group{a.configMgr},
|
||||
component.Group{a.llmFactory},
|
||||
component.Group{a.llmFactory, a.telemetry},
|
||||
component.Group{a.rewriter},
|
||||
component.Group{a.feedStorage},
|
||||
component.Group{a.kvStorage},
|
||||
component.Group{a.notifier, a.api},
|
||||
component.Group{a.http, a.mcp, a.scraperMgr, a.scheduler},
|
||||
component.Group{a.http, a.mcp, a.rss, a.scraperMgr, a.scheduler},
|
||||
); err != nil && !errors.Is(err, context.Canceled) {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -37,7 +37,6 @@ import (
|
||||
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
jsonschema "github.com/glidea/zenfeed/pkg/util/json_schema"
|
||||
"github.com/glidea/zenfeed/pkg/util/rpc"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
@@ -161,11 +160,11 @@ type QueryRequest struct {
|
||||
}
|
||||
|
||||
func (r *QueryRequest) Validate() error { //nolint:cyclop
|
||||
if r.Query != "" && utf8.RuneCountInString(r.Query) < 5 {
|
||||
return errors.New("query must be at least 5 characters")
|
||||
if r.Query != "" && utf8.RuneCountInString(r.Query) > 64 {
|
||||
return errors.New("query must be at most 64 characters")
|
||||
}
|
||||
if r.Threshold == 0 {
|
||||
r.Threshold = 0.55
|
||||
r.Threshold = 0.5
|
||||
}
|
||||
if r.Threshold < 0 || r.Threshold > 1 {
|
||||
return errors.New("threshold must be between 0 and 1")
|
||||
@@ -200,6 +199,28 @@ type QueryResponse struct {
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
type Error struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
func (e Error) Error() string {
|
||||
return e.Message
|
||||
}
|
||||
|
||||
func newError(code int, err error) Error {
|
||||
return Error{
|
||||
Code: code,
|
||||
Message: err.Error(),
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
ErrBadRequest = func(err error) Error { return newError(http.StatusBadRequest, err) }
|
||||
ErrNotFound = func(err error) Error { return newError(http.StatusNotFound, err) }
|
||||
ErrInternal = func(err error) Error { return newError(http.StatusInternalServerError, err) }
|
||||
)
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[API, config.App, Dependencies]
|
||||
|
||||
@@ -262,7 +283,7 @@ func (a *api) QueryAppConfigSchema(
|
||||
) (resp *QueryAppConfigSchemaResponse, err error) {
|
||||
schema, err := jsonschema.ForType(reflect.TypeOf(config.App{}))
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "query app config schema"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "query app config schema"))
|
||||
}
|
||||
|
||||
return (*QueryAppConfigSchemaResponse)(&schema), nil
|
||||
@@ -282,7 +303,7 @@ func (a *api) ApplyAppConfig(
|
||||
req *ApplyAppConfigRequest,
|
||||
) (resp *ApplyAppConfigResponse, err error) {
|
||||
if err := a.Dependencies().ConfigManager.SaveAppConfig(&req.App); err != nil {
|
||||
return nil, rpc.ErrBadRequest(errors.Wrap(err, "save app config"))
|
||||
return nil, ErrBadRequest(errors.Wrap(err, "save app config"))
|
||||
}
|
||||
|
||||
return &ApplyAppConfigResponse{}, nil
|
||||
@@ -297,20 +318,20 @@ func (a *api) QueryRSSHubCategories(
|
||||
// New request.
|
||||
forwardReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "new request"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "new request"))
|
||||
}
|
||||
|
||||
// Do request.
|
||||
forwardRespIO, err := a.hc.Do(forwardReq)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "query rss hub websites"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "query rss hub websites"))
|
||||
}
|
||||
defer func() { _ = forwardRespIO.Body.Close() }()
|
||||
|
||||
// Parse response.
|
||||
var forwardResp map[string]RSSHubWebsite
|
||||
if err := json.NewDecoder(forwardRespIO.Body).Decode(&forwardResp); err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "parse response"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "parse response"))
|
||||
}
|
||||
|
||||
// Convert to response.
|
||||
@@ -333,7 +354,7 @@ func (a *api) QueryRSSHubWebsites(
|
||||
ctx context.Context, req *QueryRSSHubWebsitesRequest,
|
||||
) (resp *QueryRSSHubWebsitesResponse, err error) {
|
||||
if req.Category == "" {
|
||||
return nil, rpc.ErrBadRequest(errors.New("category is required"))
|
||||
return nil, ErrBadRequest(errors.New("category is required"))
|
||||
}
|
||||
|
||||
url := a.Config().RSSHubEndpoint + "/api/category/" + req.Category
|
||||
@@ -341,29 +362,29 @@ func (a *api) QueryRSSHubWebsites(
|
||||
// New request.
|
||||
forwardReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "new request"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "new request"))
|
||||
}
|
||||
|
||||
// Do request.
|
||||
forwardRespIO, err := a.hc.Do(forwardReq)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "query rss hub routes"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "query rss hub routes"))
|
||||
}
|
||||
defer func() { _ = forwardRespIO.Body.Close() }()
|
||||
|
||||
// Parse response.
|
||||
body, err := io.ReadAll(forwardRespIO.Body)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "read response"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "read response"))
|
||||
}
|
||||
if len(body) == 0 {
|
||||
// Hack for RSSHub...
|
||||
// Consider cache category ids for validate by self to remove this shit code.
|
||||
return nil, rpc.ErrBadRequest(errors.New("category id is invalid"))
|
||||
return nil, ErrBadRequest(errors.New("category id is invalid"))
|
||||
}
|
||||
var forwardResp map[string]RSSHubWebsite
|
||||
if err := json.Unmarshal(body, &forwardResp); err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "parse response"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "parse response"))
|
||||
}
|
||||
|
||||
// Convert to response.
|
||||
@@ -383,7 +404,7 @@ func (a *api) QueryRSSHubRoutes(
|
||||
req *QueryRSSHubRoutesRequest,
|
||||
) (resp *QueryRSSHubRoutesResponse, err error) {
|
||||
if req.WebsiteID == "" {
|
||||
return nil, rpc.ErrBadRequest(errors.New("website id is required"))
|
||||
return nil, ErrBadRequest(errors.New("website id is required"))
|
||||
}
|
||||
|
||||
url := a.Config().RSSHubEndpoint + "/api/namespace/" + req.WebsiteID
|
||||
@@ -391,30 +412,30 @@ func (a *api) QueryRSSHubRoutes(
|
||||
// New request.
|
||||
forwardReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "new request"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "new request"))
|
||||
}
|
||||
|
||||
// Do request.
|
||||
forwardRespIO, err := a.hc.Do(forwardReq)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "query rss hub routes"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "query rss hub routes"))
|
||||
}
|
||||
defer func() { _ = forwardRespIO.Body.Close() }()
|
||||
|
||||
// Parse response.
|
||||
body, err := io.ReadAll(forwardRespIO.Body)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "read response"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "read response"))
|
||||
}
|
||||
if len(body) == 0 {
|
||||
return nil, rpc.ErrBadRequest(errors.New("website id is invalid"))
|
||||
return nil, ErrBadRequest(errors.New("website id is invalid"))
|
||||
}
|
||||
|
||||
var forwardResp struct {
|
||||
Routes map[string]RSSHubRoute `json:"routes"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &forwardResp); err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "parse response"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "parse response"))
|
||||
}
|
||||
|
||||
// Convert to response.
|
||||
@@ -435,7 +456,7 @@ func (a *api) Write(ctx context.Context, req *WriteRequest) (resp *WriteResponse
|
||||
feed.Labels.Put(model.LabelType, "api", false)
|
||||
}
|
||||
if err := a.Dependencies().FeedStorage.Append(ctx, req.Feeds...); err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "append"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "append"))
|
||||
}
|
||||
|
||||
return &WriteResponse{}, nil
|
||||
@@ -447,7 +468,7 @@ func (a *api) Query(ctx context.Context, req *QueryRequest) (resp *QueryResponse
|
||||
|
||||
// Validate request.
|
||||
if err := req.Validate(); err != nil {
|
||||
return nil, rpc.ErrBadRequest(errors.Wrap(err, "validate"))
|
||||
return nil, ErrBadRequest(errors.Wrap(err, "validate"))
|
||||
}
|
||||
|
||||
// Forward to storage.
|
||||
@@ -460,7 +481,7 @@ func (a *api) Query(ctx context.Context, req *QueryRequest) (resp *QueryResponse
|
||||
End: req.End,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "query"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "query"))
|
||||
}
|
||||
if len(feeds) == 0 {
|
||||
return &QueryResponse{Feeds: []*block.FeedVO{}}, nil
|
||||
|
||||
@@ -26,9 +26,8 @@ import (
|
||||
"github.com/glidea/zenfeed/pkg/config"
|
||||
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/metric"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
"github.com/glidea/zenfeed/pkg/util/rpc"
|
||||
"github.com/glidea/zenfeed/pkg/util/jsonrpc"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
@@ -89,18 +88,14 @@ func new(instance string, app *config.App, dependencies Dependencies) (Server, e
|
||||
|
||||
router := http.NewServeMux()
|
||||
api := dependencies.API
|
||||
router.Handle("/metrics", metric.Handler())
|
||||
router.Handle("/health", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(200)
|
||||
}))
|
||||
router.Handle("/write", rpc.API(api.Write))
|
||||
router.Handle("/query_config", rpc.API(api.QueryAppConfig))
|
||||
router.Handle("/apply_config", rpc.API(api.ApplyAppConfig))
|
||||
router.Handle("/query_config_schema", rpc.API(api.QueryAppConfigSchema))
|
||||
router.Handle("/query_rsshub_categories", rpc.API(api.QueryRSSHubCategories))
|
||||
router.Handle("/query_rsshub_websites", rpc.API(api.QueryRSSHubWebsites))
|
||||
router.Handle("/query_rsshub_routes", rpc.API(api.QueryRSSHubRoutes))
|
||||
router.Handle("/query", rpc.API(api.Query))
|
||||
router.Handle("/write", jsonrpc.API(api.Write))
|
||||
router.Handle("/query_config", jsonrpc.API(api.QueryAppConfig))
|
||||
router.Handle("/apply_config", jsonrpc.API(api.ApplyAppConfig))
|
||||
router.Handle("/query_config_schema", jsonrpc.API(api.QueryAppConfigSchema))
|
||||
router.Handle("/query_rsshub_categories", jsonrpc.API(api.QueryRSSHubCategories))
|
||||
router.Handle("/query_rsshub_websites", jsonrpc.API(api.QueryRSSHubWebsites))
|
||||
router.Handle("/query_rsshub_routes", jsonrpc.API(api.QueryRSSHubRoutes))
|
||||
router.Handle("/query", jsonrpc.API(api.Query))
|
||||
httpServer := &http.Server{Addr: config.Address, Handler: router}
|
||||
|
||||
return &server{
|
||||
|
||||
231
pkg/api/rss/rss.go
Normal file
231
pkg/api/rss/rss.go
Normal file
@@ -0,0 +1,231 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/benbjohnson/clock"
|
||||
"github.com/gorilla/feeds"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/api"
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/config"
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
"github.com/glidea/zenfeed/pkg/util/buffer"
|
||||
)
|
||||
|
||||
var clk = clock.New()
|
||||
|
||||
// --- Interface code block ---
|
||||
type Server interface {
|
||||
component.Component
|
||||
config.Watcher
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Address string
|
||||
ContentHTMLTemplate string
|
||||
contentHTMLTemplate *template.Template
|
||||
}
|
||||
|
||||
func (c *Config) Validate() error {
|
||||
if c.Address == "" {
|
||||
c.Address = ":1302"
|
||||
}
|
||||
if _, _, err := net.SplitHostPort(c.Address); err != nil {
|
||||
return errors.Wrap(err, "invalid address")
|
||||
}
|
||||
|
||||
if c.ContentHTMLTemplate == "" {
|
||||
c.ContentHTMLTemplate = "{{ .summary_html_snippet }}"
|
||||
}
|
||||
t, err := template.New("").Parse(c.ContentHTMLTemplate)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "parse rss content template")
|
||||
}
|
||||
c.contentHTMLTemplate = t
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) From(app *config.App) *Config {
|
||||
c.Address = app.API.RSS.Address
|
||||
c.ContentHTMLTemplate = app.API.RSS.ContentHTMLTemplate
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
type Dependencies struct {
|
||||
API api.API
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[Server, config.App, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
if len(mockOn) > 0 {
|
||||
return component.FactoryFunc[Server, config.App, Dependencies](
|
||||
func(instance string, config *config.App, dependencies Dependencies) (Server, error) {
|
||||
m := &mockServer{}
|
||||
component.MockOptions(mockOn).Apply(&m.Mock)
|
||||
|
||||
return m, nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return component.FactoryFunc[Server, config.App, Dependencies](new)
|
||||
}
|
||||
|
||||
func new(instance string, app *config.App, dependencies Dependencies) (Server, error) {
|
||||
config := &Config{}
|
||||
config.From(app)
|
||||
if err := config.Validate(); err != nil {
|
||||
return nil, errors.Wrap(err, "validate config")
|
||||
}
|
||||
|
||||
s := &server{
|
||||
Base: component.New(&component.BaseConfig[Config, Dependencies]{
|
||||
Name: "RSSServer",
|
||||
Instance: instance,
|
||||
Config: config,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
}
|
||||
|
||||
router := http.NewServeMux()
|
||||
router.Handle("/", http.HandlerFunc(s.rss))
|
||||
|
||||
s.http = &http.Server{Addr: config.Address, Handler: router}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
type server struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
http *http.Server
|
||||
}
|
||||
|
||||
func (s *server) Run() (err error) {
|
||||
ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
serverErr := make(chan error, 1)
|
||||
go func() {
|
||||
serverErr <- s.http.ListenAndServe()
|
||||
}()
|
||||
|
||||
s.MarkReady()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Info(ctx, "shutting down")
|
||||
|
||||
return s.http.Shutdown(ctx)
|
||||
case err := <-serverErr:
|
||||
return errors.Wrap(err, "listen and serve")
|
||||
}
|
||||
}
|
||||
|
||||
func (s *server) Reload(app *config.App) error {
|
||||
newConfig := &Config{}
|
||||
newConfig.From(app)
|
||||
if err := newConfig.Validate(); err != nil {
|
||||
return errors.Wrap(err, "validate config")
|
||||
}
|
||||
if s.Config().Address != newConfig.Address {
|
||||
return errors.New("address cannot be reloaded")
|
||||
}
|
||||
|
||||
s.SetConfig(newConfig)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *server) rss(w http.ResponseWriter, r *http.Request) {
|
||||
var err error
|
||||
ctx := telemetry.StartWith(r.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "rss")...)
|
||||
defer telemetry.End(ctx, err)
|
||||
|
||||
// Extract parameters.
|
||||
ps := r.URL.Query()
|
||||
labelFilters := ps["label_filter"]
|
||||
query := ps.Get("query")
|
||||
|
||||
// Forward query request to API.
|
||||
now := clk.Now()
|
||||
queryResult, err := s.Dependencies().API.Query(ctx, &api.QueryRequest{
|
||||
Query: query,
|
||||
LabelFilters: labelFilters,
|
||||
Start: now.Add(-24 * time.Hour),
|
||||
End: now,
|
||||
Limit: 100,
|
||||
})
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest) // TODO: standardize error handling.
|
||||
return
|
||||
}
|
||||
|
||||
// Render and convert to RSS.
|
||||
rssObj := &feeds.Feed{
|
||||
Title: fmt.Sprintf("Zenfeed RSS - %s", ps.Encode()),
|
||||
Description: "Powered by Github Zenfeed - https://github.com/glidea/zenfeed. If you use Folo, please enable 'Appearance - Content - Render inline styles'",
|
||||
Items: make([]*feeds.Item, 0, len(queryResult.Feeds)),
|
||||
}
|
||||
|
||||
buf := buffer.Get()
|
||||
defer buffer.Put(buf)
|
||||
|
||||
for _, feed := range queryResult.Feeds {
|
||||
buf.Reset()
|
||||
|
||||
if err = s.Config().contentHTMLTemplate.Execute(buf, feed.Labels.Map()); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
item := &feeds.Item{
|
||||
Title: feed.Labels.Get(model.LabelTitle),
|
||||
Link: &feeds.Link{Href: feed.Labels.Get(model.LabelLink)},
|
||||
Created: feed.Time, // NOTE: scrape time, not pub time.
|
||||
Content: buf.String(),
|
||||
}
|
||||
|
||||
rssObj.Items = append(rssObj.Items, item)
|
||||
}
|
||||
|
||||
if err = rssObj.WriteRss(w); err != nil {
|
||||
log.Error(ctx, errors.Wrap(err, "write rss response"))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
type mockServer struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (m *mockServer) Reload(app *config.App) error {
|
||||
return m.Called(app).Error(0)
|
||||
}
|
||||
@@ -46,10 +46,13 @@ type Config struct {
|
||||
}
|
||||
|
||||
type App struct {
|
||||
Timezone string `yaml:"timezone,omitempty" json:"timezone,omitempty" desc:"The timezone of the app. e.g. Asia/Shanghai. Default: server's local timezone"`
|
||||
Log struct {
|
||||
Level string `yaml:"level,omitempty" json:"level,omitempty" desc:"Log level, one of debug, info, warn, error. Default: info"`
|
||||
} `yaml:"log,omitempty" json:"log,omitempty" desc:"The log config."`
|
||||
Timezone string `yaml:"timezone,omitempty" json:"timezone,omitempty" desc:"The timezone of the app. e.g. Asia/Shanghai. Default: server's local timezone"`
|
||||
Telemetry struct {
|
||||
Address string `yaml:"address,omitempty" json:"address,omitempty" desc:"The address ([host]:port) of the telemetry server. e.g. 0.0.0.0:9090. Default: :9090. It can not be changed after the app is running."`
|
||||
Log struct {
|
||||
Level string `yaml:"level,omitempty" json:"level,omitempty" desc:"Log level, one of debug, info, warn, error. Default: info"`
|
||||
} `yaml:"log,omitempty" json:"log,omitempty" desc:"The log config."`
|
||||
} `yaml:"telemetry,omitempty" json:"telemetry,omitempty" desc:"The telemetry config."`
|
||||
API struct {
|
||||
HTTP struct {
|
||||
Address string `yaml:"address,omitempty" json:"address,omitempty" desc:"The address ([host]:port) of the HTTP API. e.g. 0.0.0.0:1300. Default: :1300. It can not be changed after the app is running."`
|
||||
@@ -57,9 +60,16 @@ type App struct {
|
||||
MCP struct {
|
||||
Address string `yaml:"address,omitempty" json:"address,omitempty" desc:"The address ([host]:port) of the MCP API. e.g. 0.0.0.0:1300. Default: :1301. It can not be changed after the app is running."`
|
||||
} `yaml:"mcp,omitempty" json:"mcp,omitempty" desc:"The MCP API config."`
|
||||
RSS struct {
|
||||
Address string `yaml:"address,omitempty" json:"address,omitempty" desc:"The address ([host]:port) of the RSS API. e.g. 0.0.0.0:1300. Default: :1302. It can not be changed after the app is running."`
|
||||
ContentHTMLTemplate string `yaml:"content_html_template,omitempty" json:"content_html_template,omitempty" desc:"The template to render the RSS content for each item. Default is {{ .summary_html_snippet }}."`
|
||||
} `yaml:"rss,omitempty" json:"rss,omitempty" desc:"The RSS config."`
|
||||
LLM string `yaml:"llm,omitempty" json:"llm,omitempty" desc:"The LLM name for summarizing feeds. e.g. my-favorite-gemini-king. Default is the default LLM in llms section."`
|
||||
} `yaml:"api,omitempty" json:"api,omitempty" desc:"The API config."`
|
||||
LLMs []LLM `yaml:"llms,omitempty" json:"llms,omitempty" desc:"The LLMs config. It is required, at least one LLM is needed, refered by other config sections."`
|
||||
LLMs []LLM `yaml:"llms,omitempty" json:"llms,omitempty" desc:"The LLMs config. It is required, at least one LLM is needed, refered by other config sections."`
|
||||
Jina struct {
|
||||
Token string `yaml:"token,omitempty" json:"token,omitempty" desc:"The token of the Jina server."`
|
||||
} `yaml:"jina,omitempty" json:"jina,omitempty" desc:"The Jina config."`
|
||||
Scrape Scrape `yaml:"scrape,omitempty" json:"scrape,omitempty" desc:"The scrape config."`
|
||||
Storage Storage `yaml:"storage,omitempty" json:"storage,omitempty" desc:"The storage config."`
|
||||
Scheduls struct {
|
||||
@@ -116,6 +126,7 @@ type ScrapeSourceRSS struct {
|
||||
}
|
||||
|
||||
type RewriteRule struct {
|
||||
If []string `yaml:"if,omitempty" json:"if,omitempty" desc:"The condition config to match the feed. If not set, that means match all feeds. Like label filters, e.g. [source=github, title!=xxx]"`
|
||||
SourceLabel string `yaml:"source_label,omitempty" json:"source_label,omitempty" desc:"The feed label of the source text to transform. Default is the 'content' label. The feed is essentially a label set (similar to Prometheus metric data). The default labels are type (rss, email (in future), etc), source (the source name), title (feed title), link (feed link), pub_time (feed publish time), and content (feed content)."`
|
||||
SkipTooShortThreshold *int `yaml:"skip_too_short_threshold,omitempty" json:"skip_too_short_threshold,omitempty" desc:"The threshold of the source text length to skip. Default is 300. It helps we to filter out some short feeds."`
|
||||
Transform *RewriteRuleTransform `yaml:"transform,omitempty" json:"transform,omitempty" desc:"The transform config to transform the source text. If not set, that means transform nothing, so the source text is the transformed text."`
|
||||
@@ -130,6 +141,7 @@ type RewriteRuleTransform struct {
|
||||
}
|
||||
|
||||
type RewriteRuleTransformToText struct {
|
||||
Type string `yaml:"type,omitempty" json:"type,omitempty" desc:"The type of the transform. It can be one of prompt, crawl, crawl_by_jina. Default is prompt. For crawl, the source text will be as the url to crawl the page, and the page will be converted to markdown. crawl vs crawl_by_jina: crawl is local, more stable; crawl_by_jina is powered by https://jina.ai, more powerful."`
|
||||
LLM string `yaml:"llm,omitempty" json:"llm,omitempty" desc:"The LLM name to use. Default is the default LLM in llms section."`
|
||||
Prompt string `yaml:"prompt,omitempty" json:"prompt,omitempty" desc:"The prompt to transform the source text. The source text will be injected into the prompt above. And you can use go template syntax to refer some built-in prompts, like {{ .summary }}. Available built-in prompts: category, tags, score, comment_confucius, summary, summary_html_snippet."`
|
||||
}
|
||||
@@ -166,15 +178,14 @@ type NotifySubRoute struct {
|
||||
}
|
||||
|
||||
type NotifyReceiver struct {
|
||||
Name string `yaml:"name,omitempty" json:"name,omitempty" desc:"The name of the receiver. It is required."`
|
||||
Email string `yaml:"email,omitempty" json:"email,omitempty" desc:"The email of the receiver."`
|
||||
// TODO: to reduce copyright risk, we do not support webhook receiver now.
|
||||
// Webhook *NotifyReceiverWebhook `yaml:"webhook" json:"webhook" desc:"The webhook of the receiver."`
|
||||
Name string `yaml:"name,omitempty" json:"name,omitempty" desc:"The name of the receiver. It is required."`
|
||||
Email string `yaml:"email,omitempty" json:"email,omitempty" desc:"The email of the receiver."`
|
||||
Webhook *NotifyReceiverWebhook `yaml:"webhook" json:"webhook" desc:"The webhook of the receiver."`
|
||||
}
|
||||
|
||||
// type NotifyReceiverWebhook struct {
|
||||
// URL string `yaml:"url"`
|
||||
// }
|
||||
type NotifyReceiverWebhook struct {
|
||||
URL string `yaml:"url"`
|
||||
}
|
||||
|
||||
type NotifyChannels struct {
|
||||
Email *NotifyChannelEmail `yaml:"email,omitempty" json:"email,omitempty" desc:"The global email channel config."`
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"reflect"
|
||||
"strconv"
|
||||
@@ -33,6 +34,8 @@ import (
|
||||
"github.com/glidea/zenfeed/pkg/storage/kv"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
binaryutil "github.com/glidea/zenfeed/pkg/util/binary"
|
||||
"github.com/glidea/zenfeed/pkg/util/buffer"
|
||||
"github.com/glidea/zenfeed/pkg/util/hash"
|
||||
)
|
||||
|
||||
@@ -373,24 +376,94 @@ func newCached(llm LLM, kvStorage kv.Storage) LLM {
|
||||
|
||||
func (c *cached) String(ctx context.Context, messages []string) (string, error) {
|
||||
key := hash.Sum64s(messages)
|
||||
keyStr := strconv.FormatUint(key, 10)
|
||||
keyStr := strconv.FormatUint(key, 10) // for human readable & compatible.
|
||||
|
||||
value, err := c.kvStorage.Get(ctx, keyStr)
|
||||
valueBs, err := c.kvStorage.Get(ctx, []byte(keyStr))
|
||||
switch {
|
||||
case err == nil:
|
||||
return value, nil
|
||||
return string(valueBs), nil
|
||||
case errors.Is(err, kv.ErrNotFound):
|
||||
break
|
||||
default:
|
||||
return "", errors.Wrap(err, "get from kv storage")
|
||||
}
|
||||
|
||||
value, err = c.LLM.String(ctx, messages)
|
||||
value, err := c.LLM.String(ctx, messages)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if err = c.kvStorage.Set(ctx, keyStr, value, 65*time.Minute); err != nil {
|
||||
// TODO: reduce copies.
|
||||
if err = c.kvStorage.Set(ctx, []byte(keyStr), []byte(value), 65*time.Minute); err != nil {
|
||||
log.Error(ctx, err, "set to kv storage")
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
var (
|
||||
toBytes = func(v []float32) ([]byte, error) {
|
||||
buf := buffer.Get()
|
||||
defer buffer.Put(buf)
|
||||
|
||||
for _, fVal := range v {
|
||||
if err := binaryutil.WriteFloat32(buf, fVal); err != nil {
|
||||
return nil, errors.Wrap(err, "write float32")
|
||||
}
|
||||
}
|
||||
|
||||
// Must copy data, as the buffer will be reused.
|
||||
bs := make([]byte, buf.Len())
|
||||
copy(bs, buf.Bytes())
|
||||
|
||||
return bs, nil
|
||||
}
|
||||
|
||||
toF32s = func(bs []byte) ([]float32, error) {
|
||||
if len(bs)%4 != 0 {
|
||||
return nil, errors.New("embedding data is corrupted, length not multiple of 4")
|
||||
}
|
||||
|
||||
r := bytes.NewReader(bs)
|
||||
floats := make([]float32, len(bs)/4)
|
||||
|
||||
for i := range floats {
|
||||
f, err := binaryutil.ReadFloat32(r)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "deserialize float32")
|
||||
}
|
||||
floats[i] = f
|
||||
}
|
||||
|
||||
return floats, nil
|
||||
}
|
||||
)
|
||||
|
||||
func (c *cached) Embedding(ctx context.Context, text string) ([]float32, error) {
|
||||
key := hash.Sum64(text)
|
||||
keyStr := strconv.FormatUint(key, 10)
|
||||
|
||||
valueBs, err := c.kvStorage.Get(ctx, []byte(keyStr))
|
||||
switch {
|
||||
case err == nil:
|
||||
return toF32s(valueBs)
|
||||
case errors.Is(err, kv.ErrNotFound):
|
||||
break
|
||||
default:
|
||||
return nil, errors.Wrap(err, "get from kv storage")
|
||||
}
|
||||
|
||||
value, err := c.LLM.Embedding(ctx, text)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
valueBs, err = toBytes(value)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "serialize embedding")
|
||||
}
|
||||
|
||||
if err = c.kvStorage.Set(ctx, []byte(keyStr), valueBs, 65*time.Minute); err != nil {
|
||||
log.Error(ctx, err, "set to kv storage")
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ package llm
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
oai "github.com/sashabaranov/go-openai"
|
||||
@@ -40,7 +41,7 @@ func newOpenAI(c *Config) LLM {
|
||||
config := oai.DefaultConfig(c.APIKey)
|
||||
config.BaseURL = c.Endpoint
|
||||
client := oai.NewClientWithConfig(config)
|
||||
embeddingSpliter := newEmbeddingSpliter(2048, 64)
|
||||
embeddingSpliter := newEmbeddingSpliter(1536, 64)
|
||||
|
||||
return &openai{
|
||||
Base: component.New(&component.BaseConfig[Config, struct{}]{
|
||||
@@ -61,9 +62,9 @@ func (o *openai) String(ctx context.Context, messages []string) (value string, e
|
||||
if config.Model == "" {
|
||||
return "", errors.New("model is not set")
|
||||
}
|
||||
msg := make([]oai.ChatCompletionMessage, 0, len(messages))
|
||||
msgs := make([]oai.ChatCompletionMessage, 0, len(messages))
|
||||
for _, m := range messages {
|
||||
msg = append(msg, oai.ChatCompletionMessage{
|
||||
msgs = append(msgs, oai.ChatCompletionMessage{
|
||||
Role: oai.ChatMessageRoleUser,
|
||||
Content: m,
|
||||
})
|
||||
@@ -71,7 +72,7 @@ func (o *openai) String(ctx context.Context, messages []string) (value string, e
|
||||
|
||||
req := oai.ChatCompletionRequest{
|
||||
Model: config.Model,
|
||||
Messages: msg,
|
||||
Messages: msgs,
|
||||
Temperature: config.Temperature,
|
||||
}
|
||||
|
||||
@@ -131,6 +132,7 @@ func (o *openai) Embedding(ctx context.Context, s string) (value []float32, err
|
||||
EncodingFormat: oai.EmbeddingEncodingFormatFloat,
|
||||
})
|
||||
if err != nil {
|
||||
fmt.Println(s)
|
||||
return nil, errors.Wrap(err, "create embeddings")
|
||||
}
|
||||
if len(vec.Data) == 0 {
|
||||
@@ -141,6 +143,6 @@ func (o *openai) Embedding(ctx context.Context, s string) (value []float32, err
|
||||
promptTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.PromptTokens))
|
||||
completionTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.CompletionTokens))
|
||||
totalTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.TotalTokens))
|
||||
|
||||
|
||||
return vec.Data[0].Embedding, nil
|
||||
}
|
||||
|
||||
156
pkg/llm/prompt/prompt.go
Normal file
156
pkg/llm/prompt/prompt.go
Normal file
@@ -0,0 +1,156 @@
|
||||
package prompt
|
||||
|
||||
var Builtin = map[string]string{
|
||||
"category": `
|
||||
Analyze the content and categorize it into exactly one of these categories:
|
||||
Technology, Development, Entertainment, Finance, Health, Politics, Other
|
||||
|
||||
Classification requirements:
|
||||
- Choose the SINGLE most appropriate category based on:
|
||||
* Primary topic and main focus of the content
|
||||
* Key terminology and concepts used
|
||||
* Target audience and purpose
|
||||
* Technical depth and complexity level
|
||||
- For content that could fit multiple categories:
|
||||
* Identify the dominant theme
|
||||
* Consider the most specific applicable category
|
||||
* Use the primary intended purpose
|
||||
- If content appears ambiguous:
|
||||
* Focus on the most prominent aspects
|
||||
* Consider the practical application
|
||||
* Choose the category that best serves user needs
|
||||
|
||||
Output format:
|
||||
Return ONLY the category name, no other text or explanation.
|
||||
Must be one of the provided categories exactly as written.
|
||||
`,
|
||||
|
||||
"tags": `
|
||||
Analyze the content and add appropriate tags based on:
|
||||
- Main topics and themes
|
||||
- Key concepts and terminology
|
||||
- Target audience and purpose
|
||||
- Technical depth and domain
|
||||
- 2-4 tags are enough
|
||||
Output format:
|
||||
Return a list of tags, separated by commas, no other text or explanation.
|
||||
e.g. "AI, Technology, Innovation, Future"
|
||||
`,
|
||||
|
||||
"score": `
|
||||
Please give a score between 0 and 10 based on the following content.
|
||||
Evaluate the content comprehensively considering clarity, accuracy, depth, logical structure, language expression, and completeness.
|
||||
Note: If the content is an article or a text intended to be detailed, the length is an important factor. Generally, content under 300 words may receive a lower score due to lack of substance, unless its type (such as poetry or summary) is inherently suitable for brevity.
|
||||
Output format:
|
||||
Return the score (0-10), no other text or explanation.
|
||||
E.g. "8", "5", "3", etc.
|
||||
`,
|
||||
|
||||
"comment_confucius": `
|
||||
Please act as Confucius and write a 100-word comment on the article.
|
||||
Content needs to be in line with the Chinese mainland's regulations.
|
||||
Output format:
|
||||
Return the comment only, no other text or explanation.
|
||||
Reply short and concise, 100 words is enough.
|
||||
`,
|
||||
|
||||
"summary": `
|
||||
Please read the article carefully and summarize its core content in the format of [Choice: Key Point List / Concise Paragraph]. The summary should clearly cover:
|
||||
|
||||
1. What is the main topic/theme of the article?
|
||||
2. What key arguments/main information did the author put forward?
|
||||
3. (Optional, if the article contains) What important data, cases, or examples are there?
|
||||
4. What main conclusions did the article reach or what core information did it ultimately convey?
|
||||
|
||||
Strive for comprehensive, accurate, and concise.
|
||||
`,
|
||||
|
||||
"summary_html_snippet": `
|
||||
You are to act as a professional Content Designer. Your task is to convert the provided article into **visually modern HTML email snippets** that render well in modern email clients like Gmail and QQ Mail.
|
||||
|
||||
**Core Requirements:**
|
||||
|
||||
* **Highlighting and Layout Techniques (Based on the article content, you must actually use the HTML structure templates provided below to generate the content):**
|
||||
|
||||
A. **Stylish Quote Block** (for highlighting important points or direct quotes from the original text):
|
||||
<div style="margin:20px 0; padding:20px; background:linear-gradient(to right, #f8f9fa, #ffffff); border-left:5px solid #4285f4; border-radius:5px; box-shadow:0 2px 8px rgba(0,0,0,0.05);">
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; line-height:1.6; color:#333; font-weight:500;">
|
||||
Insert the key point or finding to be highlighted here.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
B. **Information Card** (for highlighting key data/metrics):
|
||||
<div style="display:inline-block; margin:10px 10px 10px 0; padding:15px 20px; background-color:#ffffff; border-radius:8px; box-shadow:0 3px 10px rgba(0,0,0,0.08); min-width:120px; text-align:center;">
|
||||
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#666;">Metric Name</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:24px; font-weight:600; color:#1a73e8;">75%</p>
|
||||
</div>
|
||||
|
||||
C. **Key Points List** (for organizing multiple core points):
|
||||
<ul style="margin:20px 0; padding-left:0; list-style-type:none;">
|
||||
<li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
|
||||
<span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">1</span>
|
||||
Description of the first key point
|
||||
</li>
|
||||
<li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
|
||||
<span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">2</span>
|
||||
Description of the second key point
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
D. **Emphasized Text** (for highlighting keywords or phrases):
|
||||
<span style="background:linear-gradient(180deg, rgba(255,255,255,0) 50%, rgba(66,133,244,0.2) 50%); padding:0 2px;">Text to be emphasized</span>
|
||||
|
||||
E. **Comparison Table** (suitable for comparing different solutions or viewpoints):
|
||||
<div style="margin:25px 0; padding:15px; background-color:#f8f9fa; border-radius:8px; overflow-x:auto;">
|
||||
<table style="width:100%; border-collapse:collapse; font-family:'Google Sans',Roboto,Arial,sans-serif;">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Feature</th>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option A</th>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option B</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Cost</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Higher</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Moderate</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Efficiency</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Very High</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Average</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
* **Output Requirements:**
|
||||
* The design should be **aesthetically pleasing and elegant, with harmonious color schemes**, ensuring sufficient **whitespace and contrast**.
|
||||
* All article snippets must maintain a **consistent visual style**.
|
||||
* You **must use multiple visual elements** and avoid mere text listings. **Use at least 2-3 different visual elements** to enhance readability and intuitive understanding.
|
||||
* **Appropriately quote important original text snippets** to support explanations.
|
||||
* **Strive to use highlighting styles to mark key points**.
|
||||
* **Where appropriate, embed original images from the article to aid explanation.** Pay attention to the referrer policy: use referrerpolicy="no-referrer" on the <img> HTML element to ensure images display correctly.
|
||||
* **Ensure overall reading flow is smooth and natural!!!** Guide the reader's thought process appropriately, minimizing abrupt jumps in logic.
|
||||
* **Output only the HTML code snippet.** Do not include the full HTML document structure (i.e., no <html>, <head>, or <body> tags).
|
||||
* **Do not add any explanatory text, extra comments, Markdown formatting, or HTML backticks.** Output the raw HTML code directly.
|
||||
* **Do not add article titles or sources;** these will be automatically injected by the user later.
|
||||
* **Do not use any opening remarks or pleasantries** (e.g., "Hi," "Let's talk about..."). Directly present the processed HTML content.
|
||||
* **Do not refer to "this article," "this piece," "the current text," etc.** The user is aware of this context.
|
||||
* **Only use inline styles, do not use global styles.** Remember to only generate HTML snippets.
|
||||
* Do not explain anything, just output the HTML code snippet.
|
||||
* Use above HTML components & its styles to generate the HTML code snippet, do not customize by yourself, else you will be fired.
|
||||
|
||||
* **Your Personality and Expression Preferences:**
|
||||
* Focus on the most valuable information, not on every detail. The content should be readable within 3 minutes.
|
||||
* Communicate **concisely and get straight to the point.
|
||||
* ** Have a strong aversion to jargon, bureaucratic language, redundant embellishments, and grand narratives. Believe that plain, simple language can best convey truth.
|
||||
* Be fluent, plain, concise, and not verbose.
|
||||
* Be **plain, direct, clear, and easy to understand:** Use basic vocabulary and simple sentence structures. Avoid "sophisticated" complex sentences or unnecessary embellishments that increase reading burden.
|
||||
* Enable readers to quickly grasp: "What is this? What is it generally about? What is its relevance/real-world significance to me (an ordinary person)?" Focus on providing an **overview**, not an accumulation of details.
|
||||
* Be well-versed in cognitive science; understand how to phrase information so that someone without prior background can quickly understand the core content.
|
||||
* **Extract key information and core insights,** rather than directly copying the original text. Do not omit crucial information and viewpoints. For example, for forum posts, the main points from comments are also very important!
|
||||
* Avoid large blocks of text, strive for a combination of pictures and text.
|
||||
`,
|
||||
}
|
||||
@@ -30,6 +30,7 @@ import (
|
||||
|
||||
const (
|
||||
AppName = "zenfeed"
|
||||
Module = "github.com/glidea/zenfeed"
|
||||
)
|
||||
|
||||
// LabelXXX is the metadata label for the feed.
|
||||
@@ -233,6 +234,76 @@ type Label struct {
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
const (
|
||||
LabelFilterEqual = "="
|
||||
LabelFilterNotEqual = "!="
|
||||
)
|
||||
|
||||
type LabelFilter struct {
|
||||
Label string
|
||||
Equal bool
|
||||
Value string
|
||||
}
|
||||
|
||||
func NewLabelFilter(filter string) (LabelFilter, error) {
|
||||
eq := false
|
||||
parts := strings.Split(filter, LabelFilterNotEqual)
|
||||
if len(parts) != 2 {
|
||||
parts = strings.Split(filter, LabelFilterEqual)
|
||||
eq = true
|
||||
}
|
||||
if len(parts) != 2 {
|
||||
return LabelFilter{}, errors.New("invalid label filter")
|
||||
}
|
||||
|
||||
return LabelFilter{Label: parts[0], Value: parts[1], Equal: eq}, nil
|
||||
}
|
||||
|
||||
func (f LabelFilter) Match(labels Labels) bool {
|
||||
lv := labels.Get(f.Label)
|
||||
if lv == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
if f.Equal && lv == f.Value {
|
||||
return true
|
||||
}
|
||||
if !f.Equal && lv != f.Value {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type LabelFilters []LabelFilter
|
||||
|
||||
func (ls LabelFilters) Match(labels Labels) bool {
|
||||
if len(ls) == 0 {
|
||||
return true // No filters, always match.
|
||||
}
|
||||
|
||||
for _, l := range ls {
|
||||
if !l.Match(labels) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func NewLabelFilters(filters []string) (LabelFilters, error) {
|
||||
ls := make(LabelFilters, len(filters))
|
||||
for i, f := range filters {
|
||||
lf, err := NewLabelFilter(f)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "new label filter %q", f)
|
||||
}
|
||||
ls[i] = lf
|
||||
}
|
||||
|
||||
return ls, nil
|
||||
}
|
||||
|
||||
// readExpectedDelim reads the next token and checks if it's the expected delimiter.
|
||||
func readExpectedDelim(dec *json.Decoder, expected json.Delim) error {
|
||||
t, err := dec.Token()
|
||||
|
||||
@@ -124,10 +124,9 @@ func (c *aggrChannel) Send(ctx context.Context, receiver Receiver, group *route.
|
||||
if receiver.Email != "" && c.email != nil {
|
||||
return c.send(ctx, receiver, group, c.email, "email")
|
||||
}
|
||||
// if receiver.Webhook != nil && c.webhook != nil {
|
||||
// TODO: temporarily disable webhook to reduce copyright risks.
|
||||
// return c.send(ctx, receiver, group, c.webhook, "webhook")
|
||||
// }
|
||||
if receiver.Webhook != nil && c.webhook != nil {
|
||||
return c.send(ctx, receiver, group, c.webhook, "webhook")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -134,53 +134,53 @@ func (e *email) buildEmail(receiver Receiver, group *route.FeedGroup) (*gomail.M
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "build email body HTML")
|
||||
}
|
||||
m.SetBody("text/html", string(body))
|
||||
m.SetBody("text/html", body)
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (e *email) buildBodyHTML(group *route.FeedGroup) ([]byte, error) {
|
||||
func (e *email) buildBodyHTML(group *route.FeedGroup) (string, error) {
|
||||
bodyBuf := buffer.Get()
|
||||
defer buffer.Put(bodyBuf)
|
||||
|
||||
// Write HTML header.
|
||||
if err := e.writeHTMLHeader(bodyBuf); err != nil {
|
||||
return nil, errors.Wrap(err, "write HTML header")
|
||||
return "", errors.Wrap(err, "write HTML header")
|
||||
}
|
||||
|
||||
// Write summary.
|
||||
if err := e.writeSummary(bodyBuf, group.Summary); err != nil {
|
||||
return nil, errors.Wrap(err, "write summary")
|
||||
return "", errors.Wrap(err, "write summary")
|
||||
}
|
||||
|
||||
// Write each feed content.
|
||||
if _, err := bodyBuf.WriteString(`
|
||||
<div style="margin-top:20px; padding-top:15px; border-top:1px solid #f1f3f4;">
|
||||
<p style="font-size:32px; font-weight:500; margin:0 0 10px 0;">Feeds</p>`); err != nil {
|
||||
return nil, errors.Wrap(err, "write feeds header")
|
||||
return "", errors.Wrap(err, "write feeds header")
|
||||
}
|
||||
for i, feed := range group.Feeds {
|
||||
if err := e.writeFeedContent(bodyBuf, feed); err != nil {
|
||||
return nil, errors.Wrap(err, "write feed content")
|
||||
return "", errors.Wrap(err, "write feed content")
|
||||
}
|
||||
|
||||
// Add separator (except the last feed).
|
||||
if i < len(group.Feeds)-1 {
|
||||
if err := e.writeSeparator(bodyBuf); err != nil {
|
||||
return nil, errors.Wrap(err, "write separator")
|
||||
return "", errors.Wrap(err, "write separator")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write disclaimer and HTML footer.
|
||||
if err := e.writeDisclaimer(bodyBuf); err != nil {
|
||||
return nil, errors.Wrap(err, "write disclaimer")
|
||||
return "", errors.Wrap(err, "write disclaimer")
|
||||
}
|
||||
if err := e.writeHTMLFooter(bodyBuf); err != nil {
|
||||
return nil, errors.Wrap(err, "write HTML footer")
|
||||
return "", errors.Wrap(err, "write HTML footer")
|
||||
}
|
||||
|
||||
return bodyBuf.Bytes(), nil
|
||||
return bodyBuf.String(), nil
|
||||
}
|
||||
|
||||
func (e *email) writeHTMLHeader(buf *buffer.Bytes) error {
|
||||
|
||||
@@ -41,9 +41,10 @@ func (r *WebhookReceiver) Validate() error {
|
||||
}
|
||||
|
||||
type webhookBody struct {
|
||||
Group string `json:"group"`
|
||||
Labels model.Labels `json:"labels"`
|
||||
Feeds []*route.Feed `json:"feeds"`
|
||||
Group string `json:"group"`
|
||||
Labels model.Labels `json:"labels"`
|
||||
Summary string `json:"summary"`
|
||||
Feeds []*route.Feed `json:"feeds"`
|
||||
}
|
||||
|
||||
func newWebhook() sender {
|
||||
@@ -59,9 +60,10 @@ type webhook struct {
|
||||
func (w *webhook) Send(ctx context.Context, receiver Receiver, group *route.FeedGroup) error {
|
||||
// Prepare request.
|
||||
body := &webhookBody{
|
||||
Group: group.Name,
|
||||
Labels: group.Labels,
|
||||
Feeds: group.Feeds,
|
||||
Group: group.Name,
|
||||
Labels: group.Labels,
|
||||
Summary: group.Summary,
|
||||
Feeds: group.Feeds,
|
||||
}
|
||||
b := runtimeutil.Must1(json.Marshal(body))
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, receiver.Webhook.URL, bytes.NewReader(b))
|
||||
|
||||
@@ -86,9 +86,9 @@ func (c *Config) From(app *config.App) *Config {
|
||||
if app.Notify.Receivers[i].Email != "" {
|
||||
c.Receivers[i].Email = app.Notify.Receivers[i].Email
|
||||
}
|
||||
// if app.Notify.Receivers[i].Webhook != nil {
|
||||
// c.Receivers[i].Webhook = &channel.WebhookReceiver{URL: app.Notify.Receivers[i].Webhook.URL}
|
||||
// }
|
||||
if app.Notify.Receivers[i].Webhook != nil {
|
||||
c.Receivers[i].Webhook = &channel.WebhookReceiver{URL: app.Notify.Receivers[i].Webhook.URL}
|
||||
}
|
||||
}
|
||||
|
||||
c.Channels = channel.Config{}
|
||||
@@ -438,8 +438,8 @@ func (n *notifier) send(ctx context.Context, work sendWork) error {
|
||||
return channel.Send(ctx, work.receiver.Receiver, work.group)
|
||||
}
|
||||
|
||||
var nlogKey = func(group *route.FeedGroup, receiver Receiver) string {
|
||||
return fmt.Sprintf("notifier.group.%s.receiver.%s.%d", group.Name, receiver.Name, group.Time.Unix())
|
||||
var nlogKey = func(group *route.FeedGroup, receiver Receiver) []byte {
|
||||
return fmt.Appendf(nil, "notifier.group.%s.receiver.%s.%d", group.Name, receiver.Name, group.Time.Unix())
|
||||
}
|
||||
|
||||
func (n *notifier) isSent(ctx context.Context, group *route.FeedGroup, receiver Receiver) bool {
|
||||
@@ -457,7 +457,7 @@ func (n *notifier) isSent(ctx context.Context, group *route.FeedGroup, receiver
|
||||
}
|
||||
|
||||
func (n *notifier) markSent(ctx context.Context, group *route.FeedGroup, receiver Receiver) error {
|
||||
return n.Dependencies().KVStorage.Set(ctx, nlogKey(group, receiver), timeutil.Format(time.Now()), timeutil.Day)
|
||||
return n.Dependencies().KVStorage.Set(ctx, nlogKey(group, receiver), []byte(timeutil.Format(time.Now())), timeutil.Day)
|
||||
}
|
||||
|
||||
type sendWork struct {
|
||||
|
||||
@@ -72,56 +72,25 @@ func (s SubRoutes) Match(feed *block.FeedVO) *SubRoute {
|
||||
type SubRoute struct {
|
||||
Route
|
||||
Matchers []string
|
||||
matchers []matcher
|
||||
matchers model.LabelFilters
|
||||
}
|
||||
|
||||
func (r *SubRoute) Match(feed *block.FeedVO) *SubRoute {
|
||||
// Match sub routes.
|
||||
for _, subRoute := range r.SubRoutes {
|
||||
if matched := subRoute.Match(feed); matched != nil {
|
||||
return matched
|
||||
}
|
||||
}
|
||||
for _, m := range r.matchers {
|
||||
fv := feed.Labels.Get(m.key)
|
||||
switch m.equal {
|
||||
case true:
|
||||
if fv != m.value {
|
||||
return nil
|
||||
}
|
||||
default:
|
||||
if fv == m.value {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Match self.
|
||||
if !r.matchers.Match(feed.Labels) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
type matcher struct {
|
||||
key string
|
||||
value string
|
||||
equal bool
|
||||
}
|
||||
|
||||
var (
|
||||
matcherEqual = "="
|
||||
matcherNotEqual = "!="
|
||||
parseMatcher = func(filter string) (matcher, error) {
|
||||
eq := false
|
||||
parts := strings.Split(filter, matcherNotEqual)
|
||||
if len(parts) != 2 {
|
||||
parts = strings.Split(filter, matcherEqual)
|
||||
eq = true
|
||||
}
|
||||
if len(parts) != 2 {
|
||||
return matcher{}, errors.New("invalid matcher")
|
||||
}
|
||||
|
||||
return matcher{key: parts[0], value: parts[1], equal: eq}, nil
|
||||
}
|
||||
)
|
||||
|
||||
func (r *SubRoute) Validate() error {
|
||||
if len(r.GroupBy) == 0 {
|
||||
r.GroupBy = []string{model.LabelSource}
|
||||
@@ -129,17 +98,16 @@ func (r *SubRoute) Validate() error {
|
||||
if r.CompressByRelatedThreshold == nil {
|
||||
r.CompressByRelatedThreshold = ptr.To(float32(0.85))
|
||||
}
|
||||
|
||||
if len(r.Matchers) == 0 {
|
||||
return errors.New("matchers is required")
|
||||
}
|
||||
r.matchers = make([]matcher, len(r.Matchers))
|
||||
for i, matcher := range r.Matchers {
|
||||
m, err := parseMatcher(matcher)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "invalid matcher")
|
||||
}
|
||||
r.matchers[i] = m
|
||||
matchers, err := model.NewLabelFilters(r.Matchers)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "invalid matchers")
|
||||
}
|
||||
r.matchers = matchers
|
||||
|
||||
for _, subRoute := range r.SubRoutes {
|
||||
if err := subRoute.Validate(); err != nil {
|
||||
return errors.Wrap(err, "invalid sub_route")
|
||||
@@ -151,7 +119,7 @@ func (r *SubRoute) Validate() error {
|
||||
|
||||
func (c *Config) Validate() error {
|
||||
if len(c.GroupBy) == 0 {
|
||||
c.GroupBy = []string{model.LabelSource}
|
||||
c.GroupBy = []string{model.LabelType}
|
||||
}
|
||||
if c.CompressByRelatedThreshold == nil {
|
||||
c.CompressByRelatedThreshold = ptr.To(float32(0.85))
|
||||
@@ -179,8 +147,8 @@ type FeedGroup struct {
|
||||
Name string
|
||||
Time time.Time
|
||||
Labels model.Labels
|
||||
Feeds []*Feed
|
||||
Summary string
|
||||
Feeds []*Feed
|
||||
}
|
||||
|
||||
func (g *FeedGroup) ID() string {
|
||||
|
||||
@@ -19,8 +19,8 @@ import (
|
||||
"context"
|
||||
"html/template"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
"unsafe"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"k8s.io/utils/ptr"
|
||||
@@ -28,14 +28,15 @@ import (
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/config"
|
||||
"github.com/glidea/zenfeed/pkg/llm"
|
||||
"github.com/glidea/zenfeed/pkg/llm/prompt"
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
"github.com/glidea/zenfeed/pkg/util/buffer"
|
||||
"github.com/glidea/zenfeed/pkg/util/crawl"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
|
||||
type Rewriter interface {
|
||||
component.Component
|
||||
config.Watcher
|
||||
@@ -71,6 +72,11 @@ type Dependencies struct {
|
||||
}
|
||||
|
||||
type Rule struct {
|
||||
// If is the condition to check before applying the rule.
|
||||
// If not set, the rule will be applied.
|
||||
If []string
|
||||
if_ model.LabelFilters
|
||||
|
||||
// SourceLabel specifies which label's value to use as source text.
|
||||
// Default is model.LabelContent.
|
||||
SourceLabel string
|
||||
@@ -96,29 +102,51 @@ type Rule struct {
|
||||
}
|
||||
|
||||
func (r *Rule) Validate() error { //nolint:cyclop
|
||||
// If.
|
||||
if len(r.If) > 0 {
|
||||
if_, err := model.NewLabelFilters(r.If)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "invalid if %q", r.If)
|
||||
}
|
||||
r.if_ = if_
|
||||
}
|
||||
|
||||
// Source label.
|
||||
if r.SourceLabel == "" {
|
||||
r.SourceLabel = model.LabelContent
|
||||
}
|
||||
if r.SkipTooShortThreshold == nil {
|
||||
r.SkipTooShortThreshold = ptr.To(300)
|
||||
r.SkipTooShortThreshold = ptr.To(0)
|
||||
}
|
||||
|
||||
// Transform.
|
||||
if r.Transform != nil {
|
||||
if r.Transform.ToText.Prompt == "" {
|
||||
return errors.New("to text prompt is required")
|
||||
if r.Transform.ToText == nil {
|
||||
return errors.New("to_text is required when transform is set")
|
||||
}
|
||||
tmpl, err := template.New("").Parse(r.Transform.ToText.Prompt)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "parse prompt template %s", r.Transform.ToText.Prompt)
|
||||
|
||||
switch r.Transform.ToText.Type {
|
||||
case ToTextTypePrompt:
|
||||
if r.Transform.ToText.Prompt == "" {
|
||||
return errors.New("to text prompt is required for prompt type")
|
||||
}
|
||||
tmpl, err := template.New("").Parse(r.Transform.ToText.Prompt)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "parse prompt template %s", r.Transform.ToText.Prompt)
|
||||
}
|
||||
|
||||
buf := buffer.Get()
|
||||
defer buffer.Put(buf)
|
||||
if err := tmpl.Execute(buf, prompt.Builtin); err != nil {
|
||||
return errors.Wrapf(err, "execute prompt template %s", r.Transform.ToText.Prompt)
|
||||
}
|
||||
r.Transform.ToText.promptRendered = buf.String()
|
||||
|
||||
case ToTextTypeCrawl, ToTextTypeCrawlByJina:
|
||||
// No specific validation for crawl type here, as the source text itself is the URL.
|
||||
default:
|
||||
return errors.Errorf("unknown transform type: %s", r.Transform.ToText.Type)
|
||||
}
|
||||
buf := buffer.Get()
|
||||
defer buffer.Put(buf)
|
||||
if err := tmpl.Execute(buf, promptTemplates); err != nil {
|
||||
return errors.Wrapf(err, "execute prompt template %s", r.Transform.ToText.Prompt)
|
||||
}
|
||||
r.Transform.ToText.promptRendered = buf.String()
|
||||
}
|
||||
|
||||
// Match.
|
||||
@@ -148,15 +176,21 @@ func (r *Rule) Validate() error { //nolint:cyclop
|
||||
}
|
||||
|
||||
func (r *Rule) From(c *config.RewriteRule) {
|
||||
r.If = c.If
|
||||
r.SourceLabel = c.SourceLabel
|
||||
r.SkipTooShortThreshold = c.SkipTooShortThreshold
|
||||
if c.Transform != nil {
|
||||
t := &Transform{}
|
||||
if c.Transform.ToText != nil {
|
||||
t.ToText = &ToText{
|
||||
toText := &ToText{
|
||||
LLM: c.Transform.ToText.LLM,
|
||||
Prompt: c.Transform.ToText.Prompt,
|
||||
}
|
||||
toText.Type = ToTextType(c.Transform.ToText.Type)
|
||||
if toText.Type == "" {
|
||||
toText.Type = ToTextTypePrompt // Default to prompt if not specified.
|
||||
}
|
||||
t.ToText = toText
|
||||
}
|
||||
r.Transform = t
|
||||
}
|
||||
@@ -173,15 +207,27 @@ type Transform struct {
|
||||
}
|
||||
|
||||
type ToText struct {
|
||||
Type ToTextType
|
||||
|
||||
// LLM is the name of the LLM to use.
|
||||
// Only used when Type is ToTextTypePrompt.
|
||||
LLM string
|
||||
|
||||
// Prompt is the prompt for LLM completion.
|
||||
// The source text will automatically be injected into the prompt.
|
||||
// Only used when Type is ToTextTypePrompt.
|
||||
Prompt string
|
||||
promptRendered string
|
||||
}
|
||||
|
||||
type ToTextType string
|
||||
|
||||
const (
|
||||
ToTextTypePrompt ToTextType = "prompt"
|
||||
ToTextTypeCrawl ToTextType = "crawl"
|
||||
ToTextTypeCrawlByJina ToTextType = "crawl_by_jina"
|
||||
)
|
||||
|
||||
type Action string
|
||||
|
||||
const (
|
||||
@@ -189,233 +235,7 @@ const (
|
||||
ActionCreateOrUpdateLabel Action = "create_or_update_label"
|
||||
)
|
||||
|
||||
var promptTemplates = map[string]string{
|
||||
"category": `
|
||||
Analyze the content and categorize it into exactly one of these categories:
|
||||
Technology, Development, Entertainment, Finance, Health, Politics, Other
|
||||
|
||||
Classification requirements:
|
||||
- Choose the SINGLE most appropriate category based on:
|
||||
* Primary topic and main focus of the content
|
||||
* Key terminology and concepts used
|
||||
* Target audience and purpose
|
||||
* Technical depth and complexity level
|
||||
- For content that could fit multiple categories:
|
||||
* Identify the dominant theme
|
||||
* Consider the most specific applicable category
|
||||
* Use the primary intended purpose
|
||||
- If content appears ambiguous:
|
||||
* Focus on the most prominent aspects
|
||||
* Consider the practical application
|
||||
* Choose the category that best serves user needs
|
||||
|
||||
Output format:
|
||||
Return ONLY the category name, no other text or explanation.
|
||||
Must be one of the provided categories exactly as written.
|
||||
`,
|
||||
|
||||
"tags": `
|
||||
Analyze the content and add appropriate tags based on:
|
||||
- Main topics and themes
|
||||
- Key concepts and terminology
|
||||
- Target audience and purpose
|
||||
- Technical depth and domain
|
||||
- 2-4 tags are enough
|
||||
Output format:
|
||||
Return a list of tags, separated by commas, no other text or explanation.
|
||||
e.g. "AI, Technology, Innovation, Future"
|
||||
`,
|
||||
|
||||
"score": `
|
||||
Please give a score between 0 and 10 based on the following content.
|
||||
Evaluate the content comprehensively considering clarity, accuracy, depth, logical structure, language expression, and completeness.
|
||||
Note: If the content is an article or a text intended to be detailed, the length is an important factor. Generally, content under 300 words may receive a lower score due to lack of substance, unless its type (such as poetry or summary) is inherently suitable for brevity.
|
||||
Output format:
|
||||
Return the score (0-10), no other text or explanation.
|
||||
E.g. "8", "5", "3", etc.
|
||||
`,
|
||||
|
||||
"comment_confucius": `
|
||||
Please act as Confucius and write a 100-word comment on the article.
|
||||
Content needs to be in line with the Chinese mainland's regulations.
|
||||
Output format:
|
||||
Return the comment only, no other text or explanation.
|
||||
Reply short and concise, 100 words is enough.
|
||||
`,
|
||||
|
||||
"summary": `
|
||||
Summarize the article in 100-200 words.
|
||||
`,
|
||||
|
||||
"summary_html_snippet": `
|
||||
# Task: Create Visually Appealing Information Summary Emails
|
||||
|
||||
You are a professional content designer. Please convert the provided articles into **visually modern HTML email segments**, focusing on display effects in modern clients like Gmail and QQ Mail.
|
||||
|
||||
## Key Requirements:
|
||||
|
||||
1. **Output Format**:
|
||||
- Only output HTML code snippets, **no need for complete HTML document structure**
|
||||
- Only generate HTML code for a single article, so users can combine multiple pieces into a complete email
|
||||
- No explanations, additional comments, or markups
|
||||
- **No need to add titles and sources**, users will inject them automatically
|
||||
- No use html backticks, output raw html code directly
|
||||
- Output directly, no explanation, no comments, no markups
|
||||
|
||||
2. **Content Processing**:
|
||||
- **Don't directly copy the original text**, but extract key information and core insights from each article
|
||||
- **Each article summary should be 100-200 words**, don't force word count, adjust the word count based on the actual length of the article
|
||||
- Summarize points in relaxed, natural language, as if chatting with friends, while maintaining depth
|
||||
- Maintain the original language of the article (e.g., Chinese summary for Chinese articles)
|
||||
|
||||
3. **Visual Design**:
|
||||
- Design should be aesthetically pleasing with coordinated colors
|
||||
- Use sufficient whitespace and contrast
|
||||
- Maintain a consistent visual style across all articles
|
||||
- **Must use multiple visual elements** (charts, cards, quote blocks, etc.), avoid pure text presentation
|
||||
- Each article should use at least 2-3 different visual elements to make content more intuitive and readable
|
||||
|
||||
4. **Highlight Techniques**:
|
||||
|
||||
A. **Beautiful Quote Blocks** (for highlighting important viewpoints):
|
||||
<div style="margin:20px 0; padding:20px; background:linear-gradient(to right, #f8f9fa, #ffffff); border-left:5px solid #4285f4; border-radius:5px; box-shadow:0 2px 8px rgba(0,0,0,0.05);">
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; line-height:1.6; color:#333; font-weight:500;">
|
||||
Here is the key viewpoint or finding that needs to be highlighted.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
B. **Information Cards** (for highlighting key data):
|
||||
<div style="display:inline-block; margin:10px 10px 10px 0; padding:15px 20px; background-color:#ffffff; border-radius:8px; box-shadow:0 3px 10px rgba(0,0,0,0.08); min-width:120px; text-align:center;">
|
||||
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#666;">Metric Name</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:24px; font-weight:600; color:#1a73e8;">75%</p>
|
||||
</div>
|
||||
|
||||
C. **Key Points List** (for highlighting multiple points):
|
||||
<ul style="margin:20px 0; padding-left:0; list-style-type:none;">
|
||||
<li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
|
||||
<span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">1</span>
|
||||
First point description
|
||||
</li>
|
||||
<li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
|
||||
<span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">2</span>
|
||||
Second point description
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
D. **Emphasis Text** (for highlighting key words or phrases):
|
||||
<span style="background:linear-gradient(180deg, rgba(255,255,255,0) 50%, rgba(66,133,244,0.2) 50%); padding:0 2px;">Text to emphasize</span>
|
||||
|
||||
5. **Timeline Design** (suitable for event sequences or news developments):
|
||||
<div style="margin:25px 0; padding:5px 0;">
|
||||
<h3 style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:18px; color:#333; margin-bottom:15px;">Event Development Timeline</h3>
|
||||
|
||||
<div style="position:relative; margin-left:30px; padding-left:30px; border-left:2px solid #e0e0e0;">
|
||||
<!-- Time Point 1 -->
|
||||
<div style="position:relative; margin-bottom:25px;">
|
||||
<div style="position:absolute; width:16px; height:16px; background-color:#4285f4; border-radius:50%; left:-40px; top:0; border:3px solid #ffffff; box-shadow:0 2px 5px rgba(0,0,0,0.1);"></div>
|
||||
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#4285f4;">June 1, 2023</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.5; color:#333;">Event description content, concisely explaining the key points and impact of the event.</p>
|
||||
</div>
|
||||
|
||||
<!-- Time Point 2 -->
|
||||
<div style="position:relative; margin-bottom:25px;">
|
||||
<div style="position:absolute; width:16px; height:16px; background-color:#4285f4; border-radius:50%; left:-40px; top:0; border:3px solid #ffffff; box-shadow:0 2px 5px rgba(0,0,0,0.1);"></div>
|
||||
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#4285f4;">June 15, 2023</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.5; color:#333;">Event description content, concisely explaining the key points and impact of the event.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
6. **Comparison Table** (for comparing different options or viewpoints):
|
||||
<div style="margin:25px 0; padding:15px; background-color:#f8f9fa; border-radius:8px; overflow-x:auto;">
|
||||
<table style="width:100%; border-collapse:collapse; font-family:'Google Sans',Roboto,Arial,sans-serif;">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Feature</th>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option A</th>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option B</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Cost</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Higher</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Moderate</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Efficiency</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Very High</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Average</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
7. **Chart Data Processing**:
|
||||
- Bar Chart/Horizontal Bars:
|
||||
<div style="margin:20px 0; padding:15px; background-color:#f8f9fa; border-radius:8px;">
|
||||
<p style="margin:0 0 15px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#333;">Data Comparison</p>
|
||||
|
||||
<!-- Item 1 -->
|
||||
<div style="margin-bottom:12px;">
|
||||
<div style="display:flex; align-items:center; justify-content:space-between; margin-bottom:5px;">
|
||||
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#555;">Project A</span>
|
||||
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#333;">65%</span>
|
||||
</div>
|
||||
<div style="height:10px; width:100%; background-color:#e8eaed; border-radius:5px; overflow:hidden;">
|
||||
<div style="height:100%; width:65%; background:linear-gradient(to right, #4285f4, #5e97f6); border-radius:5px;"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Item 2 -->
|
||||
<div style="margin-bottom:12px;">
|
||||
<div style="display:flex; align-items:center; justify-content:space-between; margin-bottom:5px;">
|
||||
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#555;">Project B</span>
|
||||
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#333;">42%</span>
|
||||
</div>
|
||||
<div style="height:10px; width:100%; background-color:#e8eaed; border-radius:5px; overflow:hidden;">
|
||||
<div style="height:100%; width:42%; background:linear-gradient(to right, #ea4335, #f07575); border-radius:5px;"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
8. **Highlight Box** (for displaying tips or reminders):
|
||||
<div style="margin:25px 0; padding:20px; background-color:#fffde7; border-radius:8px; border-left:4px solid #fdd835; box-shadow:0 1px 5px rgba(0,0,0,0.05);">
|
||||
<div style="display:flex; align-items:flex-start;">
|
||||
<div style="flex-shrink:0; margin-right:15px; width:24px; height:24px; background-color:#fdd835; border-radius:50%; display:flex; align-items:center; justify-content:center;">
|
||||
<span style="color:#fff; font-weight:bold; font-size:16px;">!</span>
|
||||
</div>
|
||||
<div>
|
||||
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#333;">Tip</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#555;">
|
||||
Here are some additional tips or suggestions to help readers better understand or apply the article content.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
9. **Summary Box**:
|
||||
<div style="margin:25px 0; padding:20px; background-color:#f2f7fd; border-radius:8px; box-shadow:0 1px 5px rgba(66,133,244,0.1);">
|
||||
<p style="margin:0 0 10px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#1a73e8;">In Simple Terms</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#333;">
|
||||
This is a concise summary of the entire content, highlighting the most critical findings and conclusions.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
## Notes:
|
||||
1. **Only generate content for a single article**, not including title and source, and not including HTML head and tail structure
|
||||
2. Content should be **200-300 words**, don't force word count
|
||||
3. **Must use multiple visual elements** (at least 2-3 types), avoid monotonous pure text presentation
|
||||
4. Use relaxed, natural language, as if chatting with friends
|
||||
5. Create visual charts for important data, rather than just describing with text
|
||||
6. Use quote blocks to highlight important viewpoints, and lists to organize multiple points
|
||||
7. Appropriately use emojis and conversational expressions to increase friendliness
|
||||
8. Note that the article content has been provided in the previous message, please reply directly, no explanation, no comments, no markups
|
||||
`,
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
|
||||
type Factory component.Factory[Rewriter, config.App, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
@@ -445,6 +265,8 @@ func new(instance string, app *config.App, dependencies Dependencies) (Rewriter,
|
||||
Config: c,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
crawler: crawl.NewLocal(),
|
||||
jinaCrawler: crawl.NewJina(app.Jina.Token),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -452,6 +274,9 @@ func new(instance string, app *config.App, dependencies Dependencies) (Rewriter,
|
||||
|
||||
type rewriter struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
|
||||
crawler crawl.Crawler
|
||||
jinaCrawler crawl.Crawler
|
||||
}
|
||||
|
||||
func (r *rewriter) Reload(app *config.App) error {
|
||||
@@ -462,6 +287,8 @@ func (r *rewriter) Reload(app *config.App) error {
|
||||
}
|
||||
r.SetConfig(newConfig)
|
||||
|
||||
r.jinaCrawler = crawl.NewJina(app.Jina.Token)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -471,6 +298,11 @@ func (r *rewriter) Labels(ctx context.Context, labels model.Labels) (rewritten m
|
||||
|
||||
rules := *r.Config()
|
||||
for _, rule := range rules {
|
||||
// If.
|
||||
if !rule.if_.Match(labels) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get source text based on source label.
|
||||
sourceText := labels.Get(rule.SourceLabel)
|
||||
if utf8.RuneCountInString(sourceText) < *rule.SkipTooShortThreshold {
|
||||
@@ -479,7 +311,7 @@ func (r *rewriter) Labels(ctx context.Context, labels model.Labels) (rewritten m
|
||||
|
||||
// Transform text if configured.
|
||||
text := sourceText
|
||||
if rule.Transform != nil {
|
||||
if rule.Transform != nil && rule.Transform.ToText != nil {
|
||||
transformed, err := r.transformText(ctx, rule.Transform, sourceText)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "transform text")
|
||||
@@ -506,15 +338,37 @@ func (r *rewriter) Labels(ctx context.Context, labels model.Labels) (rewritten m
|
||||
return labels, nil
|
||||
}
|
||||
|
||||
// transformText transforms text using configured LLM.
|
||||
// transformText transforms text using configured LLM or by crawling a URL.
|
||||
func (r *rewriter) transformText(ctx context.Context, transform *Transform, text string) (string, error) {
|
||||
switch transform.ToText.Type {
|
||||
case ToTextTypeCrawl:
|
||||
return r.transformTextCrawl(ctx, r.crawler, text)
|
||||
case ToTextTypeCrawlByJina:
|
||||
return r.transformTextCrawl(ctx, r.jinaCrawler, text)
|
||||
|
||||
case ToTextTypePrompt:
|
||||
return r.transformTextPrompt(ctx, transform, text)
|
||||
default:
|
||||
return r.transformTextPrompt(ctx, transform, text)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *rewriter) transformTextCrawl(ctx context.Context, crawler crawl.Crawler, url string) (string, error) {
|
||||
mdBytes, err := crawler.Markdown(ctx, url)
|
||||
if err != nil {
|
||||
return "", errors.Wrapf(err, "crawl %s", url)
|
||||
}
|
||||
return string(mdBytes), nil
|
||||
}
|
||||
|
||||
// transformTextPrompt transforms text using configured LLM.
|
||||
func (r *rewriter) transformTextPrompt(ctx context.Context, transform *Transform, text string) (string, error) {
|
||||
// Get LLM instance.
|
||||
llm := r.Dependencies().LLMFactory.Get(transform.ToText.LLM)
|
||||
|
||||
// Call completion.
|
||||
result, err := llm.String(ctx, []string{
|
||||
transform.ToText.promptRendered,
|
||||
"The content to be processed is below, and the processing requirements are as above",
|
||||
text, // TODO: may place to first line to hit the model cache in different rewrite rules.
|
||||
})
|
||||
if err != nil {
|
||||
@@ -525,32 +379,11 @@ func (r *rewriter) transformText(ctx context.Context, transform *Transform, text
|
||||
}
|
||||
|
||||
func (r *rewriter) transformTextHack(text string) string {
|
||||
bytes := unsafe.Slice(unsafe.StringData(text), len(text))
|
||||
start := 0
|
||||
end := len(bytes)
|
||||
|
||||
// Remove the last line if it's empty.
|
||||
// This is a hack to avoid the model output a empty line.
|
||||
// E.g. category: tech\n
|
||||
if end > 0 && bytes[end-1] == '\n' {
|
||||
end--
|
||||
}
|
||||
|
||||
// Remove the html backticks.
|
||||
if end-start >= 7 && string(bytes[start:start+7]) == "```html" {
|
||||
start += 7
|
||||
}
|
||||
if end-start >= 3 && string(bytes[end-3:end]) == "```" {
|
||||
end -= 3
|
||||
}
|
||||
|
||||
// If no changes, return the original string.
|
||||
if start == 0 && end == len(bytes) {
|
||||
return text
|
||||
}
|
||||
|
||||
// Only copy one time.
|
||||
return string(bytes[start:end])
|
||||
// TODO: optimize this.
|
||||
text = strings.ReplaceAll(text, "```html", "")
|
||||
text = strings.ReplaceAll(text, "```markdown", "")
|
||||
text = strings.ReplaceAll(text, "```", "")
|
||||
return text
|
||||
}
|
||||
|
||||
type mockRewriter struct {
|
||||
|
||||
@@ -44,6 +44,7 @@ func TestLabels(t *testing.T) {
|
||||
SkipTooShortThreshold: ptr.To(10),
|
||||
Transform: &Transform{
|
||||
ToText: &ToText{
|
||||
Type: ToTextTypePrompt,
|
||||
LLM: "mock-llm",
|
||||
Prompt: "{{ .category }}", // Using a simple template for testing
|
||||
},
|
||||
@@ -79,6 +80,7 @@ func TestLabels(t *testing.T) {
|
||||
SkipTooShortThreshold: ptr.To(10),
|
||||
Transform: &Transform{
|
||||
ToText: &ToText{
|
||||
Type: ToTextTypePrompt,
|
||||
LLM: "mock-llm",
|
||||
Prompt: "{{ .category }}",
|
||||
},
|
||||
@@ -148,6 +150,7 @@ func TestLabels(t *testing.T) {
|
||||
SkipTooShortThreshold: ptr.To(10),
|
||||
Transform: &Transform{
|
||||
ToText: &ToText{
|
||||
Type: ToTextTypePrompt,
|
||||
LLM: "mock-llm",
|
||||
Prompt: "{{ .category }}",
|
||||
promptRendered: "Analyze the content and categorize it...",
|
||||
@@ -186,6 +189,7 @@ func TestLabels(t *testing.T) {
|
||||
SkipTooShortThreshold: ptr.To(10),
|
||||
Transform: &Transform{
|
||||
ToText: &ToText{
|
||||
Type: ToTextTypePrompt,
|
||||
LLM: "mock-llm",
|
||||
Prompt: "{{ .category }}",
|
||||
promptRendered: "Analyze the content and categorize it...",
|
||||
|
||||
@@ -55,7 +55,7 @@ func (r *periodic) Run() (err error) {
|
||||
end := time.Date(today.Year(), today.Month(), today.Day(),
|
||||
config.end.Hour(), config.end.Minute(), 0, 0, today.Location())
|
||||
|
||||
buffer := 20 * time.Minute
|
||||
buffer := 30 * time.Minute
|
||||
endPlusBuffer := end.Add(buffer)
|
||||
if now.Before(end) || now.After(endPlusBuffer) {
|
||||
return
|
||||
|
||||
@@ -18,7 +18,6 @@ package rule
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
@@ -58,11 +57,8 @@ func (c *Config) Validate() error { //nolint:cyclop,gocognit
|
||||
if c.Name == "" {
|
||||
return errors.New("name is required")
|
||||
}
|
||||
if c.Query != "" && utf8.RuneCountInString(c.Query) < 5 {
|
||||
return errors.New("query must be at least 5 characters")
|
||||
}
|
||||
if c.Threshold == 0 {
|
||||
c.Threshold = 0.6
|
||||
c.Threshold = 0.5
|
||||
}
|
||||
if c.Threshold < 0 || c.Threshold > 1 {
|
||||
return errors.New("threshold must be between 0 and 1")
|
||||
|
||||
@@ -65,7 +65,6 @@ func newRSSReader(config *ScrapeSourceRSS) (reader, error) {
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
|
||||
type rssReader struct {
|
||||
config *ScrapeSourceRSS
|
||||
client client
|
||||
|
||||
@@ -227,7 +227,7 @@ func (s *scraper) filterExists(ctx context.Context, feeds []*model.Feed) (filter
|
||||
appendToResult := func(feed *model.Feed) {
|
||||
key := keyPrefix + strconv.FormatUint(feed.ID, 10)
|
||||
value := timeutil.Format(feed.Time)
|
||||
if err := s.Dependencies().KVStorage.Set(ctx, key, value, ttl); err != nil {
|
||||
if err := s.Dependencies().KVStorage.Set(ctx, []byte(key), []byte(value), ttl); err != nil {
|
||||
log.Error(ctx, err, "set last try store time")
|
||||
}
|
||||
filtered = append(filtered, feed)
|
||||
@@ -236,7 +236,7 @@ func (s *scraper) filterExists(ctx context.Context, feeds []*model.Feed) (filter
|
||||
for _, feed := range feeds {
|
||||
key := keyPrefix + strconv.FormatUint(feed.ID, 10)
|
||||
|
||||
lastTryStored, err := s.Dependencies().KVStorage.Get(ctx, key)
|
||||
lastTryStored, err := s.Dependencies().KVStorage.Get(ctx, []byte(key))
|
||||
switch {
|
||||
default:
|
||||
log.Error(ctx, err, "get last stored time, fallback to continue writing")
|
||||
@@ -246,7 +246,7 @@ func (s *scraper) filterExists(ctx context.Context, feeds []*model.Feed) (filter
|
||||
appendToResult(feed)
|
||||
|
||||
case err == nil:
|
||||
t, err := timeutil.Parse(lastTryStored)
|
||||
t, err := timeutil.Parse(string(lastTryStored))
|
||||
if err != nil {
|
||||
log.Error(ctx, err, "parse last try stored time, fallback to continue writing")
|
||||
appendToResult(feed)
|
||||
|
||||
@@ -26,7 +26,6 @@ import (
|
||||
"runtime"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@@ -277,47 +276,20 @@ type QueryOptions struct {
|
||||
Query string
|
||||
Threshold float32
|
||||
LabelFilters []string
|
||||
labelFilters []LabelFilter
|
||||
labelFilters model.LabelFilters
|
||||
Limit int
|
||||
Start, End time.Time
|
||||
}
|
||||
|
||||
var (
|
||||
LabelFilterEqual = "="
|
||||
LabelFilterNotEqual = "!="
|
||||
|
||||
NewLabelFilter = func(key, value string, eq bool) string {
|
||||
if eq {
|
||||
return fmt.Sprintf("%s%s%s", key, LabelFilterEqual, value)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s%s%s", key, LabelFilterNotEqual, value)
|
||||
}
|
||||
|
||||
ParseLabelFilter = func(filter string) (LabelFilter, error) {
|
||||
eq := false
|
||||
parts := strings.Split(filter, LabelFilterNotEqual)
|
||||
if len(parts) != 2 {
|
||||
parts = strings.Split(filter, LabelFilterEqual)
|
||||
eq = true
|
||||
}
|
||||
if len(parts) != 2 {
|
||||
return LabelFilter{}, errors.New("invalid label filter")
|
||||
}
|
||||
|
||||
return LabelFilter{Label: parts[0], Value: parts[1], Equal: eq}, nil
|
||||
}
|
||||
)
|
||||
|
||||
func (q *QueryOptions) Validate() error { //nolint:cyclop
|
||||
if q.Threshold < 0 || q.Threshold > 1 {
|
||||
return errors.New("threshold must be between 0 and 1")
|
||||
}
|
||||
for _, labelFilter := range q.LabelFilters {
|
||||
if labelFilter == "" {
|
||||
for _, s := range q.LabelFilters {
|
||||
if s == "" {
|
||||
return errors.New("label filter is required")
|
||||
}
|
||||
filter, err := ParseLabelFilter(labelFilter)
|
||||
filter, err := model.NewLabelFilter(s)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "parse label filter")
|
||||
}
|
||||
@@ -368,13 +340,6 @@ func (q *QueryOptions) HitTimeRangeCondition(b Block) bool {
|
||||
return queryAsBase || blockAsBase
|
||||
}
|
||||
|
||||
// LabelFilter defines the matcher for an item.
|
||||
type LabelFilter struct {
|
||||
Label string
|
||||
Equal bool
|
||||
Value string
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[Block, Config, Dependencies]
|
||||
|
||||
@@ -1228,14 +1193,14 @@ func (b *block) applyFilters(ctx context.Context, query *QueryOptions) (res filt
|
||||
return b.mergeFilterResults(labelsResult, vectorsResult), nil
|
||||
}
|
||||
|
||||
func (b *block) applyLabelFilters(ctx context.Context, filters []LabelFilter) filterResult {
|
||||
func (b *block) applyLabelFilters(ctx context.Context, filters model.LabelFilters) filterResult {
|
||||
if len(filters) == 0 {
|
||||
return matchedAllFilterResult
|
||||
}
|
||||
|
||||
var allIDs map[uint64]struct{}
|
||||
for _, filter := range filters {
|
||||
ids := b.invertedIndex.Search(ctx, filter.Label, filter.Equal, filter.Value)
|
||||
ids := b.invertedIndex.Search(ctx, filter)
|
||||
if len(ids) == 0 {
|
||||
return matchedNothingFilterResult
|
||||
}
|
||||
@@ -1317,7 +1282,7 @@ func (b *block) mergeFilterResults(x, y filterResult) filterResult {
|
||||
}
|
||||
|
||||
func (b *block) fillEmbedding(ctx context.Context, feeds []*model.Feed) ([]*chunk.Feed, error) {
|
||||
embedded := make([]*chunk.Feed, len(feeds))
|
||||
embedded := make([]*chunk.Feed, 0, len(feeds))
|
||||
llm := b.Dependencies().LLMFactory.Get(b.Config().embeddingLLM)
|
||||
var wg sync.WaitGroup
|
||||
var mu sync.Mutex
|
||||
@@ -1336,16 +1301,21 @@ func (b *block) fillEmbedding(ctx context.Context, feeds []*model.Feed) ([]*chun
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
embedded[i] = &chunk.Feed{
|
||||
embedded = append(embedded, &chunk.Feed{
|
||||
Feed: feed,
|
||||
Vectors: vectors,
|
||||
}
|
||||
})
|
||||
mu.Unlock()
|
||||
}(i, feed)
|
||||
}
|
||||
wg.Wait()
|
||||
if len(errs) > 0 {
|
||||
return nil, errs[0]
|
||||
|
||||
switch len(errs) {
|
||||
case 0:
|
||||
case len(feeds):
|
||||
return nil, errs[0] // All failed.
|
||||
default:
|
||||
log.Error(ctx, errors.Wrap(errs[0], "fill embedding"), "error_count", len(errs))
|
||||
}
|
||||
|
||||
return embedded, nil
|
||||
|
||||
@@ -24,7 +24,7 @@ type Index interface {
|
||||
index.Codec
|
||||
|
||||
// Search returns item IDs matching the given label and value.
|
||||
Search(ctx context.Context, label string, eq bool, value string) (ids map[uint64]struct{})
|
||||
Search(ctx context.Context, matcher model.LabelFilter) (ids map[uint64]struct{})
|
||||
// Add adds item to the index.
|
||||
// If label or value in labels is empty, it will be ignored.
|
||||
// If value is too long, it will be ignored,
|
||||
@@ -88,17 +88,17 @@ type idx struct {
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
func (idx *idx) Search(ctx context.Context, label string, eq bool, value string) (ids map[uint64]struct{}) {
|
||||
func (idx *idx) Search(ctx context.Context, matcher model.LabelFilter) (ids map[uint64]struct{}) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "Search")...)
|
||||
defer func() { telemetry.End(ctx, nil) }()
|
||||
idx.mu.RLock()
|
||||
defer idx.mu.RUnlock()
|
||||
|
||||
if value == "" {
|
||||
return idx.searchEmptyValue(label, eq)
|
||||
if matcher.Value == "" {
|
||||
return idx.searchEmptyValue(matcher.Label, matcher.Equal)
|
||||
}
|
||||
|
||||
return idx.searchNonEmptyValue(label, eq, value)
|
||||
return idx.searchNonEmptyValue(matcher)
|
||||
}
|
||||
|
||||
func (idx *idx) Add(ctx context.Context, id uint64, labels model.Labels) {
|
||||
@@ -198,16 +198,16 @@ func (idx *idx) searchEmptyValue(label string, eq bool) map[uint64]struct{} {
|
||||
// searchNonEmptyValue handles the search logic when the target value is not empty.
|
||||
// If eq is true, it returns IDs that have the exact label-value pair.
|
||||
// If eq is false, it returns IDs that *do not* have the exact label-value pair.
|
||||
func (idx *idx) searchNonEmptyValue(label string, eq bool, value string) map[uint64]struct{} {
|
||||
func (idx *idx) searchNonEmptyValue(matcher model.LabelFilter) map[uint64]struct{} {
|
||||
// Get the map of values for the given label.
|
||||
values, labelExists := idx.m[label]
|
||||
values, labelExists := idx.m[matcher.Label]
|
||||
|
||||
// If equal (eq), find the exact match.
|
||||
if eq {
|
||||
if matcher.Equal {
|
||||
if !labelExists {
|
||||
return make(map[uint64]struct{}) // Label doesn't exist.
|
||||
}
|
||||
ids, valueExists := values[value]
|
||||
ids, valueExists := values[matcher.Value]
|
||||
if !valueExists {
|
||||
return make(map[uint64]struct{}) // Value doesn't exist for this label.
|
||||
}
|
||||
@@ -221,7 +221,7 @@ func (idx *idx) searchNonEmptyValue(label string, eq bool, value string) map[uin
|
||||
resultIDs := maps.Clone(idx.ids)
|
||||
if labelExists {
|
||||
// If the specific label-value pair exists, remove its associated IDs.
|
||||
if matchingIDs, valueExists := values[value]; valueExists {
|
||||
if matchingIDs, valueExists := values[matcher.Value]; valueExists {
|
||||
for id := range matchingIDs {
|
||||
delete(resultIDs, id)
|
||||
}
|
||||
@@ -413,8 +413,8 @@ type mockIndex struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (m *mockIndex) Search(ctx context.Context, label string, eq bool, value string) (ids map[uint64]struct{}) {
|
||||
args := m.Called(ctx, label, eq, value)
|
||||
func (m *mockIndex) Search(ctx context.Context, matcher model.LabelFilter) (ids map[uint64]struct{}) {
|
||||
args := m.Called(ctx, matcher)
|
||||
|
||||
return args.Get(0).(map[uint64]struct{})
|
||||
}
|
||||
|
||||
@@ -118,9 +118,7 @@ func TestSearch(t *testing.T) {
|
||||
setupLabels map[uint64]model.Labels
|
||||
}
|
||||
type whenDetail struct {
|
||||
searchLabel string
|
||||
eq bool
|
||||
searchValue string
|
||||
matcher model.LabelFilter
|
||||
}
|
||||
type thenExpected struct {
|
||||
want []uint64
|
||||
@@ -140,9 +138,11 @@ func TestSearch(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "category",
|
||||
searchValue: "tech",
|
||||
eq: true,
|
||||
matcher: model.LabelFilter{
|
||||
Label: "category",
|
||||
Value: "tech",
|
||||
Equal: true,
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: []uint64{1, 2},
|
||||
@@ -159,9 +159,11 @@ func TestSearch(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "invalid",
|
||||
searchValue: "value",
|
||||
eq: true,
|
||||
matcher: model.LabelFilter{
|
||||
Label: "invalid",
|
||||
Value: "value",
|
||||
Equal: true,
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: nil,
|
||||
@@ -178,9 +180,11 @@ func TestSearch(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "category",
|
||||
searchValue: "invalid",
|
||||
eq: true,
|
||||
matcher: model.LabelFilter{
|
||||
Label: "category",
|
||||
Value: "invalid",
|
||||
Equal: true,
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: nil,
|
||||
@@ -200,9 +204,11 @@ func TestSearch(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "category",
|
||||
searchValue: "tech",
|
||||
eq: false,
|
||||
matcher: model.LabelFilter{
|
||||
Label: "category",
|
||||
Value: "tech",
|
||||
Equal: false,
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: []uint64{2},
|
||||
@@ -220,9 +226,11 @@ func TestSearch(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "invalid",
|
||||
searchValue: "value",
|
||||
eq: false,
|
||||
matcher: model.LabelFilter{
|
||||
Label: "invalid",
|
||||
Value: "value",
|
||||
Equal: false,
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: []uint64{1, 2},
|
||||
@@ -240,7 +248,7 @@ func TestSearch(t *testing.T) {
|
||||
}
|
||||
|
||||
// When.
|
||||
result := idx.Search(context.Background(), tt.WhenDetail.searchLabel, tt.WhenDetail.eq, tt.WhenDetail.searchValue)
|
||||
result := idx.Search(context.Background(), tt.WhenDetail.matcher)
|
||||
|
||||
// Then.
|
||||
if tt.ThenExpected.want == nil {
|
||||
|
||||
@@ -32,8 +32,8 @@ import (
|
||||
// --- Interface code block ---
|
||||
type Storage interface {
|
||||
component.Component
|
||||
Get(ctx context.Context, key string) (string, error)
|
||||
Set(ctx context.Context, key string, value string, ttl time.Duration) error
|
||||
Get(ctx context.Context, key []byte) ([]byte, error)
|
||||
Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) error
|
||||
}
|
||||
|
||||
var ErrNotFound = errors.New("not found")
|
||||
@@ -137,7 +137,7 @@ func (k *kv) Close() error {
|
||||
|
||||
const bucket = "0"
|
||||
|
||||
func (k *kv) Get(ctx context.Context, key string) (value string, err error) {
|
||||
func (k *kv) Get(ctx context.Context, key []byte) (value []byte, err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(k.TelemetryLabels(), telemetrymodel.KeyOperation, "Get")...)
|
||||
defer func() {
|
||||
telemetry.End(ctx, func() error {
|
||||
@@ -157,22 +157,22 @@ func (k *kv) Get(ctx context.Context, key string) (value string, err error) {
|
||||
})
|
||||
switch {
|
||||
case err == nil:
|
||||
return string(b), nil
|
||||
return b, nil
|
||||
case errors.Is(err, nutsdb.ErrNotFoundKey):
|
||||
return "", ErrNotFound
|
||||
return nil, ErrNotFound
|
||||
case strings.Contains(err.Error(), "key not found"):
|
||||
return "", ErrNotFound
|
||||
return nil, ErrNotFound
|
||||
default:
|
||||
return "", err
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
func (k *kv) Set(ctx context.Context, key string, value string, ttl time.Duration) (err error) {
|
||||
func (k *kv) Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) (err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(k.TelemetryLabels(), telemetrymodel.KeyOperation, "Set")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
return k.db.Update(func(tx *nutsdb.Tx) error {
|
||||
return tx.Put(bucket, []byte(key), []byte(value), uint32(ttl.Seconds()))
|
||||
return tx.Put(bucket, key, value, uint32(ttl.Seconds()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -180,13 +180,13 @@ type mockKV struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (m *mockKV) Get(ctx context.Context, key string) (string, error) {
|
||||
func (m *mockKV) Get(ctx context.Context, key []byte) ([]byte, error) {
|
||||
args := m.Called(ctx, key)
|
||||
|
||||
return args.String(0), args.Error(1)
|
||||
return args.Get(0).([]byte), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *mockKV) Set(ctx context.Context, key string, value string, ttl time.Duration) error {
|
||||
func (m *mockKV) Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) error {
|
||||
args := m.Called(ctx, key, value, ttl)
|
||||
|
||||
return args.Error(0)
|
||||
|
||||
@@ -27,6 +27,8 @@ import (
|
||||
|
||||
"github.com/pkg/errors"
|
||||
slogdedup "github.com/veqryn/slog-dedup"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
)
|
||||
|
||||
type Level string
|
||||
@@ -187,7 +189,8 @@ func getStack(skip, depth int) string {
|
||||
}
|
||||
first = false
|
||||
|
||||
b.WriteString(frame.Function)
|
||||
fn := strings.TrimPrefix(frame.Function, model.Module) // no module prefix for zenfeed self.
|
||||
b.WriteString(fn)
|
||||
b.WriteByte(':')
|
||||
b.WriteString(strconv.Itoa(frame.Line))
|
||||
}
|
||||
|
||||
137
pkg/telemetry/server/server.go
Normal file
137
pkg/telemetry/server/server.go
Normal file
@@ -0,0 +1,137 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package http
|
||||
|
||||
import (
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/pprof"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/config"
|
||||
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/metric"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
type Server interface {
|
||||
component.Component
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Address string
|
||||
}
|
||||
|
||||
func (c *Config) Validate() error {
|
||||
if c.Address == "" {
|
||||
c.Address = ":9090"
|
||||
}
|
||||
if _, _, err := net.SplitHostPort(c.Address); err != nil {
|
||||
return errors.Wrap(err, "invalid address")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) From(app *config.App) *Config {
|
||||
c.Address = app.Telemetry.Address
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
type Dependencies struct {
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[Server, config.App, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
if len(mockOn) > 0 {
|
||||
return component.FactoryFunc[Server, config.App, Dependencies](
|
||||
func(instance string, config *config.App, dependencies Dependencies) (Server, error) {
|
||||
m := &mockServer{}
|
||||
component.MockOptions(mockOn).Apply(&m.Mock)
|
||||
|
||||
return m, nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return component.FactoryFunc[Server, config.App, Dependencies](new)
|
||||
}
|
||||
|
||||
func new(instance string, app *config.App, dependencies Dependencies) (Server, error) {
|
||||
config := &Config{}
|
||||
config.From(app)
|
||||
if err := config.Validate(); err != nil {
|
||||
return nil, errors.Wrap(err, "validate config")
|
||||
}
|
||||
|
||||
router := http.NewServeMux()
|
||||
router.Handle("/health", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(200)
|
||||
}))
|
||||
router.Handle("/metrics", metric.Handler())
|
||||
router.HandleFunc("/pprof", pprof.Index)
|
||||
router.HandleFunc("/pprof/cmdline", pprof.Cmdline)
|
||||
router.HandleFunc("/pprof/profile", pprof.Profile)
|
||||
router.HandleFunc("/pprof/symbol", pprof.Symbol)
|
||||
router.HandleFunc("/pprof/trace", pprof.Trace)
|
||||
|
||||
return &server{
|
||||
Base: component.New(&component.BaseConfig[Config, Dependencies]{
|
||||
Name: "TelemetryServer",
|
||||
Instance: instance,
|
||||
Config: config,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
http: &http.Server{Addr: config.Address, Handler: router},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
type server struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
http *http.Server
|
||||
}
|
||||
|
||||
func (s *server) Run() (err error) {
|
||||
ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
serverErr := make(chan error, 1)
|
||||
go func() {
|
||||
serverErr <- s.http.ListenAndServe()
|
||||
}()
|
||||
|
||||
s.MarkReady()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Info(ctx, "shutting down")
|
||||
|
||||
return s.http.Shutdown(ctx)
|
||||
case err := <-serverErr:
|
||||
return errors.Wrap(err, "listen and serve")
|
||||
}
|
||||
}
|
||||
|
||||
type mockServer struct {
|
||||
component.Mock
|
||||
}
|
||||
176
pkg/util/crawl/crawl.go
Normal file
176
pkg/util/crawl/crawl.go
Normal file
@@ -0,0 +1,176 @@
|
||||
package crawl
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sync"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/temoto/robotstxt"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/util/text_convert"
|
||||
)
|
||||
|
||||
type Crawler interface {
|
||||
Markdown(ctx context.Context, u string) ([]byte, error)
|
||||
}
|
||||
|
||||
type local struct {
|
||||
hc *http.Client
|
||||
|
||||
robotsDataCache sync.Map
|
||||
}
|
||||
|
||||
func NewLocal() Crawler {
|
||||
return &local{
|
||||
hc: &http.Client{},
|
||||
}
|
||||
}
|
||||
|
||||
func (c *local) Markdown(ctx context.Context, u string) ([]byte, error) {
|
||||
// Check if the page is allowed.
|
||||
if err := c.checkAllowed(ctx, u); err != nil {
|
||||
return nil, errors.Wrapf(err, "check robots.txt for %s", u)
|
||||
}
|
||||
|
||||
// Prepare the request.
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "create request for %s", u)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// Send the request.
|
||||
resp, err := c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "fetch %s", u)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Parse the response.
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, errors.Errorf("received non-200 status code %d from %s", resp.StatusCode, u)
|
||||
}
|
||||
bodyBytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "read body from %s", u)
|
||||
}
|
||||
|
||||
// Convert the body to markdown.
|
||||
mdBytes, err := textconvert.HTMLToMarkdown(bodyBytes)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "convert html to markdown")
|
||||
}
|
||||
|
||||
return mdBytes, nil
|
||||
}
|
||||
|
||||
const userAgent = "ZenFeed"
|
||||
|
||||
func (c *local) checkAllowed(ctx context.Context, u string) error {
|
||||
parsedURL, err := url.Parse(u)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "parse url %s", u)
|
||||
}
|
||||
|
||||
d, err := c.getRobotsData(ctx, parsedURL.Host)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "check robots.txt for %s", parsedURL.Host)
|
||||
}
|
||||
if !d.TestAgent(parsedURL.Path, userAgent) {
|
||||
return errors.Errorf("disallowed by robots.txt for %s", u)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// getRobotsData fetches and parses robots.txt for a given host.
|
||||
func (c *local) getRobotsData(ctx context.Context, host string) (*robotstxt.RobotsData, error) {
|
||||
// Check the cache.
|
||||
if data, found := c.robotsDataCache.Load(host); found {
|
||||
return data.(*robotstxt.RobotsData), nil
|
||||
}
|
||||
|
||||
// Prepare the request.
|
||||
robotsURL := fmt.Sprintf("https://%s/robots.txt", host)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, robotsURL, nil)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "create request for %s", robotsURL)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// Send the request.
|
||||
resp, err := c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "fetch %s", robotsURL)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Parse the response.
|
||||
switch resp.StatusCode {
|
||||
case http.StatusOK:
|
||||
data, err := robotstxt.FromResponse(resp)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "parse robots.txt from %s", robotsURL)
|
||||
}
|
||||
c.robotsDataCache.Store(host, data)
|
||||
return data, nil
|
||||
case http.StatusNotFound:
|
||||
data := &robotstxt.RobotsData{}
|
||||
c.robotsDataCache.Store(host, data)
|
||||
return data, nil
|
||||
case http.StatusUnauthorized, http.StatusForbidden:
|
||||
return nil, errors.Errorf("access to %s denied (status %d)", robotsURL, resp.StatusCode)
|
||||
default:
|
||||
return nil, errors.Errorf("unexpected status %d fetching %s", resp.StatusCode, robotsURL)
|
||||
}
|
||||
}
|
||||
|
||||
type jina struct {
|
||||
hc *http.Client
|
||||
token string
|
||||
}
|
||||
|
||||
func NewJina(token string) Crawler {
|
||||
return &jina{
|
||||
hc: &http.Client{},
|
||||
|
||||
// If token is empty, will not affect to use, but rate limit will be lower.
|
||||
// See https://jina.ai/api-dashboard/rate-limit.
|
||||
token: token,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *jina) Markdown(ctx context.Context, u string) ([]byte, error) {
|
||||
proxyURL := fmt.Sprintf("https://r.jina.ai/%s", u)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, proxyURL, nil)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "create request for %s", u)
|
||||
}
|
||||
|
||||
req.Header.Set("X-Engine", "browser")
|
||||
req.Header.Set("X-Robots-Txt", userAgent)
|
||||
if c.token != "" {
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.token))
|
||||
}
|
||||
|
||||
resp, err := c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "fetch %s", proxyURL)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, errors.Errorf("received non-200 status code %d from %s", resp.StatusCode, proxyURL)
|
||||
}
|
||||
|
||||
mdBytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "read body from %s", proxyURL)
|
||||
}
|
||||
|
||||
return mdBytes, nil
|
||||
}
|
||||
@@ -13,39 +13,19 @@
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package rpc
|
||||
package jsonrpc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/api"
|
||||
)
|
||||
|
||||
type Handler[Request any, Response any] func(ctx context.Context, req *Request) (*Response, error)
|
||||
|
||||
var (
|
||||
ErrBadRequest = func(err error) Error { return newError(http.StatusBadRequest, err) }
|
||||
ErrNotFound = func(err error) Error { return newError(http.StatusNotFound, err) }
|
||||
ErrInternal = func(err error) Error { return newError(http.StatusInternalServerError, err) }
|
||||
)
|
||||
|
||||
type Error struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
func (e Error) Error() string {
|
||||
return e.Message
|
||||
}
|
||||
|
||||
func newError(code int, err error) Error {
|
||||
return Error{
|
||||
Code: code,
|
||||
Message: err.Error(),
|
||||
}
|
||||
}
|
||||
|
||||
func API[Request any, Response any](handler Handler[Request, Response]) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
allowCORS(w)
|
||||
@@ -65,11 +45,11 @@ func API[Request any, Response any](handler Handler[Request, Response]) http.Han
|
||||
|
||||
resp, err := handler(r.Context(), &req)
|
||||
if err != nil {
|
||||
var rpcErr Error
|
||||
if errors.As(err, &rpcErr) {
|
||||
var apiErr api.Error
|
||||
if errors.As(err, &apiErr) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(rpcErr.Code)
|
||||
_ = json.NewEncoder(w).Encode(rpcErr)
|
||||
w.WriteHeader(apiErr.Code)
|
||||
_ = json.NewEncoder(w).Encode(apiErr)
|
||||
|
||||
return
|
||||
}
|
||||
@@ -13,7 +13,7 @@
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package rpc
|
||||
package jsonrpc
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/api"
|
||||
"github.com/glidea/zenfeed/pkg/test"
|
||||
)
|
||||
|
||||
@@ -58,15 +59,15 @@ func TestAPI(t *testing.T) {
|
||||
}
|
||||
|
||||
badRequestHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
|
||||
return nil, ErrBadRequest(errors.New("invalid request"))
|
||||
return nil, api.ErrBadRequest(errors.New("invalid request"))
|
||||
}
|
||||
|
||||
notFoundHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
|
||||
return nil, ErrNotFound(errors.New("resource not found"))
|
||||
return nil, api.ErrNotFound(errors.New("resource not found"))
|
||||
}
|
||||
|
||||
internalErrorHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
|
||||
return nil, ErrInternal(errors.New("server error"))
|
||||
return nil, api.ErrInternal(errors.New("server error"))
|
||||
}
|
||||
|
||||
genericErrorHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
|
||||
Reference in New Issue
Block a user