diff --git a/docs/config-zh.md b/docs/config-zh.md index 4b7387d..30277e2 100644 --- a/docs/config-zh.md +++ b/docs/config-zh.md @@ -59,6 +59,7 @@ | `scrape.past` | `time.Duration` | 抓取 Feed 的回溯时间窗口。例如 `1h` 表示只抓取过去 1 小时的 Feed。 | `24h` | 否 | | `scrape.interval` | `time.Duration` | 抓取每个源的频率 (全局默认值)。例如 `1h`。 | `1h` | 否 | | `scrape.rsshub_endpoint` | `string` | RSSHub 的端点。你可以部署自己的 RSSHub 服务器或使用公共实例 (参见 [RSSHub 文档](https://docs.rsshub.app/guide/instances))。例如 `https://rsshub.app`。 | | 是 (如果使用了 `rsshub_route_path`) | +| `scrape.rsshub_access_key` | `string` | RSSHub 的访问密钥。用于访问控制。(详情见 [RSSHub文档访问控制](https://docs.rsshub.app/deploy/config#access-control-configurations)) | | 否 | | `scrape.sources` | `对象列表` | 用于抓取 Feed 的源列表。详见下方的 **抓取源配置**。 | `[]` | 是 (至少一个) | ### 抓取源配置 (`scrape.sources[]`) diff --git a/docs/config.md b/docs/config.md index 78fd800..5230c54 100644 --- a/docs/config.md +++ b/docs/config.md @@ -59,6 +59,7 @@ This section configures parameters related to the Jina AI Reader API, primarily | `scrape.past` | `time.Duration` | Time window to look back when scraping feeds. E.g., `1h` means only scrape feeds from the past 1 hour. | `24h` | No | | `scrape.interval` | `time.Duration` | Frequency to scrape each source (global default). E.g., `1h`. | `1h` | No | | `scrape.rsshub_endpoint` | `string` | Endpoint for RSSHub. You can deploy your own RSSHub server or use a public instance (see [RSSHub Documentation](https://docs.rsshub.app/guide/instances)). E.g., `https://rsshub.app`. | | Yes (if `rsshub_route_path` is used) | +| `scrape.rsshub_access_key` | `string` | The access key for RSSHub. Used for access control. (see [RSSHub config](https://docs.rsshub.app/deploy/config#access-control-configurations))| | No | | `scrape.sources` | `list of objects` | List of sources to scrape feeds from. See **Scrape Source Configuration** below. | `[]` | Yes (at least one) | ### Scrape Source Configuration (`scrape.sources[]`) diff --git a/pkg/config/config.go b/pkg/config/config.go index 668d910..cd14d06 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -95,10 +95,11 @@ type LLM struct { } type Scrape struct { - Past timeutil.Duration `yaml:"past,omitempty" json:"past,omitempty" desc:"The lookback time window for scraping feeds. e.g. 1h means only scrape feeds in the past 1 hour. Default: 3d"` - Interval timeutil.Duration `yaml:"interval,omitempty" json:"interval,omitempty" desc:"How often to scrape each source, it is a global interval. e.g. 1h. Default: 1h"` - RSSHubEndpoint string `yaml:"rsshub_endpoint,omitempty" json:"rsshub_endpoint,omitempty" desc:"The endpoint of the RSSHub. You can deploy your own RSSHub server or use the public one (https://docs.rsshub.app/guide/instances). e.g. https://rsshub.app. It is required when sources[].rss.rsshub_route_path is set."` - Sources []ScrapeSource `yaml:"sources,omitempty" json:"sources,omitempty" desc:"The sources for scraping feeds."` + Past timeutil.Duration `yaml:"past,omitempty" json:"past,omitempty" desc:"The lookback time window for scraping feeds. e.g. 1h means only scrape feeds in the past 1 hour. Default: 3d"` + Interval timeutil.Duration `yaml:"interval,omitempty" json:"interval,omitempty" desc:"How often to scrape each source, it is a global interval. e.g. 1h. Default: 1h"` + RSSHubEndpoint string `yaml:"rsshub_endpoint,omitempty" json:"rsshub_endpoint,omitempty" desc:"The endpoint of the RSSHub. You can deploy your own RSSHub server or use the public one (https://docs.rsshub.app/guide/instances). e.g. https://rsshub.app. It is required when sources[].rss.rsshub_route_path is set."` + RSSHubAccessKey string `yaml:"rsshub_access_key,omitempty" json:"rsshub_access_key,omitempty" desc:"The access key for RSSHub. Used for access control. (see [RSSHub config](https://docs.rsshub.app/deploy/config#access-control-configurations))"` + Sources []ScrapeSource `yaml:"sources,omitempty" json:"sources,omitempty" desc:"The sources for scraping feeds."` } type Storage struct { @@ -133,6 +134,7 @@ type ScrapeSource struct { type ScrapeSourceRSS struct { URL string `yaml:"url,omitempty" json:"url,omitempty" desc:"The URL of the RSS feed. e.g. http://localhost:1200/github/trending/daily/any. You can not set it when rsshub_route_path is set."` RSSHubRoutePath string `yaml:"rsshub_route_path,omitempty" json:"rsshub_route_path,omitempty" desc:"The RSSHub route path of the RSS feed. e.g. github/trending/daily/any. It will be joined with the rsshub_endpoint as the final URL."` + // RSSHubAccessKey string `yaml:"rsshub_access_key,omitempty" json:"rsshub_access_key,omitempty" desc:"The access key for accessing RSSHub routes. If set, it will be appended as a query parameter to the URL."` } type RewriteRule struct { diff --git a/pkg/scrape/manager.go b/pkg/scrape/manager.go index 0c55454..73c194d 100644 --- a/pkg/scrape/manager.go +++ b/pkg/scrape/manager.go @@ -80,6 +80,7 @@ func (c *Config) From(app *config.App) { URL: app.Scrape.Sources[i].RSS.URL, RSSHubEndpoint: app.Scrape.RSSHubEndpoint, RSSHubRoutePath: app.Scrape.Sources[i].RSS.RSSHubRoutePath, + RSSHubAccessKey: app.Scrape.RSSHubAccessKey, } } } diff --git a/pkg/scrape/scraper/rss.go b/pkg/scrape/scraper/rss.go index 851bc49..4820e6f 100644 --- a/pkg/scrape/scraper/rss.go +++ b/pkg/scrape/scraper/rss.go @@ -33,6 +33,7 @@ type ScrapeSourceRSS struct { URL string RSSHubEndpoint string RSSHubRoutePath string + RSSHubAccessKey string } func (c *ScrapeSourceRSS) Validate() error { @@ -46,6 +47,15 @@ func (c *ScrapeSourceRSS) Validate() error { return errors.New("URL must be a valid HTTP/HTTPS URL") } + // Append access key as query parameter if provided + if c.RSSHubAccessKey != "" && !strings.Contains(c.URL, "key=") { + if strings.Contains(c.URL, "?") { + c.URL += "&key=" + c.RSSHubAccessKey + } else { + c.URL += "?key=" + c.RSSHubAccessKey + } + } + return nil } diff --git a/pkg/scrape/scraper/rss_test.go b/pkg/scrape/scraper/rss_test.go index b66b22c..cfeedfb 100644 --- a/pkg/scrape/scraper/rss_test.go +++ b/pkg/scrape/scraper/rss_test.go @@ -122,6 +122,55 @@ func TestNewRSS(t *testing.T) { }, }, }, + { + Scenario: "Valid Configuration - RSSHub with Access Key", + Given: "a valid configuration with RSSHub details and access key", + When: "creating a new RSS reader", + Then: "should succeed, construct the URL with access key, and return a valid reader", + GivenDetail: givenDetail{ + config: &ScrapeSourceRSS{ + RSSHubEndpoint: "http://rsshub.app/", + RSSHubRoutePath: "/_/test", + RSSHubAccessKey: "testkey", + }, + }, + WhenDetail: whenDetail{}, + ThenExpected: thenExpected{ + wantErr: false, + validateFunc: func(t *testing.T, r reader) { + Expect(r).NotTo(BeNil()) + rssReader, ok := r.(*rssReader) + Expect(ok).To(BeTrue()) + Expect(rssReader.config.URL).To(Equal("http://rsshub.app/_/test?key=testkey")) + Expect(rssReader.config.RSSHubEndpoint).To(Equal("http://rsshub.app/")) + Expect(rssReader.config.RSSHubRoutePath).To(Equal("/_/test")) + Expect(rssReader.config.RSSHubAccessKey).To(Equal("testkey")) + }, + }, + }, + { + Scenario: "Valid Configuration - URL with Access Key", + Given: "a valid configuration with URL and access key", + When: "creating a new RSS reader", + Then: "should succeed, append access key to URL, and return a valid reader", + GivenDetail: givenDetail{ + config: &ScrapeSourceRSS{ + URL: "http://example.com/feed", + RSSHubAccessKey: "testkey", + }, + }, + WhenDetail: whenDetail{}, + ThenExpected: thenExpected{ + wantErr: false, + validateFunc: func(t *testing.T, r reader) { + Expect(r).NotTo(BeNil()) + rssReader, ok := r.(*rssReader) + Expect(ok).To(BeTrue()) + Expect(rssReader.config.URL).To(Equal("http://example.com/feed?key=testkey")) + Expect(rssReader.config.RSSHubAccessKey).To(Equal("testkey")) + }, + }, + }, } // --- Run tests ---