From 278cb662dee15b776febc50020c9d06b17f7f2fa Mon Sep 17 00:00:00 2001 From: glidea <740696441@qq.com> Date: Tue, 6 May 2025 16:00:59 +0800 Subject: [PATCH] marshal time.Duration as json string --- docs/config-zh.md | 2 +- docs/config.md | 2 +- pkg/config/config.go | 33 ++++++++++---------- pkg/schedule/schedule.go | 2 +- pkg/scrape/manager.go | 6 ++-- pkg/scrape/scraper/scraper.go | 2 +- pkg/storage/feed/feed.go | 6 ++-- pkg/util/time/time.go | 59 +++++++++++++++++++++++++++++++++++ 8 files changed, 86 insertions(+), 26 deletions(-) diff --git a/docs/config-zh.md b/docs/config-zh.md index 6100e4f..380c226 100644 --- a/docs/config-zh.md +++ b/docs/config-zh.md @@ -44,7 +44,7 @@ | 字段 | 类型 | 描述 | 默认值 | 是否必需 | | :----------------------- | :-------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------- | :----- | :---------------------------------- | -| `scrape.past` | `time.Duration` | 抓取 Feed 的回溯时间窗口。例如 `1h` 表示只抓取过去 1 小时的 Feed。 | `3d` | 否 | +| `scrape.past` | `time.Duration` | 抓取 Feed 的回溯时间窗口。例如 `1h` 表示只抓取过去 1 小时的 Feed。 | `24h` | 否 | | `scrape.interval` | `time.Duration` | 抓取每个源的频率 (全局默认值)。例如 `1h`。 | `1h` | 否 | | `scrape.rsshub_endpoint` | `string` | RSSHub 的端点。你可以部署自己的 RSSHub 服务器或使用公共实例 (参见 [RSSHub 文档](https://docs.rsshub.app/guide/instances))。例如 `https://rsshub.app`。 | | 是 (如果使用了 `rsshub_route_path`) | | `scrape.sources` | `对象列表` | 用于抓取 Feed 的源列表。详见下方的 **抓取源配置**。 | `[]` | 是 (至少一个) | diff --git a/docs/config.md b/docs/config.md index af5d9fd..1358094 100644 --- a/docs/config.md +++ b/docs/config.md @@ -44,7 +44,7 @@ This section defines a list of available Large Language Models. At least one LLM | Field | Type | Description | Default | Required | | :----------------------- | :-------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------ | :-------------------------------- | -| `scrape.past` | duration | The lookback time window for scraping feeds. e.g. `1h` means only scrape feeds in the past 1 hour. | `3d` | No | +| `scrape.past` | duration | The lookback time window for scraping feeds. e.g. `1h` means only scrape feeds in the past 1 hour. | `24h` | No | | `scrape.interval` | duration | How often to scrape each source (global default). e.g. `1h`. | `1h` | No | | `scrape.rsshub_endpoint` | string | The endpoint of the RSSHub. You can deploy your own or use a public one (see [RSSHub Docs](https://docs.rsshub.app/guide/instances)). e.g. `https://rsshub.app`. | | Yes (if `rsshub_route_path` used) | | `scrape.sources` | list of objects | The sources for scraping feeds. See **Scrape Source Configuration** below. | `[]` | Yes (at least one) | diff --git a/pkg/config/config.go b/pkg/config/config.go index 8ea7089..a853bf9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -30,6 +30,7 @@ import ( "github.com/glidea/zenfeed/pkg/telemetry" "github.com/glidea/zenfeed/pkg/telemetry/log" telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model" + timeutil "github.com/glidea/zenfeed/pkg/util/time" ) // --- Interface code block --- @@ -83,10 +84,10 @@ type LLM struct { } type Scrape struct { - Past time.Duration `yaml:"past,omitempty" json:"past,omitempty" desc:"The lookback time window for scraping feeds. e.g. 1h means only scrape feeds in the past 1 hour. Default: 3d"` - Interval time.Duration `yaml:"interval,omitempty" json:"interval,omitempty" desc:"How often to scrape each source, it is a global interval. e.g. 1h. Default: 1h"` - RSSHubEndpoint string `yaml:"rsshub_endpoint,omitempty" json:"rsshub_endpoint,omitempty" desc:"The endpoint of the RSSHub. You can deploy your own RSSHub server or use the public one (https://docs.rsshub.app/guide/instances). e.g. https://rsshub.app. It is required when sources[].rss.rsshub_route_path is set."` - Sources []ScrapeSource `yaml:"sources,omitempty" json:"sources,omitempty" desc:"The sources for scraping feeds."` + Past timeutil.Duration `yaml:"past,omitempty" json:"past,omitempty" desc:"The lookback time window for scraping feeds. e.g. 1h means only scrape feeds in the past 1 hour. Default: 3d"` + Interval timeutil.Duration `yaml:"interval,omitempty" json:"interval,omitempty" desc:"How often to scrape each source, it is a global interval. e.g. 1h. Default: 1h"` + RSSHubEndpoint string `yaml:"rsshub_endpoint,omitempty" json:"rsshub_endpoint,omitempty" desc:"The endpoint of the RSSHub. You can deploy your own RSSHub server or use the public one (https://docs.rsshub.app/guide/instances). e.g. https://rsshub.app. It is required when sources[].rss.rsshub_route_path is set."` + Sources []ScrapeSource `yaml:"sources,omitempty" json:"sources,omitempty" desc:"The sources for scraping feeds."` } type Storage struct { @@ -95,15 +96,15 @@ type Storage struct { } type FeedStorage struct { - Rewrites []RewriteRule `yaml:"rewrites,omitempty" json:"rewrites,omitempty" desc:"How to process each feed before storing it. It inspired by Prometheus relabeling (https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config), this implements a very strong flexibility and loose coupling."` - FlushInterval time.Duration `yaml:"flush_interval,omitempty" json:"flush_interval,omitempty" desc:"How often to flush the feed storage to the database, higher value will cause high data loss risk, but on the other hand, it will reduce the number of disk operations and improve performance. Default: 200ms"` - EmbeddingLLM string `yaml:"embedding_llm,omitempty" json:"embedding_llm,omitempty" desc:"The embedding LLM for the feed storage. It will significantly affect the accuracy of semantic search, please be careful to choose. If you want to switch, please note to keep the old llm configuration, because the past data is still implicitly associated with it, otherwise it will cause the past data to be unable to be semantically searched. Default is the default LLM in llms section."` - Retention time.Duration `yaml:"retention,omitempty" json:"retention,omitempty" desc:"How long to keep a feed. Default: 8d"` - BlockDuration time.Duration `yaml:"block_duration,omitempty" json:"block_duration,omitempty" desc:"How long to keep the feed storage block. Block is time-based, like Prometheus TSDB Block. Default: 25h"` + Rewrites []RewriteRule `yaml:"rewrites,omitempty" json:"rewrites,omitempty" desc:"How to process each feed before storing it. It inspired by Prometheus relabeling (https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config), this implements a very strong flexibility and loose coupling."` + FlushInterval timeutil.Duration `yaml:"flush_interval,omitempty" json:"flush_interval,omitempty" desc:"How often to flush the feed storage to the database, higher value will cause high data loss risk, but on the other hand, it will reduce the number of disk operations and improve performance. Default: 200ms"` + EmbeddingLLM string `yaml:"embedding_llm,omitempty" json:"embedding_llm,omitempty" desc:"The embedding LLM for the feed storage. It will significantly affect the accuracy of semantic search, please be careful to choose. If you want to switch, please note to keep the old llm configuration, because the past data is still implicitly associated with it, otherwise it will cause the past data to be unable to be semantically searched. Default is the default LLM in llms section."` + Retention timeutil.Duration `yaml:"retention,omitempty" json:"retention,omitempty" desc:"How long to keep a feed. Default: 8d"` + BlockDuration timeutil.Duration `yaml:"block_duration,omitempty" json:"block_duration,omitempty" desc:"How long to keep the feed storage block. Block is time-based, like Prometheus TSDB Block. Default: 25h"` } type ScrapeSource struct { - Interval time.Duration `yaml:"interval,omitempty" json:"interval,omitempty" desc:"How often to scrape this source. Default: global interval"` + Interval timeutil.Duration `yaml:"interval,omitempty" json:"interval,omitempty" desc:"How often to scrape this source. Default: global interval"` Name string `yaml:"name,omitempty" json:"name,omitempty" desc:"The name of the source. It is required."` Labels map[string]string `yaml:"labels,omitempty" json:"labels,omitempty" desc:"The additional labels to add to the feed of this source."` RSS *ScrapeSourceRSS `yaml:"rss,omitempty" json:"rss,omitempty" desc:"The RSS config of the source."` @@ -134,12 +135,12 @@ type RewriteRuleTransformToText struct { } type SchedulsRule struct { - Name string `yaml:"name,omitempty" json:"name,omitempty" desc:"The name of the rule. It is required."` - Query string `yaml:"query,omitempty" json:"query,omitempty" desc:"The semantic query to get the feeds. NOTE it is optional"` - Threshold float32 `yaml:"threshold,omitempty" json:"threshold,omitempty" desc:"The threshold to filter the query result by relevance (with 'query') score. It does not work when query is not set. Default is 0.6."` - LabelFilters []string `yaml:"label_filters,omitempty" json:"label_filters,omitempty" desc:"The label filters (equal or not equal) to match the feeds. e.g. [category=tech, source!=github]"` - EveryDay string `yaml:"every_day,omitempty" json:"every_day,omitempty" desc:"The query range at the end time of every day. Format: start~end, e.g. 00:00~23:59, or -22:00~7:00 (yesterday 22:00 to today 07:00)."` - WatchInterval time.Duration `yaml:"watch_interval,omitempty" json:"watch_interval,omitempty" desc:"The run and query interval to watch the rule. Default is 10m. It can not be set with every_day at same time."` + Name string `yaml:"name,omitempty" json:"name,omitempty" desc:"The name of the rule. It is required."` + Query string `yaml:"query,omitempty" json:"query,omitempty" desc:"The semantic query to get the feeds. NOTE it is optional"` + Threshold float32 `yaml:"threshold,omitempty" json:"threshold,omitempty" desc:"The threshold to filter the query result by relevance (with 'query') score. It does not work when query is not set. Default is 0.6."` + LabelFilters []string `yaml:"label_filters,omitempty" json:"label_filters,omitempty" desc:"The label filters (equal or not equal) to match the feeds. e.g. [category=tech, source!=github]"` + EveryDay string `yaml:"every_day,omitempty" json:"every_day,omitempty" desc:"The query range at the end time of every day. Format: start~end, e.g. 00:00~23:59, or -22:00~7:00 (yesterday 22:00 to today 07:00)."` + WatchInterval timeutil.Duration `yaml:"watch_interval,omitempty" json:"watch_interval,omitempty" desc:"The run and query interval to watch the rule. Default is 10m. It can not be set with every_day at same time."` } type NotifyRoute struct { diff --git a/pkg/schedule/schedule.go b/pkg/schedule/schedule.go index 4390345..9f2be2b 100644 --- a/pkg/schedule/schedule.go +++ b/pkg/schedule/schedule.go @@ -59,7 +59,7 @@ func (c *Config) From(app *config.App) *Config { Threshold: r.Threshold, LabelFilters: r.LabelFilters, EveryDay: r.EveryDay, - WatchInterval: r.WatchInterval, + WatchInterval: time.Duration(r.WatchInterval), } } diff --git a/pkg/scrape/manager.go b/pkg/scrape/manager.go index 5243e7d..ae93379 100644 --- a/pkg/scrape/manager.go +++ b/pkg/scrape/manager.go @@ -66,14 +66,14 @@ func (c *Config) From(app *config.App) { c.Scrapers = make([]scraper.Config, len(app.Scrape.Sources)) for i := range app.Scrape.Sources { c.Scrapers[i] = scraper.Config{ - Past: app.Scrape.Past, + Past: time.Duration(app.Scrape.Past), Name: app.Scrape.Sources[i].Name, - Interval: app.Scrape.Sources[i].Interval, + Interval: time.Duration(app.Scrape.Sources[i].Interval), Labels: model.Labels{}, } c.Scrapers[i].Labels.FromMap(app.Scrape.Sources[i].Labels) if c.Scrapers[i].Interval <= 0 { - c.Scrapers[i].Interval = app.Scrape.Interval + c.Scrapers[i].Interval = time.Duration(app.Scrape.Interval) } if app.Scrape.Sources[i].RSS != nil { c.Scrapers[i].RSS = &scraper.ScrapeSourceRSS{ diff --git a/pkg/scrape/scraper/scraper.go b/pkg/scrape/scraper/scraper.go index d3a73e9..495340b 100644 --- a/pkg/scrape/scraper/scraper.go +++ b/pkg/scrape/scraper/scraper.go @@ -55,7 +55,7 @@ const maxPast = 15 * 24 * time.Hour func (c *Config) Validate() error { if c.Past <= 0 { - c.Past = 3 * timeutil.Day + c.Past = timeutil.Day } if c.Past > maxPast { c.Past = maxPast diff --git a/pkg/storage/feed/feed.go b/pkg/storage/feed/feed.go index 2560ec5..7a7e2c6 100644 --- a/pkg/storage/feed/feed.go +++ b/pkg/storage/feed/feed.go @@ -99,9 +99,9 @@ func (c *Config) Validate() error { func (c *Config) From(app *config.App) { *c = Config{ Dir: app.Storage.Dir, - Retention: app.Storage.Feed.Retention, - BlockDuration: app.Storage.Feed.BlockDuration, - FlushInterval: app.Storage.Feed.FlushInterval, + Retention: time.Duration(app.Storage.Feed.Retention), + BlockDuration: time.Duration(app.Storage.Feed.BlockDuration), + FlushInterval: time.Duration(app.Storage.Feed.FlushInterval), EmbeddingLLM: app.Storage.Feed.EmbeddingLLM, } } diff --git a/pkg/util/time/time.go b/pkg/util/time/time.go index fca5290..d7eb4d9 100644 --- a/pkg/util/time/time.go +++ b/pkg/util/time/time.go @@ -17,11 +17,13 @@ package time import ( "context" + "encoding/json" "math/rand" "time" _ "time/tzdata" "github.com/pkg/errors" + "gopkg.in/yaml.v3" runtimeutil "github.com/glidea/zenfeed/pkg/util/runtime" ) @@ -84,3 +86,60 @@ func Tick(ctx context.Context, d time.Duration, f func() error) error { func Random(max time.Duration) time.Duration { return time.Duration(rand.Int63n(int64(max))) } + +type Duration time.Duration + +func (d Duration) String() string { + return time.Duration(d).String() +} + +func (d Duration) MarshalJSON() ([]byte, error) { + return json.Marshal(d.String()) +} + +func (d *Duration) UnmarshalJSON(b []byte) error { + var v any + if err := json.Unmarshal(b, &v); err != nil { + return err + } + + switch tv := v.(type) { + case float64: + *d = Duration(time.Duration(tv)) + + return nil + + case string: + parsed, err := time.ParseDuration(tv) + if err != nil { + return err + } + *d = Duration(parsed) + + return nil + + default: + return errors.Errorf("invalid duration: %v", tv) + } +} + +func (d Duration) MarshalYAML() (interface{}, error) { + return d.String(), nil +} + +func (d *Duration) UnmarshalYAML(value *yaml.Node) error { + if value.Kind != yaml.ScalarNode { + return errors.Errorf("invalid duration: expected a scalar node, got %v", value.Kind) + } + + s := value.Value + + parsed, err := time.ParseDuration(s) + if err != nil { + return errors.Errorf("failed to parse duration string '%s' from YAML: %s", s, err.Error()) + } + + *d = Duration(parsed) + + return nil +}