add rss & crawl & webhook
This commit is contained in:
@@ -37,7 +37,6 @@ import (
|
||||
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
jsonschema "github.com/glidea/zenfeed/pkg/util/json_schema"
|
||||
"github.com/glidea/zenfeed/pkg/util/rpc"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
@@ -161,11 +160,11 @@ type QueryRequest struct {
|
||||
}
|
||||
|
||||
func (r *QueryRequest) Validate() error { //nolint:cyclop
|
||||
if r.Query != "" && utf8.RuneCountInString(r.Query) < 5 {
|
||||
return errors.New("query must be at least 5 characters")
|
||||
if r.Query != "" && utf8.RuneCountInString(r.Query) > 64 {
|
||||
return errors.New("query must be at most 64 characters")
|
||||
}
|
||||
if r.Threshold == 0 {
|
||||
r.Threshold = 0.55
|
||||
r.Threshold = 0.5
|
||||
}
|
||||
if r.Threshold < 0 || r.Threshold > 1 {
|
||||
return errors.New("threshold must be between 0 and 1")
|
||||
@@ -200,6 +199,28 @@ type QueryResponse struct {
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
type Error struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
func (e Error) Error() string {
|
||||
return e.Message
|
||||
}
|
||||
|
||||
func newError(code int, err error) Error {
|
||||
return Error{
|
||||
Code: code,
|
||||
Message: err.Error(),
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
ErrBadRequest = func(err error) Error { return newError(http.StatusBadRequest, err) }
|
||||
ErrNotFound = func(err error) Error { return newError(http.StatusNotFound, err) }
|
||||
ErrInternal = func(err error) Error { return newError(http.StatusInternalServerError, err) }
|
||||
)
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[API, config.App, Dependencies]
|
||||
|
||||
@@ -262,7 +283,7 @@ func (a *api) QueryAppConfigSchema(
|
||||
) (resp *QueryAppConfigSchemaResponse, err error) {
|
||||
schema, err := jsonschema.ForType(reflect.TypeOf(config.App{}))
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "query app config schema"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "query app config schema"))
|
||||
}
|
||||
|
||||
return (*QueryAppConfigSchemaResponse)(&schema), nil
|
||||
@@ -282,7 +303,7 @@ func (a *api) ApplyAppConfig(
|
||||
req *ApplyAppConfigRequest,
|
||||
) (resp *ApplyAppConfigResponse, err error) {
|
||||
if err := a.Dependencies().ConfigManager.SaveAppConfig(&req.App); err != nil {
|
||||
return nil, rpc.ErrBadRequest(errors.Wrap(err, "save app config"))
|
||||
return nil, ErrBadRequest(errors.Wrap(err, "save app config"))
|
||||
}
|
||||
|
||||
return &ApplyAppConfigResponse{}, nil
|
||||
@@ -297,20 +318,20 @@ func (a *api) QueryRSSHubCategories(
|
||||
// New request.
|
||||
forwardReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "new request"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "new request"))
|
||||
}
|
||||
|
||||
// Do request.
|
||||
forwardRespIO, err := a.hc.Do(forwardReq)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "query rss hub websites"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "query rss hub websites"))
|
||||
}
|
||||
defer func() { _ = forwardRespIO.Body.Close() }()
|
||||
|
||||
// Parse response.
|
||||
var forwardResp map[string]RSSHubWebsite
|
||||
if err := json.NewDecoder(forwardRespIO.Body).Decode(&forwardResp); err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "parse response"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "parse response"))
|
||||
}
|
||||
|
||||
// Convert to response.
|
||||
@@ -333,7 +354,7 @@ func (a *api) QueryRSSHubWebsites(
|
||||
ctx context.Context, req *QueryRSSHubWebsitesRequest,
|
||||
) (resp *QueryRSSHubWebsitesResponse, err error) {
|
||||
if req.Category == "" {
|
||||
return nil, rpc.ErrBadRequest(errors.New("category is required"))
|
||||
return nil, ErrBadRequest(errors.New("category is required"))
|
||||
}
|
||||
|
||||
url := a.Config().RSSHubEndpoint + "/api/category/" + req.Category
|
||||
@@ -341,29 +362,29 @@ func (a *api) QueryRSSHubWebsites(
|
||||
// New request.
|
||||
forwardReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "new request"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "new request"))
|
||||
}
|
||||
|
||||
// Do request.
|
||||
forwardRespIO, err := a.hc.Do(forwardReq)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "query rss hub routes"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "query rss hub routes"))
|
||||
}
|
||||
defer func() { _ = forwardRespIO.Body.Close() }()
|
||||
|
||||
// Parse response.
|
||||
body, err := io.ReadAll(forwardRespIO.Body)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "read response"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "read response"))
|
||||
}
|
||||
if len(body) == 0 {
|
||||
// Hack for RSSHub...
|
||||
// Consider cache category ids for validate by self to remove this shit code.
|
||||
return nil, rpc.ErrBadRequest(errors.New("category id is invalid"))
|
||||
return nil, ErrBadRequest(errors.New("category id is invalid"))
|
||||
}
|
||||
var forwardResp map[string]RSSHubWebsite
|
||||
if err := json.Unmarshal(body, &forwardResp); err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "parse response"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "parse response"))
|
||||
}
|
||||
|
||||
// Convert to response.
|
||||
@@ -383,7 +404,7 @@ func (a *api) QueryRSSHubRoutes(
|
||||
req *QueryRSSHubRoutesRequest,
|
||||
) (resp *QueryRSSHubRoutesResponse, err error) {
|
||||
if req.WebsiteID == "" {
|
||||
return nil, rpc.ErrBadRequest(errors.New("website id is required"))
|
||||
return nil, ErrBadRequest(errors.New("website id is required"))
|
||||
}
|
||||
|
||||
url := a.Config().RSSHubEndpoint + "/api/namespace/" + req.WebsiteID
|
||||
@@ -391,30 +412,30 @@ func (a *api) QueryRSSHubRoutes(
|
||||
// New request.
|
||||
forwardReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "new request"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "new request"))
|
||||
}
|
||||
|
||||
// Do request.
|
||||
forwardRespIO, err := a.hc.Do(forwardReq)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "query rss hub routes"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "query rss hub routes"))
|
||||
}
|
||||
defer func() { _ = forwardRespIO.Body.Close() }()
|
||||
|
||||
// Parse response.
|
||||
body, err := io.ReadAll(forwardRespIO.Body)
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "read response"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "read response"))
|
||||
}
|
||||
if len(body) == 0 {
|
||||
return nil, rpc.ErrBadRequest(errors.New("website id is invalid"))
|
||||
return nil, ErrBadRequest(errors.New("website id is invalid"))
|
||||
}
|
||||
|
||||
var forwardResp struct {
|
||||
Routes map[string]RSSHubRoute `json:"routes"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &forwardResp); err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "parse response"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "parse response"))
|
||||
}
|
||||
|
||||
// Convert to response.
|
||||
@@ -435,7 +456,7 @@ func (a *api) Write(ctx context.Context, req *WriteRequest) (resp *WriteResponse
|
||||
feed.Labels.Put(model.LabelType, "api", false)
|
||||
}
|
||||
if err := a.Dependencies().FeedStorage.Append(ctx, req.Feeds...); err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "append"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "append"))
|
||||
}
|
||||
|
||||
return &WriteResponse{}, nil
|
||||
@@ -447,7 +468,7 @@ func (a *api) Query(ctx context.Context, req *QueryRequest) (resp *QueryResponse
|
||||
|
||||
// Validate request.
|
||||
if err := req.Validate(); err != nil {
|
||||
return nil, rpc.ErrBadRequest(errors.Wrap(err, "validate"))
|
||||
return nil, ErrBadRequest(errors.Wrap(err, "validate"))
|
||||
}
|
||||
|
||||
// Forward to storage.
|
||||
@@ -460,7 +481,7 @@ func (a *api) Query(ctx context.Context, req *QueryRequest) (resp *QueryResponse
|
||||
End: req.End,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, rpc.ErrInternal(errors.Wrap(err, "query"))
|
||||
return nil, ErrInternal(errors.Wrap(err, "query"))
|
||||
}
|
||||
if len(feeds) == 0 {
|
||||
return &QueryResponse{Feeds: []*block.FeedVO{}}, nil
|
||||
|
||||
@@ -26,9 +26,8 @@ import (
|
||||
"github.com/glidea/zenfeed/pkg/config"
|
||||
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/metric"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
"github.com/glidea/zenfeed/pkg/util/rpc"
|
||||
"github.com/glidea/zenfeed/pkg/util/jsonrpc"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
@@ -89,18 +88,14 @@ func new(instance string, app *config.App, dependencies Dependencies) (Server, e
|
||||
|
||||
router := http.NewServeMux()
|
||||
api := dependencies.API
|
||||
router.Handle("/metrics", metric.Handler())
|
||||
router.Handle("/health", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(200)
|
||||
}))
|
||||
router.Handle("/write", rpc.API(api.Write))
|
||||
router.Handle("/query_config", rpc.API(api.QueryAppConfig))
|
||||
router.Handle("/apply_config", rpc.API(api.ApplyAppConfig))
|
||||
router.Handle("/query_config_schema", rpc.API(api.QueryAppConfigSchema))
|
||||
router.Handle("/query_rsshub_categories", rpc.API(api.QueryRSSHubCategories))
|
||||
router.Handle("/query_rsshub_websites", rpc.API(api.QueryRSSHubWebsites))
|
||||
router.Handle("/query_rsshub_routes", rpc.API(api.QueryRSSHubRoutes))
|
||||
router.Handle("/query", rpc.API(api.Query))
|
||||
router.Handle("/write", jsonrpc.API(api.Write))
|
||||
router.Handle("/query_config", jsonrpc.API(api.QueryAppConfig))
|
||||
router.Handle("/apply_config", jsonrpc.API(api.ApplyAppConfig))
|
||||
router.Handle("/query_config_schema", jsonrpc.API(api.QueryAppConfigSchema))
|
||||
router.Handle("/query_rsshub_categories", jsonrpc.API(api.QueryRSSHubCategories))
|
||||
router.Handle("/query_rsshub_websites", jsonrpc.API(api.QueryRSSHubWebsites))
|
||||
router.Handle("/query_rsshub_routes", jsonrpc.API(api.QueryRSSHubRoutes))
|
||||
router.Handle("/query", jsonrpc.API(api.Query))
|
||||
httpServer := &http.Server{Addr: config.Address, Handler: router}
|
||||
|
||||
return &server{
|
||||
|
||||
231
pkg/api/rss/rss.go
Normal file
231
pkg/api/rss/rss.go
Normal file
@@ -0,0 +1,231 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/benbjohnson/clock"
|
||||
"github.com/gorilla/feeds"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/api"
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/config"
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
"github.com/glidea/zenfeed/pkg/util/buffer"
|
||||
)
|
||||
|
||||
var clk = clock.New()
|
||||
|
||||
// --- Interface code block ---
|
||||
type Server interface {
|
||||
component.Component
|
||||
config.Watcher
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Address string
|
||||
ContentHTMLTemplate string
|
||||
contentHTMLTemplate *template.Template
|
||||
}
|
||||
|
||||
func (c *Config) Validate() error {
|
||||
if c.Address == "" {
|
||||
c.Address = ":1302"
|
||||
}
|
||||
if _, _, err := net.SplitHostPort(c.Address); err != nil {
|
||||
return errors.Wrap(err, "invalid address")
|
||||
}
|
||||
|
||||
if c.ContentHTMLTemplate == "" {
|
||||
c.ContentHTMLTemplate = "{{ .summary_html_snippet }}"
|
||||
}
|
||||
t, err := template.New("").Parse(c.ContentHTMLTemplate)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "parse rss content template")
|
||||
}
|
||||
c.contentHTMLTemplate = t
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) From(app *config.App) *Config {
|
||||
c.Address = app.API.RSS.Address
|
||||
c.ContentHTMLTemplate = app.API.RSS.ContentHTMLTemplate
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
type Dependencies struct {
|
||||
API api.API
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[Server, config.App, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
if len(mockOn) > 0 {
|
||||
return component.FactoryFunc[Server, config.App, Dependencies](
|
||||
func(instance string, config *config.App, dependencies Dependencies) (Server, error) {
|
||||
m := &mockServer{}
|
||||
component.MockOptions(mockOn).Apply(&m.Mock)
|
||||
|
||||
return m, nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return component.FactoryFunc[Server, config.App, Dependencies](new)
|
||||
}
|
||||
|
||||
func new(instance string, app *config.App, dependencies Dependencies) (Server, error) {
|
||||
config := &Config{}
|
||||
config.From(app)
|
||||
if err := config.Validate(); err != nil {
|
||||
return nil, errors.Wrap(err, "validate config")
|
||||
}
|
||||
|
||||
s := &server{
|
||||
Base: component.New(&component.BaseConfig[Config, Dependencies]{
|
||||
Name: "RSSServer",
|
||||
Instance: instance,
|
||||
Config: config,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
}
|
||||
|
||||
router := http.NewServeMux()
|
||||
router.Handle("/", http.HandlerFunc(s.rss))
|
||||
|
||||
s.http = &http.Server{Addr: config.Address, Handler: router}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
type server struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
http *http.Server
|
||||
}
|
||||
|
||||
func (s *server) Run() (err error) {
|
||||
ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
serverErr := make(chan error, 1)
|
||||
go func() {
|
||||
serverErr <- s.http.ListenAndServe()
|
||||
}()
|
||||
|
||||
s.MarkReady()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Info(ctx, "shutting down")
|
||||
|
||||
return s.http.Shutdown(ctx)
|
||||
case err := <-serverErr:
|
||||
return errors.Wrap(err, "listen and serve")
|
||||
}
|
||||
}
|
||||
|
||||
func (s *server) Reload(app *config.App) error {
|
||||
newConfig := &Config{}
|
||||
newConfig.From(app)
|
||||
if err := newConfig.Validate(); err != nil {
|
||||
return errors.Wrap(err, "validate config")
|
||||
}
|
||||
if s.Config().Address != newConfig.Address {
|
||||
return errors.New("address cannot be reloaded")
|
||||
}
|
||||
|
||||
s.SetConfig(newConfig)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *server) rss(w http.ResponseWriter, r *http.Request) {
|
||||
var err error
|
||||
ctx := telemetry.StartWith(r.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "rss")...)
|
||||
defer telemetry.End(ctx, err)
|
||||
|
||||
// Extract parameters.
|
||||
ps := r.URL.Query()
|
||||
labelFilters := ps["label_filter"]
|
||||
query := ps.Get("query")
|
||||
|
||||
// Forward query request to API.
|
||||
now := clk.Now()
|
||||
queryResult, err := s.Dependencies().API.Query(ctx, &api.QueryRequest{
|
||||
Query: query,
|
||||
LabelFilters: labelFilters,
|
||||
Start: now.Add(-24 * time.Hour),
|
||||
End: now,
|
||||
Limit: 100,
|
||||
})
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest) // TODO: standardize error handling.
|
||||
return
|
||||
}
|
||||
|
||||
// Render and convert to RSS.
|
||||
rssObj := &feeds.Feed{
|
||||
Title: fmt.Sprintf("Zenfeed RSS - %s", ps.Encode()),
|
||||
Description: "Powered by Github Zenfeed - https://github.com/glidea/zenfeed. If you use Folo, please enable 'Appearance - Content - Render inline styles'",
|
||||
Items: make([]*feeds.Item, 0, len(queryResult.Feeds)),
|
||||
}
|
||||
|
||||
buf := buffer.Get()
|
||||
defer buffer.Put(buf)
|
||||
|
||||
for _, feed := range queryResult.Feeds {
|
||||
buf.Reset()
|
||||
|
||||
if err = s.Config().contentHTMLTemplate.Execute(buf, feed.Labels.Map()); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
item := &feeds.Item{
|
||||
Title: feed.Labels.Get(model.LabelTitle),
|
||||
Link: &feeds.Link{Href: feed.Labels.Get(model.LabelLink)},
|
||||
Created: feed.Time, // NOTE: scrape time, not pub time.
|
||||
Content: buf.String(),
|
||||
}
|
||||
|
||||
rssObj.Items = append(rssObj.Items, item)
|
||||
}
|
||||
|
||||
if err = rssObj.WriteRss(w); err != nil {
|
||||
log.Error(ctx, errors.Wrap(err, "write rss response"))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
type mockServer struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (m *mockServer) Reload(app *config.App) error {
|
||||
return m.Called(app).Error(0)
|
||||
}
|
||||
@@ -46,10 +46,13 @@ type Config struct {
|
||||
}
|
||||
|
||||
type App struct {
|
||||
Timezone string `yaml:"timezone,omitempty" json:"timezone,omitempty" desc:"The timezone of the app. e.g. Asia/Shanghai. Default: server's local timezone"`
|
||||
Log struct {
|
||||
Level string `yaml:"level,omitempty" json:"level,omitempty" desc:"Log level, one of debug, info, warn, error. Default: info"`
|
||||
} `yaml:"log,omitempty" json:"log,omitempty" desc:"The log config."`
|
||||
Timezone string `yaml:"timezone,omitempty" json:"timezone,omitempty" desc:"The timezone of the app. e.g. Asia/Shanghai. Default: server's local timezone"`
|
||||
Telemetry struct {
|
||||
Address string `yaml:"address,omitempty" json:"address,omitempty" desc:"The address ([host]:port) of the telemetry server. e.g. 0.0.0.0:9090. Default: :9090. It can not be changed after the app is running."`
|
||||
Log struct {
|
||||
Level string `yaml:"level,omitempty" json:"level,omitempty" desc:"Log level, one of debug, info, warn, error. Default: info"`
|
||||
} `yaml:"log,omitempty" json:"log,omitempty" desc:"The log config."`
|
||||
} `yaml:"telemetry,omitempty" json:"telemetry,omitempty" desc:"The telemetry config."`
|
||||
API struct {
|
||||
HTTP struct {
|
||||
Address string `yaml:"address,omitempty" json:"address,omitempty" desc:"The address ([host]:port) of the HTTP API. e.g. 0.0.0.0:1300. Default: :1300. It can not be changed after the app is running."`
|
||||
@@ -57,9 +60,16 @@ type App struct {
|
||||
MCP struct {
|
||||
Address string `yaml:"address,omitempty" json:"address,omitempty" desc:"The address ([host]:port) of the MCP API. e.g. 0.0.0.0:1300. Default: :1301. It can not be changed after the app is running."`
|
||||
} `yaml:"mcp,omitempty" json:"mcp,omitempty" desc:"The MCP API config."`
|
||||
RSS struct {
|
||||
Address string `yaml:"address,omitempty" json:"address,omitempty" desc:"The address ([host]:port) of the RSS API. e.g. 0.0.0.0:1300. Default: :1302. It can not be changed after the app is running."`
|
||||
ContentHTMLTemplate string `yaml:"content_html_template,omitempty" json:"content_html_template,omitempty" desc:"The template to render the RSS content for each item. Default is {{ .summary_html_snippet }}."`
|
||||
} `yaml:"rss,omitempty" json:"rss,omitempty" desc:"The RSS config."`
|
||||
LLM string `yaml:"llm,omitempty" json:"llm,omitempty" desc:"The LLM name for summarizing feeds. e.g. my-favorite-gemini-king. Default is the default LLM in llms section."`
|
||||
} `yaml:"api,omitempty" json:"api,omitempty" desc:"The API config."`
|
||||
LLMs []LLM `yaml:"llms,omitempty" json:"llms,omitempty" desc:"The LLMs config. It is required, at least one LLM is needed, refered by other config sections."`
|
||||
LLMs []LLM `yaml:"llms,omitempty" json:"llms,omitempty" desc:"The LLMs config. It is required, at least one LLM is needed, refered by other config sections."`
|
||||
Jina struct {
|
||||
Token string `yaml:"token,omitempty" json:"token,omitempty" desc:"The token of the Jina server."`
|
||||
} `yaml:"jina,omitempty" json:"jina,omitempty" desc:"The Jina config."`
|
||||
Scrape Scrape `yaml:"scrape,omitempty" json:"scrape,omitempty" desc:"The scrape config."`
|
||||
Storage Storage `yaml:"storage,omitempty" json:"storage,omitempty" desc:"The storage config."`
|
||||
Scheduls struct {
|
||||
@@ -116,6 +126,7 @@ type ScrapeSourceRSS struct {
|
||||
}
|
||||
|
||||
type RewriteRule struct {
|
||||
If []string `yaml:"if,omitempty" json:"if,omitempty" desc:"The condition config to match the feed. If not set, that means match all feeds. Like label filters, e.g. [source=github, title!=xxx]"`
|
||||
SourceLabel string `yaml:"source_label,omitempty" json:"source_label,omitempty" desc:"The feed label of the source text to transform. Default is the 'content' label. The feed is essentially a label set (similar to Prometheus metric data). The default labels are type (rss, email (in future), etc), source (the source name), title (feed title), link (feed link), pub_time (feed publish time), and content (feed content)."`
|
||||
SkipTooShortThreshold *int `yaml:"skip_too_short_threshold,omitempty" json:"skip_too_short_threshold,omitempty" desc:"The threshold of the source text length to skip. Default is 300. It helps we to filter out some short feeds."`
|
||||
Transform *RewriteRuleTransform `yaml:"transform,omitempty" json:"transform,omitempty" desc:"The transform config to transform the source text. If not set, that means transform nothing, so the source text is the transformed text."`
|
||||
@@ -130,6 +141,7 @@ type RewriteRuleTransform struct {
|
||||
}
|
||||
|
||||
type RewriteRuleTransformToText struct {
|
||||
Type string `yaml:"type,omitempty" json:"type,omitempty" desc:"The type of the transform. It can be one of prompt, crawl, crawl_by_jina. Default is prompt. For crawl, the source text will be as the url to crawl the page, and the page will be converted to markdown. crawl vs crawl_by_jina: crawl is local, more stable; crawl_by_jina is powered by https://jina.ai, more powerful."`
|
||||
LLM string `yaml:"llm,omitempty" json:"llm,omitempty" desc:"The LLM name to use. Default is the default LLM in llms section."`
|
||||
Prompt string `yaml:"prompt,omitempty" json:"prompt,omitempty" desc:"The prompt to transform the source text. The source text will be injected into the prompt above. And you can use go template syntax to refer some built-in prompts, like {{ .summary }}. Available built-in prompts: category, tags, score, comment_confucius, summary, summary_html_snippet."`
|
||||
}
|
||||
@@ -166,15 +178,14 @@ type NotifySubRoute struct {
|
||||
}
|
||||
|
||||
type NotifyReceiver struct {
|
||||
Name string `yaml:"name,omitempty" json:"name,omitempty" desc:"The name of the receiver. It is required."`
|
||||
Email string `yaml:"email,omitempty" json:"email,omitempty" desc:"The email of the receiver."`
|
||||
// TODO: to reduce copyright risk, we do not support webhook receiver now.
|
||||
// Webhook *NotifyReceiverWebhook `yaml:"webhook" json:"webhook" desc:"The webhook of the receiver."`
|
||||
Name string `yaml:"name,omitempty" json:"name,omitempty" desc:"The name of the receiver. It is required."`
|
||||
Email string `yaml:"email,omitempty" json:"email,omitempty" desc:"The email of the receiver."`
|
||||
Webhook *NotifyReceiverWebhook `yaml:"webhook" json:"webhook" desc:"The webhook of the receiver."`
|
||||
}
|
||||
|
||||
// type NotifyReceiverWebhook struct {
|
||||
// URL string `yaml:"url"`
|
||||
// }
|
||||
type NotifyReceiverWebhook struct {
|
||||
URL string `yaml:"url"`
|
||||
}
|
||||
|
||||
type NotifyChannels struct {
|
||||
Email *NotifyChannelEmail `yaml:"email,omitempty" json:"email,omitempty" desc:"The global email channel config."`
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"reflect"
|
||||
"strconv"
|
||||
@@ -33,6 +34,8 @@ import (
|
||||
"github.com/glidea/zenfeed/pkg/storage/kv"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
binaryutil "github.com/glidea/zenfeed/pkg/util/binary"
|
||||
"github.com/glidea/zenfeed/pkg/util/buffer"
|
||||
"github.com/glidea/zenfeed/pkg/util/hash"
|
||||
)
|
||||
|
||||
@@ -373,24 +376,94 @@ func newCached(llm LLM, kvStorage kv.Storage) LLM {
|
||||
|
||||
func (c *cached) String(ctx context.Context, messages []string) (string, error) {
|
||||
key := hash.Sum64s(messages)
|
||||
keyStr := strconv.FormatUint(key, 10)
|
||||
keyStr := strconv.FormatUint(key, 10) // for human readable & compatible.
|
||||
|
||||
value, err := c.kvStorage.Get(ctx, keyStr)
|
||||
valueBs, err := c.kvStorage.Get(ctx, []byte(keyStr))
|
||||
switch {
|
||||
case err == nil:
|
||||
return value, nil
|
||||
return string(valueBs), nil
|
||||
case errors.Is(err, kv.ErrNotFound):
|
||||
break
|
||||
default:
|
||||
return "", errors.Wrap(err, "get from kv storage")
|
||||
}
|
||||
|
||||
value, err = c.LLM.String(ctx, messages)
|
||||
value, err := c.LLM.String(ctx, messages)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if err = c.kvStorage.Set(ctx, keyStr, value, 65*time.Minute); err != nil {
|
||||
// TODO: reduce copies.
|
||||
if err = c.kvStorage.Set(ctx, []byte(keyStr), []byte(value), 65*time.Minute); err != nil {
|
||||
log.Error(ctx, err, "set to kv storage")
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
var (
|
||||
toBytes = func(v []float32) ([]byte, error) {
|
||||
buf := buffer.Get()
|
||||
defer buffer.Put(buf)
|
||||
|
||||
for _, fVal := range v {
|
||||
if err := binaryutil.WriteFloat32(buf, fVal); err != nil {
|
||||
return nil, errors.Wrap(err, "write float32")
|
||||
}
|
||||
}
|
||||
|
||||
// Must copy data, as the buffer will be reused.
|
||||
bs := make([]byte, buf.Len())
|
||||
copy(bs, buf.Bytes())
|
||||
|
||||
return bs, nil
|
||||
}
|
||||
|
||||
toF32s = func(bs []byte) ([]float32, error) {
|
||||
if len(bs)%4 != 0 {
|
||||
return nil, errors.New("embedding data is corrupted, length not multiple of 4")
|
||||
}
|
||||
|
||||
r := bytes.NewReader(bs)
|
||||
floats := make([]float32, len(bs)/4)
|
||||
|
||||
for i := range floats {
|
||||
f, err := binaryutil.ReadFloat32(r)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "deserialize float32")
|
||||
}
|
||||
floats[i] = f
|
||||
}
|
||||
|
||||
return floats, nil
|
||||
}
|
||||
)
|
||||
|
||||
func (c *cached) Embedding(ctx context.Context, text string) ([]float32, error) {
|
||||
key := hash.Sum64(text)
|
||||
keyStr := strconv.FormatUint(key, 10)
|
||||
|
||||
valueBs, err := c.kvStorage.Get(ctx, []byte(keyStr))
|
||||
switch {
|
||||
case err == nil:
|
||||
return toF32s(valueBs)
|
||||
case errors.Is(err, kv.ErrNotFound):
|
||||
break
|
||||
default:
|
||||
return nil, errors.Wrap(err, "get from kv storage")
|
||||
}
|
||||
|
||||
value, err := c.LLM.Embedding(ctx, text)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
valueBs, err = toBytes(value)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "serialize embedding")
|
||||
}
|
||||
|
||||
if err = c.kvStorage.Set(ctx, []byte(keyStr), valueBs, 65*time.Minute); err != nil {
|
||||
log.Error(ctx, err, "set to kv storage")
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ package llm
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
oai "github.com/sashabaranov/go-openai"
|
||||
@@ -40,7 +41,7 @@ func newOpenAI(c *Config) LLM {
|
||||
config := oai.DefaultConfig(c.APIKey)
|
||||
config.BaseURL = c.Endpoint
|
||||
client := oai.NewClientWithConfig(config)
|
||||
embeddingSpliter := newEmbeddingSpliter(2048, 64)
|
||||
embeddingSpliter := newEmbeddingSpliter(1536, 64)
|
||||
|
||||
return &openai{
|
||||
Base: component.New(&component.BaseConfig[Config, struct{}]{
|
||||
@@ -61,9 +62,9 @@ func (o *openai) String(ctx context.Context, messages []string) (value string, e
|
||||
if config.Model == "" {
|
||||
return "", errors.New("model is not set")
|
||||
}
|
||||
msg := make([]oai.ChatCompletionMessage, 0, len(messages))
|
||||
msgs := make([]oai.ChatCompletionMessage, 0, len(messages))
|
||||
for _, m := range messages {
|
||||
msg = append(msg, oai.ChatCompletionMessage{
|
||||
msgs = append(msgs, oai.ChatCompletionMessage{
|
||||
Role: oai.ChatMessageRoleUser,
|
||||
Content: m,
|
||||
})
|
||||
@@ -71,7 +72,7 @@ func (o *openai) String(ctx context.Context, messages []string) (value string, e
|
||||
|
||||
req := oai.ChatCompletionRequest{
|
||||
Model: config.Model,
|
||||
Messages: msg,
|
||||
Messages: msgs,
|
||||
Temperature: config.Temperature,
|
||||
}
|
||||
|
||||
@@ -131,6 +132,7 @@ func (o *openai) Embedding(ctx context.Context, s string) (value []float32, err
|
||||
EncodingFormat: oai.EmbeddingEncodingFormatFloat,
|
||||
})
|
||||
if err != nil {
|
||||
fmt.Println(s)
|
||||
return nil, errors.Wrap(err, "create embeddings")
|
||||
}
|
||||
if len(vec.Data) == 0 {
|
||||
@@ -141,6 +143,6 @@ func (o *openai) Embedding(ctx context.Context, s string) (value []float32, err
|
||||
promptTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.PromptTokens))
|
||||
completionTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.CompletionTokens))
|
||||
totalTokens.WithLabelValues(lvs...).Add(float64(vec.Usage.TotalTokens))
|
||||
|
||||
|
||||
return vec.Data[0].Embedding, nil
|
||||
}
|
||||
|
||||
156
pkg/llm/prompt/prompt.go
Normal file
156
pkg/llm/prompt/prompt.go
Normal file
@@ -0,0 +1,156 @@
|
||||
package prompt
|
||||
|
||||
var Builtin = map[string]string{
|
||||
"category": `
|
||||
Analyze the content and categorize it into exactly one of these categories:
|
||||
Technology, Development, Entertainment, Finance, Health, Politics, Other
|
||||
|
||||
Classification requirements:
|
||||
- Choose the SINGLE most appropriate category based on:
|
||||
* Primary topic and main focus of the content
|
||||
* Key terminology and concepts used
|
||||
* Target audience and purpose
|
||||
* Technical depth and complexity level
|
||||
- For content that could fit multiple categories:
|
||||
* Identify the dominant theme
|
||||
* Consider the most specific applicable category
|
||||
* Use the primary intended purpose
|
||||
- If content appears ambiguous:
|
||||
* Focus on the most prominent aspects
|
||||
* Consider the practical application
|
||||
* Choose the category that best serves user needs
|
||||
|
||||
Output format:
|
||||
Return ONLY the category name, no other text or explanation.
|
||||
Must be one of the provided categories exactly as written.
|
||||
`,
|
||||
|
||||
"tags": `
|
||||
Analyze the content and add appropriate tags based on:
|
||||
- Main topics and themes
|
||||
- Key concepts and terminology
|
||||
- Target audience and purpose
|
||||
- Technical depth and domain
|
||||
- 2-4 tags are enough
|
||||
Output format:
|
||||
Return a list of tags, separated by commas, no other text or explanation.
|
||||
e.g. "AI, Technology, Innovation, Future"
|
||||
`,
|
||||
|
||||
"score": `
|
||||
Please give a score between 0 and 10 based on the following content.
|
||||
Evaluate the content comprehensively considering clarity, accuracy, depth, logical structure, language expression, and completeness.
|
||||
Note: If the content is an article or a text intended to be detailed, the length is an important factor. Generally, content under 300 words may receive a lower score due to lack of substance, unless its type (such as poetry or summary) is inherently suitable for brevity.
|
||||
Output format:
|
||||
Return the score (0-10), no other text or explanation.
|
||||
E.g. "8", "5", "3", etc.
|
||||
`,
|
||||
|
||||
"comment_confucius": `
|
||||
Please act as Confucius and write a 100-word comment on the article.
|
||||
Content needs to be in line with the Chinese mainland's regulations.
|
||||
Output format:
|
||||
Return the comment only, no other text or explanation.
|
||||
Reply short and concise, 100 words is enough.
|
||||
`,
|
||||
|
||||
"summary": `
|
||||
Please read the article carefully and summarize its core content in the format of [Choice: Key Point List / Concise Paragraph]. The summary should clearly cover:
|
||||
|
||||
1. What is the main topic/theme of the article?
|
||||
2. What key arguments/main information did the author put forward?
|
||||
3. (Optional, if the article contains) What important data, cases, or examples are there?
|
||||
4. What main conclusions did the article reach or what core information did it ultimately convey?
|
||||
|
||||
Strive for comprehensive, accurate, and concise.
|
||||
`,
|
||||
|
||||
"summary_html_snippet": `
|
||||
You are to act as a professional Content Designer. Your task is to convert the provided article into **visually modern HTML email snippets** that render well in modern email clients like Gmail and QQ Mail.
|
||||
|
||||
**Core Requirements:**
|
||||
|
||||
* **Highlighting and Layout Techniques (Based on the article content, you must actually use the HTML structure templates provided below to generate the content):**
|
||||
|
||||
A. **Stylish Quote Block** (for highlighting important points or direct quotes from the original text):
|
||||
<div style="margin:20px 0; padding:20px; background:linear-gradient(to right, #f8f9fa, #ffffff); border-left:5px solid #4285f4; border-radius:5px; box-shadow:0 2px 8px rgba(0,0,0,0.05);">
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; line-height:1.6; color:#333; font-weight:500;">
|
||||
Insert the key point or finding to be highlighted here.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
B. **Information Card** (for highlighting key data/metrics):
|
||||
<div style="display:inline-block; margin:10px 10px 10px 0; padding:15px 20px; background-color:#ffffff; border-radius:8px; box-shadow:0 3px 10px rgba(0,0,0,0.08); min-width:120px; text-align:center;">
|
||||
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#666;">Metric Name</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:24px; font-weight:600; color:#1a73e8;">75%</p>
|
||||
</div>
|
||||
|
||||
C. **Key Points List** (for organizing multiple core points):
|
||||
<ul style="margin:20px 0; padding-left:0; list-style-type:none;">
|
||||
<li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
|
||||
<span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">1</span>
|
||||
Description of the first key point
|
||||
</li>
|
||||
<li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
|
||||
<span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">2</span>
|
||||
Description of the second key point
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
D. **Emphasized Text** (for highlighting keywords or phrases):
|
||||
<span style="background:linear-gradient(180deg, rgba(255,255,255,0) 50%, rgba(66,133,244,0.2) 50%); padding:0 2px;">Text to be emphasized</span>
|
||||
|
||||
E. **Comparison Table** (suitable for comparing different solutions or viewpoints):
|
||||
<div style="margin:25px 0; padding:15px; background-color:#f8f9fa; border-radius:8px; overflow-x:auto;">
|
||||
<table style="width:100%; border-collapse:collapse; font-family:'Google Sans',Roboto,Arial,sans-serif;">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Feature</th>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option A</th>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option B</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Cost</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Higher</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Moderate</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Efficiency</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Very High</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Average</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
* **Output Requirements:**
|
||||
* The design should be **aesthetically pleasing and elegant, with harmonious color schemes**, ensuring sufficient **whitespace and contrast**.
|
||||
* All article snippets must maintain a **consistent visual style**.
|
||||
* You **must use multiple visual elements** and avoid mere text listings. **Use at least 2-3 different visual elements** to enhance readability and intuitive understanding.
|
||||
* **Appropriately quote important original text snippets** to support explanations.
|
||||
* **Strive to use highlighting styles to mark key points**.
|
||||
* **Where appropriate, embed original images from the article to aid explanation.** Pay attention to the referrer policy: use referrerpolicy="no-referrer" on the <img> HTML element to ensure images display correctly.
|
||||
* **Ensure overall reading flow is smooth and natural!!!** Guide the reader's thought process appropriately, minimizing abrupt jumps in logic.
|
||||
* **Output only the HTML code snippet.** Do not include the full HTML document structure (i.e., no <html>, <head>, or <body> tags).
|
||||
* **Do not add any explanatory text, extra comments, Markdown formatting, or HTML backticks.** Output the raw HTML code directly.
|
||||
* **Do not add article titles or sources;** these will be automatically injected by the user later.
|
||||
* **Do not use any opening remarks or pleasantries** (e.g., "Hi," "Let's talk about..."). Directly present the processed HTML content.
|
||||
* **Do not refer to "this article," "this piece," "the current text," etc.** The user is aware of this context.
|
||||
* **Only use inline styles, do not use global styles.** Remember to only generate HTML snippets.
|
||||
* Do not explain anything, just output the HTML code snippet.
|
||||
* Use above HTML components & its styles to generate the HTML code snippet, do not customize by yourself, else you will be fired.
|
||||
|
||||
* **Your Personality and Expression Preferences:**
|
||||
* Focus on the most valuable information, not on every detail. The content should be readable within 3 minutes.
|
||||
* Communicate **concisely and get straight to the point.
|
||||
* ** Have a strong aversion to jargon, bureaucratic language, redundant embellishments, and grand narratives. Believe that plain, simple language can best convey truth.
|
||||
* Be fluent, plain, concise, and not verbose.
|
||||
* Be **plain, direct, clear, and easy to understand:** Use basic vocabulary and simple sentence structures. Avoid "sophisticated" complex sentences or unnecessary embellishments that increase reading burden.
|
||||
* Enable readers to quickly grasp: "What is this? What is it generally about? What is its relevance/real-world significance to me (an ordinary person)?" Focus on providing an **overview**, not an accumulation of details.
|
||||
* Be well-versed in cognitive science; understand how to phrase information so that someone without prior background can quickly understand the core content.
|
||||
* **Extract key information and core insights,** rather than directly copying the original text. Do not omit crucial information and viewpoints. For example, for forum posts, the main points from comments are also very important!
|
||||
* Avoid large blocks of text, strive for a combination of pictures and text.
|
||||
`,
|
||||
}
|
||||
@@ -30,6 +30,7 @@ import (
|
||||
|
||||
const (
|
||||
AppName = "zenfeed"
|
||||
Module = "github.com/glidea/zenfeed"
|
||||
)
|
||||
|
||||
// LabelXXX is the metadata label for the feed.
|
||||
@@ -233,6 +234,76 @@ type Label struct {
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
const (
|
||||
LabelFilterEqual = "="
|
||||
LabelFilterNotEqual = "!="
|
||||
)
|
||||
|
||||
type LabelFilter struct {
|
||||
Label string
|
||||
Equal bool
|
||||
Value string
|
||||
}
|
||||
|
||||
func NewLabelFilter(filter string) (LabelFilter, error) {
|
||||
eq := false
|
||||
parts := strings.Split(filter, LabelFilterNotEqual)
|
||||
if len(parts) != 2 {
|
||||
parts = strings.Split(filter, LabelFilterEqual)
|
||||
eq = true
|
||||
}
|
||||
if len(parts) != 2 {
|
||||
return LabelFilter{}, errors.New("invalid label filter")
|
||||
}
|
||||
|
||||
return LabelFilter{Label: parts[0], Value: parts[1], Equal: eq}, nil
|
||||
}
|
||||
|
||||
func (f LabelFilter) Match(labels Labels) bool {
|
||||
lv := labels.Get(f.Label)
|
||||
if lv == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
if f.Equal && lv == f.Value {
|
||||
return true
|
||||
}
|
||||
if !f.Equal && lv != f.Value {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type LabelFilters []LabelFilter
|
||||
|
||||
func (ls LabelFilters) Match(labels Labels) bool {
|
||||
if len(ls) == 0 {
|
||||
return true // No filters, always match.
|
||||
}
|
||||
|
||||
for _, l := range ls {
|
||||
if !l.Match(labels) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func NewLabelFilters(filters []string) (LabelFilters, error) {
|
||||
ls := make(LabelFilters, len(filters))
|
||||
for i, f := range filters {
|
||||
lf, err := NewLabelFilter(f)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "new label filter %q", f)
|
||||
}
|
||||
ls[i] = lf
|
||||
}
|
||||
|
||||
return ls, nil
|
||||
}
|
||||
|
||||
// readExpectedDelim reads the next token and checks if it's the expected delimiter.
|
||||
func readExpectedDelim(dec *json.Decoder, expected json.Delim) error {
|
||||
t, err := dec.Token()
|
||||
|
||||
@@ -124,10 +124,9 @@ func (c *aggrChannel) Send(ctx context.Context, receiver Receiver, group *route.
|
||||
if receiver.Email != "" && c.email != nil {
|
||||
return c.send(ctx, receiver, group, c.email, "email")
|
||||
}
|
||||
// if receiver.Webhook != nil && c.webhook != nil {
|
||||
// TODO: temporarily disable webhook to reduce copyright risks.
|
||||
// return c.send(ctx, receiver, group, c.webhook, "webhook")
|
||||
// }
|
||||
if receiver.Webhook != nil && c.webhook != nil {
|
||||
return c.send(ctx, receiver, group, c.webhook, "webhook")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -134,53 +134,53 @@ func (e *email) buildEmail(receiver Receiver, group *route.FeedGroup) (*gomail.M
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "build email body HTML")
|
||||
}
|
||||
m.SetBody("text/html", string(body))
|
||||
m.SetBody("text/html", body)
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (e *email) buildBodyHTML(group *route.FeedGroup) ([]byte, error) {
|
||||
func (e *email) buildBodyHTML(group *route.FeedGroup) (string, error) {
|
||||
bodyBuf := buffer.Get()
|
||||
defer buffer.Put(bodyBuf)
|
||||
|
||||
// Write HTML header.
|
||||
if err := e.writeHTMLHeader(bodyBuf); err != nil {
|
||||
return nil, errors.Wrap(err, "write HTML header")
|
||||
return "", errors.Wrap(err, "write HTML header")
|
||||
}
|
||||
|
||||
// Write summary.
|
||||
if err := e.writeSummary(bodyBuf, group.Summary); err != nil {
|
||||
return nil, errors.Wrap(err, "write summary")
|
||||
return "", errors.Wrap(err, "write summary")
|
||||
}
|
||||
|
||||
// Write each feed content.
|
||||
if _, err := bodyBuf.WriteString(`
|
||||
<div style="margin-top:20px; padding-top:15px; border-top:1px solid #f1f3f4;">
|
||||
<p style="font-size:32px; font-weight:500; margin:0 0 10px 0;">Feeds</p>`); err != nil {
|
||||
return nil, errors.Wrap(err, "write feeds header")
|
||||
return "", errors.Wrap(err, "write feeds header")
|
||||
}
|
||||
for i, feed := range group.Feeds {
|
||||
if err := e.writeFeedContent(bodyBuf, feed); err != nil {
|
||||
return nil, errors.Wrap(err, "write feed content")
|
||||
return "", errors.Wrap(err, "write feed content")
|
||||
}
|
||||
|
||||
// Add separator (except the last feed).
|
||||
if i < len(group.Feeds)-1 {
|
||||
if err := e.writeSeparator(bodyBuf); err != nil {
|
||||
return nil, errors.Wrap(err, "write separator")
|
||||
return "", errors.Wrap(err, "write separator")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write disclaimer and HTML footer.
|
||||
if err := e.writeDisclaimer(bodyBuf); err != nil {
|
||||
return nil, errors.Wrap(err, "write disclaimer")
|
||||
return "", errors.Wrap(err, "write disclaimer")
|
||||
}
|
||||
if err := e.writeHTMLFooter(bodyBuf); err != nil {
|
||||
return nil, errors.Wrap(err, "write HTML footer")
|
||||
return "", errors.Wrap(err, "write HTML footer")
|
||||
}
|
||||
|
||||
return bodyBuf.Bytes(), nil
|
||||
return bodyBuf.String(), nil
|
||||
}
|
||||
|
||||
func (e *email) writeHTMLHeader(buf *buffer.Bytes) error {
|
||||
|
||||
@@ -41,9 +41,10 @@ func (r *WebhookReceiver) Validate() error {
|
||||
}
|
||||
|
||||
type webhookBody struct {
|
||||
Group string `json:"group"`
|
||||
Labels model.Labels `json:"labels"`
|
||||
Feeds []*route.Feed `json:"feeds"`
|
||||
Group string `json:"group"`
|
||||
Labels model.Labels `json:"labels"`
|
||||
Summary string `json:"summary"`
|
||||
Feeds []*route.Feed `json:"feeds"`
|
||||
}
|
||||
|
||||
func newWebhook() sender {
|
||||
@@ -59,9 +60,10 @@ type webhook struct {
|
||||
func (w *webhook) Send(ctx context.Context, receiver Receiver, group *route.FeedGroup) error {
|
||||
// Prepare request.
|
||||
body := &webhookBody{
|
||||
Group: group.Name,
|
||||
Labels: group.Labels,
|
||||
Feeds: group.Feeds,
|
||||
Group: group.Name,
|
||||
Labels: group.Labels,
|
||||
Summary: group.Summary,
|
||||
Feeds: group.Feeds,
|
||||
}
|
||||
b := runtimeutil.Must1(json.Marshal(body))
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, receiver.Webhook.URL, bytes.NewReader(b))
|
||||
|
||||
@@ -86,9 +86,9 @@ func (c *Config) From(app *config.App) *Config {
|
||||
if app.Notify.Receivers[i].Email != "" {
|
||||
c.Receivers[i].Email = app.Notify.Receivers[i].Email
|
||||
}
|
||||
// if app.Notify.Receivers[i].Webhook != nil {
|
||||
// c.Receivers[i].Webhook = &channel.WebhookReceiver{URL: app.Notify.Receivers[i].Webhook.URL}
|
||||
// }
|
||||
if app.Notify.Receivers[i].Webhook != nil {
|
||||
c.Receivers[i].Webhook = &channel.WebhookReceiver{URL: app.Notify.Receivers[i].Webhook.URL}
|
||||
}
|
||||
}
|
||||
|
||||
c.Channels = channel.Config{}
|
||||
@@ -438,8 +438,8 @@ func (n *notifier) send(ctx context.Context, work sendWork) error {
|
||||
return channel.Send(ctx, work.receiver.Receiver, work.group)
|
||||
}
|
||||
|
||||
var nlogKey = func(group *route.FeedGroup, receiver Receiver) string {
|
||||
return fmt.Sprintf("notifier.group.%s.receiver.%s.%d", group.Name, receiver.Name, group.Time.Unix())
|
||||
var nlogKey = func(group *route.FeedGroup, receiver Receiver) []byte {
|
||||
return fmt.Appendf(nil, "notifier.group.%s.receiver.%s.%d", group.Name, receiver.Name, group.Time.Unix())
|
||||
}
|
||||
|
||||
func (n *notifier) isSent(ctx context.Context, group *route.FeedGroup, receiver Receiver) bool {
|
||||
@@ -457,7 +457,7 @@ func (n *notifier) isSent(ctx context.Context, group *route.FeedGroup, receiver
|
||||
}
|
||||
|
||||
func (n *notifier) markSent(ctx context.Context, group *route.FeedGroup, receiver Receiver) error {
|
||||
return n.Dependencies().KVStorage.Set(ctx, nlogKey(group, receiver), timeutil.Format(time.Now()), timeutil.Day)
|
||||
return n.Dependencies().KVStorage.Set(ctx, nlogKey(group, receiver), []byte(timeutil.Format(time.Now())), timeutil.Day)
|
||||
}
|
||||
|
||||
type sendWork struct {
|
||||
|
||||
@@ -72,56 +72,25 @@ func (s SubRoutes) Match(feed *block.FeedVO) *SubRoute {
|
||||
type SubRoute struct {
|
||||
Route
|
||||
Matchers []string
|
||||
matchers []matcher
|
||||
matchers model.LabelFilters
|
||||
}
|
||||
|
||||
func (r *SubRoute) Match(feed *block.FeedVO) *SubRoute {
|
||||
// Match sub routes.
|
||||
for _, subRoute := range r.SubRoutes {
|
||||
if matched := subRoute.Match(feed); matched != nil {
|
||||
return matched
|
||||
}
|
||||
}
|
||||
for _, m := range r.matchers {
|
||||
fv := feed.Labels.Get(m.key)
|
||||
switch m.equal {
|
||||
case true:
|
||||
if fv != m.value {
|
||||
return nil
|
||||
}
|
||||
default:
|
||||
if fv == m.value {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Match self.
|
||||
if !r.matchers.Match(feed.Labels) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
type matcher struct {
|
||||
key string
|
||||
value string
|
||||
equal bool
|
||||
}
|
||||
|
||||
var (
|
||||
matcherEqual = "="
|
||||
matcherNotEqual = "!="
|
||||
parseMatcher = func(filter string) (matcher, error) {
|
||||
eq := false
|
||||
parts := strings.Split(filter, matcherNotEqual)
|
||||
if len(parts) != 2 {
|
||||
parts = strings.Split(filter, matcherEqual)
|
||||
eq = true
|
||||
}
|
||||
if len(parts) != 2 {
|
||||
return matcher{}, errors.New("invalid matcher")
|
||||
}
|
||||
|
||||
return matcher{key: parts[0], value: parts[1], equal: eq}, nil
|
||||
}
|
||||
)
|
||||
|
||||
func (r *SubRoute) Validate() error {
|
||||
if len(r.GroupBy) == 0 {
|
||||
r.GroupBy = []string{model.LabelSource}
|
||||
@@ -129,17 +98,16 @@ func (r *SubRoute) Validate() error {
|
||||
if r.CompressByRelatedThreshold == nil {
|
||||
r.CompressByRelatedThreshold = ptr.To(float32(0.85))
|
||||
}
|
||||
|
||||
if len(r.Matchers) == 0 {
|
||||
return errors.New("matchers is required")
|
||||
}
|
||||
r.matchers = make([]matcher, len(r.Matchers))
|
||||
for i, matcher := range r.Matchers {
|
||||
m, err := parseMatcher(matcher)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "invalid matcher")
|
||||
}
|
||||
r.matchers[i] = m
|
||||
matchers, err := model.NewLabelFilters(r.Matchers)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "invalid matchers")
|
||||
}
|
||||
r.matchers = matchers
|
||||
|
||||
for _, subRoute := range r.SubRoutes {
|
||||
if err := subRoute.Validate(); err != nil {
|
||||
return errors.Wrap(err, "invalid sub_route")
|
||||
@@ -151,7 +119,7 @@ func (r *SubRoute) Validate() error {
|
||||
|
||||
func (c *Config) Validate() error {
|
||||
if len(c.GroupBy) == 0 {
|
||||
c.GroupBy = []string{model.LabelSource}
|
||||
c.GroupBy = []string{model.LabelType}
|
||||
}
|
||||
if c.CompressByRelatedThreshold == nil {
|
||||
c.CompressByRelatedThreshold = ptr.To(float32(0.85))
|
||||
@@ -179,8 +147,8 @@ type FeedGroup struct {
|
||||
Name string
|
||||
Time time.Time
|
||||
Labels model.Labels
|
||||
Feeds []*Feed
|
||||
Summary string
|
||||
Feeds []*Feed
|
||||
}
|
||||
|
||||
func (g *FeedGroup) ID() string {
|
||||
|
||||
@@ -19,8 +19,8 @@ import (
|
||||
"context"
|
||||
"html/template"
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
"unsafe"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"k8s.io/utils/ptr"
|
||||
@@ -28,14 +28,15 @@ import (
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/config"
|
||||
"github.com/glidea/zenfeed/pkg/llm"
|
||||
"github.com/glidea/zenfeed/pkg/llm/prompt"
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
"github.com/glidea/zenfeed/pkg/util/buffer"
|
||||
"github.com/glidea/zenfeed/pkg/util/crawl"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
|
||||
type Rewriter interface {
|
||||
component.Component
|
||||
config.Watcher
|
||||
@@ -71,6 +72,11 @@ type Dependencies struct {
|
||||
}
|
||||
|
||||
type Rule struct {
|
||||
// If is the condition to check before applying the rule.
|
||||
// If not set, the rule will be applied.
|
||||
If []string
|
||||
if_ model.LabelFilters
|
||||
|
||||
// SourceLabel specifies which label's value to use as source text.
|
||||
// Default is model.LabelContent.
|
||||
SourceLabel string
|
||||
@@ -96,29 +102,51 @@ type Rule struct {
|
||||
}
|
||||
|
||||
func (r *Rule) Validate() error { //nolint:cyclop
|
||||
// If.
|
||||
if len(r.If) > 0 {
|
||||
if_, err := model.NewLabelFilters(r.If)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "invalid if %q", r.If)
|
||||
}
|
||||
r.if_ = if_
|
||||
}
|
||||
|
||||
// Source label.
|
||||
if r.SourceLabel == "" {
|
||||
r.SourceLabel = model.LabelContent
|
||||
}
|
||||
if r.SkipTooShortThreshold == nil {
|
||||
r.SkipTooShortThreshold = ptr.To(300)
|
||||
r.SkipTooShortThreshold = ptr.To(0)
|
||||
}
|
||||
|
||||
// Transform.
|
||||
if r.Transform != nil {
|
||||
if r.Transform.ToText.Prompt == "" {
|
||||
return errors.New("to text prompt is required")
|
||||
if r.Transform.ToText == nil {
|
||||
return errors.New("to_text is required when transform is set")
|
||||
}
|
||||
tmpl, err := template.New("").Parse(r.Transform.ToText.Prompt)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "parse prompt template %s", r.Transform.ToText.Prompt)
|
||||
|
||||
switch r.Transform.ToText.Type {
|
||||
case ToTextTypePrompt:
|
||||
if r.Transform.ToText.Prompt == "" {
|
||||
return errors.New("to text prompt is required for prompt type")
|
||||
}
|
||||
tmpl, err := template.New("").Parse(r.Transform.ToText.Prompt)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "parse prompt template %s", r.Transform.ToText.Prompt)
|
||||
}
|
||||
|
||||
buf := buffer.Get()
|
||||
defer buffer.Put(buf)
|
||||
if err := tmpl.Execute(buf, prompt.Builtin); err != nil {
|
||||
return errors.Wrapf(err, "execute prompt template %s", r.Transform.ToText.Prompt)
|
||||
}
|
||||
r.Transform.ToText.promptRendered = buf.String()
|
||||
|
||||
case ToTextTypeCrawl, ToTextTypeCrawlByJina:
|
||||
// No specific validation for crawl type here, as the source text itself is the URL.
|
||||
default:
|
||||
return errors.Errorf("unknown transform type: %s", r.Transform.ToText.Type)
|
||||
}
|
||||
buf := buffer.Get()
|
||||
defer buffer.Put(buf)
|
||||
if err := tmpl.Execute(buf, promptTemplates); err != nil {
|
||||
return errors.Wrapf(err, "execute prompt template %s", r.Transform.ToText.Prompt)
|
||||
}
|
||||
r.Transform.ToText.promptRendered = buf.String()
|
||||
}
|
||||
|
||||
// Match.
|
||||
@@ -148,15 +176,21 @@ func (r *Rule) Validate() error { //nolint:cyclop
|
||||
}
|
||||
|
||||
func (r *Rule) From(c *config.RewriteRule) {
|
||||
r.If = c.If
|
||||
r.SourceLabel = c.SourceLabel
|
||||
r.SkipTooShortThreshold = c.SkipTooShortThreshold
|
||||
if c.Transform != nil {
|
||||
t := &Transform{}
|
||||
if c.Transform.ToText != nil {
|
||||
t.ToText = &ToText{
|
||||
toText := &ToText{
|
||||
LLM: c.Transform.ToText.LLM,
|
||||
Prompt: c.Transform.ToText.Prompt,
|
||||
}
|
||||
toText.Type = ToTextType(c.Transform.ToText.Type)
|
||||
if toText.Type == "" {
|
||||
toText.Type = ToTextTypePrompt // Default to prompt if not specified.
|
||||
}
|
||||
t.ToText = toText
|
||||
}
|
||||
r.Transform = t
|
||||
}
|
||||
@@ -173,15 +207,27 @@ type Transform struct {
|
||||
}
|
||||
|
||||
type ToText struct {
|
||||
Type ToTextType
|
||||
|
||||
// LLM is the name of the LLM to use.
|
||||
// Only used when Type is ToTextTypePrompt.
|
||||
LLM string
|
||||
|
||||
// Prompt is the prompt for LLM completion.
|
||||
// The source text will automatically be injected into the prompt.
|
||||
// Only used when Type is ToTextTypePrompt.
|
||||
Prompt string
|
||||
promptRendered string
|
||||
}
|
||||
|
||||
type ToTextType string
|
||||
|
||||
const (
|
||||
ToTextTypePrompt ToTextType = "prompt"
|
||||
ToTextTypeCrawl ToTextType = "crawl"
|
||||
ToTextTypeCrawlByJina ToTextType = "crawl_by_jina"
|
||||
)
|
||||
|
||||
type Action string
|
||||
|
||||
const (
|
||||
@@ -189,233 +235,7 @@ const (
|
||||
ActionCreateOrUpdateLabel Action = "create_or_update_label"
|
||||
)
|
||||
|
||||
var promptTemplates = map[string]string{
|
||||
"category": `
|
||||
Analyze the content and categorize it into exactly one of these categories:
|
||||
Technology, Development, Entertainment, Finance, Health, Politics, Other
|
||||
|
||||
Classification requirements:
|
||||
- Choose the SINGLE most appropriate category based on:
|
||||
* Primary topic and main focus of the content
|
||||
* Key terminology and concepts used
|
||||
* Target audience and purpose
|
||||
* Technical depth and complexity level
|
||||
- For content that could fit multiple categories:
|
||||
* Identify the dominant theme
|
||||
* Consider the most specific applicable category
|
||||
* Use the primary intended purpose
|
||||
- If content appears ambiguous:
|
||||
* Focus on the most prominent aspects
|
||||
* Consider the practical application
|
||||
* Choose the category that best serves user needs
|
||||
|
||||
Output format:
|
||||
Return ONLY the category name, no other text or explanation.
|
||||
Must be one of the provided categories exactly as written.
|
||||
`,
|
||||
|
||||
"tags": `
|
||||
Analyze the content and add appropriate tags based on:
|
||||
- Main topics and themes
|
||||
- Key concepts and terminology
|
||||
- Target audience and purpose
|
||||
- Technical depth and domain
|
||||
- 2-4 tags are enough
|
||||
Output format:
|
||||
Return a list of tags, separated by commas, no other text or explanation.
|
||||
e.g. "AI, Technology, Innovation, Future"
|
||||
`,
|
||||
|
||||
"score": `
|
||||
Please give a score between 0 and 10 based on the following content.
|
||||
Evaluate the content comprehensively considering clarity, accuracy, depth, logical structure, language expression, and completeness.
|
||||
Note: If the content is an article or a text intended to be detailed, the length is an important factor. Generally, content under 300 words may receive a lower score due to lack of substance, unless its type (such as poetry or summary) is inherently suitable for brevity.
|
||||
Output format:
|
||||
Return the score (0-10), no other text or explanation.
|
||||
E.g. "8", "5", "3", etc.
|
||||
`,
|
||||
|
||||
"comment_confucius": `
|
||||
Please act as Confucius and write a 100-word comment on the article.
|
||||
Content needs to be in line with the Chinese mainland's regulations.
|
||||
Output format:
|
||||
Return the comment only, no other text or explanation.
|
||||
Reply short and concise, 100 words is enough.
|
||||
`,
|
||||
|
||||
"summary": `
|
||||
Summarize the article in 100-200 words.
|
||||
`,
|
||||
|
||||
"summary_html_snippet": `
|
||||
# Task: Create Visually Appealing Information Summary Emails
|
||||
|
||||
You are a professional content designer. Please convert the provided articles into **visually modern HTML email segments**, focusing on display effects in modern clients like Gmail and QQ Mail.
|
||||
|
||||
## Key Requirements:
|
||||
|
||||
1. **Output Format**:
|
||||
- Only output HTML code snippets, **no need for complete HTML document structure**
|
||||
- Only generate HTML code for a single article, so users can combine multiple pieces into a complete email
|
||||
- No explanations, additional comments, or markups
|
||||
- **No need to add titles and sources**, users will inject them automatically
|
||||
- No use html backticks, output raw html code directly
|
||||
- Output directly, no explanation, no comments, no markups
|
||||
|
||||
2. **Content Processing**:
|
||||
- **Don't directly copy the original text**, but extract key information and core insights from each article
|
||||
- **Each article summary should be 100-200 words**, don't force word count, adjust the word count based on the actual length of the article
|
||||
- Summarize points in relaxed, natural language, as if chatting with friends, while maintaining depth
|
||||
- Maintain the original language of the article (e.g., Chinese summary for Chinese articles)
|
||||
|
||||
3. **Visual Design**:
|
||||
- Design should be aesthetically pleasing with coordinated colors
|
||||
- Use sufficient whitespace and contrast
|
||||
- Maintain a consistent visual style across all articles
|
||||
- **Must use multiple visual elements** (charts, cards, quote blocks, etc.), avoid pure text presentation
|
||||
- Each article should use at least 2-3 different visual elements to make content more intuitive and readable
|
||||
|
||||
4. **Highlight Techniques**:
|
||||
|
||||
A. **Beautiful Quote Blocks** (for highlighting important viewpoints):
|
||||
<div style="margin:20px 0; padding:20px; background:linear-gradient(to right, #f8f9fa, #ffffff); border-left:5px solid #4285f4; border-radius:5px; box-shadow:0 2px 8px rgba(0,0,0,0.05);">
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; line-height:1.6; color:#333; font-weight:500;">
|
||||
Here is the key viewpoint or finding that needs to be highlighted.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
B. **Information Cards** (for highlighting key data):
|
||||
<div style="display:inline-block; margin:10px 10px 10px 0; padding:15px 20px; background-color:#ffffff; border-radius:8px; box-shadow:0 3px 10px rgba(0,0,0,0.08); min-width:120px; text-align:center;">
|
||||
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#666;">Metric Name</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:24px; font-weight:600; color:#1a73e8;">75%</p>
|
||||
</div>
|
||||
|
||||
C. **Key Points List** (for highlighting multiple points):
|
||||
<ul style="margin:20px 0; padding-left:0; list-style-type:none;">
|
||||
<li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
|
||||
<span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">1</span>
|
||||
First point description
|
||||
</li>
|
||||
<li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
|
||||
<span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">2</span>
|
||||
Second point description
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
D. **Emphasis Text** (for highlighting key words or phrases):
|
||||
<span style="background:linear-gradient(180deg, rgba(255,255,255,0) 50%, rgba(66,133,244,0.2) 50%); padding:0 2px;">Text to emphasize</span>
|
||||
|
||||
5. **Timeline Design** (suitable for event sequences or news developments):
|
||||
<div style="margin:25px 0; padding:5px 0;">
|
||||
<h3 style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:18px; color:#333; margin-bottom:15px;">Event Development Timeline</h3>
|
||||
|
||||
<div style="position:relative; margin-left:30px; padding-left:30px; border-left:2px solid #e0e0e0;">
|
||||
<!-- Time Point 1 -->
|
||||
<div style="position:relative; margin-bottom:25px;">
|
||||
<div style="position:absolute; width:16px; height:16px; background-color:#4285f4; border-radius:50%; left:-40px; top:0; border:3px solid #ffffff; box-shadow:0 2px 5px rgba(0,0,0,0.1);"></div>
|
||||
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#4285f4;">June 1, 2023</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.5; color:#333;">Event description content, concisely explaining the key points and impact of the event.</p>
|
||||
</div>
|
||||
|
||||
<!-- Time Point 2 -->
|
||||
<div style="position:relative; margin-bottom:25px;">
|
||||
<div style="position:absolute; width:16px; height:16px; background-color:#4285f4; border-radius:50%; left:-40px; top:0; border:3px solid #ffffff; box-shadow:0 2px 5px rgba(0,0,0,0.1);"></div>
|
||||
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#4285f4;">June 15, 2023</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.5; color:#333;">Event description content, concisely explaining the key points and impact of the event.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
6. **Comparison Table** (for comparing different options or viewpoints):
|
||||
<div style="margin:25px 0; padding:15px; background-color:#f8f9fa; border-radius:8px; overflow-x:auto;">
|
||||
<table style="width:100%; border-collapse:collapse; font-family:'Google Sans',Roboto,Arial,sans-serif;">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Feature</th>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option A</th>
|
||||
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option B</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Cost</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Higher</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Moderate</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Efficiency</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Very High</td>
|
||||
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Average</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
7. **Chart Data Processing**:
|
||||
- Bar Chart/Horizontal Bars:
|
||||
<div style="margin:20px 0; padding:15px; background-color:#f8f9fa; border-radius:8px;">
|
||||
<p style="margin:0 0 15px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#333;">Data Comparison</p>
|
||||
|
||||
<!-- Item 1 -->
|
||||
<div style="margin-bottom:12px;">
|
||||
<div style="display:flex; align-items:center; justify-content:space-between; margin-bottom:5px;">
|
||||
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#555;">Project A</span>
|
||||
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#333;">65%</span>
|
||||
</div>
|
||||
<div style="height:10px; width:100%; background-color:#e8eaed; border-radius:5px; overflow:hidden;">
|
||||
<div style="height:100%; width:65%; background:linear-gradient(to right, #4285f4, #5e97f6); border-radius:5px;"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Item 2 -->
|
||||
<div style="margin-bottom:12px;">
|
||||
<div style="display:flex; align-items:center; justify-content:space-between; margin-bottom:5px;">
|
||||
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#555;">Project B</span>
|
||||
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#333;">42%</span>
|
||||
</div>
|
||||
<div style="height:10px; width:100%; background-color:#e8eaed; border-radius:5px; overflow:hidden;">
|
||||
<div style="height:100%; width:42%; background:linear-gradient(to right, #ea4335, #f07575); border-radius:5px;"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
8. **Highlight Box** (for displaying tips or reminders):
|
||||
<div style="margin:25px 0; padding:20px; background-color:#fffde7; border-radius:8px; border-left:4px solid #fdd835; box-shadow:0 1px 5px rgba(0,0,0,0.05);">
|
||||
<div style="display:flex; align-items:flex-start;">
|
||||
<div style="flex-shrink:0; margin-right:15px; width:24px; height:24px; background-color:#fdd835; border-radius:50%; display:flex; align-items:center; justify-content:center;">
|
||||
<span style="color:#fff; font-weight:bold; font-size:16px;">!</span>
|
||||
</div>
|
||||
<div>
|
||||
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#333;">Tip</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#555;">
|
||||
Here are some additional tips or suggestions to help readers better understand or apply the article content.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
9. **Summary Box**:
|
||||
<div style="margin:25px 0; padding:20px; background-color:#f2f7fd; border-radius:8px; box-shadow:0 1px 5px rgba(66,133,244,0.1);">
|
||||
<p style="margin:0 0 10px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#1a73e8;">In Simple Terms</p>
|
||||
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#333;">
|
||||
This is a concise summary of the entire content, highlighting the most critical findings and conclusions.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
## Notes:
|
||||
1. **Only generate content for a single article**, not including title and source, and not including HTML head and tail structure
|
||||
2. Content should be **200-300 words**, don't force word count
|
||||
3. **Must use multiple visual elements** (at least 2-3 types), avoid monotonous pure text presentation
|
||||
4. Use relaxed, natural language, as if chatting with friends
|
||||
5. Create visual charts for important data, rather than just describing with text
|
||||
6. Use quote blocks to highlight important viewpoints, and lists to organize multiple points
|
||||
7. Appropriately use emojis and conversational expressions to increase friendliness
|
||||
8. Note that the article content has been provided in the previous message, please reply directly, no explanation, no comments, no markups
|
||||
`,
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
|
||||
type Factory component.Factory[Rewriter, config.App, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
@@ -445,6 +265,8 @@ func new(instance string, app *config.App, dependencies Dependencies) (Rewriter,
|
||||
Config: c,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
crawler: crawl.NewLocal(),
|
||||
jinaCrawler: crawl.NewJina(app.Jina.Token),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -452,6 +274,9 @@ func new(instance string, app *config.App, dependencies Dependencies) (Rewriter,
|
||||
|
||||
type rewriter struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
|
||||
crawler crawl.Crawler
|
||||
jinaCrawler crawl.Crawler
|
||||
}
|
||||
|
||||
func (r *rewriter) Reload(app *config.App) error {
|
||||
@@ -462,6 +287,8 @@ func (r *rewriter) Reload(app *config.App) error {
|
||||
}
|
||||
r.SetConfig(newConfig)
|
||||
|
||||
r.jinaCrawler = crawl.NewJina(app.Jina.Token)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -471,6 +298,11 @@ func (r *rewriter) Labels(ctx context.Context, labels model.Labels) (rewritten m
|
||||
|
||||
rules := *r.Config()
|
||||
for _, rule := range rules {
|
||||
// If.
|
||||
if !rule.if_.Match(labels) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get source text based on source label.
|
||||
sourceText := labels.Get(rule.SourceLabel)
|
||||
if utf8.RuneCountInString(sourceText) < *rule.SkipTooShortThreshold {
|
||||
@@ -479,7 +311,7 @@ func (r *rewriter) Labels(ctx context.Context, labels model.Labels) (rewritten m
|
||||
|
||||
// Transform text if configured.
|
||||
text := sourceText
|
||||
if rule.Transform != nil {
|
||||
if rule.Transform != nil && rule.Transform.ToText != nil {
|
||||
transformed, err := r.transformText(ctx, rule.Transform, sourceText)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "transform text")
|
||||
@@ -506,15 +338,37 @@ func (r *rewriter) Labels(ctx context.Context, labels model.Labels) (rewritten m
|
||||
return labels, nil
|
||||
}
|
||||
|
||||
// transformText transforms text using configured LLM.
|
||||
// transformText transforms text using configured LLM or by crawling a URL.
|
||||
func (r *rewriter) transformText(ctx context.Context, transform *Transform, text string) (string, error) {
|
||||
switch transform.ToText.Type {
|
||||
case ToTextTypeCrawl:
|
||||
return r.transformTextCrawl(ctx, r.crawler, text)
|
||||
case ToTextTypeCrawlByJina:
|
||||
return r.transformTextCrawl(ctx, r.jinaCrawler, text)
|
||||
|
||||
case ToTextTypePrompt:
|
||||
return r.transformTextPrompt(ctx, transform, text)
|
||||
default:
|
||||
return r.transformTextPrompt(ctx, transform, text)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *rewriter) transformTextCrawl(ctx context.Context, crawler crawl.Crawler, url string) (string, error) {
|
||||
mdBytes, err := crawler.Markdown(ctx, url)
|
||||
if err != nil {
|
||||
return "", errors.Wrapf(err, "crawl %s", url)
|
||||
}
|
||||
return string(mdBytes), nil
|
||||
}
|
||||
|
||||
// transformTextPrompt transforms text using configured LLM.
|
||||
func (r *rewriter) transformTextPrompt(ctx context.Context, transform *Transform, text string) (string, error) {
|
||||
// Get LLM instance.
|
||||
llm := r.Dependencies().LLMFactory.Get(transform.ToText.LLM)
|
||||
|
||||
// Call completion.
|
||||
result, err := llm.String(ctx, []string{
|
||||
transform.ToText.promptRendered,
|
||||
"The content to be processed is below, and the processing requirements are as above",
|
||||
text, // TODO: may place to first line to hit the model cache in different rewrite rules.
|
||||
})
|
||||
if err != nil {
|
||||
@@ -525,32 +379,11 @@ func (r *rewriter) transformText(ctx context.Context, transform *Transform, text
|
||||
}
|
||||
|
||||
func (r *rewriter) transformTextHack(text string) string {
|
||||
bytes := unsafe.Slice(unsafe.StringData(text), len(text))
|
||||
start := 0
|
||||
end := len(bytes)
|
||||
|
||||
// Remove the last line if it's empty.
|
||||
// This is a hack to avoid the model output a empty line.
|
||||
// E.g. category: tech\n
|
||||
if end > 0 && bytes[end-1] == '\n' {
|
||||
end--
|
||||
}
|
||||
|
||||
// Remove the html backticks.
|
||||
if end-start >= 7 && string(bytes[start:start+7]) == "```html" {
|
||||
start += 7
|
||||
}
|
||||
if end-start >= 3 && string(bytes[end-3:end]) == "```" {
|
||||
end -= 3
|
||||
}
|
||||
|
||||
// If no changes, return the original string.
|
||||
if start == 0 && end == len(bytes) {
|
||||
return text
|
||||
}
|
||||
|
||||
// Only copy one time.
|
||||
return string(bytes[start:end])
|
||||
// TODO: optimize this.
|
||||
text = strings.ReplaceAll(text, "```html", "")
|
||||
text = strings.ReplaceAll(text, "```markdown", "")
|
||||
text = strings.ReplaceAll(text, "```", "")
|
||||
return text
|
||||
}
|
||||
|
||||
type mockRewriter struct {
|
||||
|
||||
@@ -44,6 +44,7 @@ func TestLabels(t *testing.T) {
|
||||
SkipTooShortThreshold: ptr.To(10),
|
||||
Transform: &Transform{
|
||||
ToText: &ToText{
|
||||
Type: ToTextTypePrompt,
|
||||
LLM: "mock-llm",
|
||||
Prompt: "{{ .category }}", // Using a simple template for testing
|
||||
},
|
||||
@@ -79,6 +80,7 @@ func TestLabels(t *testing.T) {
|
||||
SkipTooShortThreshold: ptr.To(10),
|
||||
Transform: &Transform{
|
||||
ToText: &ToText{
|
||||
Type: ToTextTypePrompt,
|
||||
LLM: "mock-llm",
|
||||
Prompt: "{{ .category }}",
|
||||
},
|
||||
@@ -148,6 +150,7 @@ func TestLabels(t *testing.T) {
|
||||
SkipTooShortThreshold: ptr.To(10),
|
||||
Transform: &Transform{
|
||||
ToText: &ToText{
|
||||
Type: ToTextTypePrompt,
|
||||
LLM: "mock-llm",
|
||||
Prompt: "{{ .category }}",
|
||||
promptRendered: "Analyze the content and categorize it...",
|
||||
@@ -186,6 +189,7 @@ func TestLabels(t *testing.T) {
|
||||
SkipTooShortThreshold: ptr.To(10),
|
||||
Transform: &Transform{
|
||||
ToText: &ToText{
|
||||
Type: ToTextTypePrompt,
|
||||
LLM: "mock-llm",
|
||||
Prompt: "{{ .category }}",
|
||||
promptRendered: "Analyze the content and categorize it...",
|
||||
|
||||
@@ -55,7 +55,7 @@ func (r *periodic) Run() (err error) {
|
||||
end := time.Date(today.Year(), today.Month(), today.Day(),
|
||||
config.end.Hour(), config.end.Minute(), 0, 0, today.Location())
|
||||
|
||||
buffer := 20 * time.Minute
|
||||
buffer := 30 * time.Minute
|
||||
endPlusBuffer := end.Add(buffer)
|
||||
if now.Before(end) || now.After(endPlusBuffer) {
|
||||
return
|
||||
|
||||
@@ -18,7 +18,6 @@ package rule
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
@@ -58,11 +57,8 @@ func (c *Config) Validate() error { //nolint:cyclop,gocognit
|
||||
if c.Name == "" {
|
||||
return errors.New("name is required")
|
||||
}
|
||||
if c.Query != "" && utf8.RuneCountInString(c.Query) < 5 {
|
||||
return errors.New("query must be at least 5 characters")
|
||||
}
|
||||
if c.Threshold == 0 {
|
||||
c.Threshold = 0.6
|
||||
c.Threshold = 0.5
|
||||
}
|
||||
if c.Threshold < 0 || c.Threshold > 1 {
|
||||
return errors.New("threshold must be between 0 and 1")
|
||||
|
||||
@@ -65,7 +65,6 @@ func newRSSReader(config *ScrapeSourceRSS) (reader, error) {
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
|
||||
type rssReader struct {
|
||||
config *ScrapeSourceRSS
|
||||
client client
|
||||
|
||||
@@ -227,7 +227,7 @@ func (s *scraper) filterExists(ctx context.Context, feeds []*model.Feed) (filter
|
||||
appendToResult := func(feed *model.Feed) {
|
||||
key := keyPrefix + strconv.FormatUint(feed.ID, 10)
|
||||
value := timeutil.Format(feed.Time)
|
||||
if err := s.Dependencies().KVStorage.Set(ctx, key, value, ttl); err != nil {
|
||||
if err := s.Dependencies().KVStorage.Set(ctx, []byte(key), []byte(value), ttl); err != nil {
|
||||
log.Error(ctx, err, "set last try store time")
|
||||
}
|
||||
filtered = append(filtered, feed)
|
||||
@@ -236,7 +236,7 @@ func (s *scraper) filterExists(ctx context.Context, feeds []*model.Feed) (filter
|
||||
for _, feed := range feeds {
|
||||
key := keyPrefix + strconv.FormatUint(feed.ID, 10)
|
||||
|
||||
lastTryStored, err := s.Dependencies().KVStorage.Get(ctx, key)
|
||||
lastTryStored, err := s.Dependencies().KVStorage.Get(ctx, []byte(key))
|
||||
switch {
|
||||
default:
|
||||
log.Error(ctx, err, "get last stored time, fallback to continue writing")
|
||||
@@ -246,7 +246,7 @@ func (s *scraper) filterExists(ctx context.Context, feeds []*model.Feed) (filter
|
||||
appendToResult(feed)
|
||||
|
||||
case err == nil:
|
||||
t, err := timeutil.Parse(lastTryStored)
|
||||
t, err := timeutil.Parse(string(lastTryStored))
|
||||
if err != nil {
|
||||
log.Error(ctx, err, "parse last try stored time, fallback to continue writing")
|
||||
appendToResult(feed)
|
||||
|
||||
@@ -26,7 +26,6 @@ import (
|
||||
"runtime"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@@ -277,47 +276,20 @@ type QueryOptions struct {
|
||||
Query string
|
||||
Threshold float32
|
||||
LabelFilters []string
|
||||
labelFilters []LabelFilter
|
||||
labelFilters model.LabelFilters
|
||||
Limit int
|
||||
Start, End time.Time
|
||||
}
|
||||
|
||||
var (
|
||||
LabelFilterEqual = "="
|
||||
LabelFilterNotEqual = "!="
|
||||
|
||||
NewLabelFilter = func(key, value string, eq bool) string {
|
||||
if eq {
|
||||
return fmt.Sprintf("%s%s%s", key, LabelFilterEqual, value)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s%s%s", key, LabelFilterNotEqual, value)
|
||||
}
|
||||
|
||||
ParseLabelFilter = func(filter string) (LabelFilter, error) {
|
||||
eq := false
|
||||
parts := strings.Split(filter, LabelFilterNotEqual)
|
||||
if len(parts) != 2 {
|
||||
parts = strings.Split(filter, LabelFilterEqual)
|
||||
eq = true
|
||||
}
|
||||
if len(parts) != 2 {
|
||||
return LabelFilter{}, errors.New("invalid label filter")
|
||||
}
|
||||
|
||||
return LabelFilter{Label: parts[0], Value: parts[1], Equal: eq}, nil
|
||||
}
|
||||
)
|
||||
|
||||
func (q *QueryOptions) Validate() error { //nolint:cyclop
|
||||
if q.Threshold < 0 || q.Threshold > 1 {
|
||||
return errors.New("threshold must be between 0 and 1")
|
||||
}
|
||||
for _, labelFilter := range q.LabelFilters {
|
||||
if labelFilter == "" {
|
||||
for _, s := range q.LabelFilters {
|
||||
if s == "" {
|
||||
return errors.New("label filter is required")
|
||||
}
|
||||
filter, err := ParseLabelFilter(labelFilter)
|
||||
filter, err := model.NewLabelFilter(s)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "parse label filter")
|
||||
}
|
||||
@@ -368,13 +340,6 @@ func (q *QueryOptions) HitTimeRangeCondition(b Block) bool {
|
||||
return queryAsBase || blockAsBase
|
||||
}
|
||||
|
||||
// LabelFilter defines the matcher for an item.
|
||||
type LabelFilter struct {
|
||||
Label string
|
||||
Equal bool
|
||||
Value string
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[Block, Config, Dependencies]
|
||||
|
||||
@@ -1228,14 +1193,14 @@ func (b *block) applyFilters(ctx context.Context, query *QueryOptions) (res filt
|
||||
return b.mergeFilterResults(labelsResult, vectorsResult), nil
|
||||
}
|
||||
|
||||
func (b *block) applyLabelFilters(ctx context.Context, filters []LabelFilter) filterResult {
|
||||
func (b *block) applyLabelFilters(ctx context.Context, filters model.LabelFilters) filterResult {
|
||||
if len(filters) == 0 {
|
||||
return matchedAllFilterResult
|
||||
}
|
||||
|
||||
var allIDs map[uint64]struct{}
|
||||
for _, filter := range filters {
|
||||
ids := b.invertedIndex.Search(ctx, filter.Label, filter.Equal, filter.Value)
|
||||
ids := b.invertedIndex.Search(ctx, filter)
|
||||
if len(ids) == 0 {
|
||||
return matchedNothingFilterResult
|
||||
}
|
||||
@@ -1317,7 +1282,7 @@ func (b *block) mergeFilterResults(x, y filterResult) filterResult {
|
||||
}
|
||||
|
||||
func (b *block) fillEmbedding(ctx context.Context, feeds []*model.Feed) ([]*chunk.Feed, error) {
|
||||
embedded := make([]*chunk.Feed, len(feeds))
|
||||
embedded := make([]*chunk.Feed, 0, len(feeds))
|
||||
llm := b.Dependencies().LLMFactory.Get(b.Config().embeddingLLM)
|
||||
var wg sync.WaitGroup
|
||||
var mu sync.Mutex
|
||||
@@ -1336,16 +1301,21 @@ func (b *block) fillEmbedding(ctx context.Context, feeds []*model.Feed) ([]*chun
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
embedded[i] = &chunk.Feed{
|
||||
embedded = append(embedded, &chunk.Feed{
|
||||
Feed: feed,
|
||||
Vectors: vectors,
|
||||
}
|
||||
})
|
||||
mu.Unlock()
|
||||
}(i, feed)
|
||||
}
|
||||
wg.Wait()
|
||||
if len(errs) > 0 {
|
||||
return nil, errs[0]
|
||||
|
||||
switch len(errs) {
|
||||
case 0:
|
||||
case len(feeds):
|
||||
return nil, errs[0] // All failed.
|
||||
default:
|
||||
log.Error(ctx, errors.Wrap(errs[0], "fill embedding"), "error_count", len(errs))
|
||||
}
|
||||
|
||||
return embedded, nil
|
||||
|
||||
@@ -24,7 +24,7 @@ type Index interface {
|
||||
index.Codec
|
||||
|
||||
// Search returns item IDs matching the given label and value.
|
||||
Search(ctx context.Context, label string, eq bool, value string) (ids map[uint64]struct{})
|
||||
Search(ctx context.Context, matcher model.LabelFilter) (ids map[uint64]struct{})
|
||||
// Add adds item to the index.
|
||||
// If label or value in labels is empty, it will be ignored.
|
||||
// If value is too long, it will be ignored,
|
||||
@@ -88,17 +88,17 @@ type idx struct {
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
func (idx *idx) Search(ctx context.Context, label string, eq bool, value string) (ids map[uint64]struct{}) {
|
||||
func (idx *idx) Search(ctx context.Context, matcher model.LabelFilter) (ids map[uint64]struct{}) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "Search")...)
|
||||
defer func() { telemetry.End(ctx, nil) }()
|
||||
idx.mu.RLock()
|
||||
defer idx.mu.RUnlock()
|
||||
|
||||
if value == "" {
|
||||
return idx.searchEmptyValue(label, eq)
|
||||
if matcher.Value == "" {
|
||||
return idx.searchEmptyValue(matcher.Label, matcher.Equal)
|
||||
}
|
||||
|
||||
return idx.searchNonEmptyValue(label, eq, value)
|
||||
return idx.searchNonEmptyValue(matcher)
|
||||
}
|
||||
|
||||
func (idx *idx) Add(ctx context.Context, id uint64, labels model.Labels) {
|
||||
@@ -198,16 +198,16 @@ func (idx *idx) searchEmptyValue(label string, eq bool) map[uint64]struct{} {
|
||||
// searchNonEmptyValue handles the search logic when the target value is not empty.
|
||||
// If eq is true, it returns IDs that have the exact label-value pair.
|
||||
// If eq is false, it returns IDs that *do not* have the exact label-value pair.
|
||||
func (idx *idx) searchNonEmptyValue(label string, eq bool, value string) map[uint64]struct{} {
|
||||
func (idx *idx) searchNonEmptyValue(matcher model.LabelFilter) map[uint64]struct{} {
|
||||
// Get the map of values for the given label.
|
||||
values, labelExists := idx.m[label]
|
||||
values, labelExists := idx.m[matcher.Label]
|
||||
|
||||
// If equal (eq), find the exact match.
|
||||
if eq {
|
||||
if matcher.Equal {
|
||||
if !labelExists {
|
||||
return make(map[uint64]struct{}) // Label doesn't exist.
|
||||
}
|
||||
ids, valueExists := values[value]
|
||||
ids, valueExists := values[matcher.Value]
|
||||
if !valueExists {
|
||||
return make(map[uint64]struct{}) // Value doesn't exist for this label.
|
||||
}
|
||||
@@ -221,7 +221,7 @@ func (idx *idx) searchNonEmptyValue(label string, eq bool, value string) map[uin
|
||||
resultIDs := maps.Clone(idx.ids)
|
||||
if labelExists {
|
||||
// If the specific label-value pair exists, remove its associated IDs.
|
||||
if matchingIDs, valueExists := values[value]; valueExists {
|
||||
if matchingIDs, valueExists := values[matcher.Value]; valueExists {
|
||||
for id := range matchingIDs {
|
||||
delete(resultIDs, id)
|
||||
}
|
||||
@@ -413,8 +413,8 @@ type mockIndex struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (m *mockIndex) Search(ctx context.Context, label string, eq bool, value string) (ids map[uint64]struct{}) {
|
||||
args := m.Called(ctx, label, eq, value)
|
||||
func (m *mockIndex) Search(ctx context.Context, matcher model.LabelFilter) (ids map[uint64]struct{}) {
|
||||
args := m.Called(ctx, matcher)
|
||||
|
||||
return args.Get(0).(map[uint64]struct{})
|
||||
}
|
||||
|
||||
@@ -118,9 +118,7 @@ func TestSearch(t *testing.T) {
|
||||
setupLabels map[uint64]model.Labels
|
||||
}
|
||||
type whenDetail struct {
|
||||
searchLabel string
|
||||
eq bool
|
||||
searchValue string
|
||||
matcher model.LabelFilter
|
||||
}
|
||||
type thenExpected struct {
|
||||
want []uint64
|
||||
@@ -140,9 +138,11 @@ func TestSearch(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "category",
|
||||
searchValue: "tech",
|
||||
eq: true,
|
||||
matcher: model.LabelFilter{
|
||||
Label: "category",
|
||||
Value: "tech",
|
||||
Equal: true,
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: []uint64{1, 2},
|
||||
@@ -159,9 +159,11 @@ func TestSearch(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "invalid",
|
||||
searchValue: "value",
|
||||
eq: true,
|
||||
matcher: model.LabelFilter{
|
||||
Label: "invalid",
|
||||
Value: "value",
|
||||
Equal: true,
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: nil,
|
||||
@@ -178,9 +180,11 @@ func TestSearch(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "category",
|
||||
searchValue: "invalid",
|
||||
eq: true,
|
||||
matcher: model.LabelFilter{
|
||||
Label: "category",
|
||||
Value: "invalid",
|
||||
Equal: true,
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: nil,
|
||||
@@ -200,9 +204,11 @@ func TestSearch(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "category",
|
||||
searchValue: "tech",
|
||||
eq: false,
|
||||
matcher: model.LabelFilter{
|
||||
Label: "category",
|
||||
Value: "tech",
|
||||
Equal: false,
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: []uint64{2},
|
||||
@@ -220,9 +226,11 @@ func TestSearch(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "invalid",
|
||||
searchValue: "value",
|
||||
eq: false,
|
||||
matcher: model.LabelFilter{
|
||||
Label: "invalid",
|
||||
Value: "value",
|
||||
Equal: false,
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: []uint64{1, 2},
|
||||
@@ -240,7 +248,7 @@ func TestSearch(t *testing.T) {
|
||||
}
|
||||
|
||||
// When.
|
||||
result := idx.Search(context.Background(), tt.WhenDetail.searchLabel, tt.WhenDetail.eq, tt.WhenDetail.searchValue)
|
||||
result := idx.Search(context.Background(), tt.WhenDetail.matcher)
|
||||
|
||||
// Then.
|
||||
if tt.ThenExpected.want == nil {
|
||||
|
||||
@@ -32,8 +32,8 @@ import (
|
||||
// --- Interface code block ---
|
||||
type Storage interface {
|
||||
component.Component
|
||||
Get(ctx context.Context, key string) (string, error)
|
||||
Set(ctx context.Context, key string, value string, ttl time.Duration) error
|
||||
Get(ctx context.Context, key []byte) ([]byte, error)
|
||||
Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) error
|
||||
}
|
||||
|
||||
var ErrNotFound = errors.New("not found")
|
||||
@@ -137,7 +137,7 @@ func (k *kv) Close() error {
|
||||
|
||||
const bucket = "0"
|
||||
|
||||
func (k *kv) Get(ctx context.Context, key string) (value string, err error) {
|
||||
func (k *kv) Get(ctx context.Context, key []byte) (value []byte, err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(k.TelemetryLabels(), telemetrymodel.KeyOperation, "Get")...)
|
||||
defer func() {
|
||||
telemetry.End(ctx, func() error {
|
||||
@@ -157,22 +157,22 @@ func (k *kv) Get(ctx context.Context, key string) (value string, err error) {
|
||||
})
|
||||
switch {
|
||||
case err == nil:
|
||||
return string(b), nil
|
||||
return b, nil
|
||||
case errors.Is(err, nutsdb.ErrNotFoundKey):
|
||||
return "", ErrNotFound
|
||||
return nil, ErrNotFound
|
||||
case strings.Contains(err.Error(), "key not found"):
|
||||
return "", ErrNotFound
|
||||
return nil, ErrNotFound
|
||||
default:
|
||||
return "", err
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
func (k *kv) Set(ctx context.Context, key string, value string, ttl time.Duration) (err error) {
|
||||
func (k *kv) Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) (err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(k.TelemetryLabels(), telemetrymodel.KeyOperation, "Set")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
return k.db.Update(func(tx *nutsdb.Tx) error {
|
||||
return tx.Put(bucket, []byte(key), []byte(value), uint32(ttl.Seconds()))
|
||||
return tx.Put(bucket, key, value, uint32(ttl.Seconds()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -180,13 +180,13 @@ type mockKV struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (m *mockKV) Get(ctx context.Context, key string) (string, error) {
|
||||
func (m *mockKV) Get(ctx context.Context, key []byte) ([]byte, error) {
|
||||
args := m.Called(ctx, key)
|
||||
|
||||
return args.String(0), args.Error(1)
|
||||
return args.Get(0).([]byte), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *mockKV) Set(ctx context.Context, key string, value string, ttl time.Duration) error {
|
||||
func (m *mockKV) Set(ctx context.Context, key []byte, value []byte, ttl time.Duration) error {
|
||||
args := m.Called(ctx, key, value, ttl)
|
||||
|
||||
return args.Error(0)
|
||||
|
||||
@@ -27,6 +27,8 @@ import (
|
||||
|
||||
"github.com/pkg/errors"
|
||||
slogdedup "github.com/veqryn/slog-dedup"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
)
|
||||
|
||||
type Level string
|
||||
@@ -187,7 +189,8 @@ func getStack(skip, depth int) string {
|
||||
}
|
||||
first = false
|
||||
|
||||
b.WriteString(frame.Function)
|
||||
fn := strings.TrimPrefix(frame.Function, model.Module) // no module prefix for zenfeed self.
|
||||
b.WriteString(fn)
|
||||
b.WriteByte(':')
|
||||
b.WriteString(strconv.Itoa(frame.Line))
|
||||
}
|
||||
|
||||
137
pkg/telemetry/server/server.go
Normal file
137
pkg/telemetry/server/server.go
Normal file
@@ -0,0 +1,137 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package http
|
||||
|
||||
import (
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/pprof"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/config"
|
||||
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/metric"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
type Server interface {
|
||||
component.Component
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Address string
|
||||
}
|
||||
|
||||
func (c *Config) Validate() error {
|
||||
if c.Address == "" {
|
||||
c.Address = ":9090"
|
||||
}
|
||||
if _, _, err := net.SplitHostPort(c.Address); err != nil {
|
||||
return errors.Wrap(err, "invalid address")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) From(app *config.App) *Config {
|
||||
c.Address = app.Telemetry.Address
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
type Dependencies struct {
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[Server, config.App, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
if len(mockOn) > 0 {
|
||||
return component.FactoryFunc[Server, config.App, Dependencies](
|
||||
func(instance string, config *config.App, dependencies Dependencies) (Server, error) {
|
||||
m := &mockServer{}
|
||||
component.MockOptions(mockOn).Apply(&m.Mock)
|
||||
|
||||
return m, nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return component.FactoryFunc[Server, config.App, Dependencies](new)
|
||||
}
|
||||
|
||||
func new(instance string, app *config.App, dependencies Dependencies) (Server, error) {
|
||||
config := &Config{}
|
||||
config.From(app)
|
||||
if err := config.Validate(); err != nil {
|
||||
return nil, errors.Wrap(err, "validate config")
|
||||
}
|
||||
|
||||
router := http.NewServeMux()
|
||||
router.Handle("/health", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(200)
|
||||
}))
|
||||
router.Handle("/metrics", metric.Handler())
|
||||
router.HandleFunc("/pprof", pprof.Index)
|
||||
router.HandleFunc("/pprof/cmdline", pprof.Cmdline)
|
||||
router.HandleFunc("/pprof/profile", pprof.Profile)
|
||||
router.HandleFunc("/pprof/symbol", pprof.Symbol)
|
||||
router.HandleFunc("/pprof/trace", pprof.Trace)
|
||||
|
||||
return &server{
|
||||
Base: component.New(&component.BaseConfig[Config, Dependencies]{
|
||||
Name: "TelemetryServer",
|
||||
Instance: instance,
|
||||
Config: config,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
http: &http.Server{Addr: config.Address, Handler: router},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
type server struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
http *http.Server
|
||||
}
|
||||
|
||||
func (s *server) Run() (err error) {
|
||||
ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
serverErr := make(chan error, 1)
|
||||
go func() {
|
||||
serverErr <- s.http.ListenAndServe()
|
||||
}()
|
||||
|
||||
s.MarkReady()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Info(ctx, "shutting down")
|
||||
|
||||
return s.http.Shutdown(ctx)
|
||||
case err := <-serverErr:
|
||||
return errors.Wrap(err, "listen and serve")
|
||||
}
|
||||
}
|
||||
|
||||
type mockServer struct {
|
||||
component.Mock
|
||||
}
|
||||
176
pkg/util/crawl/crawl.go
Normal file
176
pkg/util/crawl/crawl.go
Normal file
@@ -0,0 +1,176 @@
|
||||
package crawl
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sync"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/temoto/robotstxt"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/util/text_convert"
|
||||
)
|
||||
|
||||
type Crawler interface {
|
||||
Markdown(ctx context.Context, u string) ([]byte, error)
|
||||
}
|
||||
|
||||
type local struct {
|
||||
hc *http.Client
|
||||
|
||||
robotsDataCache sync.Map
|
||||
}
|
||||
|
||||
func NewLocal() Crawler {
|
||||
return &local{
|
||||
hc: &http.Client{},
|
||||
}
|
||||
}
|
||||
|
||||
func (c *local) Markdown(ctx context.Context, u string) ([]byte, error) {
|
||||
// Check if the page is allowed.
|
||||
if err := c.checkAllowed(ctx, u); err != nil {
|
||||
return nil, errors.Wrapf(err, "check robots.txt for %s", u)
|
||||
}
|
||||
|
||||
// Prepare the request.
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "create request for %s", u)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// Send the request.
|
||||
resp, err := c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "fetch %s", u)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Parse the response.
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, errors.Errorf("received non-200 status code %d from %s", resp.StatusCode, u)
|
||||
}
|
||||
bodyBytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "read body from %s", u)
|
||||
}
|
||||
|
||||
// Convert the body to markdown.
|
||||
mdBytes, err := textconvert.HTMLToMarkdown(bodyBytes)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "convert html to markdown")
|
||||
}
|
||||
|
||||
return mdBytes, nil
|
||||
}
|
||||
|
||||
const userAgent = "ZenFeed"
|
||||
|
||||
func (c *local) checkAllowed(ctx context.Context, u string) error {
|
||||
parsedURL, err := url.Parse(u)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "parse url %s", u)
|
||||
}
|
||||
|
||||
d, err := c.getRobotsData(ctx, parsedURL.Host)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "check robots.txt for %s", parsedURL.Host)
|
||||
}
|
||||
if !d.TestAgent(parsedURL.Path, userAgent) {
|
||||
return errors.Errorf("disallowed by robots.txt for %s", u)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// getRobotsData fetches and parses robots.txt for a given host.
|
||||
func (c *local) getRobotsData(ctx context.Context, host string) (*robotstxt.RobotsData, error) {
|
||||
// Check the cache.
|
||||
if data, found := c.robotsDataCache.Load(host); found {
|
||||
return data.(*robotstxt.RobotsData), nil
|
||||
}
|
||||
|
||||
// Prepare the request.
|
||||
robotsURL := fmt.Sprintf("https://%s/robots.txt", host)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, robotsURL, nil)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "create request for %s", robotsURL)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
// Send the request.
|
||||
resp, err := c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "fetch %s", robotsURL)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Parse the response.
|
||||
switch resp.StatusCode {
|
||||
case http.StatusOK:
|
||||
data, err := robotstxt.FromResponse(resp)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "parse robots.txt from %s", robotsURL)
|
||||
}
|
||||
c.robotsDataCache.Store(host, data)
|
||||
return data, nil
|
||||
case http.StatusNotFound:
|
||||
data := &robotstxt.RobotsData{}
|
||||
c.robotsDataCache.Store(host, data)
|
||||
return data, nil
|
||||
case http.StatusUnauthorized, http.StatusForbidden:
|
||||
return nil, errors.Errorf("access to %s denied (status %d)", robotsURL, resp.StatusCode)
|
||||
default:
|
||||
return nil, errors.Errorf("unexpected status %d fetching %s", resp.StatusCode, robotsURL)
|
||||
}
|
||||
}
|
||||
|
||||
type jina struct {
|
||||
hc *http.Client
|
||||
token string
|
||||
}
|
||||
|
||||
func NewJina(token string) Crawler {
|
||||
return &jina{
|
||||
hc: &http.Client{},
|
||||
|
||||
// If token is empty, will not affect to use, but rate limit will be lower.
|
||||
// See https://jina.ai/api-dashboard/rate-limit.
|
||||
token: token,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *jina) Markdown(ctx context.Context, u string) ([]byte, error) {
|
||||
proxyURL := fmt.Sprintf("https://r.jina.ai/%s", u)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, proxyURL, nil)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "create request for %s", u)
|
||||
}
|
||||
|
||||
req.Header.Set("X-Engine", "browser")
|
||||
req.Header.Set("X-Robots-Txt", userAgent)
|
||||
if c.token != "" {
|
||||
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.token))
|
||||
}
|
||||
|
||||
resp, err := c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "fetch %s", proxyURL)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, errors.Errorf("received non-200 status code %d from %s", resp.StatusCode, proxyURL)
|
||||
}
|
||||
|
||||
mdBytes, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "read body from %s", proxyURL)
|
||||
}
|
||||
|
||||
return mdBytes, nil
|
||||
}
|
||||
@@ -13,39 +13,19 @@
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package rpc
|
||||
package jsonrpc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/api"
|
||||
)
|
||||
|
||||
type Handler[Request any, Response any] func(ctx context.Context, req *Request) (*Response, error)
|
||||
|
||||
var (
|
||||
ErrBadRequest = func(err error) Error { return newError(http.StatusBadRequest, err) }
|
||||
ErrNotFound = func(err error) Error { return newError(http.StatusNotFound, err) }
|
||||
ErrInternal = func(err error) Error { return newError(http.StatusInternalServerError, err) }
|
||||
)
|
||||
|
||||
type Error struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
func (e Error) Error() string {
|
||||
return e.Message
|
||||
}
|
||||
|
||||
func newError(code int, err error) Error {
|
||||
return Error{
|
||||
Code: code,
|
||||
Message: err.Error(),
|
||||
}
|
||||
}
|
||||
|
||||
func API[Request any, Response any](handler Handler[Request, Response]) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
allowCORS(w)
|
||||
@@ -65,11 +45,11 @@ func API[Request any, Response any](handler Handler[Request, Response]) http.Han
|
||||
|
||||
resp, err := handler(r.Context(), &req)
|
||||
if err != nil {
|
||||
var rpcErr Error
|
||||
if errors.As(err, &rpcErr) {
|
||||
var apiErr api.Error
|
||||
if errors.As(err, &apiErr) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(rpcErr.Code)
|
||||
_ = json.NewEncoder(w).Encode(rpcErr)
|
||||
w.WriteHeader(apiErr.Code)
|
||||
_ = json.NewEncoder(w).Encode(apiErr)
|
||||
|
||||
return
|
||||
}
|
||||
@@ -13,7 +13,7 @@
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package rpc
|
||||
package jsonrpc
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/api"
|
||||
"github.com/glidea/zenfeed/pkg/test"
|
||||
)
|
||||
|
||||
@@ -58,15 +59,15 @@ func TestAPI(t *testing.T) {
|
||||
}
|
||||
|
||||
badRequestHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
|
||||
return nil, ErrBadRequest(errors.New("invalid request"))
|
||||
return nil, api.ErrBadRequest(errors.New("invalid request"))
|
||||
}
|
||||
|
||||
notFoundHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
|
||||
return nil, ErrNotFound(errors.New("resource not found"))
|
||||
return nil, api.ErrNotFound(errors.New("resource not found"))
|
||||
}
|
||||
|
||||
internalErrorHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
|
||||
return nil, ErrInternal(errors.New("server error"))
|
||||
return nil, api.ErrInternal(errors.New("server error"))
|
||||
}
|
||||
|
||||
genericErrorHandler := func(ctx context.Context, req *TestRequest) (*TestResponse, error) {
|
||||
Reference in New Issue
Block a user