add rss & crawl & webhook

This commit is contained in:
glidea
2025-06-05 23:29:37 +08:00
parent ead8286a48
commit d520444e9f
43 changed files with 1757 additions and 703 deletions

View File

@@ -37,7 +37,6 @@ import (
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
jsonschema "github.com/glidea/zenfeed/pkg/util/json_schema"
"github.com/glidea/zenfeed/pkg/util/rpc"
)
// --- Interface code block ---
@@ -161,11 +160,11 @@ type QueryRequest struct {
}
func (r *QueryRequest) Validate() error { //nolint:cyclop
if r.Query != "" && utf8.RuneCountInString(r.Query) < 5 {
return errors.New("query must be at least 5 characters")
if r.Query != "" && utf8.RuneCountInString(r.Query) > 64 {
return errors.New("query must be at most 64 characters")
}
if r.Threshold == 0 {
r.Threshold = 0.55
r.Threshold = 0.5
}
if r.Threshold < 0 || r.Threshold > 1 {
return errors.New("threshold must be between 0 and 1")
@@ -200,6 +199,28 @@ type QueryResponse struct {
Count int `json:"count"`
}
type Error struct {
Code int `json:"code"`
Message string `json:"message"`
}
func (e Error) Error() string {
return e.Message
}
func newError(code int, err error) Error {
return Error{
Code: code,
Message: err.Error(),
}
}
var (
ErrBadRequest = func(err error) Error { return newError(http.StatusBadRequest, err) }
ErrNotFound = func(err error) Error { return newError(http.StatusNotFound, err) }
ErrInternal = func(err error) Error { return newError(http.StatusInternalServerError, err) }
)
// --- Factory code block ---
type Factory component.Factory[API, config.App, Dependencies]
@@ -262,7 +283,7 @@ func (a *api) QueryAppConfigSchema(
) (resp *QueryAppConfigSchemaResponse, err error) {
schema, err := jsonschema.ForType(reflect.TypeOf(config.App{}))
if err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "query app config schema"))
return nil, ErrInternal(errors.Wrap(err, "query app config schema"))
}
return (*QueryAppConfigSchemaResponse)(&schema), nil
@@ -282,7 +303,7 @@ func (a *api) ApplyAppConfig(
req *ApplyAppConfigRequest,
) (resp *ApplyAppConfigResponse, err error) {
if err := a.Dependencies().ConfigManager.SaveAppConfig(&req.App); err != nil {
return nil, rpc.ErrBadRequest(errors.Wrap(err, "save app config"))
return nil, ErrBadRequest(errors.Wrap(err, "save app config"))
}
return &ApplyAppConfigResponse{}, nil
@@ -297,20 +318,20 @@ func (a *api) QueryRSSHubCategories(
// New request.
forwardReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "new request"))
return nil, ErrInternal(errors.Wrap(err, "new request"))
}
// Do request.
forwardRespIO, err := a.hc.Do(forwardReq)
if err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "query rss hub websites"))
return nil, ErrInternal(errors.Wrap(err, "query rss hub websites"))
}
defer func() { _ = forwardRespIO.Body.Close() }()
// Parse response.
var forwardResp map[string]RSSHubWebsite
if err := json.NewDecoder(forwardRespIO.Body).Decode(&forwardResp); err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "parse response"))
return nil, ErrInternal(errors.Wrap(err, "parse response"))
}
// Convert to response.
@@ -333,7 +354,7 @@ func (a *api) QueryRSSHubWebsites(
ctx context.Context, req *QueryRSSHubWebsitesRequest,
) (resp *QueryRSSHubWebsitesResponse, err error) {
if req.Category == "" {
return nil, rpc.ErrBadRequest(errors.New("category is required"))
return nil, ErrBadRequest(errors.New("category is required"))
}
url := a.Config().RSSHubEndpoint + "/api/category/" + req.Category
@@ -341,29 +362,29 @@ func (a *api) QueryRSSHubWebsites(
// New request.
forwardReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "new request"))
return nil, ErrInternal(errors.Wrap(err, "new request"))
}
// Do request.
forwardRespIO, err := a.hc.Do(forwardReq)
if err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "query rss hub routes"))
return nil, ErrInternal(errors.Wrap(err, "query rss hub routes"))
}
defer func() { _ = forwardRespIO.Body.Close() }()
// Parse response.
body, err := io.ReadAll(forwardRespIO.Body)
if err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "read response"))
return nil, ErrInternal(errors.Wrap(err, "read response"))
}
if len(body) == 0 {
// Hack for RSSHub...
// Consider cache category ids for validate by self to remove this shit code.
return nil, rpc.ErrBadRequest(errors.New("category id is invalid"))
return nil, ErrBadRequest(errors.New("category id is invalid"))
}
var forwardResp map[string]RSSHubWebsite
if err := json.Unmarshal(body, &forwardResp); err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "parse response"))
return nil, ErrInternal(errors.Wrap(err, "parse response"))
}
// Convert to response.
@@ -383,7 +404,7 @@ func (a *api) QueryRSSHubRoutes(
req *QueryRSSHubRoutesRequest,
) (resp *QueryRSSHubRoutesResponse, err error) {
if req.WebsiteID == "" {
return nil, rpc.ErrBadRequest(errors.New("website id is required"))
return nil, ErrBadRequest(errors.New("website id is required"))
}
url := a.Config().RSSHubEndpoint + "/api/namespace/" + req.WebsiteID
@@ -391,30 +412,30 @@ func (a *api) QueryRSSHubRoutes(
// New request.
forwardReq, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "new request"))
return nil, ErrInternal(errors.Wrap(err, "new request"))
}
// Do request.
forwardRespIO, err := a.hc.Do(forwardReq)
if err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "query rss hub routes"))
return nil, ErrInternal(errors.Wrap(err, "query rss hub routes"))
}
defer func() { _ = forwardRespIO.Body.Close() }()
// Parse response.
body, err := io.ReadAll(forwardRespIO.Body)
if err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "read response"))
return nil, ErrInternal(errors.Wrap(err, "read response"))
}
if len(body) == 0 {
return nil, rpc.ErrBadRequest(errors.New("website id is invalid"))
return nil, ErrBadRequest(errors.New("website id is invalid"))
}
var forwardResp struct {
Routes map[string]RSSHubRoute `json:"routes"`
}
if err := json.Unmarshal(body, &forwardResp); err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "parse response"))
return nil, ErrInternal(errors.Wrap(err, "parse response"))
}
// Convert to response.
@@ -435,7 +456,7 @@ func (a *api) Write(ctx context.Context, req *WriteRequest) (resp *WriteResponse
feed.Labels.Put(model.LabelType, "api", false)
}
if err := a.Dependencies().FeedStorage.Append(ctx, req.Feeds...); err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "append"))
return nil, ErrInternal(errors.Wrap(err, "append"))
}
return &WriteResponse{}, nil
@@ -447,7 +468,7 @@ func (a *api) Query(ctx context.Context, req *QueryRequest) (resp *QueryResponse
// Validate request.
if err := req.Validate(); err != nil {
return nil, rpc.ErrBadRequest(errors.Wrap(err, "validate"))
return nil, ErrBadRequest(errors.Wrap(err, "validate"))
}
// Forward to storage.
@@ -460,7 +481,7 @@ func (a *api) Query(ctx context.Context, req *QueryRequest) (resp *QueryResponse
End: req.End,
})
if err != nil {
return nil, rpc.ErrInternal(errors.Wrap(err, "query"))
return nil, ErrInternal(errors.Wrap(err, "query"))
}
if len(feeds) == 0 {
return &QueryResponse{Feeds: []*block.FeedVO{}}, nil

View File

@@ -26,9 +26,8 @@ import (
"github.com/glidea/zenfeed/pkg/config"
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
"github.com/glidea/zenfeed/pkg/telemetry/log"
"github.com/glidea/zenfeed/pkg/telemetry/metric"
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
"github.com/glidea/zenfeed/pkg/util/rpc"
"github.com/glidea/zenfeed/pkg/util/jsonrpc"
)
// --- Interface code block ---
@@ -89,18 +88,14 @@ func new(instance string, app *config.App, dependencies Dependencies) (Server, e
router := http.NewServeMux()
api := dependencies.API
router.Handle("/metrics", metric.Handler())
router.Handle("/health", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
}))
router.Handle("/write", rpc.API(api.Write))
router.Handle("/query_config", rpc.API(api.QueryAppConfig))
router.Handle("/apply_config", rpc.API(api.ApplyAppConfig))
router.Handle("/query_config_schema", rpc.API(api.QueryAppConfigSchema))
router.Handle("/query_rsshub_categories", rpc.API(api.QueryRSSHubCategories))
router.Handle("/query_rsshub_websites", rpc.API(api.QueryRSSHubWebsites))
router.Handle("/query_rsshub_routes", rpc.API(api.QueryRSSHubRoutes))
router.Handle("/query", rpc.API(api.Query))
router.Handle("/write", jsonrpc.API(api.Write))
router.Handle("/query_config", jsonrpc.API(api.QueryAppConfig))
router.Handle("/apply_config", jsonrpc.API(api.ApplyAppConfig))
router.Handle("/query_config_schema", jsonrpc.API(api.QueryAppConfigSchema))
router.Handle("/query_rsshub_categories", jsonrpc.API(api.QueryRSSHubCategories))
router.Handle("/query_rsshub_websites", jsonrpc.API(api.QueryRSSHubWebsites))
router.Handle("/query_rsshub_routes", jsonrpc.API(api.QueryRSSHubRoutes))
router.Handle("/query", jsonrpc.API(api.Query))
httpServer := &http.Server{Addr: config.Address, Handler: router}
return &server{

231
pkg/api/rss/rss.go Normal file
View File

@@ -0,0 +1,231 @@
// Copyright (C) 2025 wangyusong
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package rss
import (
"fmt"
"net"
"net/http"
"text/template"
"time"
"github.com/benbjohnson/clock"
"github.com/gorilla/feeds"
"github.com/pkg/errors"
"github.com/glidea/zenfeed/pkg/api"
"github.com/glidea/zenfeed/pkg/component"
"github.com/glidea/zenfeed/pkg/config"
"github.com/glidea/zenfeed/pkg/model"
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
"github.com/glidea/zenfeed/pkg/telemetry/log"
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
"github.com/glidea/zenfeed/pkg/util/buffer"
)
var clk = clock.New()
// --- Interface code block ---
type Server interface {
component.Component
config.Watcher
}
type Config struct {
Address string
ContentHTMLTemplate string
contentHTMLTemplate *template.Template
}
func (c *Config) Validate() error {
if c.Address == "" {
c.Address = ":1302"
}
if _, _, err := net.SplitHostPort(c.Address); err != nil {
return errors.Wrap(err, "invalid address")
}
if c.ContentHTMLTemplate == "" {
c.ContentHTMLTemplate = "{{ .summary_html_snippet }}"
}
t, err := template.New("").Parse(c.ContentHTMLTemplate)
if err != nil {
return errors.Wrap(err, "parse rss content template")
}
c.contentHTMLTemplate = t
return nil
}
func (c *Config) From(app *config.App) *Config {
c.Address = app.API.RSS.Address
c.ContentHTMLTemplate = app.API.RSS.ContentHTMLTemplate
return c
}
type Dependencies struct {
API api.API
}
// --- Factory code block ---
type Factory component.Factory[Server, config.App, Dependencies]
func NewFactory(mockOn ...component.MockOption) Factory {
if len(mockOn) > 0 {
return component.FactoryFunc[Server, config.App, Dependencies](
func(instance string, config *config.App, dependencies Dependencies) (Server, error) {
m := &mockServer{}
component.MockOptions(mockOn).Apply(&m.Mock)
return m, nil
},
)
}
return component.FactoryFunc[Server, config.App, Dependencies](new)
}
func new(instance string, app *config.App, dependencies Dependencies) (Server, error) {
config := &Config{}
config.From(app)
if err := config.Validate(); err != nil {
return nil, errors.Wrap(err, "validate config")
}
s := &server{
Base: component.New(&component.BaseConfig[Config, Dependencies]{
Name: "RSSServer",
Instance: instance,
Config: config,
Dependencies: dependencies,
}),
}
router := http.NewServeMux()
router.Handle("/", http.HandlerFunc(s.rss))
s.http = &http.Server{Addr: config.Address, Handler: router}
return s, nil
}
// --- Implementation code block ---
type server struct {
*component.Base[Config, Dependencies]
http *http.Server
}
func (s *server) Run() (err error) {
ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
defer func() { telemetry.End(ctx, err) }()
serverErr := make(chan error, 1)
go func() {
serverErr <- s.http.ListenAndServe()
}()
s.MarkReady()
select {
case <-ctx.Done():
log.Info(ctx, "shutting down")
return s.http.Shutdown(ctx)
case err := <-serverErr:
return errors.Wrap(err, "listen and serve")
}
}
func (s *server) Reload(app *config.App) error {
newConfig := &Config{}
newConfig.From(app)
if err := newConfig.Validate(); err != nil {
return errors.Wrap(err, "validate config")
}
if s.Config().Address != newConfig.Address {
return errors.New("address cannot be reloaded")
}
s.SetConfig(newConfig)
return nil
}
func (s *server) rss(w http.ResponseWriter, r *http.Request) {
var err error
ctx := telemetry.StartWith(r.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "rss")...)
defer telemetry.End(ctx, err)
// Extract parameters.
ps := r.URL.Query()
labelFilters := ps["label_filter"]
query := ps.Get("query")
// Forward query request to API.
now := clk.Now()
queryResult, err := s.Dependencies().API.Query(ctx, &api.QueryRequest{
Query: query,
LabelFilters: labelFilters,
Start: now.Add(-24 * time.Hour),
End: now,
Limit: 100,
})
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest) // TODO: standardize error handling.
return
}
// Render and convert to RSS.
rssObj := &feeds.Feed{
Title: fmt.Sprintf("Zenfeed RSS - %s", ps.Encode()),
Description: "Powered by Github Zenfeed - https://github.com/glidea/zenfeed. If you use Folo, please enable 'Appearance - Content - Render inline styles'",
Items: make([]*feeds.Item, 0, len(queryResult.Feeds)),
}
buf := buffer.Get()
defer buffer.Put(buf)
for _, feed := range queryResult.Feeds {
buf.Reset()
if err = s.Config().contentHTMLTemplate.Execute(buf, feed.Labels.Map()); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
item := &feeds.Item{
Title: feed.Labels.Get(model.LabelTitle),
Link: &feeds.Link{Href: feed.Labels.Get(model.LabelLink)},
Created: feed.Time, // NOTE: scrape time, not pub time.
Content: buf.String(),
}
rssObj.Items = append(rssObj.Items, item)
}
if err = rssObj.WriteRss(w); err != nil {
log.Error(ctx, errors.Wrap(err, "write rss response"))
return
}
}
type mockServer struct {
component.Mock
}
func (m *mockServer) Reload(app *config.App) error {
return m.Called(app).Error(0)
}