init
This commit is contained in:
171
pkg/scrape/scraper/rss.go
Normal file
171
pkg/scrape/scraper/rss.go
Normal file
@@ -0,0 +1,171 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mmcdole/gofeed"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/stretchr/testify/mock"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
textconvert "github.com/glidea/zenfeed/pkg/util/text_convert"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
type ScrapeSourceRSS struct {
|
||||
URL string
|
||||
RSSHubEndpoint string
|
||||
RSSHubRoutePath string
|
||||
}
|
||||
|
||||
func (c *ScrapeSourceRSS) Validate() error {
|
||||
if c.URL == "" && c.RSSHubEndpoint == "" {
|
||||
return errors.New("URL or RSSHubEndpoint can not be empty at the same time")
|
||||
}
|
||||
if c.URL == "" {
|
||||
c.URL = strings.TrimSuffix(c.RSSHubEndpoint, "/") + "/" + strings.TrimPrefix(c.RSSHubRoutePath, "/")
|
||||
}
|
||||
if c.URL != "" && !strings.HasPrefix(c.URL, "http://") && !strings.HasPrefix(c.URL, "https://") {
|
||||
return errors.New("URL must be a valid HTTP/HTTPS URL")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
func newRSSReader(config *ScrapeSourceRSS) (reader, error) {
|
||||
if err := config.Validate(); err != nil {
|
||||
return nil, errors.Wrapf(err, "invalid RSS config")
|
||||
}
|
||||
|
||||
return &rssReader{
|
||||
config: config,
|
||||
client: &gofeedClient{
|
||||
url: config.URL,
|
||||
base: gofeed.NewParser(),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
|
||||
type rssReader struct {
|
||||
config *ScrapeSourceRSS
|
||||
client client
|
||||
}
|
||||
|
||||
func (r *rssReader) Read(ctx context.Context) ([]*model.Feed, error) {
|
||||
feed, err := r.client.Get(ctx)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "fetching RSS feed")
|
||||
}
|
||||
if len(feed.Items) == 0 {
|
||||
return []*model.Feed{}, nil
|
||||
}
|
||||
|
||||
now := clk.Now()
|
||||
feeds := make([]*model.Feed, 0, len(feed.Items))
|
||||
for _, fi := range feed.Items {
|
||||
item, err := r.toResultFeed(now, fi)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "converting feed item")
|
||||
}
|
||||
|
||||
feeds = append(feeds, item)
|
||||
}
|
||||
|
||||
return feeds, nil
|
||||
}
|
||||
|
||||
func (r *rssReader) toResultFeed(now time.Time, feedFeed *gofeed.Item) (*model.Feed, error) {
|
||||
content := r.combineContent(feedFeed.Content, feedFeed.Description)
|
||||
|
||||
// Ensure the content is markdown.
|
||||
mdContent, err := textconvert.HTMLToMarkdown([]byte(content))
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "converting content to markdown")
|
||||
}
|
||||
|
||||
// Create the feed item.
|
||||
feed := &model.Feed{
|
||||
Labels: model.Labels{
|
||||
{Key: model.LabelType, Value: "rss"},
|
||||
{Key: model.LabelTitle, Value: feedFeed.Title},
|
||||
{Key: model.LabelLink, Value: feedFeed.Link},
|
||||
{Key: model.LabelPubTime, Value: r.parseTime(feedFeed).Format(time.RFC3339)},
|
||||
{Key: model.LabelContent, Value: string(mdContent)},
|
||||
},
|
||||
Time: now,
|
||||
}
|
||||
|
||||
return feed, nil
|
||||
}
|
||||
|
||||
// parseTime parses the publication time from the feed item.
|
||||
// If the feed item does not have a publication time, it returns the current time.
|
||||
func (r *rssReader) parseTime(feedFeed *gofeed.Item) time.Time {
|
||||
if feedFeed.PublishedParsed == nil {
|
||||
return clk.Now().In(time.Local)
|
||||
}
|
||||
|
||||
return feedFeed.PublishedParsed.In(time.Local)
|
||||
}
|
||||
|
||||
// combineContent combines Content and Description fields with proper formatting.
|
||||
func (r *rssReader) combineContent(content, description string) string {
|
||||
switch {
|
||||
case content == "":
|
||||
return description
|
||||
case description == "":
|
||||
return content
|
||||
default:
|
||||
return strings.Join([]string{description, content}, "\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
type client interface {
|
||||
Get(ctx context.Context) (*gofeed.Feed, error)
|
||||
}
|
||||
|
||||
type gofeedClient struct {
|
||||
url string
|
||||
base *gofeed.Parser
|
||||
}
|
||||
|
||||
func (c *gofeedClient) Get(ctx context.Context) (*gofeed.Feed, error) {
|
||||
return c.base.ParseURLWithContext(c.url, ctx)
|
||||
}
|
||||
|
||||
type mockClient struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
func newMockClient() *mockClient {
|
||||
return &mockClient{}
|
||||
}
|
||||
|
||||
func (c *mockClient) Get(ctx context.Context) (*gofeed.Feed, error) {
|
||||
args := c.Called(ctx)
|
||||
if args.Error(1) != nil {
|
||||
return nil, args.Error(1)
|
||||
}
|
||||
|
||||
return args.Get(0).(*gofeed.Feed), nil
|
||||
}
|
||||
440
pkg/scrape/scraper/rss_test.go
Normal file
440
pkg/scrape/scraper/rss_test.go
Normal file
@@ -0,0 +1,440 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mmcdole/gofeed"
|
||||
. "github.com/onsi/gomega"
|
||||
"github.com/stretchr/testify/mock"
|
||||
"k8s.io/utils/ptr"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
"github.com/glidea/zenfeed/pkg/test"
|
||||
)
|
||||
|
||||
func TestNewRSS(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
// --- Test types ---
|
||||
type givenDetail struct {
|
||||
config *ScrapeSourceRSS
|
||||
}
|
||||
type whenDetail struct{} // No specific action details needed for New
|
||||
type thenExpected struct {
|
||||
wantErr bool
|
||||
wantErrMsg string
|
||||
validateFunc func(t *testing.T, r reader) // Optional validation for successful creation
|
||||
}
|
||||
|
||||
// --- Test cases ---
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Invalid Configuration - Empty URL and RSSHub",
|
||||
Given: "a configuration with empty URL and empty RSSHub config",
|
||||
When: "creating a new RSS reader",
|
||||
Then: "should return a validation error",
|
||||
GivenDetail: givenDetail{
|
||||
config: &ScrapeSourceRSS{},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
wantErr: true,
|
||||
wantErrMsg: "URL or RSSHubEndpoint can not be empty at the same time",
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Invalid Configuration - Invalid URL format",
|
||||
Given: "a configuration with an invalid URL format",
|
||||
When: "creating a new RSS reader",
|
||||
Then: "should return a URL format error",
|
||||
GivenDetail: givenDetail{
|
||||
config: &ScrapeSourceRSS{
|
||||
URL: "invalid-url",
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
wantErr: true,
|
||||
wantErrMsg: "URL must be a valid HTTP/HTTPS URL",
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Valid Configuration - URL only",
|
||||
Given: "a valid configuration with only URL",
|
||||
When: "creating a new RSS reader",
|
||||
Then: "should succeed and return a valid reader",
|
||||
GivenDetail: givenDetail{
|
||||
config: &ScrapeSourceRSS{
|
||||
URL: "http://example.com/feed",
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
wantErr: false,
|
||||
validateFunc: func(t *testing.T, r reader) {
|
||||
Expect(r).NotTo(BeNil())
|
||||
rssReader, ok := r.(*rssReader)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(rssReader.config.URL).To(Equal("http://example.com/feed"))
|
||||
// Expect(rssReader.addtionalLabels).To(HaveKey("custom")) // NOTE: rssReader doesn't handle additional labels directly
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Valid Configuration - RSSHub only",
|
||||
Given: "a valid configuration with only RSSHub details",
|
||||
When: "creating a new RSS reader",
|
||||
Then: "should succeed, construct the URL, and return a valid reader",
|
||||
GivenDetail: givenDetail{
|
||||
config: &ScrapeSourceRSS{
|
||||
RSSHubEndpoint: "http://rsshub.app/",
|
||||
RSSHubRoutePath: "/_/test",
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
wantErr: false,
|
||||
validateFunc: func(t *testing.T, r reader) {
|
||||
Expect(r).NotTo(BeNil())
|
||||
rssReader, ok := r.(*rssReader)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(rssReader.config.URL).To(Equal("http://rsshub.app/_/test"))
|
||||
Expect(rssReader.config.RSSHubEndpoint).To(Equal("http://rsshub.app/"))
|
||||
Expect(rssReader.config.RSSHubRoutePath).To(Equal("/_/test"))
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// --- Run tests ---
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// --- Given & When ---
|
||||
r, err := newRSSReader(tt.GivenDetail.config)
|
||||
|
||||
// --- Then ---
|
||||
if tt.ThenExpected.wantErr {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.wantErrMsg))
|
||||
Expect(r).To(BeNil())
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(r).NotTo(BeNil())
|
||||
if tt.ThenExpected.validateFunc != nil {
|
||||
tt.ThenExpected.validateFunc(t, r)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestReader_Read(t *testing.T) { // Renamed from TestReader_Read
|
||||
RegisterTestingT(t)
|
||||
|
||||
// --- Test types ---
|
||||
type givenDetail struct {
|
||||
config *ScrapeSourceRSS
|
||||
mockClient func(m *mock.Mock) // Setup mock client behavior
|
||||
}
|
||||
type whenDetail struct{} // Context is passed, no specific details needed here
|
||||
type thenExpected struct {
|
||||
feeds []*model.Feed
|
||||
isErr bool
|
||||
wantErrMsg string
|
||||
validateFunc func(t *testing.T, feeds []*model.Feed) // Custom validation
|
||||
}
|
||||
|
||||
// --- Test cases ---
|
||||
now := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) // Fixed time for predictable results
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Basic Feed Fetching",
|
||||
Given: "a valid RSS config and a client returning one feed item",
|
||||
When: "reading the feed",
|
||||
Then: "should return one parsed feed with correct labels",
|
||||
GivenDetail: givenDetail{
|
||||
config: &ScrapeSourceRSS{
|
||||
URL: "http://techblog.com/feed",
|
||||
},
|
||||
mockClient: func(m *mock.Mock) {
|
||||
m.On("Get", mock.Anything).Return(&gofeed.Feed{
|
||||
Items: []*gofeed.Item{
|
||||
{
|
||||
Title: "New Tech Article",
|
||||
Description: "Content about new technology",
|
||||
Link: "http://techblog.com/1",
|
||||
PublishedParsed: ptr.To(now.Add(-1 * time.Hour)), // Use fixed time offset
|
||||
},
|
||||
},
|
||||
}, nil)
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
isErr: false,
|
||||
validateFunc: func(t *testing.T, feeds []*model.Feed) {
|
||||
Expect(feeds).To(HaveLen(1))
|
||||
Expect(feeds[0].Labels).To(ContainElement(model.Label{Key: model.LabelType, Value: "rss"}))
|
||||
Expect(feeds[0].Labels).To(ContainElement(model.Label{Key: model.LabelTitle, Value: "New Tech Article"}))
|
||||
Expect(feeds[0].Labels).To(ContainElement(model.Label{Key: model.LabelLink, Value: "http://techblog.com/1"}))
|
||||
Expect(feeds[0].Labels).To(ContainElement(model.Label{Key: model.LabelContent, Value: "Content about new technology"})) // Assuming HTML to Markdown conversion is trivial here
|
||||
Expect(feeds[0].Labels).To(ContainElement(model.Label{Key: model.LabelPubTime, Value: now.Add(-1 * time.Hour).In(time.Local).Format(time.RFC3339)}))
|
||||
// Note: Feed.Time is set by scraper using clk, not tested directly here.
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Client returns error",
|
||||
Given: "a valid RSS config and a client returning an error",
|
||||
When: "reading the feed",
|
||||
Then: "should return the wrapped error",
|
||||
GivenDetail: givenDetail{
|
||||
config: &ScrapeSourceRSS{
|
||||
URL: "http://techblog.com/feed",
|
||||
},
|
||||
mockClient: func(m *mock.Mock) {
|
||||
m.On("Get", mock.Anything).Return(nil, errors.New("network error"))
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
isErr: true,
|
||||
wantErrMsg: "fetching RSS feed: network error",
|
||||
feeds: nil,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Client returns empty feed",
|
||||
Given: "a valid RSS config and a client returning an empty feed",
|
||||
When: "reading the feed",
|
||||
Then: "should return an empty slice of feeds without error",
|
||||
GivenDetail: givenDetail{
|
||||
config: &ScrapeSourceRSS{
|
||||
URL: "http://techblog.com/empty",
|
||||
},
|
||||
mockClient: func(m *mock.Mock) {
|
||||
m.On("Get", mock.Anything).Return(&gofeed.Feed{Items: []*gofeed.Item{}}, nil)
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
isErr: false,
|
||||
feeds: []*model.Feed{},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// --- Run tests ---
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// --- Given ---
|
||||
// Create the reader instance first
|
||||
r, err := newRSSReader(tt.GivenDetail.config)
|
||||
Expect(err).NotTo(HaveOccurred(), "newRSSReader should succeed for valid test config")
|
||||
rssReader, ok := r.(*rssReader)
|
||||
Expect(ok).To(BeTrue(), "Expected reader to be of type *rssReader")
|
||||
|
||||
// Create and setup the mock client
|
||||
mockCli := newMockClient() // Use the existing mockClient constructor
|
||||
if tt.GivenDetail.mockClient != nil {
|
||||
tt.GivenDetail.mockClient(&mockCli.Mock)
|
||||
}
|
||||
|
||||
// Inject the mock client into the reader instance
|
||||
rssReader.client = mockCli
|
||||
|
||||
// --- When ---
|
||||
feeds, err := r.Read(context.Background())
|
||||
|
||||
// --- Then ---
|
||||
if tt.ThenExpected.isErr {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.wantErrMsg))
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
}
|
||||
|
||||
// Validate feeds using either direct comparison or custom func
|
||||
if tt.ThenExpected.validateFunc != nil {
|
||||
tt.ThenExpected.validateFunc(t, feeds)
|
||||
} else {
|
||||
Expect(feeds).To(Equal(tt.ThenExpected.feeds)) // Direct comparison if no custom validation
|
||||
}
|
||||
|
||||
// Assert mock expectations
|
||||
mockCli.AssertExpectations(t)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTime(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
// --- Test types ---
|
||||
type givenDetail struct {
|
||||
item *gofeed.Item
|
||||
}
|
||||
type whenDetail struct{}
|
||||
type thenExpected struct {
|
||||
timeIsNow bool // True if expected time should be close to time.Now()
|
||||
exactTime time.Time // Used only if timeIsNow is false
|
||||
}
|
||||
|
||||
fixedTime := time.Date(2024, 1, 1, 10, 30, 0, 0, time.UTC)
|
||||
|
||||
// --- Test cases ---
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Missing Publication Time",
|
||||
Given: "a feed item without publication time",
|
||||
When: "parsing the publication time",
|
||||
Then: "should return current time (approximated)",
|
||||
GivenDetail: givenDetail{
|
||||
item: &gofeed.Item{
|
||||
PublishedParsed: nil, // Explicitly nil
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
timeIsNow: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Valid Publication Time",
|
||||
Given: "a feed item with valid publication time",
|
||||
When: "parsing the publication time",
|
||||
Then: "should return the item's publication time in Local timezone",
|
||||
GivenDetail: givenDetail{
|
||||
item: &gofeed.Item{
|
||||
PublishedParsed: ptr.To(fixedTime),
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
timeIsNow: false,
|
||||
exactTime: fixedTime.In(time.Local), // Expect Local time
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// --- Run tests ---
|
||||
r := &rssReader{} // Instance needed to call the method
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// --- Given & When ---
|
||||
result := r.parseTime(tt.GivenDetail.item)
|
||||
|
||||
// --- Then ---
|
||||
if tt.ThenExpected.timeIsNow {
|
||||
// Allow for slight difference when checking against time.Now()
|
||||
Expect(result).To(BeTemporally("~", time.Now(), time.Second))
|
||||
} else {
|
||||
Expect(result).To(Equal(tt.ThenExpected.exactTime))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCombineContent(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
// --- Test types ---
|
||||
type givenDetail struct {
|
||||
content string
|
||||
description string
|
||||
}
|
||||
type whenDetail struct{}
|
||||
type thenExpected struct {
|
||||
combined string
|
||||
}
|
||||
|
||||
// --- Test cases ---
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Content Only",
|
||||
Given: "a feed item with only content",
|
||||
When: "combining content and description",
|
||||
Then: "should return content only",
|
||||
GivenDetail: givenDetail{
|
||||
content: "test content",
|
||||
description: "",
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
combined: "test content",
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Description Only",
|
||||
Given: "a feed item with only description",
|
||||
When: "combining content and description",
|
||||
Then: "should return description only",
|
||||
GivenDetail: givenDetail{
|
||||
content: "",
|
||||
description: "test description",
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
combined: "test description",
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Both Content and Description",
|
||||
Given: "a feed item with both content and description",
|
||||
When: "combining content and description",
|
||||
Then: "should return combined content with newlines",
|
||||
GivenDetail: givenDetail{
|
||||
content: "test content",
|
||||
description: "test description",
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
combined: "test description\n\ntest content",
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Both Empty",
|
||||
Given: "a feed item with no content and no description",
|
||||
When: "combining content and description",
|
||||
Then: "should return empty string",
|
||||
GivenDetail: givenDetail{
|
||||
content: "",
|
||||
description: "",
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
combined: "",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// --- Run tests ---
|
||||
r := &rssReader{} // Instance needed to call the method
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// --- Given & When ---
|
||||
got := r.combineContent(tt.GivenDetail.content, tt.GivenDetail.description)
|
||||
|
||||
// --- Then ---
|
||||
Expect(got).To(Equal(tt.ThenExpected.combined))
|
||||
})
|
||||
}
|
||||
}
|
||||
288
pkg/scrape/scraper/scraper.go
Normal file
288
pkg/scrape/scraper/scraper.go
Normal file
@@ -0,0 +1,288 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/benbjohnson/clock"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
"github.com/glidea/zenfeed/pkg/storage/feed"
|
||||
"github.com/glidea/zenfeed/pkg/storage/kv"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
hashutil "github.com/glidea/zenfeed/pkg/util/hash"
|
||||
"github.com/glidea/zenfeed/pkg/util/retry"
|
||||
timeutil "github.com/glidea/zenfeed/pkg/util/time"
|
||||
)
|
||||
|
||||
var clk = clock.New()
|
||||
|
||||
// --- Interface code block ---
|
||||
type Scraper interface {
|
||||
component.Component
|
||||
Config() *Config
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Past time.Duration
|
||||
Interval time.Duration
|
||||
Name string
|
||||
Labels model.Labels
|
||||
RSS *ScrapeSourceRSS
|
||||
}
|
||||
|
||||
const maxPast = 15 * 24 * time.Hour
|
||||
|
||||
func (c *Config) Validate() error {
|
||||
if c.Past <= 0 {
|
||||
c.Past = 3 * timeutil.Day
|
||||
}
|
||||
if c.Past > maxPast {
|
||||
c.Past = maxPast
|
||||
}
|
||||
if c.Interval <= 0 {
|
||||
c.Interval = time.Hour
|
||||
}
|
||||
if c.Interval < 10*time.Minute {
|
||||
c.Interval = 10 * time.Minute
|
||||
}
|
||||
if c.Name == "" {
|
||||
return errors.New("name cannot be empty")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type Dependencies struct {
|
||||
FeedStorage feed.Storage
|
||||
KVStorage kv.Storage
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[Scraper, Config, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
if len(mockOn) > 0 {
|
||||
return component.FactoryFunc[Scraper, Config, Dependencies](
|
||||
func(instance string, config *Config, dependencies Dependencies) (Scraper, error) {
|
||||
m := &mockScraper{}
|
||||
component.MockOptions(mockOn).Apply(&m.Mock)
|
||||
|
||||
return m, nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return component.FactoryFunc[Scraper, Config, Dependencies](new)
|
||||
}
|
||||
|
||||
func new(instance string, config *Config, dependencies Dependencies) (Scraper, error) {
|
||||
if err := config.Validate(); err != nil {
|
||||
return nil, errors.Wrap(err, "invalid scraper config")
|
||||
}
|
||||
|
||||
source, err := newReader(config)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "creating source")
|
||||
}
|
||||
|
||||
return &scraper{
|
||||
Base: component.New(&component.BaseConfig[Config, Dependencies]{
|
||||
Name: "Scraper",
|
||||
Instance: instance,
|
||||
Config: config,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
source: source,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
|
||||
type scraper struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
|
||||
source reader
|
||||
}
|
||||
|
||||
func (s *scraper) Run() (err error) {
|
||||
ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
// Add random offset to avoid synchronized scraping.
|
||||
offset := timeutil.Random(time.Minute)
|
||||
log.Debug(ctx, "computed scrape offset", "offset", offset)
|
||||
|
||||
timer := time.NewTimer(offset)
|
||||
defer timer.Stop()
|
||||
s.MarkReady()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-timer.C:
|
||||
s.scrapeUntilSuccess(ctx)
|
||||
timer.Reset(s.Config().Interval)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scraper) scrapeUntilSuccess(ctx context.Context) {
|
||||
_ = retry.Backoff(ctx, func() (err error) {
|
||||
opCtx := telemetry.StartWith(ctx, append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "scrape")...)
|
||||
defer func() { telemetry.End(opCtx, err) }()
|
||||
timeout := 20 * time.Minute // For llm rewrite, it may take a long time.
|
||||
opCtx, cancel := context.WithTimeout(opCtx, timeout)
|
||||
defer cancel()
|
||||
|
||||
// Read feeds from source.
|
||||
feeds, err := s.source.Read(opCtx)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "reading source feeds")
|
||||
}
|
||||
log.Debug(opCtx, "reading source feeds success", "count", len(feeds))
|
||||
|
||||
// Process feeds.
|
||||
processed := s.processFeeds(ctx, feeds)
|
||||
log.Debug(opCtx, "processed feeds", "count", len(processed))
|
||||
if len(processed) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Save processed feeds.
|
||||
if err := s.Dependencies().FeedStorage.Append(opCtx, processed...); err != nil {
|
||||
return errors.Wrap(err, "saving feeds")
|
||||
}
|
||||
log.Debug(opCtx, "appending feeds success")
|
||||
|
||||
return nil
|
||||
}, &retry.Options{
|
||||
MinInterval: time.Minute,
|
||||
MaxInterval: 16 * time.Minute,
|
||||
MaxAttempts: retry.InfAttempts,
|
||||
})
|
||||
}
|
||||
|
||||
func (s *scraper) processFeeds(ctx context.Context, feeds []*model.Feed) []*model.Feed {
|
||||
feeds = s.filterPasted(feeds)
|
||||
feeds = s.addAdditionalMetaLabels(feeds)
|
||||
feeds = s.fillIDs(feeds)
|
||||
feeds = s.filterExists(ctx, feeds)
|
||||
|
||||
return feeds
|
||||
}
|
||||
|
||||
func (s *scraper) filterPasted(feeds []*model.Feed) (filtered []*model.Feed) {
|
||||
now := clk.Now()
|
||||
for _, feed := range feeds {
|
||||
t := timeutil.MustParse(feed.Labels.Get(model.LabelPubTime))
|
||||
if timeutil.InRange(t, now.Add(-s.Config().Past), now) {
|
||||
filtered = append(filtered, feed)
|
||||
}
|
||||
}
|
||||
|
||||
return filtered
|
||||
}
|
||||
|
||||
func (s *scraper) fillIDs(feeds []*model.Feed) []*model.Feed {
|
||||
for _, feed := range feeds {
|
||||
// We can not use the pub time to join the hash,
|
||||
// because the pub time is dynamic for some sources.
|
||||
source := feed.Labels.Get(model.LabelSource)
|
||||
title := feed.Labels.Get(model.LabelTitle)
|
||||
link := feed.Labels.Get(model.LabelLink)
|
||||
feed.ID = hashutil.Sum64s([]string{source, title, link})
|
||||
}
|
||||
|
||||
return feeds
|
||||
}
|
||||
|
||||
const (
|
||||
keyPrefix = "scraper.feed.try-append."
|
||||
ttl = maxPast + time.Minute // Ensure the key is always available util the feed is pasted.
|
||||
)
|
||||
|
||||
func (s *scraper) filterExists(ctx context.Context, feeds []*model.Feed) (filtered []*model.Feed) {
|
||||
appendToResult := func(feed *model.Feed) {
|
||||
key := keyPrefix + strconv.FormatUint(feed.ID, 10)
|
||||
value := timeutil.Format(feed.Time)
|
||||
if err := s.Dependencies().KVStorage.Set(ctx, key, value, ttl); err != nil {
|
||||
log.Error(ctx, err, "set last try store time")
|
||||
}
|
||||
filtered = append(filtered, feed)
|
||||
}
|
||||
|
||||
for _, feed := range feeds {
|
||||
key := keyPrefix + strconv.FormatUint(feed.ID, 10)
|
||||
|
||||
lastTryStored, err := s.Dependencies().KVStorage.Get(ctx, key)
|
||||
switch {
|
||||
default:
|
||||
log.Error(ctx, err, "get last stored time, fallback to continue writing")
|
||||
appendToResult(feed)
|
||||
|
||||
case errors.Is(err, kv.ErrNotFound):
|
||||
appendToResult(feed)
|
||||
|
||||
case err == nil:
|
||||
t, err := timeutil.Parse(lastTryStored)
|
||||
if err != nil {
|
||||
log.Error(ctx, err, "parse last try stored time, fallback to continue writing")
|
||||
appendToResult(feed)
|
||||
}
|
||||
|
||||
exists, err := s.Dependencies().FeedStorage.Exists(ctx, feed.ID, t)
|
||||
if err != nil {
|
||||
log.Error(ctx, err, "check feed exists, fallback to continue writing")
|
||||
appendToResult(feed)
|
||||
}
|
||||
if !exists {
|
||||
appendToResult(feed)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return filtered
|
||||
}
|
||||
|
||||
func (s *scraper) addAdditionalMetaLabels(feeds []*model.Feed) []*model.Feed {
|
||||
for _, feed := range feeds {
|
||||
feed.Labels = append(
|
||||
feed.Labels,
|
||||
append(s.Config().Labels, model.Label{Key: model.LabelSource, Value: s.Config().Name})...,
|
||||
)
|
||||
feed.Labels.EnsureSorted()
|
||||
}
|
||||
|
||||
return feeds
|
||||
}
|
||||
|
||||
type mockScraper struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (s *mockScraper) Config() *Config {
|
||||
args := s.Called()
|
||||
|
||||
return args.Get(0).(*Config)
|
||||
}
|
||||
294
pkg/scrape/scraper/scraper_test.go
Normal file
294
pkg/scrape/scraper/scraper_test.go
Normal file
@@ -0,0 +1,294 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/test"
|
||||
timeutil "github.com/glidea/zenfeed/pkg/util/time"
|
||||
)
|
||||
|
||||
func TestConfig_Validate(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
// --- Test types ---
|
||||
type givenDetail struct {
|
||||
config *Config
|
||||
}
|
||||
type whenDetail struct{} // Validation is the action
|
||||
type thenExpected struct {
|
||||
expectedConfig *Config // Expected state after validation
|
||||
isErr bool
|
||||
wantErrMsg string
|
||||
}
|
||||
|
||||
// --- Test cases ---
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Default values",
|
||||
Given: "a config with zero values for Past and Interval and non-empty Name",
|
||||
When: "validating the config",
|
||||
Then: "should set default Past and Interval, and no error",
|
||||
GivenDetail: givenDetail{
|
||||
config: &Config{Name: "test"}, // Name is required
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
expectedConfig: &Config{
|
||||
Name: "test",
|
||||
Past: 3 * timeutil.Day, // Default Past
|
||||
Interval: time.Hour, // Default/Minimum Interval
|
||||
},
|
||||
isErr: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Past exceeds maximum",
|
||||
Given: "a config with Past exceeding the maximum limit",
|
||||
When: "validating the config",
|
||||
Then: "should cap Past to the maximum value",
|
||||
GivenDetail: givenDetail{
|
||||
config: &Config{Name: "test", Past: maxPast + time.Hour},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
expectedConfig: &Config{
|
||||
Name: "test",
|
||||
Past: maxPast, // Capped Past
|
||||
Interval: time.Hour, // Default Interval
|
||||
},
|
||||
isErr: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Interval below minimum",
|
||||
Given: "a config with Interval below the minimum limit",
|
||||
When: "validating the config",
|
||||
Then: "should set Interval to the minimum value",
|
||||
GivenDetail: givenDetail{
|
||||
config: &Config{Name: "test", Interval: time.Second},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
expectedConfig: &Config{
|
||||
Name: "test",
|
||||
Past: 3 * timeutil.Day, // Default Past
|
||||
Interval: 10 * time.Minute, // Minimum Interval
|
||||
},
|
||||
isErr: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Valid values",
|
||||
Given: "a config with valid Past and Interval",
|
||||
When: "validating the config",
|
||||
Then: "should keep the original values",
|
||||
GivenDetail: givenDetail{
|
||||
config: &Config{
|
||||
Name: "test",
|
||||
Past: 24 * time.Hour,
|
||||
Interval: 30 * time.Minute,
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
expectedConfig: &Config{
|
||||
Name: "test",
|
||||
Past: 24 * time.Hour,
|
||||
Interval: 30 * time.Minute,
|
||||
},
|
||||
isErr: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Missing Name",
|
||||
Given: "a config with an empty Name",
|
||||
When: "validating the config",
|
||||
Then: "should return an error",
|
||||
GivenDetail: givenDetail{
|
||||
config: &Config{}, // Empty Name
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
isErr: true,
|
||||
wantErrMsg: "name cannot be empty",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// --- Run tests ---
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// --- Given ---
|
||||
config := tt.GivenDetail.config // Use the config from the test case
|
||||
|
||||
// --- When ---
|
||||
err := config.Validate()
|
||||
|
||||
// --- Then ---
|
||||
if tt.ThenExpected.isErr {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.wantErrMsg))
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
// Compare the validated config with the expected one
|
||||
Expect(config).To(Equal(tt.ThenExpected.expectedConfig))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
// --- Test types ---
|
||||
type givenDetail struct {
|
||||
instance string
|
||||
config *Config
|
||||
dependencies Dependencies // Keep dependencies empty for now, focus on config validation
|
||||
}
|
||||
type whenDetail struct{} // Creation is the action
|
||||
type thenExpected struct {
|
||||
isErr bool
|
||||
wantErrMsg string
|
||||
validateFunc func(t *testing.T, s Scraper) // Optional validation
|
||||
}
|
||||
|
||||
// --- Test cases ---
|
||||
validRSSConfig := &ScrapeSourceRSS{URL: "http://valid.com/feed"}
|
||||
validBaseConfig := &Config{
|
||||
Name: "test-scraper",
|
||||
Interval: 15 * time.Minute, // Valid interval
|
||||
RSS: validRSSConfig, // Need a valid source for newReader
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Valid Configuration",
|
||||
Given: "a valid config and dependencies",
|
||||
When: "creating a new scraper",
|
||||
Then: "should create scraper successfully",
|
||||
GivenDetail: givenDetail{
|
||||
instance: "scraper-1",
|
||||
config: validBaseConfig,
|
||||
dependencies: Dependencies{}, // Empty deps are okay for New itself
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
isErr: false,
|
||||
validateFunc: func(t *testing.T, s Scraper) {
|
||||
Expect(s).NotTo(BeNil())
|
||||
Expect(s.Name()).To(Equal("Scraper")) // From Base component
|
||||
Expect(s.Instance()).To(Equal("scraper-1"))
|
||||
Expect(s.Config()).To(Equal(validBaseConfig)) // Check if config is stored
|
||||
|
||||
// Check internal state if needed (e.g., source type)
|
||||
concreteScraper, ok := s.(*scraper)
|
||||
Expect(ok).To(BeTrue())
|
||||
Expect(concreteScraper.source).NotTo(BeNil())
|
||||
_, isRSSReader := concreteScraper.source.(*rssReader)
|
||||
Expect(isRSSReader).To(BeTrue())
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Invalid Configuration - Validation Fail",
|
||||
Given: "a config that fails validation (e.g., missing name)",
|
||||
When: "creating a new scraper",
|
||||
Then: "should return a validation error",
|
||||
GivenDetail: givenDetail{
|
||||
instance: "scraper-invalid",
|
||||
config: &Config{ // Missing Name, invalid interval
|
||||
Interval: time.Second,
|
||||
RSS: validRSSConfig,
|
||||
},
|
||||
dependencies: Dependencies{},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
isErr: true,
|
||||
wantErrMsg: "invalid scraper config: name cannot be empty", // Specific validation error
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Invalid Configuration - Source Creation Fail",
|
||||
Given: "a config that passes validation but has invalid source details",
|
||||
When: "creating a new scraper",
|
||||
Then: "should return an error from source creation",
|
||||
GivenDetail: givenDetail{
|
||||
instance: "scraper-bad-source",
|
||||
config: &Config{
|
||||
Name: "test-bad-source",
|
||||
Interval: 15 * time.Minute,
|
||||
RSS: &ScrapeSourceRSS{URL: "invalid-url-format"}, // Invalid RSS URL
|
||||
},
|
||||
dependencies: Dependencies{},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
isErr: true,
|
||||
wantErrMsg: "creating source: invalid RSS config: URL must be a valid HTTP/HTTPS URL", // Error from newRSSReader via newReader
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Invalid Configuration - No Source Configured",
|
||||
Given: "a config that passes validation but lacks any source config (RSS is nil)",
|
||||
When: "creating a new scraper",
|
||||
Then: "should return an error indicating unsupported source",
|
||||
GivenDetail: givenDetail{
|
||||
instance: "scraper-no-source",
|
||||
config: &Config{
|
||||
Name: "test-no-source",
|
||||
Interval: 15 * time.Minute,
|
||||
RSS: nil, // No source configured
|
||||
},
|
||||
dependencies: Dependencies{},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
isErr: true,
|
||||
wantErrMsg: "creating source: source not supported", // Error from newReader
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// --- Run tests ---
|
||||
factory := NewFactory() // Use the real factory
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// --- Given & When ---
|
||||
s, err := factory.New(tt.GivenDetail.instance, tt.GivenDetail.config, tt.GivenDetail.dependencies)
|
||||
|
||||
// --- Then ---
|
||||
if tt.ThenExpected.isErr {
|
||||
Expect(err).To(HaveOccurred())
|
||||
// Use MatchError for wrapped errors if necessary, but ContainSubstring is often sufficient
|
||||
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.wantErrMsg))
|
||||
Expect(s).To(BeNil())
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(s).NotTo(BeNil())
|
||||
if tt.ThenExpected.validateFunc != nil {
|
||||
tt.ThenExpected.validateFunc(t, s)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
62
pkg/scrape/scraper/source.go
Normal file
62
pkg/scrape/scraper/source.go
Normal file
@@ -0,0 +1,62 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
"github.com/stretchr/testify/mock"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
|
||||
// reader defines interface for reading from different data sources.
|
||||
type reader interface {
|
||||
// Read fetches content from the data source.
|
||||
// Returns a slice of feeds and any error encountered.
|
||||
Read(ctx context.Context) ([]*model.Feed, error)
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
func newReader(config *Config) (reader, error) {
|
||||
if config.RSS != nil {
|
||||
return newRSSReader(config.RSS)
|
||||
}
|
||||
|
||||
return nil, errors.New("source not supported")
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
|
||||
type mockReader struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
func NewMock() *mockReader {
|
||||
return &mockReader{}
|
||||
}
|
||||
|
||||
func (m *mockReader) Read(ctx context.Context) ([]*model.Feed, error) {
|
||||
args := m.Called(ctx)
|
||||
if feeds := args.Get(0); feeds != nil {
|
||||
return feeds.([]*model.Feed), args.Error(1)
|
||||
}
|
||||
|
||||
return nil, args.Error(1)
|
||||
}
|
||||
Reference in New Issue
Block a user