This commit is contained in:
glidea
2025-04-19 15:50:26 +08:00
commit 8b33df8a05
109 changed files with 24407 additions and 0 deletions

573
pkg/rewrite/rewrite.go Normal file
View File

@@ -0,0 +1,573 @@
// Copyright (C) 2025 wangyusong
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package rewrite
import (
"context"
"html/template"
"regexp"
"unicode/utf8"
"unsafe"
"github.com/pkg/errors"
"k8s.io/utils/ptr"
"github.com/glidea/zenfeed/pkg/component"
"github.com/glidea/zenfeed/pkg/config"
"github.com/glidea/zenfeed/pkg/llm"
"github.com/glidea/zenfeed/pkg/model"
"github.com/glidea/zenfeed/pkg/telemetry"
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
"github.com/glidea/zenfeed/pkg/util/buffer"
)
// --- Interface code block ---
type Rewriter interface {
component.Component
config.Watcher
// Labels applies rewrite rules to the given labels and returns the modified labels.
// Note: this method modifies the input labels in place.
// If a rule's action is ActionDropFeed, it returns nil to indicate the item should be dropped.
Labels(ctx context.Context, labels model.Labels) (model.Labels, error)
}
type Config []Rule
func (c *Config) Validate() error {
for i := range *c {
if err := (*c)[i].Validate(); err != nil {
return errors.Wrapf(err, "validate and adjust rewrite config")
}
}
return nil
}
func (c *Config) From(app *config.App) {
for _, r := range app.Storage.Feed.Rewrites {
var rc Rule
rc.From(&r)
*c = append(*c, rc)
}
}
type Dependencies struct {
LLMFactory llm.Factory
}
type Rule struct {
// SourceLabel specifies which label's value to use as source text.
// Default is model.LabelContent.
SourceLabel string
// SkipTooShortThreshold is the threshold of the source text length.
// If the source text is shorter than this threshold, it will be skipped.
SkipTooShortThreshold *int
// Transform used to transform the source text.
// If not set, transform to original source text.
Transform *Transform
// Match used to match the text after transform.
// If not set, match all.
Match string
matchRE *regexp.Regexp
// Action determines what to do if matchs.
Action Action
// Label is the label to create or update.
Label string
}
func (r *Rule) Validate() error { //nolint:cyclop
// Source label.
if r.SourceLabel == "" {
r.SourceLabel = model.LabelContent
}
if r.SkipTooShortThreshold == nil {
r.SkipTooShortThreshold = ptr.To(300)
}
// Transform.
if r.Transform != nil {
if r.Transform.ToText.Prompt == "" {
return errors.New("to text prompt is required")
}
tmpl, err := template.New("").Parse(r.Transform.ToText.Prompt)
if err != nil {
return errors.Wrapf(err, "parse prompt template %s", r.Transform.ToText.Prompt)
}
buf := buffer.Get()
defer buffer.Put(buf)
if err := tmpl.Execute(buf, promptTemplates); err != nil {
return errors.Wrapf(err, "execute prompt template %s", r.Transform.ToText.Prompt)
}
r.Transform.ToText.promptRendered = buf.String()
}
// Match.
if r.Match == "" {
r.Match = ".*"
}
re, err := regexp.Compile(r.Match)
if err != nil {
return errors.Wrapf(err, "compile match regex %s", r.Match)
}
r.matchRE = re
// Action.
switch r.Action {
case "":
r.Action = ActionCreateOrUpdateLabel
case ActionCreateOrUpdateLabel:
if r.Label == "" {
return errors.New("label is required for create or update label action")
}
case ActionDropFeed:
default:
return errors.Errorf("invalid action: %s", r.Action)
}
return nil
}
func (r *Rule) From(c *config.RewriteRule) {
r.SourceLabel = c.SourceLabel
r.SkipTooShortThreshold = c.SkipTooShortThreshold
if c.Transform != nil {
t := &Transform{}
if c.Transform.ToText != nil {
t.ToText = &ToText{
LLM: c.Transform.ToText.LLM,
Prompt: c.Transform.ToText.Prompt,
}
}
r.Transform = t
}
r.Match = c.Match
if r.Match == "" {
r.Match = c.MatchRE
}
r.Action = Action(c.Action)
r.Label = c.Label
}
type Transform struct {
ToText *ToText
}
type ToText struct {
// LLM is the name of the LLM to use.
LLM string
// Prompt is the prompt for LLM completion.
// The source text will automatically be injected into the prompt.
Prompt string
promptRendered string
}
type Action string
const (
ActionDropFeed Action = "drop_feed"
ActionCreateOrUpdateLabel Action = "create_or_update_label"
)
var promptTemplates = map[string]string{
"category": `
Analyze the content and categorize it into exactly one of these categories:
Technology, Development, Entertainment, Finance, Health, Politics, Other
Classification requirements:
- Choose the SINGLE most appropriate category based on:
* Primary topic and main focus of the content
* Key terminology and concepts used
* Target audience and purpose
* Technical depth and complexity level
- For content that could fit multiple categories:
* Identify the dominant theme
* Consider the most specific applicable category
* Use the primary intended purpose
- If content appears ambiguous:
* Focus on the most prominent aspects
* Consider the practical application
* Choose the category that best serves user needs
Output format:
Return ONLY the category name, no other text or explanation.
Must be one of the provided categories exactly as written.
`,
"tags": `
Analyze the content and add appropriate tags based on:
- Main topics and themes
- Key concepts and terminology
- Target audience and purpose
- Technical depth and domain
- 2-4 tags are enough
Output format:
Return a list of tags, separated by commas, no other text or explanation.
e.g. "AI, Technology, Innovation, Future"
`,
"score": `
Please give a score between 0 and 10 based on the following content.
Evaluate the content comprehensively considering clarity, accuracy, depth, logical structure, language expression, and completeness.
Note: If the content is an article or a text intended to be detailed, the length is an important factor. Generally, content under 300 words may receive a lower score due to lack of substance, unless its type (such as poetry or summary) is inherently suitable for brevity.
Output format:
Return the score (0-10), no other text or explanation.
E.g. "8", "5", "3", etc.
`,
"comment_confucius": `
Please act as Confucius and write a 100-word comment on the article.
Content needs to be in line with the Chinese mainland's regulations.
Output format:
Return the comment only, no other text or explanation.
Reply short and concise, 100 words is enough.
`,
"summary": `
Summarize the article in 100-200 words.
`,
"summary_html_snippet": `
# Task: Create Visually Appealing Information Summary Emails
You are a professional content designer. Please convert the provided articles into **visually modern HTML email segments**, focusing on display effects in modern clients like Gmail and QQ Mail.
## Key Requirements:
1. **Output Format**:
- Only output HTML code snippets, **no need for complete HTML document structure**
- Only generate HTML code for a single article, so users can combine multiple pieces into a complete email
- No explanations, additional comments, or markups
- **No need to add titles and sources**, users will inject them automatically
- No use html backticks, output raw html code directly
- Output directly, no explanation, no comments, no markups
2. **Content Processing**:
- **Don't directly copy the original text**, but extract key information and core insights from each article
- **Each article summary should be 100-200 words**, don't force word count, adjust the word count based on the actual length of the article
- Summarize points in relaxed, natural language, as if chatting with friends, while maintaining depth
- Maintain the original language of the article (e.g., Chinese summary for Chinese articles)
3. **Visual Design**:
- Design should be aesthetically pleasing with coordinated colors
- Use sufficient whitespace and contrast
- Maintain a consistent visual style across all articles
- **Must use multiple visual elements** (charts, cards, quote blocks, etc.), avoid pure text presentation
- Each article should use at least 2-3 different visual elements to make content more intuitive and readable
4. **Highlight Techniques**:
A. **Beautiful Quote Blocks** (for highlighting important viewpoints):
<div style="margin:20px 0; padding:20px; background:linear-gradient(to right, #f8f9fa, #ffffff); border-left:5px solid #4285f4; border-radius:5px; box-shadow:0 2px 8px rgba(0,0,0,0.05);">
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; line-height:1.6; color:#333; font-weight:500;">
Here is the key viewpoint or finding that needs to be highlighted.
</p>
</div>
B. **Information Cards** (for highlighting key data):
<div style="display:inline-block; margin:10px 10px 10px 0; padding:15px 20px; background-color:#ffffff; border-radius:8px; box-shadow:0 3px 10px rgba(0,0,0,0.08); min-width:120px; text-align:center;">
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#666;">Metric Name</p>
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:24px; font-weight:600; color:#1a73e8;">75%</p>
</div>
C. **Key Points List** (for highlighting multiple points):
<ul style="margin:20px 0; padding-left:0; list-style-type:none;">
<li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
<span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">1</span>
First point description
</li>
<li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
<span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">2</span>
Second point description
</li>
</ul>
D. **Emphasis Text** (for highlighting key words or phrases):
<span style="background:linear-gradient(180deg, rgba(255,255,255,0) 50%, rgba(66,133,244,0.2) 50%); padding:0 2px;">Text to emphasize</span>
5. **Timeline Design** (suitable for event sequences or news developments):
<div style="margin:25px 0; padding:5px 0;">
<h3 style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:18px; color:#333; margin-bottom:15px;">Event Development Timeline</h3>
<div style="position:relative; margin-left:30px; padding-left:30px; border-left:2px solid #e0e0e0;">
<!-- Time Point 1 -->
<div style="position:relative; margin-bottom:25px;">
<div style="position:absolute; width:16px; height:16px; background-color:#4285f4; border-radius:50%; left:-40px; top:0; border:3px solid #ffffff; box-shadow:0 2px 5px rgba(0,0,0,0.1);"></div>
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#4285f4;">June 1, 2023</p>
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.5; color:#333;">Event description content, concisely explaining the key points and impact of the event.</p>
</div>
<!-- Time Point 2 -->
<div style="position:relative; margin-bottom:25px;">
<div style="position:absolute; width:16px; height:16px; background-color:#4285f4; border-radius:50%; left:-40px; top:0; border:3px solid #ffffff; box-shadow:0 2px 5px rgba(0,0,0,0.1);"></div>
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#4285f4;">June 15, 2023</p>
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.5; color:#333;">Event description content, concisely explaining the key points and impact of the event.</p>
</div>
</div>
</div>
6. **Comparison Table** (for comparing different options or viewpoints):
<div style="margin:25px 0; padding:15px; background-color:#f8f9fa; border-radius:8px; overflow-x:auto;">
<table style="width:100%; border-collapse:collapse; font-family:'Google Sans',Roboto,Arial,sans-serif;">
<thead>
<tr>
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Feature</th>
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option A</th>
<th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option B</th>
</tr>
</thead>
<tbody>
<tr>
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Cost</td>
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Higher</td>
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Moderate</td>
</tr>
<tr>
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Efficiency</td>
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Very High</td>
<td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Average</td>
</tr>
</tbody>
</table>
</div>
7. **Chart Data Processing**:
- Bar Chart/Horizontal Bars:
<div style="margin:20px 0; padding:15px; background-color:#f8f9fa; border-radius:8px;">
<p style="margin:0 0 15px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#333;">Data Comparison</p>
<!-- Item 1 -->
<div style="margin-bottom:12px;">
<div style="display:flex; align-items:center; justify-content:space-between; margin-bottom:5px;">
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#555;">Project A</span>
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#333;">65%</span>
</div>
<div style="height:10px; width:100%; background-color:#e8eaed; border-radius:5px; overflow:hidden;">
<div style="height:100%; width:65%; background:linear-gradient(to right, #4285f4, #5e97f6); border-radius:5px;"></div>
</div>
</div>
<!-- Item 2 -->
<div style="margin-bottom:12px;">
<div style="display:flex; align-items:center; justify-content:space-between; margin-bottom:5px;">
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#555;">Project B</span>
<span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#333;">42%</span>
</div>
<div style="height:10px; width:100%; background-color:#e8eaed; border-radius:5px; overflow:hidden;">
<div style="height:100%; width:42%; background:linear-gradient(to right, #ea4335, #f07575); border-radius:5px;"></div>
</div>
</div>
</div>
8. **Highlight Box** (for displaying tips or reminders):
<div style="margin:25px 0; padding:20px; background-color:#fffde7; border-radius:8px; border-left:4px solid #fdd835; box-shadow:0 1px 5px rgba(0,0,0,0.05);">
<div style="display:flex; align-items:flex-start;">
<div style="flex-shrink:0; margin-right:15px; width:24px; height:24px; background-color:#fdd835; border-radius:50%; display:flex; align-items:center; justify-content:center;">
<span style="color:#fff; font-weight:bold; font-size:16px;">!</span>
</div>
<div>
<p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#333;">Tip</p>
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#555;">
Here are some additional tips or suggestions to help readers better understand or apply the article content.
</p>
</div>
</div>
</div>
9. **Summary Box**:
<div style="margin:25px 0; padding:20px; background-color:#f2f7fd; border-radius:8px; box-shadow:0 1px 5px rgba(66,133,244,0.1);">
<p style="margin:0 0 10px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#1a73e8;">In Simple Terms</p>
<p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#333;">
This is a concise summary of the entire content, highlighting the most critical findings and conclusions.
</p>
</div>
## Notes:
1. **Only generate content for a single article**, not including title and source, and not including HTML head and tail structure
2. Content should be **200-300 words**, don't force word count
3. **Must use multiple visual elements** (at least 2-3 types), avoid monotonous pure text presentation
4. Use relaxed, natural language, as if chatting with friends
5. Create visual charts for important data, rather than just describing with text
6. Use quote blocks to highlight important viewpoints, and lists to organize multiple points
7. Appropriately use emojis and conversational expressions to increase friendliness
8. Note that the article content has been provided in the previous message, please reply directly, no explanation, no comments, no markups
`,
}
// --- Factory code block ---
type Factory component.Factory[Rewriter, config.App, Dependencies]
func NewFactory(mockOn ...component.MockOption) Factory {
if len(mockOn) > 0 {
return component.FactoryFunc[Rewriter, config.App, Dependencies](func(instance string, app *config.App, dependencies Dependencies) (Rewriter, error) {
m := &mockRewriter{}
component.MockOptions(mockOn).Apply(&m.Mock)
return m, nil
})
}
return component.FactoryFunc[Rewriter, config.App, Dependencies](new)
}
func new(instance string, app *config.App, dependencies Dependencies) (Rewriter, error) {
c := &Config{}
c.From(app)
if err := c.Validate(); err != nil {
return nil, errors.Wrap(err, "validate and adjust rewrite config")
}
return &rewriter{
Base: component.New(&component.BaseConfig[Config, Dependencies]{
Name: "Rewriter",
Instance: instance,
Config: c,
Dependencies: dependencies,
}),
}, nil
}
// --- Implementation code block ---
type rewriter struct {
*component.Base[Config, Dependencies]
}
func (r *rewriter) Reload(app *config.App) error {
newConfig := &Config{}
newConfig.From(app)
if err := newConfig.Validate(); err != nil {
return errors.Wrap(err, "validate and adjust rewrite config")
}
r.SetConfig(newConfig)
return nil
}
func (r *rewriter) Labels(ctx context.Context, labels model.Labels) (model.Labels, error) {
ctx = telemetry.StartWith(ctx, append(r.TelemetryLabels(), telemetrymodel.KeyOperation, "Labels")...)
defer func() { telemetry.End(ctx, nil) }()
rules := *r.Config()
for _, rule := range rules {
// Get source text based on source label.
sourceText := labels.Get(rule.SourceLabel)
if utf8.RuneCountInString(sourceText) < *rule.SkipTooShortThreshold {
continue
}
// Transform text if configured.
text := sourceText
if rule.Transform != nil {
transformed, err := r.transformText(ctx, rule.Transform, sourceText)
if err != nil {
return nil, errors.Wrap(err, "transform text")
}
text = transformed
}
// Check if text matches the rule.
if !rule.matchRE.MatchString(text) {
continue
}
// Handle actions.
switch rule.Action {
case ActionDropFeed:
return nil, nil
case ActionCreateOrUpdateLabel:
labels.Put(rule.Label, text, false)
}
}
labels.EnsureSorted()
return labels, nil
}
// transformText transforms text using configured LLM.
func (r *rewriter) transformText(ctx context.Context, transform *Transform, text string) (string, error) {
// Get LLM instance.
llm := r.Dependencies().LLMFactory.Get(transform.ToText.LLM)
// Call completion.
result, err := llm.String(ctx, []string{
transform.ToText.promptRendered,
"The content to be processed is below, and the processing requirements are as above",
text, // TODO: may place to first line to hit the model cache in different rewrite rules.
})
if err != nil {
return "", errors.Wrap(err, "llm completion")
}
return r.transformTextHack(result), nil
}
func (r *rewriter) transformTextHack(text string) string {
bytes := unsafe.Slice(unsafe.StringData(text), len(text))
start := 0
end := len(bytes)
// Remove the last line if it's empty.
// This is a hack to avoid the model output a empty line.
// E.g. category: tech\n
if end > 0 && bytes[end-1] == '\n' {
end--
}
// Remove the html backticks.
if end-start >= 7 && string(bytes[start:start+7]) == "```html" {
start += 7
}
if end-start >= 3 && string(bytes[end-3:end]) == "```" {
end -= 3
}
// If no changes, return the original string.
if start == 0 && end == len(bytes) {
return text
}
// Only copy one time.
return string(bytes[start:end])
}
type mockRewriter struct {
component.Mock
}
func (r *mockRewriter) Reload(app *config.App) error {
args := r.Called(app)
return args.Error(0)
}
func (r *mockRewriter) Labels(ctx context.Context, labels model.Labels) (model.Labels, error) {
args := r.Called(ctx, labels)
if args.Get(0) == nil {
return nil, args.Error(1)
}
return args.Get(0).(model.Labels), args.Error(1)
}

286
pkg/rewrite/rewrite_test.go Normal file
View File

@@ -0,0 +1,286 @@
package rewrite
import (
"context"
"testing"
. "github.com/onsi/gomega"
"github.com/pkg/errors"
"github.com/stretchr/testify/mock"
"k8s.io/utils/ptr"
"github.com/glidea/zenfeed/pkg/component"
"github.com/glidea/zenfeed/pkg/llm"
"github.com/glidea/zenfeed/pkg/model"
"github.com/glidea/zenfeed/pkg/test"
)
func TestLabels(t *testing.T) {
RegisterTestingT(t)
type givenDetail struct {
config *Config
llmMock func(m *mock.Mock)
}
type whenDetail struct {
inputLabels model.Labels
}
type thenExpected struct {
outputLabels model.Labels
err error
isErr bool
}
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
{
Scenario: "Drop feed based on transformed content match",
Given: "a rule to drop feed if transformed content matches 'spam'",
When: "processing labels where transformed content is 'spam'",
Then: "should return nil labels indicating drop",
GivenDetail: givenDetail{
config: &Config{
{
SourceLabel: model.LabelContent,
SkipTooShortThreshold: ptr.To(10),
Transform: &Transform{
ToText: &ToText{
LLM: "mock-llm",
Prompt: "{{ .category }}", // Using a simple template for testing
},
},
Match: "spam",
Action: ActionDropFeed,
},
},
llmMock: func(m *mock.Mock) {
m.On("String", mock.Anything, mock.Anything).Return("spam", nil)
},
},
WhenDetail: whenDetail{
inputLabels: model.Labels{
{Key: model.LabelContent, Value: "This is some content that will be transformed to spam."},
{Key: model.LabelTitle, Value: "Spam Article"},
},
},
ThenExpected: thenExpected{
outputLabels: nil,
isErr: false,
},
},
{
Scenario: "Create/Update label based on transformed content",
Given: "a rule to add a category label based on transformed content",
When: "processing labels where transformed content is 'Technology'",
Then: "should return labels with the new category label",
GivenDetail: givenDetail{
config: &Config{
{
SourceLabel: model.LabelContent,
SkipTooShortThreshold: ptr.To(10),
Transform: &Transform{
ToText: &ToText{
LLM: "mock-llm",
Prompt: "{{ .category }}",
},
},
Match: "Technology",
Action: ActionCreateOrUpdateLabel,
Label: "category",
},
},
llmMock: func(m *mock.Mock) {
m.On("String", mock.Anything, mock.Anything).Return("Technology", nil)
},
},
WhenDetail: whenDetail{
inputLabels: model.Labels{
{Key: model.LabelContent, Value: "Content about AI and programming."},
{Key: model.LabelTitle, Value: "Tech Article"},
},
},
ThenExpected: thenExpected{
outputLabels: model.Labels{
{Key: model.LabelContent, Value: "Content about AI and programming."},
{Key: model.LabelTitle, Value: "Tech Article"},
{Key: "category", Value: "Technology"},
},
isErr: false,
},
},
{
Scenario: "No rules match",
Given: "a rule that does not match the content",
When: "processing labels",
Then: "should return the original labels unchanged",
GivenDetail: givenDetail{
config: &Config{
{
SourceLabel: model.LabelContent,
SkipTooShortThreshold: ptr.To(10),
Match: "NonMatchingPattern",
Action: ActionDropFeed,
},
},
},
WhenDetail: whenDetail{
inputLabels: model.Labels{
{Key: model.LabelContent, Value: "Some regular content."},
{Key: model.LabelTitle, Value: "Regular Article"},
},
},
ThenExpected: thenExpected{
outputLabels: model.Labels{
{Key: model.LabelContent, Value: "Some regular content."},
{Key: model.LabelTitle, Value: "Regular Article"},
},
isErr: false,
},
},
{
Scenario: "LLM transformation error",
Given: "a rule requiring transformation and LLM returns an error",
When: "processing labels",
Then: "should return an error",
GivenDetail: givenDetail{
config: &Config{
{
SourceLabel: model.LabelContent,
SkipTooShortThreshold: ptr.To(10),
Transform: &Transform{
ToText: &ToText{
LLM: "mock-llm",
Prompt: "{{ .category }}",
promptRendered: "Analyze the content and categorize it...",
},
},
Match: ".*",
Action: ActionCreateOrUpdateLabel,
Label: "category",
},
},
llmMock: func(m *mock.Mock) {
m.On("String", mock.Anything, mock.Anything).Return("", errors.New("LLM failed"))
},
},
WhenDetail: whenDetail{
inputLabels: model.Labels{
{Key: model.LabelContent, Value: "Content requiring transformation."},
{Key: model.LabelTitle, Value: "Transform Error Article"},
},
},
ThenExpected: thenExpected{
outputLabels: nil,
err: errors.New("transform text: llm completion: LLM failed"),
isErr: true,
},
},
{
Scenario: "Rule matches but label already exists",
Given: "a rule to add a category label and the label already exists",
When: "processing labels",
Then: "should update the existing label value",
GivenDetail: givenDetail{
config: &Config{
{
SourceLabel: model.LabelContent,
SkipTooShortThreshold: ptr.To(10),
Transform: &Transform{
ToText: &ToText{
LLM: "mock-llm",
Prompt: "{{ .category }}",
promptRendered: "Analyze the content and categorize it...",
},
},
Match: "Finance",
Action: ActionCreateOrUpdateLabel,
Label: "category",
},
},
llmMock: func(m *mock.Mock) {
m.On("String", mock.Anything, mock.Anything).Return("Finance", nil)
},
},
WhenDetail: whenDetail{
inputLabels: model.Labels{
{Key: model.LabelContent, Value: "Content about stock market."},
{Key: model.LabelTitle, Value: "Finance Article"},
{Key: "category", Value: "OldCategory"}, // Existing label
},
},
ThenExpected: thenExpected{
outputLabels: model.Labels{
{Key: model.LabelContent, Value: "Content about stock market."},
{Key: model.LabelTitle, Value: "Finance Article"},
{Key: "category", Value: "Finance"}, // Updated label
},
isErr: false,
},
},
}
for _, tt := range tests {
t.Run(tt.Scenario, func(t *testing.T) {
// Given.
var mockLLMFactory llm.Factory
var mockInstance *mock.Mock // Store the mock instance for assertion
// Create mock factory and capture the mock.Mock instance.
mockOption := component.MockOption(func(m *mock.Mock) {
mockInstance = m // Capture the mock instance.
if tt.GivenDetail.llmMock != nil {
tt.GivenDetail.llmMock(m)
}
})
mockLLMFactory, err := llm.NewFactory("", nil, llm.FactoryDependencies{}, mockOption) // Use the factory directly with the option
Expect(err).NotTo(HaveOccurred())
// Manually validate config to compile regex and render templates.
// In real usage, this happens in `new` or `Reload`.
for i := range *tt.GivenDetail.config {
err := (*tt.GivenDetail.config)[i].Validate()
Expect(err).NotTo(HaveOccurred(), "Rule validation should not fail in test setup")
}
// Instantiate the rewriter with the mock factory
rewriterInstance := &rewriter{
Base: component.New(&component.BaseConfig[Config, Dependencies]{
Name: "TestRewriter",
Instance: "test",
Config: tt.GivenDetail.config,
Dependencies: Dependencies{
LLMFactory: mockLLMFactory, // Pass the mock factory
},
}),
}
// Clone input labels to avoid modification by reference affecting assertions.
inputLabelsCopy := make(model.Labels, len(tt.WhenDetail.inputLabels))
copy(inputLabelsCopy, tt.WhenDetail.inputLabels)
// When.
outputLabels, err := rewriterInstance.Labels(context.Background(), inputLabelsCopy)
// Then.
if tt.ThenExpected.isErr {
Expect(err).To(HaveOccurred())
// Use MatchError for potentially wrapped errors.
Expect(err).To(MatchError(ContainSubstring(tt.ThenExpected.err.Error())))
Expect(outputLabels).To(BeNil())
} else {
Expect(err).NotTo(HaveOccurred())
// Ensure output labels are sorted for consistent comparison.
if outputLabels != nil {
outputLabels.EnsureSorted()
}
tt.ThenExpected.outputLabels.EnsureSorted()
Expect(outputLabels).To(Equal(tt.ThenExpected.outputLabels))
}
// Verify LLM calls if stubs were provided.
if tt.GivenDetail.llmMock != nil && mockInstance != nil {
// Assert expectations on the captured mock instance.
mockInstance.AssertExpectations(t)
}
})
}
}