init

2025-04-19 15:50:26 +08:00
commit 8b33df8a05
109 changed files with 24407 additions and 0 deletions
--- a/pkg/rewrite/rewrite.go
+++ b/pkg/rewrite/rewrite.go
@@ -0,0 +1,573 @@
+// Copyright (C) 2025 wangyusong
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+package rewrite
+
+import (
+	"context"
+	"html/template"
+	"regexp"
+	"unicode/utf8"
+	"unsafe"
+
+	"github.com/pkg/errors"
+	"k8s.io/utils/ptr"
+
+	"github.com/glidea/zenfeed/pkg/component"
+	"github.com/glidea/zenfeed/pkg/config"
+	"github.com/glidea/zenfeed/pkg/llm"
+	"github.com/glidea/zenfeed/pkg/model"
+	"github.com/glidea/zenfeed/pkg/telemetry"
+	telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
+	"github.com/glidea/zenfeed/pkg/util/buffer"
+)
+
+// --- Interface code block ---
+
+type Rewriter interface {
+	component.Component
+	config.Watcher
+
+	// Labels applies rewrite rules to the given labels and returns the modified labels.
+	// Note: this method modifies the input labels in place.
+	// If a rule's action is ActionDropFeed, it returns nil to indicate the item should be dropped.
+	Labels(ctx context.Context, labels model.Labels) (model.Labels, error)
+}
+
+type Config []Rule
+
+func (c *Config) Validate() error {
+	for i := range *c {
+		if err := (*c)[i].Validate(); err != nil {
+			return errors.Wrapf(err, "validate and adjust rewrite config")
+		}
+	}
+
+	return nil
+}
+
+func (c *Config) From(app *config.App) {
+	for _, r := range app.Storage.Feed.Rewrites {
+		var rc Rule
+		rc.From(&r)
+		*c = append(*c, rc)
+	}
+}
+
+type Dependencies struct {
+	LLMFactory llm.Factory
+}
+
+type Rule struct {
+	// SourceLabel specifies which label's value to use as source text.
+	// Default is model.LabelContent.
+	SourceLabel string
+
+	// SkipTooShortThreshold is the threshold of the source text length.
+	// If the source text is shorter than this threshold, it will be skipped.
+	SkipTooShortThreshold *int
+
+	// Transform used to transform the source text.
+	// If not set, transform to original source text.
+	Transform *Transform
+
+	// Match used to match the text after transform.
+	// If not set, match all.
+	Match   string
+	matchRE *regexp.Regexp
+
+	// Action determines what to do if matchs.
+	Action Action
+
+	// Label is the label to create or update.
+	Label string
+}
+
+func (r *Rule) Validate() error { //nolint:cyclop
+	// Source label.
+	if r.SourceLabel == "" {
+		r.SourceLabel = model.LabelContent
+	}
+	if r.SkipTooShortThreshold == nil {
+		r.SkipTooShortThreshold = ptr.To(300)
+	}
+
+	// Transform.
+	if r.Transform != nil {
+		if r.Transform.ToText.Prompt == "" {
+			return errors.New("to text prompt is required")
+		}
+		tmpl, err := template.New("").Parse(r.Transform.ToText.Prompt)
+		if err != nil {
+			return errors.Wrapf(err, "parse prompt template %s", r.Transform.ToText.Prompt)
+		}
+		buf := buffer.Get()
+		defer buffer.Put(buf)
+		if err := tmpl.Execute(buf, promptTemplates); err != nil {
+			return errors.Wrapf(err, "execute prompt template %s", r.Transform.ToText.Prompt)
+		}
+		r.Transform.ToText.promptRendered = buf.String()
+	}
+
+	// Match.
+	if r.Match == "" {
+		r.Match = ".*"
+	}
+	re, err := regexp.Compile(r.Match)
+	if err != nil {
+		return errors.Wrapf(err, "compile match regex %s", r.Match)
+	}
+	r.matchRE = re
+
+	// Action.
+	switch r.Action {
+	case "":
+		r.Action = ActionCreateOrUpdateLabel
+	case ActionCreateOrUpdateLabel:
+		if r.Label == "" {
+			return errors.New("label is required for create or update label action")
+		}
+	case ActionDropFeed:
+	default:
+		return errors.Errorf("invalid action: %s", r.Action)
+	}
+
+	return nil
+}
+
+func (r *Rule) From(c *config.RewriteRule) {
+	r.SourceLabel = c.SourceLabel
+	r.SkipTooShortThreshold = c.SkipTooShortThreshold
+	if c.Transform != nil {
+		t := &Transform{}
+		if c.Transform.ToText != nil {
+			t.ToText = &ToText{
+				LLM:    c.Transform.ToText.LLM,
+				Prompt: c.Transform.ToText.Prompt,
+			}
+		}
+		r.Transform = t
+	}
+	r.Match = c.Match
+	if r.Match == "" {
+		r.Match = c.MatchRE
+	}
+	r.Action = Action(c.Action)
+	r.Label = c.Label
+}
+
+type Transform struct {
+	ToText *ToText
+}
+
+type ToText struct {
+	// LLM is the name of the LLM to use.
+	LLM string
+
+	// Prompt is the prompt for LLM completion.
+	// The source text will automatically be injected into the prompt.
+	Prompt         string
+	promptRendered string
+}
+
+type Action string
+
+const (
+	ActionDropFeed            Action = "drop_feed"
+	ActionCreateOrUpdateLabel Action = "create_or_update_label"
+)
+
+var promptTemplates = map[string]string{
+	"category": `
+Analyze the content and categorize it into exactly one of these categories:
+Technology, Development, Entertainment, Finance, Health, Politics, Other
+
+Classification requirements:
+- Choose the SINGLE most appropriate category based on:
+  * Primary topic and main focus of the content
+  * Key terminology and concepts used
+  * Target audience and purpose
+  * Technical depth and complexity level
+- For content that could fit multiple categories:
+  * Identify the dominant theme
+  * Consider the most specific applicable category
+  * Use the primary intended purpose
+- If content appears ambiguous:
+  * Focus on the most prominent aspects
+  * Consider the practical application
+  * Choose the category that best serves user needs
+
+Output format:
+Return ONLY the category name, no other text or explanation.
+Must be one of the provided categories exactly as written.
+`,
+
+	"tags": `
+Analyze the content and add appropriate tags based on:
+- Main topics and themes
+- Key concepts and terminology 
+- Target audience and purpose
+- Technical depth and domain
+- 2-4 tags are enough
+Output format:
+Return a list of tags, separated by commas, no other text or explanation.
+e.g. "AI, Technology, Innovation, Future"
+`,
+
+	"score": `
+Please give a score between 0 and 10 based on the following content.
+Evaluate the content comprehensively considering clarity, accuracy, depth, logical structure, language expression, and completeness.
+Note: If the content is an article or a text intended to be detailed, the length is an important factor. Generally, content under 300 words may receive a lower score due to lack of substance, unless its type (such as poetry or summary) is inherently suitable for brevity.
+Output format:
+Return the score (0-10), no other text or explanation.
+E.g. "8", "5", "3", etc.
+`,
+
+	"comment_confucius": `
+Please act as Confucius and write a 100-word comment on the article.
+Content needs to be in line with the Chinese mainland's regulations.
+Output format:
+Return the comment only, no other text or explanation.
+Reply short and concise, 100 words is enough.
+`,
+
+	"summary": `
+Summarize the article in 100-200 words.
+`,
+
+	"summary_html_snippet": `
+# Task: Create Visually Appealing Information Summary Emails
+
+You are a professional content designer. Please convert the provided articles into **visually modern HTML email segments**, focusing on display effects in modern clients like Gmail and QQ Mail.
+
+## Key Requirements:
+
+1. **Output Format**:
+   - Only output HTML code snippets, **no need for complete HTML document structure**
+   - Only generate HTML code for a single article, so users can combine multiple pieces into a complete email
+   - No explanations, additional comments, or markups
+   - **No need to add titles and sources**, users will inject them automatically
+   - No use html backticks, output raw html code directly
+   - Output directly, no explanation, no comments, no markups
+
+2. **Content Processing**:
+   - **Don't directly copy the original text**, but extract key information and core insights from each article
+   - **Each article summary should be 100-200 words**, don't force word count, adjust the word count based on the actual length of the article
+   - Summarize points in relaxed, natural language, as if chatting with friends, while maintaining depth
+   - Maintain the original language of the article (e.g., Chinese summary for Chinese articles)
+
+3. **Visual Design**:
+   - Design should be aesthetically pleasing with coordinated colors
+   - Use sufficient whitespace and contrast
+   - Maintain a consistent visual style across all articles
+   - **Must use multiple visual elements** (charts, cards, quote blocks, etc.), avoid pure text presentation
+   - Each article should use at least 2-3 different visual elements to make content more intuitive and readable
+
+4. **Highlight Techniques**:
+
+   A. **Beautiful Quote Blocks** (for highlighting important viewpoints):
+   <div style="margin:20px 0; padding:20px; background:linear-gradient(to right, #f8f9fa, #ffffff); border-left:5px solid #4285f4; border-radius:5px; box-shadow:0 2px 8px rgba(0,0,0,0.05);">
+     <p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; line-height:1.6; color:#333; font-weight:500;">
+       Here is the key viewpoint or finding that needs to be highlighted.
+     </p>
+   </div>
+
+   B. **Information Cards** (for highlighting key data):
+   <div style="display:inline-block; margin:10px 10px 10px 0; padding:15px 20px; background-color:#ffffff; border-radius:8px; box-shadow:0 3px 10px rgba(0,0,0,0.08); min-width:120px; text-align:center;">
+     <p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#666;">Metric Name</p>
+     <p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:24px; font-weight:600; color:#1a73e8;">75%</p>
+   </div>
+
+   C. **Key Points List** (for highlighting multiple points):
+   <ul style="margin:20px 0; padding-left:0; list-style-type:none;">
+     <li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
+       <span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">1</span>
+       First point description
+     </li>
+     <li style="position:relative; margin-bottom:12px; padding-left:28px; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#444;">
+       <span style="position:absolute; left:0; top:0; width:18px; height:18px; background-color:#4285f4; border-radius:50%; color:white; text-align:center; line-height:18px; font-size:12px;">2</span>
+       Second point description
+     </li>
+   </ul>
+
+   D. **Emphasis Text** (for highlighting key words or phrases):
+   <span style="background:linear-gradient(180deg, rgba(255,255,255,0) 50%, rgba(66,133,244,0.2) 50%); padding:0 2px;">Text to emphasize</span>
+
+5. **Timeline Design** (suitable for event sequences or news developments):
+   <div style="margin:25px 0; padding:5px 0;">
+     <h3 style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:18px; color:#333; margin-bottom:15px;">Event Development Timeline</h3>
+     
+     <div style="position:relative; margin-left:30px; padding-left:30px; border-left:2px solid #e0e0e0;">
+       <!-- Time Point 1 -->
+       <div style="position:relative; margin-bottom:25px;">
+         <div style="position:absolute; width:16px; height:16px; background-color:#4285f4; border-radius:50%; left:-40px; top:0; border:3px solid #ffffff; box-shadow:0 2px 5px rgba(0,0,0,0.1);"></div>
+         <p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#4285f4;">June 1, 2023</p>
+         <p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.5; color:#333;">Event description content, concisely explaining the key points and impact of the event.</p>
+       </div>
+       
+       <!-- Time Point 2 -->
+       <div style="position:relative; margin-bottom:25px;">
+         <div style="position:absolute; width:16px; height:16px; background-color:#4285f4; border-radius:50%; left:-40px; top:0; border:3px solid #ffffff; box-shadow:0 2px 5px rgba(0,0,0,0.1);"></div>
+         <p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#4285f4;">June 15, 2023</p>
+         <p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.5; color:#333;">Event description content, concisely explaining the key points and impact of the event.</p>
+       </div>
+     </div>
+   </div>
+
+6. **Comparison Table** (for comparing different options or viewpoints):
+   <div style="margin:25px 0; padding:15px; background-color:#f8f9fa; border-radius:8px; overflow-x:auto;">
+     <table style="width:100%; border-collapse:collapse; font-family:'Google Sans',Roboto,Arial,sans-serif;">
+       <thead>
+         <tr>
+           <th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Feature</th>
+           <th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option A</th>
+           <th style="padding:12px 15px; text-align:left; border-bottom:2px solid #e0e0e0; color:#202124; font-weight:500;">Option B</th>
+         </tr>
+       </thead>
+       <tbody>
+         <tr>
+           <td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Cost</td>
+           <td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Higher</td>
+           <td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Moderate</td>
+         </tr>
+         <tr>
+           <td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Efficiency</td>
+           <td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Very High</td>
+           <td style="padding:12px 15px; border-bottom:1px solid #e0e0e0; color:#444;">Average</td>
+         </tr>
+       </tbody>
+     </table>
+   </div>
+
+7. **Chart Data Processing**:
+   - Bar Chart/Horizontal Bars:
+   <div style="margin:20px 0; padding:15px; background-color:#f8f9fa; border-radius:8px;">
+     <p style="margin:0 0 15px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#333;">Data Comparison</p>
+     
+     <!-- Item 1 -->
+     <div style="margin-bottom:12px;">
+       <div style="display:flex; align-items:center; justify-content:space-between; margin-bottom:5px;">
+         <span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#555;">Project A</span>
+         <span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#333;">65%</span>
+       </div>
+       <div style="height:10px; width:100%; background-color:#e8eaed; border-radius:5px; overflow:hidden;">
+         <div style="height:100%; width:65%; background:linear-gradient(to right, #4285f4, #5e97f6); border-radius:5px;"></div>
+       </div>
+     </div>
+     
+     <!-- Item 2 -->
+     <div style="margin-bottom:12px;">
+       <div style="display:flex; align-items:center; justify-content:space-between; margin-bottom:5px;">
+         <span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; color:#555;">Project B</span>
+         <span style="font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:14px; font-weight:500; color:#333;">42%</span>
+       </div>
+       <div style="height:10px; width:100%; background-color:#e8eaed; border-radius:5px; overflow:hidden;">
+         <div style="height:100%; width:42%; background:linear-gradient(to right, #ea4335, #f07575); border-radius:5px;"></div>
+       </div>
+     </div>
+   </div>
+
+8. **Highlight Box** (for displaying tips or reminders):
+   <div style="margin:25px 0; padding:20px; background-color:#fffde7; border-radius:8px; border-left:4px solid #fdd835; box-shadow:0 1px 5px rgba(0,0,0,0.05);">
+     <div style="display:flex; align-items:flex-start;">
+       <div style="flex-shrink:0; margin-right:15px; width:24px; height:24px; background-color:#fdd835; border-radius:50%; display:flex; align-items:center; justify-content:center;">
+         <span style="color:#fff; font-weight:bold; font-size:16px;">!</span>
+       </div>
+       <div>
+         <p style="margin:0 0 5px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#333;">Tip</p>
+         <p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#555;">
+           Here are some additional tips or suggestions to help readers better understand or apply the article content.
+         </p>
+       </div>
+     </div>
+   </div>
+
+9. **Summary Box**:
+   <div style="margin:25px 0; padding:20px; background-color:#f2f7fd; border-radius:8px; box-shadow:0 1px 5px rgba(66,133,244,0.1);">
+     <p style="margin:0 0 10px 0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:16px; font-weight:500; color:#1a73e8;">In Simple Terms</p>
+     <p style="margin:0; font-family:'Google Sans',Roboto,Arial,sans-serif; font-size:15px; line-height:1.6; color:#333;">
+       This is a concise summary of the entire content, highlighting the most critical findings and conclusions.
+     </p>
+   </div>
+
+## Notes:
+1. **Only generate content for a single article**, not including title and source, and not including HTML head and tail structure
+2. Content should be **200-300 words**, don't force word count
+3. **Must use multiple visual elements** (at least 2-3 types), avoid monotonous pure text presentation
+4. Use relaxed, natural language, as if chatting with friends
+5. Create visual charts for important data, rather than just describing with text
+6. Use quote blocks to highlight important viewpoints, and lists to organize multiple points
+7. Appropriately use emojis and conversational expressions to increase friendliness
+8. Note that the article content has been provided in the previous message, please reply directly, no explanation, no comments, no markups
+`,
+}
+
+// --- Factory code block ---
+
+type Factory component.Factory[Rewriter, config.App, Dependencies]
+
+func NewFactory(mockOn ...component.MockOption) Factory {
+	if len(mockOn) > 0 {
+		return component.FactoryFunc[Rewriter, config.App, Dependencies](func(instance string, app *config.App, dependencies Dependencies) (Rewriter, error) {
+			m := &mockRewriter{}
+			component.MockOptions(mockOn).Apply(&m.Mock)
+
+			return m, nil
+		})
+	}
+
+	return component.FactoryFunc[Rewriter, config.App, Dependencies](new)
+}
+
+func new(instance string, app *config.App, dependencies Dependencies) (Rewriter, error) {
+	c := &Config{}
+	c.From(app)
+	if err := c.Validate(); err != nil {
+		return nil, errors.Wrap(err, "validate and adjust rewrite config")
+	}
+
+	return &rewriter{
+		Base: component.New(&component.BaseConfig[Config, Dependencies]{
+			Name:         "Rewriter",
+			Instance:     instance,
+			Config:       c,
+			Dependencies: dependencies,
+		}),
+	}, nil
+}
+
+// --- Implementation code block ---
+
+type rewriter struct {
+	*component.Base[Config, Dependencies]
+}
+
+func (r *rewriter) Reload(app *config.App) error {
+	newConfig := &Config{}
+	newConfig.From(app)
+	if err := newConfig.Validate(); err != nil {
+		return errors.Wrap(err, "validate and adjust rewrite config")
+	}
+	r.SetConfig(newConfig)
+
+	return nil
+}
+
+func (r *rewriter) Labels(ctx context.Context, labels model.Labels) (model.Labels, error) {
+	ctx = telemetry.StartWith(ctx, append(r.TelemetryLabels(), telemetrymodel.KeyOperation, "Labels")...)
+	defer func() { telemetry.End(ctx, nil) }()
+
+	rules := *r.Config()
+	for _, rule := range rules {
+		// Get source text based on source label.
+		sourceText := labels.Get(rule.SourceLabel)
+		if utf8.RuneCountInString(sourceText) < *rule.SkipTooShortThreshold {
+			continue
+		}
+
+		// Transform text if configured.
+		text := sourceText
+		if rule.Transform != nil {
+			transformed, err := r.transformText(ctx, rule.Transform, sourceText)
+			if err != nil {
+				return nil, errors.Wrap(err, "transform text")
+			}
+			text = transformed
+		}
+
+		// Check if text matches the rule.
+		if !rule.matchRE.MatchString(text) {
+			continue
+		}
+
+		// Handle actions.
+		switch rule.Action {
+		case ActionDropFeed:
+			return nil, nil
+		case ActionCreateOrUpdateLabel:
+			labels.Put(rule.Label, text, false)
+		}
+	}
+
+	labels.EnsureSorted()
+
+	return labels, nil
+}
+
+// transformText transforms text using configured LLM.
+func (r *rewriter) transformText(ctx context.Context, transform *Transform, text string) (string, error) {
+	// Get LLM instance.
+	llm := r.Dependencies().LLMFactory.Get(transform.ToText.LLM)
+
+	// Call completion.
+	result, err := llm.String(ctx, []string{
+		transform.ToText.promptRendered,
+		"The content to be processed is below, and the processing requirements are as above",
+		text, // TODO: may place to first line to hit the model cache in different rewrite rules.
+	})
+	if err != nil {
+		return "", errors.Wrap(err, "llm completion")
+	}
+
+	return r.transformTextHack(result), nil
+}
+
+func (r *rewriter) transformTextHack(text string) string {
+	bytes := unsafe.Slice(unsafe.StringData(text), len(text))
+	start := 0
+	end := len(bytes)
+
+	// Remove the last line if it's empty.
+	// This is a hack to avoid the model output a empty line.
+	// E.g. category: tech\n
+	if end > 0 && bytes[end-1] == '\n' {
+		end--
+	}
+
+	// Remove the html backticks.
+	if end-start >= 7 && string(bytes[start:start+7]) == "```html" {
+		start += 7
+	}
+	if end-start >= 3 && string(bytes[end-3:end]) == "```" {
+		end -= 3
+	}
+
+	// If no changes, return the original string.
+	if start == 0 && end == len(bytes) {
+		return text
+	}
+
+	// Only copy one time.
+	return string(bytes[start:end])
+}
+
+type mockRewriter struct {
+	component.Mock
+}
+
+func (r *mockRewriter) Reload(app *config.App) error {
+	args := r.Called(app)
+
+	return args.Error(0)
+}
+
+func (r *mockRewriter) Labels(ctx context.Context, labels model.Labels) (model.Labels, error) {
+	args := r.Called(ctx, labels)
+	if args.Get(0) == nil {
+		return nil, args.Error(1)
+	}
+
+	return args.Get(0).(model.Labels), args.Error(1)
+}
--- a/pkg/rewrite/rewrite_test.go
+++ b/pkg/rewrite/rewrite_test.go
@@ -0,0 +1,286 @@
+package rewrite
+
+import (
+	"context"
+	"testing"
+
+	. "github.com/onsi/gomega"
+	"github.com/pkg/errors"
+	"github.com/stretchr/testify/mock"
+	"k8s.io/utils/ptr"
+
+	"github.com/glidea/zenfeed/pkg/component"
+	"github.com/glidea/zenfeed/pkg/llm"
+	"github.com/glidea/zenfeed/pkg/model"
+	"github.com/glidea/zenfeed/pkg/test"
+)
+
+func TestLabels(t *testing.T) {
+	RegisterTestingT(t)
+
+	type givenDetail struct {
+		config  *Config
+		llmMock func(m *mock.Mock)
+	}
+	type whenDetail struct {
+		inputLabels model.Labels
+	}
+	type thenExpected struct {
+		outputLabels model.Labels
+		err          error
+		isErr        bool
+	}
+
+	tests := []test.Case[givenDetail, whenDetail, thenExpected]{
+		{
+			Scenario: "Drop feed based on transformed content match",
+			Given:    "a rule to drop feed if transformed content matches 'spam'",
+			When:     "processing labels where transformed content is 'spam'",
+			Then:     "should return nil labels indicating drop",
+			GivenDetail: givenDetail{
+				config: &Config{
+					{
+						SourceLabel:           model.LabelContent,
+						SkipTooShortThreshold: ptr.To(10),
+						Transform: &Transform{
+							ToText: &ToText{
+								LLM:    "mock-llm",
+								Prompt: "{{ .category }}", // Using a simple template for testing
+							},
+						},
+						Match:  "spam",
+						Action: ActionDropFeed,
+					},
+				},
+				llmMock: func(m *mock.Mock) {
+					m.On("String", mock.Anything, mock.Anything).Return("spam", nil)
+				},
+			},
+			WhenDetail: whenDetail{
+				inputLabels: model.Labels{
+					{Key: model.LabelContent, Value: "This is some content that will be transformed to spam."},
+					{Key: model.LabelTitle, Value: "Spam Article"},
+				},
+			},
+			ThenExpected: thenExpected{
+				outputLabels: nil,
+				isErr:        false,
+			},
+		},
+		{
+			Scenario: "Create/Update label based on transformed content",
+			Given:    "a rule to add a category label based on transformed content",
+			When:     "processing labels where transformed content is 'Technology'",
+			Then:     "should return labels with the new category label",
+			GivenDetail: givenDetail{
+				config: &Config{
+					{
+						SourceLabel:           model.LabelContent,
+						SkipTooShortThreshold: ptr.To(10),
+						Transform: &Transform{
+							ToText: &ToText{
+								LLM:    "mock-llm",
+								Prompt: "{{ .category }}",
+							},
+						},
+						Match:  "Technology",
+						Action: ActionCreateOrUpdateLabel,
+						Label:  "category",
+					},
+				},
+				llmMock: func(m *mock.Mock) {
+					m.On("String", mock.Anything, mock.Anything).Return("Technology", nil)
+				},
+			},
+			WhenDetail: whenDetail{
+				inputLabels: model.Labels{
+					{Key: model.LabelContent, Value: "Content about AI and programming."},
+					{Key: model.LabelTitle, Value: "Tech Article"},
+				},
+			},
+			ThenExpected: thenExpected{
+				outputLabels: model.Labels{
+					{Key: model.LabelContent, Value: "Content about AI and programming."},
+					{Key: model.LabelTitle, Value: "Tech Article"},
+					{Key: "category", Value: "Technology"},
+				},
+				isErr: false,
+			},
+		},
+		{
+			Scenario: "No rules match",
+			Given:    "a rule that does not match the content",
+			When:     "processing labels",
+			Then:     "should return the original labels unchanged",
+			GivenDetail: givenDetail{
+				config: &Config{
+					{
+						SourceLabel:           model.LabelContent,
+						SkipTooShortThreshold: ptr.To(10),
+						Match:                 "NonMatchingPattern",
+						Action:                ActionDropFeed,
+					},
+				},
+			},
+			WhenDetail: whenDetail{
+				inputLabels: model.Labels{
+					{Key: model.LabelContent, Value: "Some regular content."},
+					{Key: model.LabelTitle, Value: "Regular Article"},
+				},
+			},
+			ThenExpected: thenExpected{
+				outputLabels: model.Labels{
+					{Key: model.LabelContent, Value: "Some regular content."},
+					{Key: model.LabelTitle, Value: "Regular Article"},
+				},
+				isErr: false,
+			},
+		},
+		{
+			Scenario: "LLM transformation error",
+			Given:    "a rule requiring transformation and LLM returns an error",
+			When:     "processing labels",
+			Then:     "should return an error",
+			GivenDetail: givenDetail{
+				config: &Config{
+					{
+						SourceLabel:           model.LabelContent,
+						SkipTooShortThreshold: ptr.To(10),
+						Transform: &Transform{
+							ToText: &ToText{
+								LLM:            "mock-llm",
+								Prompt:         "{{ .category }}",
+								promptRendered: "Analyze the content and categorize it...",
+							},
+						},
+						Match:  ".*",
+						Action: ActionCreateOrUpdateLabel,
+						Label:  "category",
+					},
+				},
+				llmMock: func(m *mock.Mock) {
+					m.On("String", mock.Anything, mock.Anything).Return("", errors.New("LLM failed"))
+				},
+			},
+			WhenDetail: whenDetail{
+				inputLabels: model.Labels{
+					{Key: model.LabelContent, Value: "Content requiring transformation."},
+					{Key: model.LabelTitle, Value: "Transform Error Article"},
+				},
+			},
+			ThenExpected: thenExpected{
+				outputLabels: nil,
+				err:          errors.New("transform text: llm completion: LLM failed"),
+				isErr:        true,
+			},
+		},
+		{
+			Scenario: "Rule matches but label already exists",
+			Given:    "a rule to add a category label and the label already exists",
+			When:     "processing labels",
+			Then:     "should update the existing label value",
+			GivenDetail: givenDetail{
+				config: &Config{
+					{
+						SourceLabel:           model.LabelContent,
+						SkipTooShortThreshold: ptr.To(10),
+						Transform: &Transform{
+							ToText: &ToText{
+								LLM:            "mock-llm",
+								Prompt:         "{{ .category }}",
+								promptRendered: "Analyze the content and categorize it...",
+							},
+						},
+						Match:  "Finance",
+						Action: ActionCreateOrUpdateLabel,
+						Label:  "category",
+					},
+				},
+				llmMock: func(m *mock.Mock) {
+					m.On("String", mock.Anything, mock.Anything).Return("Finance", nil)
+				},
+			},
+			WhenDetail: whenDetail{
+				inputLabels: model.Labels{
+					{Key: model.LabelContent, Value: "Content about stock market."},
+					{Key: model.LabelTitle, Value: "Finance Article"},
+					{Key: "category", Value: "OldCategory"}, // Existing label
+				},
+			},
+			ThenExpected: thenExpected{
+				outputLabels: model.Labels{
+					{Key: model.LabelContent, Value: "Content about stock market."},
+					{Key: model.LabelTitle, Value: "Finance Article"},
+					{Key: "category", Value: "Finance"}, // Updated label
+				},
+				isErr: false,
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.Scenario, func(t *testing.T) {
+			// Given.
+			var mockLLMFactory llm.Factory
+			var mockInstance *mock.Mock // Store the mock instance for assertion
+
+			// Create mock factory and capture the mock.Mock instance.
+			mockOption := component.MockOption(func(m *mock.Mock) {
+				mockInstance = m // Capture the mock instance.
+				if tt.GivenDetail.llmMock != nil {
+					tt.GivenDetail.llmMock(m)
+				}
+			})
+			mockLLMFactory, err := llm.NewFactory("", nil, llm.FactoryDependencies{}, mockOption) // Use the factory directly with the option
+			Expect(err).NotTo(HaveOccurred())
+
+			// Manually validate config to compile regex and render templates.
+			// In real usage, this happens in `new` or `Reload`.
+			for i := range *tt.GivenDetail.config {
+				err := (*tt.GivenDetail.config)[i].Validate()
+				Expect(err).NotTo(HaveOccurred(), "Rule validation should not fail in test setup")
+			}
+
+			// Instantiate the rewriter with the mock factory
+			rewriterInstance := &rewriter{
+				Base: component.New(&component.BaseConfig[Config, Dependencies]{
+					Name:     "TestRewriter",
+					Instance: "test",
+					Config:   tt.GivenDetail.config,
+					Dependencies: Dependencies{
+						LLMFactory: mockLLMFactory, // Pass the mock factory
+					},
+				}),
+			}
+
+			// Clone input labels to avoid modification by reference affecting assertions.
+			inputLabelsCopy := make(model.Labels, len(tt.WhenDetail.inputLabels))
+			copy(inputLabelsCopy, tt.WhenDetail.inputLabels)
+
+			// When.
+			outputLabels, err := rewriterInstance.Labels(context.Background(), inputLabelsCopy)
+
+			// Then.
+			if tt.ThenExpected.isErr {
+				Expect(err).To(HaveOccurred())
+				// Use MatchError for potentially wrapped errors.
+				Expect(err).To(MatchError(ContainSubstring(tt.ThenExpected.err.Error())))
+				Expect(outputLabels).To(BeNil())
+			} else {
+				Expect(err).NotTo(HaveOccurred())
+				// Ensure output labels are sorted for consistent comparison.
+				if outputLabels != nil {
+					outputLabels.EnsureSorted()
+				}
+				tt.ThenExpected.outputLabels.EnsureSorted()
+				Expect(outputLabels).To(Equal(tt.ThenExpected.outputLabels))
+			}
+
+			// Verify LLM calls if stubs were provided.
+			if tt.GivenDetail.llmMock != nil && mockInstance != nil {
+				// Assert expectations on the captured mock instance.
+				mockInstance.AssertExpectations(t)
+			}
+		})
+	}
+}