init
This commit is contained in:
1483
pkg/storage/feed/block/block.go
Normal file
1483
pkg/storage/feed/block/block.go
Normal file
File diff suppressed because it is too large
Load Diff
1270
pkg/storage/feed/block/block_test.go
Normal file
1270
pkg/storage/feed/block/block_test.go
Normal file
File diff suppressed because it is too large
Load Diff
741
pkg/storage/feed/block/chunk/chunk.go
Normal file
741
pkg/storage/feed/block/chunk/chunk.go
Normal file
@@ -0,0 +1,741 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package chunk
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/edsrzf/mmap-go"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
"github.com/glidea/zenfeed/pkg/util/buffer"
|
||||
timeutil "github.com/glidea/zenfeed/pkg/util/time"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
|
||||
// File is the interface for a chunk file.
|
||||
// Concurrent safe.
|
||||
type File interface {
|
||||
component.Component
|
||||
|
||||
// EnsureReadonly ensures the file is readonly (can not Append).
|
||||
// It should be fast when the file already is readonly.
|
||||
// It will ensure the writeonly related resources are closed,
|
||||
// and open the readonly related resources, such as mmap to save memory.
|
||||
EnsureReadonly(ctx context.Context) (err error)
|
||||
Count(ctx context.Context) (count uint32)
|
||||
|
||||
// Append appends feeds to the file.
|
||||
// onSuccess is called when the feed is appended successfully (synchronously).
|
||||
// The offset is the offset of the feed in the file.
|
||||
// !!! It doesn't buffer the data between requests, so the caller should buffer the feeds to avoid high I/O.
|
||||
Append(ctx context.Context, feeds []*Feed, onSuccess func(feed *Feed, offset uint64) error) (err error)
|
||||
|
||||
// Read reads a feed from the file.
|
||||
Read(ctx context.Context, offset uint64) (feed *Feed, err error)
|
||||
|
||||
// Range ranges over all feeds in the file.
|
||||
Range(ctx context.Context, iter func(feed *Feed, offset uint64) (err error)) (err error)
|
||||
}
|
||||
|
||||
// Config for a chunk file.
|
||||
type Config struct {
|
||||
// Path is the path to the chunk file.
|
||||
// If the file does not exist, it will be created.
|
||||
// If the file exists, it will be reloaded.
|
||||
Path string
|
||||
// ReadonlyAtFirst indicates whether the file should be readonly at first.
|
||||
// If file of path does not exist, it cannot be true.
|
||||
ReadonlyAtFirst bool
|
||||
}
|
||||
|
||||
func (c *Config) Validate() (fileExists bool, err error) {
|
||||
if c.Path == "" {
|
||||
return false, errors.New("path is required")
|
||||
}
|
||||
|
||||
fi, err := os.Stat(c.Path)
|
||||
switch {
|
||||
case err == nil:
|
||||
if fi.IsDir() {
|
||||
return false, errors.New("path is a directory")
|
||||
}
|
||||
|
||||
return true, nil
|
||||
|
||||
case os.IsNotExist(err):
|
||||
if c.ReadonlyAtFirst {
|
||||
return false, errors.New("path does not exist")
|
||||
}
|
||||
|
||||
return false, nil
|
||||
|
||||
default:
|
||||
return false, errors.Wrap(err, "stat path")
|
||||
}
|
||||
}
|
||||
|
||||
type Dependencies struct{}
|
||||
|
||||
// File struct.
|
||||
var (
|
||||
headerBytes = 64
|
||||
headerMagicNumber = []byte{0x77, 0x79, 0x73, 0x20, 0x69, 0x73, 0x20,
|
||||
0x61, 0x77, 0x65, 0x73, 0x6f, 0x6d, 0x65, 0x00, 0x00}
|
||||
headerMagicNumberBytes = 16
|
||||
headerVersionStart = headerMagicNumberBytes
|
||||
headerVersion = uint32(1)
|
||||
headerVersionBytes = 4
|
||||
dataStart = headerBytes
|
||||
|
||||
header = func() []byte {
|
||||
b := make([]byte, headerBytes)
|
||||
copy(b[:headerMagicNumberBytes], headerMagicNumber)
|
||||
binary.LittleEndian.PutUint32(b[headerVersionStart:headerVersionStart+headerVersionBytes], headerVersion)
|
||||
|
||||
return b
|
||||
}()
|
||||
)
|
||||
|
||||
// Metrics.
|
||||
var (
|
||||
modes = []string{"readwrite", "readonly"}
|
||||
feedCount = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: model.AppName,
|
||||
Subsystem: "chunk",
|
||||
Name: "feed_count",
|
||||
Help: "Number of feeds in the chunk file.",
|
||||
},
|
||||
[]string{telemetrymodel.KeyComponent, telemetrymodel.KeyComponentInstance, "mode"},
|
||||
)
|
||||
byteSize = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: model.AppName,
|
||||
Subsystem: "chunk",
|
||||
Name: "bytes",
|
||||
Help: "Size of the chunk file.",
|
||||
},
|
||||
[]string{telemetrymodel.KeyComponent, telemetrymodel.KeyComponentInstance, "mode"},
|
||||
)
|
||||
)
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[File, Config, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
if len(mockOn) > 0 {
|
||||
return component.FactoryFunc[File, Config, Dependencies](
|
||||
func(instance string, config *Config, dependencies Dependencies) (File, error) {
|
||||
m := &mockFile{}
|
||||
component.MockOptions(mockOn).Apply(&m.Mock)
|
||||
|
||||
return m, nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return component.FactoryFunc[File, Config, Dependencies](new)
|
||||
}
|
||||
|
||||
// new creates a new chunk file.
|
||||
// It will create a new chunk file if the file that path points to does not exist.
|
||||
// It will open the file if the file exists, and reload it.
|
||||
// If readonlyAtFirst is true, it will open the file readonly.
|
||||
func new(instance string, config *Config, dependencies Dependencies) (File, error) {
|
||||
fileExists, err := config.Validate()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "validate config")
|
||||
}
|
||||
|
||||
osFile, readWriteBuf, appendOffset, readonlyMmap, count, err := init0(fileExists, config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var rn atomic.Bool
|
||||
rn.Store(config.ReadonlyAtFirst)
|
||||
var cnt atomic.Uint32
|
||||
cnt.Store(count)
|
||||
|
||||
return &file{
|
||||
Base: component.New(&component.BaseConfig[Config, Dependencies]{
|
||||
Name: "FeedChunk",
|
||||
Instance: instance,
|
||||
Config: config,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
f: osFile,
|
||||
readWriteBuf: readWriteBuf,
|
||||
appendOffset: appendOffset,
|
||||
readonlyMmap: readonlyMmap,
|
||||
readonly: &rn,
|
||||
count: &cnt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func init0(
|
||||
fileExists bool,
|
||||
config *Config,
|
||||
) (
|
||||
osFile *os.File,
|
||||
readWriteBuf *buffer.Bytes,
|
||||
appendOffset uint64,
|
||||
readonlyMmap mmap.MMap,
|
||||
count uint32,
|
||||
err error,
|
||||
) {
|
||||
// Ensure file.
|
||||
if fileExists {
|
||||
osFile, err = loadFromExisting(config.Path, config.ReadonlyAtFirst)
|
||||
if err != nil {
|
||||
return nil, nil, 0, nil, 0, errors.Wrap(err, "load from existing")
|
||||
}
|
||||
|
||||
} else { // Create new file.
|
||||
if config.ReadonlyAtFirst {
|
||||
return nil, nil, 0, nil, 0, errors.New("cannot create readonly file")
|
||||
}
|
||||
|
||||
osFile, err = createNewOSFile(config.Path)
|
||||
if err != nil {
|
||||
return nil, nil, 0, nil, 0, errors.Wrap(err, "create new os file")
|
||||
}
|
||||
}
|
||||
|
||||
// Setup for Read.
|
||||
readWriteBuf, count, err = validateOSFile(osFile)
|
||||
if err != nil {
|
||||
_ = osFile.Close()
|
||||
|
||||
return nil, nil, 0, nil, 0, errors.Wrap(err, "validate os file")
|
||||
}
|
||||
|
||||
if config.ReadonlyAtFirst {
|
||||
readWriteBuf = nil // Help GC.
|
||||
|
||||
m, err := mmap.Map(osFile, mmap.RDONLY, 0)
|
||||
if err != nil {
|
||||
_ = osFile.Close()
|
||||
|
||||
return nil, nil, 0, nil, 0, errors.Wrap(err, "mmap file")
|
||||
}
|
||||
|
||||
readonlyMmap = m
|
||||
|
||||
} else {
|
||||
appendOffset = uint64(readWriteBuf.Len())
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func validateOSFile(f *os.File) (readWriteBuf *buffer.Bytes, count uint32, err error) {
|
||||
header, err := validateHeader(f)
|
||||
if err != nil {
|
||||
return nil, 0, errors.Wrap(err, "validate header")
|
||||
}
|
||||
readWriteBuf = &buffer.Bytes{B: header} // len(header) == cap(header).
|
||||
|
||||
if _, err := f.Seek(int64(dataStart), io.SeekStart); err != nil {
|
||||
return nil, 0, errors.Wrap(err, "seek to data start")
|
||||
}
|
||||
tr := &trackReader{Reader: f}
|
||||
var lastSuccessReaded int
|
||||
|
||||
var p Feed
|
||||
for {
|
||||
err := p.validateFrom(tr, readWriteBuf)
|
||||
switch {
|
||||
case err == nil:
|
||||
count++
|
||||
lastSuccessReaded = tr.Readed()
|
||||
|
||||
continue
|
||||
|
||||
case (errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF)) ||
|
||||
errors.Is(err, errChecksumMismatch):
|
||||
|
||||
// Truncate uncompleted feed if any.
|
||||
readWriteBuf.B = readWriteBuf.B[:lastSuccessReaded+len(header)]
|
||||
|
||||
return readWriteBuf, count, nil
|
||||
|
||||
default:
|
||||
return nil, 0, errors.Wrap(err, "validate payload")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func validateHeader(f *os.File) (header []byte, err error) {
|
||||
header = make([]byte, headerBytes)
|
||||
if _, err := f.ReadAt(header, 0); err != nil {
|
||||
return nil, errors.Wrap(err, "read header")
|
||||
}
|
||||
|
||||
// Validate magic number.
|
||||
if !bytes.Equal(header[:headerMagicNumberBytes], headerMagicNumber) {
|
||||
return nil, errors.New("invalid magic number")
|
||||
}
|
||||
|
||||
// Validate version.
|
||||
version := binary.LittleEndian.Uint32(header[headerVersionStart : headerVersionStart+headerVersionBytes])
|
||||
if version != headerVersion {
|
||||
return nil, errors.New("invalid version")
|
||||
}
|
||||
|
||||
return header, nil
|
||||
}
|
||||
|
||||
func loadFromExisting(path string, readonlyAtFirst bool) (osFile *os.File, err error) {
|
||||
flag := os.O_RDWR
|
||||
if readonlyAtFirst {
|
||||
flag = os.O_RDONLY
|
||||
}
|
||||
|
||||
osFile, err = os.OpenFile(path, flag, 0600)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "open file")
|
||||
}
|
||||
|
||||
return osFile, nil
|
||||
}
|
||||
|
||||
func createNewOSFile(path string) (osFile *os.File, err error) {
|
||||
osFile, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "create file")
|
||||
}
|
||||
|
||||
if _, err = osFile.Write(header); err != nil {
|
||||
_ = osFile.Close()
|
||||
|
||||
return nil, errors.Wrap(err, "write header")
|
||||
}
|
||||
|
||||
if err = osFile.Sync(); err != nil {
|
||||
_ = osFile.Close()
|
||||
|
||||
return nil, errors.Wrap(err, "sync file")
|
||||
}
|
||||
|
||||
return osFile, nil
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
type file struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
|
||||
f *os.File
|
||||
count *atomic.Uint32
|
||||
readonly *atomic.Bool
|
||||
|
||||
mu sync.RWMutex
|
||||
|
||||
// Only readwrite.
|
||||
readWriteBuf *buffer.Bytes
|
||||
appendOffset uint64
|
||||
|
||||
// Only readonly.
|
||||
readonlyMmap mmap.MMap
|
||||
}
|
||||
|
||||
func (f *file) Run() error {
|
||||
f.MarkReady()
|
||||
|
||||
return timeutil.Tick(f.Context(), 30*time.Second, func() error {
|
||||
mode := "readwrite"
|
||||
sizeValue := f.appendOffset
|
||||
if f.readonly.Load() {
|
||||
mode = "readonly"
|
||||
sizeValue = uint64(len(f.readonlyMmap))
|
||||
}
|
||||
|
||||
feedCount.WithLabelValues(append(f.TelemetryLabelsIDFields(), mode)...).Set(float64(f.Count(context.Background())))
|
||||
byteSize.WithLabelValues(append(f.TelemetryLabelsIDFields(), mode)...).Set(float64(sizeValue))
|
||||
for _, m := range modes {
|
||||
if m == mode {
|
||||
continue
|
||||
}
|
||||
feedCount.DeleteLabelValues(append(f.TelemetryLabelsIDFields(), m)...)
|
||||
byteSize.DeleteLabelValues(append(f.TelemetryLabelsIDFields(), m)...)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func (f *file) Close() error {
|
||||
// Close Run().
|
||||
if err := f.Base.Close(); err != nil {
|
||||
return errors.Wrap(err, "closing base")
|
||||
}
|
||||
|
||||
// Clean metrics.
|
||||
feedCount.DeletePartialMatch(f.TelemetryLabelsID())
|
||||
byteSize.DeletePartialMatch(f.TelemetryLabelsID())
|
||||
|
||||
// Unmap if readonly.
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
if f.readonlyMmap != nil {
|
||||
if err := f.readonlyMmap.Unmap(); err != nil {
|
||||
return errors.Wrap(err, "unmap file")
|
||||
}
|
||||
f.readonlyMmap = nil
|
||||
}
|
||||
|
||||
// Close file.
|
||||
if err := f.f.Close(); err != nil {
|
||||
return errors.Wrap(err, "close file")
|
||||
}
|
||||
f.f = nil
|
||||
f.appendOffset = 0
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *file) EnsureReadonly(ctx context.Context) (err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "EnsureReadonly")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
// Fast path - already readonly.
|
||||
if f.readonly.Load() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Acquire write lock
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
if f.readonly.Load() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Clear readwrite resources.
|
||||
f.readWriteBuf = nil
|
||||
|
||||
// Open mmap.
|
||||
m, err := mmap.Map(f.f, mmap.RDONLY, 0)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "mmap file")
|
||||
}
|
||||
|
||||
// Update state.
|
||||
f.readonlyMmap = m
|
||||
f.readonly.Store(true)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *file) Count(ctx context.Context) uint32 {
|
||||
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "Count")...)
|
||||
defer func() { telemetry.End(ctx, nil) }()
|
||||
|
||||
return f.count.Load()
|
||||
}
|
||||
|
||||
func (f *file) Append(ctx context.Context, feeds []*Feed, onSuccess func(feed *Feed, offset uint64) error) (err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "Append")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
f.mu.Lock()
|
||||
|
||||
// Precheck.
|
||||
if f.readonly.Load() {
|
||||
f.mu.Unlock()
|
||||
|
||||
return errors.New("file is readonly")
|
||||
}
|
||||
|
||||
// Encode feeds into buffer.
|
||||
currentAppendOffset := f.appendOffset
|
||||
relativeOffsets, encodedBytesCount, err := f.encodeFeeds(feeds)
|
||||
if err != nil {
|
||||
f.readWriteBuf.B = f.readWriteBuf.B[:currentAppendOffset]
|
||||
f.mu.Unlock()
|
||||
|
||||
return errors.Wrap(err, "encode feeds")
|
||||
}
|
||||
|
||||
// Prepare for commit.
|
||||
encodedData := f.readWriteBuf.Bytes()[currentAppendOffset:]
|
||||
newAppendOffset := currentAppendOffset + uint64(encodedBytesCount)
|
||||
|
||||
// Commit data and header to file.
|
||||
if err = f.commitAppendToFile(encodedData, currentAppendOffset); err != nil {
|
||||
f.readWriteBuf.B = f.readWriteBuf.B[:currentAppendOffset]
|
||||
f.mu.Unlock()
|
||||
|
||||
return errors.Wrap(err, "commit append to file")
|
||||
}
|
||||
|
||||
// Update internal state on successful commit.
|
||||
f.appendOffset = newAppendOffset
|
||||
f.count.Add(uint32(len(feeds)))
|
||||
f.mu.Unlock()
|
||||
|
||||
// Call callbacks after releasing the lock.
|
||||
absoluteOffsets := make([]uint64, len(relativeOffsets))
|
||||
for i, relOff := range relativeOffsets {
|
||||
absoluteOffsets[i] = currentAppendOffset + relOff // Calculate absolute offsets based on append position.
|
||||
}
|
||||
if err := f.notifySuccess(feeds, absoluteOffsets, onSuccess); err != nil {
|
||||
return errors.Wrap(err, "notify success callbacks")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *file) Read(ctx context.Context, offset uint64) (feed *Feed, err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "Read")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
// Validate offset.
|
||||
if offset < uint64(dataStart) {
|
||||
return nil, errors.New("offset too small")
|
||||
}
|
||||
|
||||
// Handle readonly mode.
|
||||
if f.readonly.Load() {
|
||||
if offset >= uint64(len(f.readonlyMmap)) {
|
||||
return nil, errors.New("offset too large")
|
||||
}
|
||||
feed, _, err = f.readFeed(ctx, f.readonlyMmap, offset)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "read feed")
|
||||
}
|
||||
|
||||
return feed, nil
|
||||
}
|
||||
|
||||
// Handle readwrite mode.
|
||||
f.mu.RLock()
|
||||
defer f.mu.RUnlock()
|
||||
if offset >= f.appendOffset {
|
||||
return nil, errors.New("offset too large")
|
||||
}
|
||||
|
||||
feed, _, err = f.readFeed(ctx, f.readWriteBuf.Bytes(), offset)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "read feed")
|
||||
}
|
||||
|
||||
return feed, nil
|
||||
}
|
||||
|
||||
func (f *file) Range(ctx context.Context, iter func(feed *Feed, offset uint64) error) (err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "Range")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
// Handle readonly mode.
|
||||
if f.readonly.Load() {
|
||||
// Start from data section.
|
||||
offset := uint64(dataStart)
|
||||
for offset < uint64(len(f.readonlyMmap)) {
|
||||
feed, n, err := f.readFeed(ctx, f.readonlyMmap, offset)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "read feed")
|
||||
}
|
||||
if err := iter(feed, offset); err != nil {
|
||||
return errors.Wrap(err, "iterate feed")
|
||||
}
|
||||
|
||||
// Move to next feed.
|
||||
offset += uint64(n) // G115: Safe conversion as n is uint32
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Handle readwrite mode.
|
||||
f.mu.RLock()
|
||||
defer f.mu.RUnlock()
|
||||
data := f.readWriteBuf.Bytes()
|
||||
offset := uint64(dataStart)
|
||||
for offset < f.appendOffset { // appendOffset is already checked/maintained correctly.
|
||||
feed, n, err := f.readFeed(ctx, data, offset)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "read feed")
|
||||
}
|
||||
if err := iter(feed, offset); err != nil {
|
||||
return errors.Wrap(err, "iterate feed")
|
||||
}
|
||||
|
||||
// Move to next feed.
|
||||
offset += uint64(n)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
const estimatedFeedSize = 4 * 1024
|
||||
|
||||
// encodeFeeds encodes a slice of feeds into the internal readWriteBuf.
|
||||
// It returns the relative offsets of each feed within the newly added data,
|
||||
// the total number of bytes encoded, and any error encountered.
|
||||
func (f *file) encodeFeeds(feeds []*Feed) (relativeOffsets []uint64, encodedBytesCount int, err error) {
|
||||
relativeOffsets = make([]uint64, len(feeds))
|
||||
startOffset := f.readWriteBuf.Len()
|
||||
|
||||
f.readWriteBuf.EnsureRemaining(estimatedFeedSize * len(feeds))
|
||||
|
||||
for i, feed := range feeds {
|
||||
currentOffsetInBuf := f.readWriteBuf.Len()
|
||||
relativeOffsets[i] = uint64(currentOffsetInBuf - startOffset)
|
||||
if err := feed.encodeTo(f.readWriteBuf); err != nil {
|
||||
return nil, 0, errors.Wrapf(err, "encode feed %d", i)
|
||||
}
|
||||
}
|
||||
|
||||
encodedBytesCount = f.readWriteBuf.Len() - startOffset
|
||||
|
||||
return relativeOffsets, encodedBytesCount, nil
|
||||
}
|
||||
|
||||
// commitAppendToFile writes the encoded data and updated header to the file and syncs.
|
||||
func (f *file) commitAppendToFile(data []byte, currentAppendOffset uint64) error {
|
||||
// Append data.
|
||||
if _, err := f.f.WriteAt(data, int64(currentAppendOffset)); err != nil {
|
||||
// Data might be partially written.
|
||||
// We will overwrite it in the next append.
|
||||
return errors.Wrap(err, "write feeds")
|
||||
}
|
||||
|
||||
// Sync file to persist changes.
|
||||
if err := f.f.Sync(); err != nil {
|
||||
return errors.Wrap(err, "sync file")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// notifySuccess calls the onSuccess callback for each successfully appended feed.
|
||||
func (f *file) notifySuccess(
|
||||
feeds []*Feed,
|
||||
absoluteOffsets []uint64,
|
||||
onSuccess func(feed *Feed, offset uint64) error,
|
||||
) error {
|
||||
if onSuccess == nil {
|
||||
return nil
|
||||
}
|
||||
for i, feed := range feeds {
|
||||
if err := onSuccess(feed, absoluteOffsets[i]); err != nil {
|
||||
// Return the first error encountered during callbacks.
|
||||
return errors.Wrapf(err, "on success callback for feed %d", i)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *file) readFeed(ctx context.Context, data []byte, offset uint64) (feed *Feed, length int, err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "readFeed")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
// Prepare reader.
|
||||
r := io.NewSectionReader(bytes.NewReader(data), int64(offset), int64(uint64(len(data))-offset))
|
||||
tr := &trackReader{Reader: r}
|
||||
|
||||
// Decode feed.
|
||||
feed = &Feed{Feed: &model.Feed{}}
|
||||
if err = feed.decodeFrom(tr); err != nil {
|
||||
return nil, 0, errors.Wrap(err, "decode feed")
|
||||
}
|
||||
|
||||
return feed, tr.Readed(), nil
|
||||
}
|
||||
|
||||
type trackReader struct {
|
||||
io.Reader
|
||||
length int
|
||||
}
|
||||
|
||||
func (r *trackReader) Read(p []byte) (n int, err error) {
|
||||
n, err = r.Reader.Read(p)
|
||||
r.length += n
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (r *trackReader) Readed() int {
|
||||
return r.length
|
||||
}
|
||||
|
||||
type mockFile struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (m *mockFile) Run() error {
|
||||
args := m.Called()
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
func (m *mockFile) Ready() <-chan struct{} {
|
||||
args := m.Called()
|
||||
|
||||
return args.Get(0).(<-chan struct{})
|
||||
}
|
||||
|
||||
func (m *mockFile) Close() error {
|
||||
args := m.Called()
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
func (m *mockFile) Append(ctx context.Context, feeds []*Feed, onSuccess func(feed *Feed, offset uint64) error) error {
|
||||
args := m.Called(ctx, feeds, onSuccess)
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
func (m *mockFile) Read(ctx context.Context, offset uint64) (*Feed, error) {
|
||||
args := m.Called(ctx, offset)
|
||||
|
||||
return args.Get(0).(*Feed), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *mockFile) Range(ctx context.Context, iter func(feed *Feed, offset uint64) error) error {
|
||||
args := m.Called(ctx, iter)
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
func (m *mockFile) Count(ctx context.Context) uint32 {
|
||||
args := m.Called(ctx)
|
||||
|
||||
return args.Get(0).(uint32)
|
||||
}
|
||||
|
||||
func (m *mockFile) EnsureReadonly(ctx context.Context) error {
|
||||
args := m.Called(ctx)
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
270
pkg/storage/feed/block/chunk/chunk_benchmark_test.go
Normal file
270
pkg/storage/feed/block/chunk/chunk_benchmark_test.go
Normal file
@@ -0,0 +1,270 @@
|
||||
package chunk
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
)
|
||||
|
||||
// --- Benchmark Setup ---
|
||||
|
||||
const (
|
||||
benchmarkFeedCount = 10000 // Number of feeds for benchmark setup
|
||||
benchmarkBatchSize = 100 // Batch size for append benchmark
|
||||
)
|
||||
|
||||
var (
|
||||
benchmarkFeeds []*Feed
|
||||
benchmarkOffsets []uint64 // Store offsets for read benchmark
|
||||
benchmarkTempPath string
|
||||
)
|
||||
|
||||
// setupBenchmarkFile creates a temporary file and populates it with benchmarkFeeds.
|
||||
// It returns the path and a cleanup function.
|
||||
func setupBenchmarkFile(b *testing.B, readonly bool) (File, func()) {
|
||||
b.Helper()
|
||||
|
||||
// Create temp file path only once
|
||||
if benchmarkTempPath == "" {
|
||||
dir, err := os.MkdirTemp("", "chunk-benchmark")
|
||||
if err != nil {
|
||||
b.Fatalf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
benchmarkTempPath = filepath.Join(dir, "benchmark.chunk")
|
||||
}
|
||||
cleanup := func() {
|
||||
os.RemoveAll(filepath.Dir(benchmarkTempPath))
|
||||
benchmarkTempPath = "" // Reset path for next potential setup
|
||||
benchmarkFeeds = nil // Clear feeds
|
||||
benchmarkOffsets = nil // Clear offsets
|
||||
}
|
||||
|
||||
// Generate feeds only once per setup phase if needed
|
||||
if len(benchmarkFeeds) == 0 {
|
||||
benchmarkFeeds = generateBenchmarkFeeds(benchmarkFeedCount)
|
||||
benchmarkOffsets = make([]uint64, 0, benchmarkFeedCount)
|
||||
}
|
||||
|
||||
// Create and populate the file in read-write mode first
|
||||
rwConfig := &Config{Path: benchmarkTempPath}
|
||||
rwFile, err := new("benchmark-setup", rwConfig, Dependencies{})
|
||||
if err != nil {
|
||||
cleanup()
|
||||
b.Fatalf("Failed to create benchmark file for setup: %v", err)
|
||||
}
|
||||
|
||||
currentOffsetCount := int(rwFile.Count(context.Background()))
|
||||
if currentOffsetCount < benchmarkFeedCount { // Only append if not already populated
|
||||
appendCount := 0
|
||||
onSuccess := func(feed *Feed, offset uint64) error {
|
||||
// Collect offsets only during the initial population
|
||||
if len(benchmarkOffsets) < benchmarkFeedCount {
|
||||
benchmarkOffsets = append(benchmarkOffsets, offset)
|
||||
}
|
||||
appendCount++
|
||||
return nil
|
||||
}
|
||||
for i := currentOffsetCount; i < benchmarkFeedCount; i += benchmarkBatchSize {
|
||||
end := i + benchmarkBatchSize
|
||||
if end > benchmarkFeedCount {
|
||||
end = benchmarkFeedCount
|
||||
}
|
||||
if err := rwFile.Append(context.Background(), benchmarkFeeds[i:end], onSuccess); err != nil {
|
||||
rwFile.Close()
|
||||
cleanup()
|
||||
b.Fatalf("Failed to append feeds during setup: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Close the read-write file before potentially reopening as readonly
|
||||
if err := rwFile.Close(); err != nil {
|
||||
cleanup()
|
||||
b.Fatalf("Failed to close rw file during setup: %v", err)
|
||||
}
|
||||
|
||||
// Reopen file with the desired mode for the benchmark
|
||||
config := &Config{
|
||||
Path: benchmarkTempPath,
|
||||
ReadonlyAtFirst: readonly,
|
||||
}
|
||||
f, err := new("benchmark", config, Dependencies{})
|
||||
if err != nil {
|
||||
cleanup()
|
||||
b.Fatalf("Failed to open benchmark file in target mode: %v", err)
|
||||
}
|
||||
|
||||
if readonly {
|
||||
// For read benchmarks, ensure mmap is active if file was just created/populated
|
||||
if err := f.EnsureReadonly(context.Background()); err != nil {
|
||||
f.Close()
|
||||
cleanup()
|
||||
b.Fatalf("Failed to ensure readonly mode: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return f, cleanup
|
||||
}
|
||||
|
||||
func generateBenchmarkFeeds(count int) []*Feed {
|
||||
feeds := make([]*Feed, count)
|
||||
rng := rand.New(rand.NewSource(time.Now().UnixNano())) // Use a fixed seed for reproducibility if needed
|
||||
// Pre-generate some random characters for building large strings efficiently.
|
||||
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 "
|
||||
letterRunes := []rune(letters)
|
||||
randString := func(n int) string {
|
||||
sb := strings.Builder{}
|
||||
sb.Grow(n)
|
||||
for i := 0; i < n; i++ {
|
||||
sb.WriteRune(letterRunes[rng.Intn(len(letterRunes))])
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
minLabelSize := 8 * 1024 // 8KB
|
||||
maxLabelSize := 15 * 1024 // 15KB
|
||||
|
||||
for i := range count {
|
||||
// Generate large label content size.
|
||||
largeLabelSize := minLabelSize + rng.Intn(maxLabelSize-minLabelSize+1)
|
||||
// Estimate the overhead of other labels and structure (key names, length prefixes etc.).
|
||||
// This is a rough estimation, adjust if needed.
|
||||
otherLabelsOverhead := 100
|
||||
largeContentSize := largeLabelSize - otherLabelsOverhead
|
||||
if largeContentSize < 0 {
|
||||
largeContentSize = 0
|
||||
}
|
||||
|
||||
feeds[i] = &Feed{
|
||||
Feed: &model.Feed{
|
||||
ID: uint64(i + 1),
|
||||
Labels: model.Labels{
|
||||
model.Label{Key: "type", Value: fmt.Sprintf("type_%d", rng.Intn(10))},
|
||||
model.Label{Key: "source", Value: fmt.Sprintf("source_%d", rng.Intn(5))},
|
||||
model.Label{Key: "large_content", Value: randString(largeContentSize)}, // Add large label
|
||||
},
|
||||
Time: time.Now().Add(-time.Duration(rng.Intn(3600*24*30)) * time.Second), // Random time within the last 30 days
|
||||
},
|
||||
Vectors: [][]float32{
|
||||
generateFloat32Vector(rng, 1024), // Example dimension
|
||||
generateFloat32Vector(rng, 1024),
|
||||
},
|
||||
}
|
||||
}
|
||||
return feeds
|
||||
}
|
||||
|
||||
func generateFloat32Vector(rng *rand.Rand, dim int) []float32 {
|
||||
vec := make([]float32, dim)
|
||||
for i := range vec {
|
||||
vec[i] = rng.Float32()
|
||||
}
|
||||
return vec
|
||||
}
|
||||
|
||||
// --- Benchmarks ---
|
||||
|
||||
func BenchmarkAppend(b *testing.B) {
|
||||
// Setup: Start with an empty file for appending.
|
||||
// Note: setupBenchmarkFile(b, false) creates the file but doesn't populate it fully here.
|
||||
// We need a fresh file for append benchmark.
|
||||
dir, err := os.MkdirTemp("", "chunk-append-benchmark")
|
||||
if err != nil {
|
||||
b.Fatalf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
path := filepath.Join(dir, "append_benchmark.chunk")
|
||||
cleanup := func() {
|
||||
os.RemoveAll(dir)
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
config := &Config{Path: path}
|
||||
f, err := new("benchmark-append", config, Dependencies{})
|
||||
if err != nil {
|
||||
b.Fatalf("Failed to create benchmark file for append: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
feedsToAppend := generateBenchmarkFeeds(benchmarkBatchSize) // Generate a batch
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
// Measure appending batches of feeds.
|
||||
for i := 0; i < b.N; i++ {
|
||||
// Simulate appending new batches. In a real scenario, feeds would differ.
|
||||
// For benchmark consistency, we reuse the same batch data.
|
||||
err := f.Append(context.Background(), feedsToAppend, nil) // onSuccess is nil for performance
|
||||
if err != nil {
|
||||
b.Fatalf("Append failed during benchmark: %v", err)
|
||||
}
|
||||
}
|
||||
b.StopTimer() // Stop timer before potential cleanup/close overhead
|
||||
}
|
||||
|
||||
func BenchmarkRead(b *testing.B) {
|
||||
// Setup: Populate a file and make it readonly (mmap).
|
||||
f, cleanup := setupBenchmarkFile(b, true)
|
||||
defer cleanup()
|
||||
|
||||
if len(benchmarkOffsets) == 0 {
|
||||
b.Fatal("Benchmark setup failed: no offsets generated.")
|
||||
}
|
||||
|
||||
// Pre-select random offsets to read
|
||||
rng := rand.New(rand.NewSource(42)) // Use a fixed seed for reproducibility
|
||||
readIndices := make([]int, b.N)
|
||||
for i := 0; i < b.N; i++ {
|
||||
readIndices[i] = rng.Intn(len(benchmarkOffsets))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
// Measure reading feeds at random valid offsets using mmap.
|
||||
for i := 0; i < b.N; i++ {
|
||||
offset := benchmarkOffsets[readIndices[i]]
|
||||
feed, err := f.Read(context.Background(), offset)
|
||||
if err != nil {
|
||||
b.Fatalf("Read failed during benchmark at offset %d: %v", offset, err)
|
||||
}
|
||||
// Prevent compiler optimization by using the result slightly
|
||||
if feed == nil {
|
||||
b.Fatal("Read returned nil feed")
|
||||
}
|
||||
}
|
||||
b.StopTimer()
|
||||
}
|
||||
|
||||
func BenchmarkRange(b *testing.B) {
|
||||
// Setup: Populate a file and make it readonly (mmap).
|
||||
f, cleanup := setupBenchmarkFile(b, false)
|
||||
defer cleanup()
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
// Measure ranging over all feeds using mmap.
|
||||
for i := 0; i < b.N; i++ {
|
||||
count := 0
|
||||
err := f.Range(context.Background(), func(feed *Feed, offset uint64) (err error) {
|
||||
// Minimal operation inside the iterator
|
||||
count++
|
||||
if feed == nil { // Basic check
|
||||
return fmt.Errorf("nil feed encountered at offset %d", offset)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
b.Fatalf("Range failed during benchmark: %v", err)
|
||||
}
|
||||
// Optionally verify count, though it adds overhead to the benchmark itself
|
||||
// if uint32(count) != f.Count(context.Background()) {
|
||||
// b.Fatalf("Range count mismatch: expected %d, got %d", f.Count(context.Background()), count)
|
||||
// }
|
||||
}
|
||||
b.StopTimer()
|
||||
}
|
||||
567
pkg/storage/feed/block/chunk/chunk_test.go
Normal file
567
pkg/storage/feed/block/chunk/chunk_test.go
Normal file
@@ -0,0 +1,567 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package chunk
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
. "github.com/onsi/gomega"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
"github.com/glidea/zenfeed/pkg/test"
|
||||
)
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
path string
|
||||
readonlyAtFirst bool
|
||||
setupFeeds []*Feed
|
||||
}
|
||||
type whenDetail struct{}
|
||||
type thenExpected struct {
|
||||
count uint32
|
||||
err string
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Create New Chunk File",
|
||||
Given: "A valid non-existing file path",
|
||||
When: "Creating a new chunk file",
|
||||
Then: "Should return a valid File instance with count 0",
|
||||
GivenDetail: givenDetail{
|
||||
readonlyAtFirst: false,
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
count: 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Open Existing Chunk File",
|
||||
Given: "A valid existing chunk file with data",
|
||||
When: "Opening the file in readonly mode",
|
||||
Then: "Should return a valid File instance with correct count",
|
||||
GivenDetail: givenDetail{
|
||||
readonlyAtFirst: true,
|
||||
setupFeeds: []*Feed{
|
||||
createTestFeed(1),
|
||||
createTestFeed(2),
|
||||
createTestFeed(3),
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
count: 3,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Invalid Configuration",
|
||||
Given: "An invalid configuration with empty path",
|
||||
When: "Creating a new chunk file",
|
||||
Then: "Should return an error",
|
||||
GivenDetail: givenDetail{
|
||||
path: "", // Empty path
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
err: "validate config: path is required",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given.
|
||||
if tt.GivenDetail.path == "" && tt.ThenExpected.err == "" {
|
||||
tt.GivenDetail.path = createTempFile(t)
|
||||
defer cleanupTempFile(tt.GivenDetail.path)
|
||||
}
|
||||
|
||||
if len(tt.GivenDetail.setupFeeds) > 0 {
|
||||
initialFile, err := new("test", &Config{
|
||||
Path: tt.GivenDetail.path,
|
||||
ReadonlyAtFirst: false,
|
||||
}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = initialFile.Append(context.Background(), tt.GivenDetail.setupFeeds, nil)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
initialFile.Close()
|
||||
}
|
||||
|
||||
// When.
|
||||
file, err := new("test", &Config{
|
||||
Path: tt.GivenDetail.path,
|
||||
ReadonlyAtFirst: tt.GivenDetail.readonlyAtFirst,
|
||||
}, Dependencies{})
|
||||
|
||||
// Then.
|
||||
if tt.ThenExpected.err != "" {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(file).NotTo(BeNil())
|
||||
Expect(file.Count(context.Background())).To(Equal(tt.ThenExpected.count))
|
||||
file.Close()
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileModeSwitching(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
tests := []struct {
|
||||
scenario string
|
||||
given string
|
||||
when string
|
||||
then string
|
||||
initialMode bool // true for readonly
|
||||
expectedError string
|
||||
}{
|
||||
{
|
||||
scenario: "ReadWrite to ReadOnly Switch",
|
||||
given: "a read-write mode chunk file",
|
||||
when: "calling EnsureReadonly()",
|
||||
then: "file should switch to read-only mode",
|
||||
initialMode: false,
|
||||
expectedError: "",
|
||||
},
|
||||
{
|
||||
scenario: "Already ReadOnly",
|
||||
given: "a read-only mode chunk file",
|
||||
when: "calling EnsureReadonly()",
|
||||
then: "operation should return quickly",
|
||||
initialMode: true,
|
||||
expectedError: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.scenario, func(t *testing.T) {
|
||||
// Setup
|
||||
path := createTempFile(t)
|
||||
defer cleanupTempFile(path)
|
||||
|
||||
// Create initial file
|
||||
initialConfig := Config{
|
||||
Path: path,
|
||||
ReadonlyAtFirst: false,
|
||||
}
|
||||
initialFile, err := new("test", &initialConfig, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
initialFile.Close()
|
||||
|
||||
// Open file with specified mode
|
||||
config := Config{
|
||||
Path: path,
|
||||
ReadonlyAtFirst: tt.initialMode,
|
||||
}
|
||||
f, err := new("test", &config, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
defer f.Close()
|
||||
|
||||
// Execute
|
||||
err = f.EnsureReadonly(context.Background())
|
||||
|
||||
// Verify
|
||||
if tt.expectedError != "" {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.expectedError))
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
// Verify it's now in readonly mode by attempting an append
|
||||
appendErr := f.Append(context.Background(), []*Feed{createTestFeed(1)}, nil)
|
||||
Expect(appendErr).To(HaveOccurred())
|
||||
Expect(appendErr.Error()).To(ContainSubstring("file is readonly"))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAppend(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
readonly bool
|
||||
}
|
||||
type whenDetail struct {
|
||||
appendFeeds []*Feed
|
||||
}
|
||||
type thenExpected struct {
|
||||
count uint32
|
||||
err string
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Append Single Feed",
|
||||
Given: "A read-write mode chunk file",
|
||||
When: "Adding a single feed",
|
||||
Then: "Should successfully write the feed",
|
||||
GivenDetail: givenDetail{
|
||||
readonly: false,
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
appendFeeds: []*Feed{createTestFeed(1)},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
count: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Batch Append Multiple Feeds",
|
||||
Given: "A read-write mode chunk file",
|
||||
When: "Adding multiple feeds at once",
|
||||
Then: "Should write all feeds as a single transaction",
|
||||
GivenDetail: givenDetail{
|
||||
readonly: false,
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
appendFeeds: []*Feed{
|
||||
createTestFeed(1),
|
||||
createTestFeed(2),
|
||||
createTestFeed(3),
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
count: 3,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Append in ReadOnly Mode",
|
||||
Given: "A read-only mode chunk file",
|
||||
When: "Attempting to add a feed",
|
||||
Then: "Should fail with readonly error",
|
||||
GivenDetail: givenDetail{
|
||||
readonly: true,
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
appendFeeds: []*Feed{createTestFeed(1)},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
err: "file is readonly",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given.
|
||||
path := createTempFile(t)
|
||||
defer cleanupTempFile(path)
|
||||
|
||||
if tt.GivenDetail.readonly {
|
||||
// Create and close initial file for readonly test.
|
||||
rwFile, err := new("test", &Config{Path: path}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
rwFile.Close()
|
||||
}
|
||||
|
||||
f, err := new("test", &Config{
|
||||
Path: path,
|
||||
ReadonlyAtFirst: tt.GivenDetail.readonly,
|
||||
}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
defer f.Close()
|
||||
|
||||
// When.
|
||||
var offsets []uint64
|
||||
err = f.Append(context.Background(), tt.WhenDetail.appendFeeds, func(_ *Feed, offset uint64) error {
|
||||
offsets = append(offsets, offset)
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
// Then.
|
||||
if tt.ThenExpected.err != "" {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(f.Count(context.Background())).To(Equal(tt.ThenExpected.count))
|
||||
|
||||
// Verify each feed can be read back.
|
||||
for i, offset := range offsets {
|
||||
feed, readErr := f.Read(context.Background(), offset)
|
||||
Expect(readErr).NotTo(HaveOccurred())
|
||||
Expect(feed.ID).To(Equal(tt.WhenDetail.appendFeeds[i].ID))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRead(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
tests := []struct {
|
||||
scenario string
|
||||
given string
|
||||
when string
|
||||
then string
|
||||
readonly bool
|
||||
setupFeeds []*Feed
|
||||
readOffset uint64
|
||||
expectedErr string
|
||||
}{
|
||||
{
|
||||
scenario: "Read from Valid Offset",
|
||||
given: "a chunk file with feeds",
|
||||
when: "reading with a valid offset",
|
||||
then: "should return the correct feed",
|
||||
readonly: false,
|
||||
setupFeeds: []*Feed{createTestFeed(1)},
|
||||
readOffset: uint64(dataStart), // Will be adjusted in the test
|
||||
expectedErr: "",
|
||||
},
|
||||
{
|
||||
scenario: "Read from ReadOnly Mode",
|
||||
given: "a read-only chunk file with feeds",
|
||||
when: "reading with a valid offset",
|
||||
then: "should return the correct feed using mmap",
|
||||
readonly: true,
|
||||
setupFeeds: []*Feed{createTestFeed(2)},
|
||||
readOffset: uint64(dataStart), // Will be adjusted in the test
|
||||
expectedErr: "",
|
||||
},
|
||||
{
|
||||
scenario: "Read with Small Offset",
|
||||
given: "a chunk file with feeds",
|
||||
when: "reading with an offset smaller than dataStart",
|
||||
then: "should return 'offset too small' error",
|
||||
readonly: false,
|
||||
setupFeeds: []*Feed{createTestFeed(3)},
|
||||
readOffset: uint64(dataStart - 1),
|
||||
expectedErr: "offset too small",
|
||||
},
|
||||
{
|
||||
scenario: "Read with Large Offset",
|
||||
given: "a chunk file with feeds",
|
||||
when: "reading with an offset larger than appendOffset",
|
||||
then: "should return 'offset too large' error",
|
||||
readonly: false,
|
||||
setupFeeds: []*Feed{createTestFeed(4)},
|
||||
readOffset: 999999, // Definitely beyond appendOffset
|
||||
expectedErr: "offset too large",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.scenario, func(t *testing.T) {
|
||||
// Setup
|
||||
path := createTempFile(t)
|
||||
defer cleanupTempFile(path)
|
||||
|
||||
// Create and populate initial file
|
||||
initialConfig := Config{
|
||||
Path: path,
|
||||
ReadonlyAtFirst: false,
|
||||
}
|
||||
initialFile, err := new("test", &initialConfig, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
var validOffset uint64
|
||||
if len(tt.setupFeeds) > 0 {
|
||||
// Track the first offset for later reading
|
||||
var firstOffset uint64
|
||||
err = initialFile.Append(context.Background(), tt.setupFeeds, func(_ *Feed, offset uint64) error {
|
||||
if firstOffset == 0 {
|
||||
firstOffset = offset
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
validOffset = firstOffset
|
||||
}
|
||||
initialFile.Close()
|
||||
|
||||
// Reopen with specified mode
|
||||
config := Config{
|
||||
Path: path,
|
||||
ReadonlyAtFirst: tt.readonly,
|
||||
}
|
||||
f, err := new("test", &config, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
defer f.Close()
|
||||
|
||||
// Use valid offset if needed
|
||||
readOffset := tt.readOffset
|
||||
if readOffset == uint64(dataStart) && validOffset > 0 {
|
||||
readOffset = validOffset
|
||||
}
|
||||
|
||||
// Execute
|
||||
feed, err := f.Read(context.Background(), readOffset)
|
||||
|
||||
// Verify
|
||||
if tt.expectedErr != "" {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.expectedErr))
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(feed).NotTo(BeNil())
|
||||
Expect(feed.ID).To(Equal(tt.setupFeeds[0].ID))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRange(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
tests := []struct {
|
||||
scenario string
|
||||
given string
|
||||
when string
|
||||
then string
|
||||
readonly bool
|
||||
setupFeeds []*Feed
|
||||
earlyExit bool
|
||||
expectedCount int
|
||||
expectedErr string
|
||||
}{
|
||||
{
|
||||
scenario: "Range All Feeds",
|
||||
given: "a chunk file with multiple feeds",
|
||||
when: "calling Range()",
|
||||
then: "iterator should visit each feed in sequence",
|
||||
readonly: false,
|
||||
setupFeeds: []*Feed{
|
||||
createTestFeed(1),
|
||||
createTestFeed(2),
|
||||
createTestFeed(3),
|
||||
},
|
||||
earlyExit: false,
|
||||
expectedCount: 3,
|
||||
expectedErr: "",
|
||||
},
|
||||
{
|
||||
scenario: "Range with Early Exit",
|
||||
given: "a chunk file with multiple feeds",
|
||||
when: "calling Range() and returning an error from iterator",
|
||||
then: "range should stop and return that error",
|
||||
readonly: false,
|
||||
setupFeeds: []*Feed{
|
||||
createTestFeed(4),
|
||||
createTestFeed(5),
|
||||
createTestFeed(6),
|
||||
},
|
||||
earlyExit: true,
|
||||
expectedCount: 1, // Should stop after first feed
|
||||
expectedErr: "early exit",
|
||||
},
|
||||
{
|
||||
scenario: "Range in ReadOnly Mode",
|
||||
given: "a read-only chunk file with feeds",
|
||||
when: "calling Range()",
|
||||
then: "should use mmap and correctly visit all feeds",
|
||||
readonly: true,
|
||||
setupFeeds: []*Feed{
|
||||
createTestFeed(7),
|
||||
createTestFeed(8),
|
||||
},
|
||||
earlyExit: false,
|
||||
expectedCount: 2,
|
||||
expectedErr: "",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.scenario, func(t *testing.T) {
|
||||
// Setup
|
||||
path := createTempFile(t)
|
||||
defer cleanupTempFile(path)
|
||||
|
||||
// Create and populate initial file
|
||||
initialConfig := Config{
|
||||
Path: path,
|
||||
ReadonlyAtFirst: false,
|
||||
}
|
||||
initialFile, err := new("test", &initialConfig, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
if len(tt.setupFeeds) > 0 {
|
||||
err = initialFile.Append(context.Background(), tt.setupFeeds, nil)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
}
|
||||
initialFile.Close()
|
||||
|
||||
// Reopen with specified mode
|
||||
config := Config{
|
||||
Path: path,
|
||||
ReadonlyAtFirst: tt.readonly,
|
||||
}
|
||||
f, err := new("test", &config, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
defer f.Close()
|
||||
|
||||
// Execute
|
||||
visitCount := 0
|
||||
err = f.Range(context.Background(), func(feed *Feed, offset uint64) (err error) {
|
||||
visitCount++
|
||||
if tt.earlyExit && visitCount == 1 {
|
||||
return errors.New("early exit")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
// Verify
|
||||
if tt.expectedErr != "" {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.expectedErr))
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
}
|
||||
Expect(visitCount).To(Equal(tt.expectedCount))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func createTempFile(t *testing.T) string {
|
||||
dir, err := os.MkdirTemp("", "chunk-test")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
return filepath.Join(dir, "test.chunk")
|
||||
}
|
||||
|
||||
func cleanupTempFile(path string) {
|
||||
os.RemoveAll(filepath.Dir(path))
|
||||
}
|
||||
|
||||
func createTestFeed(id uint64) *Feed {
|
||||
return &Feed{
|
||||
Feed: &model.Feed{
|
||||
ID: id,
|
||||
Labels: model.Labels{model.Label{Key: "test", Value: "value"}},
|
||||
Time: time.Now(),
|
||||
},
|
||||
Vectors: [][]float32{
|
||||
{1.0, 2.0, 3.0},
|
||||
{4.0, 5.0, 6.0},
|
||||
},
|
||||
}
|
||||
}
|
||||
296
pkg/storage/feed/block/chunk/encoding.go
Normal file
296
pkg/storage/feed/block/chunk/encoding.go
Normal file
@@ -0,0 +1,296 @@
|
||||
package chunk
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
binaryutil "github.com/glidea/zenfeed/pkg/util/binary"
|
||||
"github.com/glidea/zenfeed/pkg/util/buffer"
|
||||
)
|
||||
|
||||
const (
|
||||
// feedHeaderSize is the size of the record header (length + checksum).
|
||||
feedHeaderSize = 8 // uint32 length + uint32 checksum
|
||||
)
|
||||
|
||||
var (
|
||||
errChecksumMismatch = errors.New("checksum mismatch")
|
||||
|
||||
crc32Table = crc32.MakeTable(crc32.IEEE)
|
||||
)
|
||||
|
||||
// Feed is the feed model in the chunk file.
|
||||
type Feed struct {
|
||||
*model.Feed
|
||||
Vectors [][]float32
|
||||
}
|
||||
|
||||
// encodeTo encodes the Feed into the provided buffer, including a length prefix and checksum.
|
||||
// It writes the record structure: [payloadLen(uint32)][checksum(uint32)][payload...].
|
||||
func (f *Feed) encodeTo(buf *buffer.Bytes) error {
|
||||
buf.EnsureRemaining(4 * 1024)
|
||||
|
||||
// 1. Reserve space for length and checksum.
|
||||
startOffset := buf.Len()
|
||||
headerPos := buf.Len() // Position where header starts.
|
||||
buf.B = buf.B[:headerPos+feedHeaderSize] // Extend buffer to include header space.
|
||||
payloadStartOffset := buf.Len() // Position where payload starts.
|
||||
|
||||
// 2. Encode the actual payload.
|
||||
if err := f.encodePayload(buf); err != nil {
|
||||
// If payload encoding fails, revert the buffer to its initial state.
|
||||
buf.B = buf.B[:startOffset]
|
||||
|
||||
return errors.Wrap(err, "encode payload")
|
||||
}
|
||||
payloadEndOffset := buf.Len()
|
||||
|
||||
// 3. Calculate payload length and checksum.
|
||||
payloadLen := uint32(payloadEndOffset - payloadStartOffset)
|
||||
payloadSlice := buf.Bytes()[payloadStartOffset:payloadEndOffset]
|
||||
checksum := crc32.Checksum(payloadSlice, crc32Table)
|
||||
|
||||
// 4. Write the actual length and checksum into the reserved space.
|
||||
binary.LittleEndian.PutUint32(buf.Bytes()[headerPos:headerPos+4], payloadLen)
|
||||
binary.LittleEndian.PutUint32(buf.Bytes()[headerPos+4:headerPos+8], checksum)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// encodePayload encodes the core fields (ID, Time, Labels, Vectors) into the buffer.
|
||||
func (f *Feed) encodePayload(w io.Writer) error {
|
||||
// Write ID.
|
||||
if err := binaryutil.WriteUint64(w, f.ID); err != nil {
|
||||
return errors.Wrap(err, "write id")
|
||||
}
|
||||
|
||||
// Write time.
|
||||
if err := binaryutil.WriteUint64(w, uint64(f.Time.UnixNano())); err != nil {
|
||||
return errors.Wrap(err, "write time")
|
||||
}
|
||||
|
||||
// Write labels.
|
||||
if err := f.encodeLabels(w); err != nil {
|
||||
return errors.Wrap(err, "encode labels")
|
||||
}
|
||||
|
||||
// Write vectors.
|
||||
if err := f.encodeVectors(w); err != nil {
|
||||
return errors.Wrap(err, "encode vectors")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// encodeLabels writes the label data to the writer.
|
||||
func (f *Feed) encodeLabels(w io.Writer) error {
|
||||
labelsLen := uint32(len(f.Labels))
|
||||
if len(f.Labels) > math.MaxUint32 {
|
||||
return errors.New("too many labels")
|
||||
}
|
||||
if err := binaryutil.WriteUint32(w, labelsLen); err != nil {
|
||||
return errors.Wrap(err, "write labels count")
|
||||
}
|
||||
for i, label := range f.Labels {
|
||||
if err := binaryutil.WriteString(w, label.Key); err != nil {
|
||||
return errors.Wrapf(err, "write label key index %d", i)
|
||||
}
|
||||
if err := binaryutil.WriteString(w, label.Value); err != nil {
|
||||
return errors.Wrapf(err, "write label value index %d", i)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// encodeVectors writes the vector data to the writer.
|
||||
func (f *Feed) encodeVectors(w io.Writer) error {
|
||||
vectorCount := uint32(len(f.Vectors))
|
||||
if len(f.Vectors) > math.MaxUint32 {
|
||||
return errors.New("too many vectors")
|
||||
}
|
||||
if err := binaryutil.WriteUint32(w, vectorCount); err != nil {
|
||||
return errors.Wrap(err, "write vectors count")
|
||||
}
|
||||
if vectorCount == 0 {
|
||||
return nil // Nothing more to write if there are no vectors.
|
||||
}
|
||||
|
||||
// Write dimension.
|
||||
dimension := uint32(len(f.Vectors[0]))
|
||||
if len(f.Vectors[0]) > math.MaxUint32 {
|
||||
return errors.New("vector dimension exceeds maximum uint32")
|
||||
}
|
||||
if err := binaryutil.WriteUint32(w, dimension); err != nil {
|
||||
return errors.Wrap(err, "write vector dimension")
|
||||
}
|
||||
|
||||
// Write vector data.
|
||||
var floatBuf [4]byte
|
||||
for i, vec := range f.Vectors {
|
||||
// Ensure vector has the correct dimension.
|
||||
if uint32(len(vec)) != dimension {
|
||||
return errors.Errorf("vector %d has inconsistent dimension %d, expected %d", i, len(vec), dimension)
|
||||
}
|
||||
|
||||
for _, val := range vec { // Avoid using binary.Write for performance.
|
||||
bits := math.Float32bits(val)
|
||||
binary.LittleEndian.PutUint32(floatBuf[:], bits)
|
||||
if _, err := w.Write(floatBuf[:]); err != nil {
|
||||
return errors.Wrapf(err, "write for vector %d, value %f", i, val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *Feed) validateFrom(r io.Reader, buf *buffer.Bytes) (err error) {
|
||||
// 1. Read header (length and checksum).
|
||||
var payloadLen, expectedChecksum uint32
|
||||
startOffset := buf.Len()
|
||||
if _, err := io.CopyN(buf, r, feedHeaderSize); err != nil {
|
||||
return errors.Wrap(err, "read header")
|
||||
}
|
||||
payloadLen = binary.LittleEndian.Uint32(buf.B[startOffset : startOffset+4])
|
||||
expectedChecksum = binary.LittleEndian.Uint32(buf.B[startOffset+4:])
|
||||
|
||||
// 2. Read payload, calculate checksum simultaneously.
|
||||
buf.EnsureRemaining(int(payloadLen))
|
||||
limitedReader := io.LimitReader(r, int64(payloadLen))
|
||||
checksumWriter := crc32.New(crc32Table)
|
||||
teeReader := io.TeeReader(limitedReader, checksumWriter)
|
||||
|
||||
// Read the exact payload length into the buffer.
|
||||
if _, err := io.CopyN(buf, teeReader, int64(payloadLen)); err != nil {
|
||||
// EOF, may be writing not complete.
|
||||
return errors.Wrap(err, "read payload")
|
||||
}
|
||||
|
||||
// 3. Verify checksum.
|
||||
calculatedChecksum := checksumWriter.Sum32()
|
||||
if calculatedChecksum != expectedChecksum {
|
||||
return errors.Wrapf(errChecksumMismatch, "expected %x, got %x", expectedChecksum, calculatedChecksum)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeFrom decodes the feed from the reader, validating length and checksum.
|
||||
// It expects the format: [payloadLen(uint32)][checksum(uint32)][payload...].
|
||||
func (f *Feed) decodeFrom(r io.Reader) (err error) {
|
||||
buf := buffer.Get()
|
||||
defer buffer.Put(buf)
|
||||
|
||||
if err := f.validateFrom(r, buf); err != nil {
|
||||
return errors.Wrap(err, "validate payload")
|
||||
}
|
||||
|
||||
payloadReader := bytes.NewReader(buf.B[feedHeaderSize:])
|
||||
if err := f.decodePayload(payloadReader); err != nil {
|
||||
return errors.Wrap(err, "decode payload")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodePayload decodes the core fields from the reader.
|
||||
func (f *Feed) decodePayload(r io.Reader) error {
|
||||
f.Feed = &model.Feed{} // Ensure Feed is initialized.
|
||||
|
||||
// Read ID.
|
||||
if err := binary.Read(r, binary.LittleEndian, &f.ID); err != nil {
|
||||
return errors.Wrap(err, "read id")
|
||||
}
|
||||
|
||||
// Read time.
|
||||
var timestamp int64
|
||||
if err := binary.Read(r, binary.LittleEndian, ×tamp); err != nil {
|
||||
return errors.Wrap(err, "read time")
|
||||
}
|
||||
f.Time = time.Unix(0, timestamp).In(time.UTC)
|
||||
|
||||
// Read labels.
|
||||
if err := f.decodeLabels(r); err != nil {
|
||||
return errors.Wrap(err, "decode labels")
|
||||
}
|
||||
|
||||
// Read vectors.
|
||||
if err := f.decodeVectors(r); err != nil {
|
||||
return errors.Wrap(err, "decode vectors")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeLabels reads the label data from the reader.
|
||||
func (f *Feed) decodeLabels(r io.Reader) error {
|
||||
var labelCount uint32
|
||||
if err := binary.Read(r, binary.LittleEndian, &labelCount); err != nil {
|
||||
return errors.Wrap(err, "read labels count")
|
||||
}
|
||||
|
||||
f.Labels = make(model.Labels, labelCount)
|
||||
for i := range labelCount {
|
||||
// Read key.
|
||||
key, err := binaryutil.ReadString(r)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "read label key index %d", i)
|
||||
}
|
||||
|
||||
// Read value.
|
||||
value, err := binaryutil.ReadString(r)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "read label value index %d", i)
|
||||
}
|
||||
|
||||
f.Labels[i] = model.Label{
|
||||
Key: key,
|
||||
Value: value,
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeVectors reads the vector data from the reader.
|
||||
func (f *Feed) decodeVectors(r io.Reader) error {
|
||||
var vectorCount uint32
|
||||
if err := binary.Read(r, binary.LittleEndian, &vectorCount); err != nil {
|
||||
return errors.Wrap(err, "read vectors count")
|
||||
}
|
||||
if vectorCount == 0 {
|
||||
f.Vectors = nil // Ensure vectors is nil if count is 0
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
f.Vectors = make([][]float32, vectorCount)
|
||||
|
||||
var dimension uint32
|
||||
if err := binary.Read(r, binary.LittleEndian, &dimension); err != nil {
|
||||
return errors.Wrap(err, "read vector dimension")
|
||||
}
|
||||
|
||||
// Pre-allocate the underlying float data contiguously for potentially better cache locality.
|
||||
totalFloats := uint64(vectorCount) * uint64(dimension)
|
||||
floatData := make([]float32, totalFloats)
|
||||
|
||||
offset := 0
|
||||
for i := range vectorCount {
|
||||
f.Vectors[i] = floatData[offset : offset+int(dimension)] // Slice into the pre-allocated data
|
||||
if err := binary.Read(r, binary.LittleEndian, f.Vectors[i]); err != nil {
|
||||
return errors.Wrapf(err, "read vector data for vector %d", i)
|
||||
}
|
||||
offset += int(dimension)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
14
pkg/storage/feed/block/index/codec.go
Normal file
14
pkg/storage/feed/block/index/codec.go
Normal file
@@ -0,0 +1,14 @@
|
||||
package index
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Codec defines interface for encoding and decoding index.
|
||||
type Codec interface {
|
||||
// EncodeTo encodes the index to the given writer.
|
||||
EncodeTo(ctx context.Context, w io.Writer) (err error)
|
||||
// DecodeFrom decodes the index from the given reader.
|
||||
DecodeFrom(ctx context.Context, r io.Reader) (err error)
|
||||
}
|
||||
436
pkg/storage/feed/block/index/inverted/inverted.go
Normal file
436
pkg/storage/feed/block/index/inverted/inverted.go
Normal file
@@ -0,0 +1,436 @@
|
||||
package inverted
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"maps"
|
||||
"sync"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
"github.com/glidea/zenfeed/pkg/storage/feed/block/index"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
binaryutil "github.com/glidea/zenfeed/pkg/util/binary"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
type Index interface {
|
||||
component.Component
|
||||
index.Codec
|
||||
|
||||
// Search returns item IDs matching the given label and value.
|
||||
Search(ctx context.Context, label string, eq bool, value string) (ids map[uint64]struct{})
|
||||
// Add adds item to the index.
|
||||
// If label or value in labels is empty, it will be ignored.
|
||||
// If value is too long, it will be ignored,
|
||||
// because does not support regex search, so long value is not useful.
|
||||
Add(ctx context.Context, id uint64, labels model.Labels)
|
||||
}
|
||||
|
||||
type Config struct{}
|
||||
|
||||
type Dependencies struct{}
|
||||
|
||||
const (
|
||||
maxLabelValueLength = 64
|
||||
)
|
||||
|
||||
var (
|
||||
headerMagicNumber = []byte{0x77, 0x79, 0x73, 0x20, 0x69, 0x73, 0x20,
|
||||
0x61, 0x77, 0x65, 0x73, 0x6f, 0x6d, 0x65, 0x00, 0x00}
|
||||
headerVersion = uint8(1)
|
||||
)
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[Index, Config, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
if len(mockOn) > 0 {
|
||||
return component.FactoryFunc[Index, Config, Dependencies](
|
||||
func(instance string, config *Config, dependencies Dependencies) (Index, error) {
|
||||
m := &mockIndex{}
|
||||
component.MockOptions(mockOn).Apply(&m.Mock)
|
||||
|
||||
return m, nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return component.FactoryFunc[Index, Config, Dependencies](new)
|
||||
}
|
||||
|
||||
func new(instance string, config *Config, dependencies Dependencies) (Index, error) {
|
||||
return &idx{
|
||||
Base: component.New(&component.BaseConfig[Config, Dependencies]{
|
||||
Name: "FeedInvertedIndex",
|
||||
Instance: instance,
|
||||
Config: config,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
m: make(map[string]map[string]map[uint64]struct{}, 64),
|
||||
ids: make(map[uint64]struct{}, 64),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
type idx struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
|
||||
// Label -> values -> ids.
|
||||
m map[string]map[string]map[uint64]struct{}
|
||||
// All ids.
|
||||
ids map[uint64]struct{}
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
func (idx *idx) Search(ctx context.Context, label string, eq bool, value string) (ids map[uint64]struct{}) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "Search")...)
|
||||
defer func() { telemetry.End(ctx, nil) }()
|
||||
idx.mu.RLock()
|
||||
defer idx.mu.RUnlock()
|
||||
|
||||
if value == "" {
|
||||
return idx.searchEmptyValue(label, eq)
|
||||
}
|
||||
|
||||
return idx.searchNonEmptyValue(label, eq, value)
|
||||
}
|
||||
|
||||
func (idx *idx) Add(ctx context.Context, id uint64, labels model.Labels) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "Add")...)
|
||||
defer func() { telemetry.End(ctx, nil) }()
|
||||
idx.mu.Lock()
|
||||
defer idx.mu.Unlock()
|
||||
|
||||
// Add all labels.
|
||||
for _, label := range labels {
|
||||
if label.Key == "" || label.Value == "" {
|
||||
continue
|
||||
}
|
||||
if len(label.Value) > maxLabelValueLength {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := idx.m[label.Key]; !ok {
|
||||
idx.m[label.Key] = make(map[string]map[uint64]struct{})
|
||||
}
|
||||
if _, ok := idx.m[label.Key][label.Value]; !ok {
|
||||
idx.m[label.Key][label.Value] = make(map[uint64]struct{})
|
||||
}
|
||||
idx.m[label.Key][label.Value][id] = struct{}{}
|
||||
}
|
||||
|
||||
// Add to ids.
|
||||
idx.ids[id] = struct{}{}
|
||||
}
|
||||
|
||||
func (idx *idx) EncodeTo(ctx context.Context, w io.Writer) (err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "EncodeTo")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
idx.mu.RLock()
|
||||
defer idx.mu.RUnlock()
|
||||
|
||||
if err := idx.writeHeader(w); err != nil {
|
||||
return errors.Wrap(err, "write header")
|
||||
}
|
||||
|
||||
if err := idx.writeLabels(w); err != nil {
|
||||
return errors.Wrap(err, "write labels")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DecodeFrom decodes the index from the given reader.
|
||||
func (idx *idx) DecodeFrom(ctx context.Context, r io.Reader) (err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "DecodeFrom")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
idx.mu.Lock()
|
||||
defer idx.mu.Unlock()
|
||||
|
||||
// Read header.
|
||||
if err := idx.readHeader(r); err != nil {
|
||||
return errors.Wrap(err, "read header")
|
||||
}
|
||||
|
||||
// Read labels.
|
||||
if err := idx.readLabels(r); err != nil {
|
||||
return errors.Wrap(err, "read labels")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// searchEmptyValue handles the search logic when the target value is empty.
|
||||
// If eq is true, it returns IDs that *do not* have the given label.
|
||||
// If eq is false, it returns IDs that *do* have the given label (with any value).
|
||||
func (idx *idx) searchEmptyValue(label string, eq bool) map[uint64]struct{} {
|
||||
// Find all IDs associated with the given label, regardless of value.
|
||||
idsWithLabel := make(map[uint64]struct{})
|
||||
if values, ok := idx.m[label]; ok {
|
||||
for _, ids := range values {
|
||||
for id := range ids {
|
||||
idsWithLabel[id] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If not equal (!eq), return the IDs that have the label.
|
||||
if !eq {
|
||||
return idsWithLabel
|
||||
}
|
||||
|
||||
// If equal (eq), return IDs that *do not* have the label.
|
||||
// Start with all known IDs and remove those that have the label.
|
||||
resultIDs := maps.Clone(idx.ids)
|
||||
for id := range idsWithLabel {
|
||||
delete(resultIDs, id)
|
||||
}
|
||||
|
||||
return resultIDs
|
||||
}
|
||||
|
||||
// searchNonEmptyValue handles the search logic when the target value is not empty.
|
||||
// If eq is true, it returns IDs that have the exact label-value pair.
|
||||
// If eq is false, it returns IDs that *do not* have the exact label-value pair.
|
||||
func (idx *idx) searchNonEmptyValue(label string, eq bool, value string) map[uint64]struct{} {
|
||||
// Get the map of values for the given label.
|
||||
values, labelExists := idx.m[label]
|
||||
|
||||
// If equal (eq), find the exact match.
|
||||
if eq {
|
||||
if !labelExists {
|
||||
return make(map[uint64]struct{}) // Label doesn't exist.
|
||||
}
|
||||
ids, valueExists := values[value]
|
||||
if !valueExists {
|
||||
return make(map[uint64]struct{}) // Value doesn't exist for this label.
|
||||
}
|
||||
|
||||
// Return a clone to prevent modification of the underlying index data.
|
||||
return maps.Clone(ids)
|
||||
}
|
||||
|
||||
// If not equal (!eq), return IDs that *do not* have this specific label-value pair.
|
||||
// Start with all known IDs.
|
||||
resultIDs := maps.Clone(idx.ids)
|
||||
if labelExists {
|
||||
// If the specific label-value pair exists, remove its associated IDs.
|
||||
if matchingIDs, valueExists := values[value]; valueExists {
|
||||
for id := range matchingIDs {
|
||||
delete(resultIDs, id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return resultIDs
|
||||
}
|
||||
|
||||
func (idx *idx) writeHeader(w io.Writer) error {
|
||||
if _, err := w.Write(headerMagicNumber); err != nil {
|
||||
return errors.Wrap(err, "write header magic number")
|
||||
}
|
||||
if _, err := w.Write([]byte{headerVersion}); err != nil {
|
||||
return errors.Wrap(err, "write header version")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (idx *idx) writeLabels(w io.Writer) error {
|
||||
// Write total unique ID count.
|
||||
idCount := uint32(len(idx.ids))
|
||||
if err := binary.Write(w, binary.LittleEndian, idCount); err != nil {
|
||||
return errors.Wrap(err, "write total id count")
|
||||
}
|
||||
|
||||
// Write label count.
|
||||
labelCount := uint32(len(idx.m))
|
||||
if err := binary.Write(w, binary.LittleEndian, labelCount); err != nil {
|
||||
return errors.Wrap(err, "write label count")
|
||||
}
|
||||
|
||||
// Write each label and its associated value entries.
|
||||
for label, values := range idx.m {
|
||||
if err := idx.writeLabelEntry(w, label, values); err != nil {
|
||||
return errors.Wrap(err, "write label entry")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeLabelEntry writes a single label, its value count, and then calls writeValueEntry for each value.
|
||||
func (idx *idx) writeLabelEntry(w io.Writer, label string, values map[string]map[uint64]struct{}) error {
|
||||
// Write label string.
|
||||
if err := binaryutil.WriteString(w, label); err != nil {
|
||||
return errors.Wrap(err, "write label")
|
||||
}
|
||||
|
||||
// Write value count for this label.
|
||||
valueCount := uint32(len(values))
|
||||
if err := binary.Write(w, binary.LittleEndian, valueCount); err != nil {
|
||||
return errors.Wrap(err, "write value count for label")
|
||||
}
|
||||
|
||||
// Write each value and its associated IDs.
|
||||
for value, ids := range values {
|
||||
if err := idx.writeValueEntry(w, value, ids); err != nil {
|
||||
return errors.Wrap(err, "write value entry")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeValueEntry writes a single value, its ID count, and then writes each associated ID.
|
||||
func (idx *idx) writeValueEntry(w io.Writer, value string, ids map[uint64]struct{}) error {
|
||||
// Write value string.
|
||||
if err := binaryutil.WriteString(w, value); err != nil {
|
||||
return errors.Wrap(err, "write value")
|
||||
}
|
||||
|
||||
// Write ID count for this label-value pair.
|
||||
idCount := uint32(len(ids))
|
||||
if err := binary.Write(w, binary.LittleEndian, idCount); err != nil {
|
||||
return errors.Wrap(err, "write id count for value")
|
||||
}
|
||||
|
||||
// Write each associated ID.
|
||||
for id := range ids {
|
||||
if err := binary.Write(w, binary.LittleEndian, id); err != nil {
|
||||
return errors.Wrap(err, "write id")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (idx *idx) readHeader(r io.Reader) error {
|
||||
magicNumber := make([]byte, len(headerMagicNumber))
|
||||
if _, err := io.ReadFull(r, magicNumber); err != nil {
|
||||
return errors.Wrap(err, "read header magic number")
|
||||
}
|
||||
if !bytes.Equal(magicNumber, headerMagicNumber) {
|
||||
return errors.New("invalid magic number")
|
||||
}
|
||||
|
||||
versionByte := make([]byte, 1)
|
||||
if _, err := io.ReadFull(r, versionByte); err != nil {
|
||||
return errors.Wrap(err, "read header version")
|
||||
}
|
||||
if versionByte[0] != headerVersion {
|
||||
return errors.New("invalid version")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (idx *idx) readLabels(r io.Reader) error {
|
||||
// Read total unique ID count (used for pre-allocation).
|
||||
var totalIDCount uint32
|
||||
if err := binary.Read(r, binary.LittleEndian, &totalIDCount); err != nil {
|
||||
return errors.Wrap(err, "read total id count")
|
||||
}
|
||||
idx.ids = make(map[uint64]struct{}, totalIDCount) // Pre-allocate ids map.
|
||||
|
||||
// Read label count.
|
||||
var labelCount uint32
|
||||
if err := binary.Read(r, binary.LittleEndian, &labelCount); err != nil {
|
||||
return errors.Wrap(err, "read label count")
|
||||
}
|
||||
idx.m = make(map[string]map[string]map[uint64]struct{}, labelCount) // Pre-allocate labels map.
|
||||
|
||||
// Read each label and its associated value entries.
|
||||
for range labelCount {
|
||||
if err := idx.readLabelEntry(r); err != nil {
|
||||
return errors.Wrap(err, "read label entry")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// readLabelEntry reads a single label, its value count, and then calls readValueEntry for each value.
|
||||
func (idx *idx) readLabelEntry(r io.Reader) error {
|
||||
// Read label string.
|
||||
label, err := binaryutil.ReadString(r)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "read label")
|
||||
}
|
||||
|
||||
// Read value count for this label.
|
||||
var valueCount uint32
|
||||
if err := binary.Read(r, binary.LittleEndian, &valueCount); err != nil {
|
||||
return errors.Wrap(err, "read value count for label")
|
||||
}
|
||||
idx.m[label] = make(map[string]map[uint64]struct{}, valueCount) // Pre-allocate values map for this label.
|
||||
|
||||
// Read each value and its associated IDs.
|
||||
for range valueCount {
|
||||
if err := idx.readValueEntry(r, label); err != nil {
|
||||
return errors.Wrap(err, "read value entry")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// readValueEntry reads a single value, its ID count, and then reads each associated ID, populating the index maps.
|
||||
func (idx *idx) readValueEntry(r io.Reader, label string) error {
|
||||
// Read value string.
|
||||
value, err := binaryutil.ReadString(r)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "read value")
|
||||
}
|
||||
|
||||
// Read ID count for this label-value pair.
|
||||
var idCount uint32
|
||||
if err := binary.Read(r, binary.LittleEndian, &idCount); err != nil {
|
||||
return errors.Wrap(err, "read id count for value")
|
||||
}
|
||||
idx.m[label][value] = make(map[uint64]struct{}, idCount) // Pre-allocate ids map for this label-value.
|
||||
|
||||
// Read each associated ID.
|
||||
for range idCount {
|
||||
var id uint64
|
||||
if err := binary.Read(r, binary.LittleEndian, &id); err != nil {
|
||||
return errors.Wrap(err, "read id")
|
||||
}
|
||||
idx.m[label][value][id] = struct{}{}
|
||||
idx.ids[id] = struct{}{} // Add to the global set of IDs.
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type mockIndex struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (m *mockIndex) Search(ctx context.Context, label string, eq bool, value string) (ids map[uint64]struct{}) {
|
||||
args := m.Called(ctx, label, eq, value)
|
||||
|
||||
return args.Get(0).(map[uint64]struct{})
|
||||
}
|
||||
|
||||
func (m *mockIndex) Add(ctx context.Context, id uint64, labels model.Labels) {
|
||||
m.Called(ctx, id, labels)
|
||||
}
|
||||
|
||||
func (m *mockIndex) EncodeTo(ctx context.Context, w io.Writer) (err error) {
|
||||
args := m.Called(ctx, w)
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
func (m *mockIndex) DecodeFrom(ctx context.Context, r io.Reader) (err error) {
|
||||
args := m.Called(ctx, r)
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
327
pkg/storage/feed/block/index/inverted/inverted_test.go
Normal file
327
pkg/storage/feed/block/index/inverted/inverted_test.go
Normal file
@@ -0,0 +1,327 @@
|
||||
package inverted
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
"github.com/glidea/zenfeed/pkg/test"
|
||||
)
|
||||
|
||||
func TestAdd(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
existingLabels map[uint64]model.Labels
|
||||
}
|
||||
type whenDetail struct {
|
||||
id uint64
|
||||
labels model.Labels
|
||||
}
|
||||
type thenExpected struct {
|
||||
indexState map[string]map[string]map[uint64]struct{}
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Add Single Label",
|
||||
Given: "An empty index",
|
||||
When: "Adding an item with a single label",
|
||||
Then: "Should index the item correctly",
|
||||
GivenDetail: givenDetail{
|
||||
existingLabels: map[uint64]model.Labels{},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
id: 1,
|
||||
labels: model.Labels{
|
||||
{Key: "category", Value: "tech"},
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
indexState: map[string]map[string]map[uint64]struct{}{
|
||||
"category": {
|
||||
"tech": {1: struct{}{}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Add Multiple Labels",
|
||||
Given: "An empty index",
|
||||
When: "Adding an item with multiple labels",
|
||||
Then: "Should index all labels correctly",
|
||||
GivenDetail: givenDetail{
|
||||
existingLabels: map[uint64]model.Labels{
|
||||
1: {model.Label{Key: "category", Value: "tech"}},
|
||||
3: {model.Label{Key: "category", Value: "news"}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
id: 2,
|
||||
labels: model.Labels{
|
||||
{Key: "category", Value: "tech"},
|
||||
{Key: "status", Value: "new"},
|
||||
{Key: "author", Value: "john"},
|
||||
},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
indexState: map[string]map[string]map[uint64]struct{}{
|
||||
"category": {
|
||||
"tech": {1: struct{}{}, 2: struct{}{}},
|
||||
"news": {3: struct{}{}},
|
||||
},
|
||||
"status": {
|
||||
"new": {2: struct{}{}},
|
||||
},
|
||||
"author": {
|
||||
"john": {2: struct{}{}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given.
|
||||
idx0, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
for id, labels := range tt.GivenDetail.existingLabels {
|
||||
idx0.Add(context.Background(), id, labels)
|
||||
}
|
||||
|
||||
// When.
|
||||
idx0.Add(context.Background(), tt.WhenDetail.id, tt.WhenDetail.labels)
|
||||
|
||||
// Then.
|
||||
invIdx := idx0.(*idx)
|
||||
for label, values := range tt.ThenExpected.indexState {
|
||||
Expect(invIdx.m).To(HaveKey(label))
|
||||
for value, ids := range values {
|
||||
Expect(invIdx.m[label]).To(HaveKey(value))
|
||||
for id := range ids {
|
||||
Expect(invIdx.m[label][value]).To(HaveKey(id))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSearch(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
setupLabels map[uint64]model.Labels
|
||||
}
|
||||
type whenDetail struct {
|
||||
searchLabel string
|
||||
eq bool
|
||||
searchValue string
|
||||
}
|
||||
type thenExpected struct {
|
||||
want []uint64
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Search Existing Label-Value",
|
||||
Given: "An index with feeds",
|
||||
When: "Searching for existing label and value",
|
||||
Then: "Should return matching item IDs",
|
||||
GivenDetail: givenDetail{
|
||||
setupLabels: map[uint64]model.Labels{
|
||||
1: {model.Label{Key: "category", Value: "tech"}},
|
||||
2: {model.Label{Key: "category", Value: "tech"}},
|
||||
3: {model.Label{Key: "category", Value: "news"}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "category",
|
||||
searchValue: "tech",
|
||||
eq: true,
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: []uint64{1, 2},
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Search Non-Existing Label",
|
||||
Given: "An index with feeds",
|
||||
When: "Searching for non-existing label",
|
||||
Then: "Should return empty result",
|
||||
GivenDetail: givenDetail{
|
||||
setupLabels: map[uint64]model.Labels{
|
||||
1: {model.Label{Key: "category", Value: "tech"}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "invalid",
|
||||
searchValue: "value",
|
||||
eq: true,
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: nil,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Search Non-Existing Value",
|
||||
Given: "An index with feeds",
|
||||
When: "Searching for existing label but non-existing value",
|
||||
Then: "Should return empty result",
|
||||
GivenDetail: givenDetail{
|
||||
setupLabels: map[uint64]model.Labels{
|
||||
1: {model.Label{Key: "category", Value: "tech"}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "category",
|
||||
searchValue: "invalid",
|
||||
eq: true,
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: nil,
|
||||
},
|
||||
},
|
||||
// Not equal tests.
|
||||
{
|
||||
Scenario: "Search Not Matching Label-Value",
|
||||
Given: "An index with multiple feeds",
|
||||
When: "Searching for feeds not matching a label-value pair",
|
||||
Then: "Should return all feeds except those matching the pair",
|
||||
GivenDetail: givenDetail{
|
||||
setupLabels: map[uint64]model.Labels{
|
||||
1: {model.Label{Key: "category", Value: "tech"}, model.Label{Key: "status", Value: "new"}},
|
||||
2: {model.Label{Key: "category", Value: "news"}, model.Label{Key: "status", Value: "old"}},
|
||||
3: {model.Label{Key: "category", Value: "tech"}, model.Label{Key: "status", Value: "old"}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "category",
|
||||
searchValue: "tech",
|
||||
eq: false,
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: []uint64{2},
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Search Not Matching Non-Existing Label",
|
||||
Given: "An index with feeds",
|
||||
When: "Searching for feeds not matching a non-existing label",
|
||||
Then: "Should return all feeds",
|
||||
GivenDetail: givenDetail{
|
||||
setupLabels: map[uint64]model.Labels{
|
||||
1: {model.Label{Key: "category", Value: "tech"}},
|
||||
2: {model.Label{Key: "category", Value: "news"}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchLabel: "invalid",
|
||||
searchValue: "value",
|
||||
eq: false,
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
want: []uint64{1, 2},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given.
|
||||
idx, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
for id, labels := range tt.GivenDetail.setupLabels {
|
||||
idx.Add(context.Background(), id, labels)
|
||||
}
|
||||
|
||||
// When.
|
||||
result := idx.Search(context.Background(), tt.WhenDetail.searchLabel, tt.WhenDetail.eq, tt.WhenDetail.searchValue)
|
||||
|
||||
// Then.
|
||||
if tt.ThenExpected.want == nil {
|
||||
Expect(result).To(BeEmpty())
|
||||
} else {
|
||||
resultIDs := make([]uint64, 0, len(result))
|
||||
for id := range result {
|
||||
resultIDs = append(resultIDs, id)
|
||||
}
|
||||
Expect(resultIDs).To(ConsistOf(tt.ThenExpected.want))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeDecode(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
setupLabels map[uint64]model.Labels
|
||||
}
|
||||
type whenDetail struct{}
|
||||
type thenExpected struct {
|
||||
success bool
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Encode and Decode Empty Index",
|
||||
Given: "An empty index",
|
||||
When: "Encoding and decoding",
|
||||
Then: "Should restore empty index correctly",
|
||||
GivenDetail: givenDetail{
|
||||
setupLabels: map[uint64]model.Labels{},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
success: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Encode and Decode Index with Data",
|
||||
Given: "An index with feeds",
|
||||
When: "Encoding and decoding",
|
||||
Then: "Should restore all data correctly",
|
||||
GivenDetail: givenDetail{
|
||||
setupLabels: map[uint64]model.Labels{
|
||||
1: {model.Label{Key: "category", Value: "tech"}, model.Label{Key: "status", Value: "new"}},
|
||||
2: {model.Label{Key: "category", Value: "news"}, model.Label{Key: "author", Value: "john"}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
success: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given.
|
||||
original, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
for id, labels := range tt.GivenDetail.setupLabels {
|
||||
original.Add(context.Background(), id, labels)
|
||||
}
|
||||
|
||||
// When.
|
||||
var buf bytes.Buffer
|
||||
err = original.EncodeTo(context.Background(), &buf)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
decoded, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = decoded.DecodeFrom(context.Background(), &buf)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Then.
|
||||
origIdx := original.(*idx)
|
||||
decodedIdx := decoded.(*idx)
|
||||
Expect(decodedIdx.m).To(Equal(origIdx.m))
|
||||
})
|
||||
}
|
||||
}
|
||||
285
pkg/storage/feed/block/index/primary/primary.go
Normal file
285
pkg/storage/feed/block/index/primary/primary.go
Normal file
@@ -0,0 +1,285 @@
|
||||
package primary
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/storage/feed/block/index"
|
||||
telemetry "github.com/glidea/zenfeed/pkg/telemetry"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
)
|
||||
|
||||
// --- Interface code block ---
|
||||
type Index interface {
|
||||
component.Component
|
||||
index.Codec
|
||||
|
||||
// Search returns item location by ID.
|
||||
Search(ctx context.Context, id uint64) (ref FeedRef, ok bool)
|
||||
// Add adds item location to the index.
|
||||
Add(ctx context.Context, id uint64, item FeedRef)
|
||||
// IDs returns all item IDs.
|
||||
IDs(ctx context.Context) (ids map[uint64]bool)
|
||||
// Count returns the number of feeds in the index.
|
||||
Count(ctx context.Context) (count uint32)
|
||||
}
|
||||
|
||||
type Config struct{}
|
||||
|
||||
type Dependencies struct{}
|
||||
|
||||
var (
|
||||
headerMagicNumber = []byte{0x77, 0x79, 0x73, 0x20, 0x69, 0x73, 0x20,
|
||||
0x61, 0x77, 0x65, 0x73, 0x6f, 0x6d, 0x65, 0x00, 0x00}
|
||||
headerVersion = uint8(1)
|
||||
)
|
||||
|
||||
type FeedRef struct {
|
||||
Chunk uint32
|
||||
Offset uint64
|
||||
Time time.Time
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[Index, Config, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
if len(mockOn) > 0 {
|
||||
return component.FactoryFunc[Index, Config, Dependencies](
|
||||
func(instance string, config *Config, dependencies Dependencies) (Index, error) {
|
||||
m := &mockIndex{}
|
||||
component.MockOptions(mockOn).Apply(&m.Mock)
|
||||
|
||||
return m, nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return component.FactoryFunc[Index, Config, Dependencies](new)
|
||||
}
|
||||
|
||||
func new(instance string, config *Config, dependencies Dependencies) (Index, error) {
|
||||
return &idx{
|
||||
Base: component.New(&component.BaseConfig[Config, Dependencies]{
|
||||
Name: "FeedPrimaryIndex",
|
||||
Instance: instance,
|
||||
Config: config,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
m: make(map[uint64]FeedRef, 64),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
type idx struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
|
||||
m map[uint64]FeedRef
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
func (idx *idx) Search(ctx context.Context, id uint64) (ref FeedRef, ok bool) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "Search")...)
|
||||
defer func() { telemetry.End(ctx, nil) }()
|
||||
|
||||
idx.mu.RLock()
|
||||
defer idx.mu.RUnlock()
|
||||
ref, ok = idx.m[id]
|
||||
|
||||
return ref, ok
|
||||
}
|
||||
|
||||
func (idx *idx) Add(ctx context.Context, id uint64, item FeedRef) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "Add")...)
|
||||
defer func() { telemetry.End(ctx, nil) }()
|
||||
|
||||
idx.mu.Lock()
|
||||
defer idx.mu.Unlock()
|
||||
item.Time = item.Time.In(time.UTC)
|
||||
idx.m[id] = item
|
||||
}
|
||||
|
||||
func (idx *idx) IDs(ctx context.Context) (ids map[uint64]bool) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "IDs")...)
|
||||
defer func() { telemetry.End(ctx, nil) }()
|
||||
|
||||
idx.mu.RLock()
|
||||
defer idx.mu.RUnlock()
|
||||
result := make(map[uint64]bool, len(idx.m))
|
||||
for id := range idx.m {
|
||||
result[id] = true
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (idx *idx) Count(ctx context.Context) (count uint32) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "Count")...)
|
||||
defer func() { telemetry.End(ctx, nil) }()
|
||||
|
||||
idx.mu.RLock()
|
||||
defer idx.mu.RUnlock()
|
||||
|
||||
return uint32(len(idx.m))
|
||||
}
|
||||
|
||||
func (idx *idx) EncodeTo(ctx context.Context, w io.Writer) (err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "EncodeTo")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
idx.mu.RLock()
|
||||
defer idx.mu.RUnlock()
|
||||
|
||||
// Write header.
|
||||
if _, err := w.Write(headerMagicNumber); err != nil {
|
||||
return errors.Wrap(err, "write header magic number")
|
||||
}
|
||||
if _, err := w.Write([]byte{headerVersion}); err != nil {
|
||||
return errors.Wrap(err, "write header version")
|
||||
}
|
||||
|
||||
// Write map count.
|
||||
count := uint64(len(idx.m))
|
||||
if err := binary.Write(w, binary.LittleEndian, count); err != nil {
|
||||
return errors.Wrap(err, "write map count")
|
||||
}
|
||||
|
||||
// Write all key-value pairs.
|
||||
for id, ref := range idx.m {
|
||||
// Write Key.
|
||||
if err := binary.Write(w, binary.LittleEndian, id); err != nil {
|
||||
return errors.Wrap(err, "write id")
|
||||
}
|
||||
|
||||
// Write Value.
|
||||
if err := binary.Write(w, binary.LittleEndian, ref.Chunk); err != nil {
|
||||
return errors.Wrap(err, "write chunk")
|
||||
}
|
||||
if err := binary.Write(w, binary.LittleEndian, ref.Offset); err != nil {
|
||||
return errors.Wrap(err, "write offset")
|
||||
}
|
||||
if err := binary.Write(w, binary.LittleEndian, ref.Time.UnixNano()); err != nil {
|
||||
return errors.Wrap(err, "write time")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (idx *idx) DecodeFrom(ctx context.Context, r io.Reader) (err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(idx.TelemetryLabels(), telemetrymodel.KeyOperation, "DecodeFrom")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
idx.mu.Lock()
|
||||
defer idx.mu.Unlock()
|
||||
|
||||
// Read header.
|
||||
if err := idx.readHeader(r); err != nil {
|
||||
return errors.Wrap(err, "read header")
|
||||
}
|
||||
|
||||
// Read map count.
|
||||
var count uint64
|
||||
if err := binary.Read(r, binary.LittleEndian, &count); err != nil {
|
||||
return errors.Wrap(err, "read map count")
|
||||
}
|
||||
idx.m = make(map[uint64]FeedRef, count)
|
||||
|
||||
// Read all key-value pairs.
|
||||
for range count {
|
||||
id, ref, err := idx.readEntry(r)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "read entry")
|
||||
}
|
||||
idx.m[id] = ref
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// readHeader reads and validates the index file header.
|
||||
func (idx *idx) readHeader(r io.Reader) error {
|
||||
magicNumber := make([]byte, len(headerMagicNumber))
|
||||
if _, err := io.ReadFull(r, magicNumber); err != nil {
|
||||
return errors.Wrap(err, "read magic number")
|
||||
}
|
||||
if !bytes.Equal(magicNumber, headerMagicNumber) {
|
||||
return errors.New("invalid magic number")
|
||||
}
|
||||
|
||||
versionByte := make([]byte, 1)
|
||||
if _, err := io.ReadFull(r, versionByte); err != nil {
|
||||
return errors.Wrap(err, "read version")
|
||||
}
|
||||
if versionByte[0] != headerVersion {
|
||||
return errors.New("invalid version")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// readEntry reads a single key-value pair (feed ID and FeedRef) from the reader.
|
||||
func (idx *idx) readEntry(r io.Reader) (id uint64, ref FeedRef, err error) {
|
||||
// Read Key (ID).
|
||||
if err := binary.Read(r, binary.LittleEndian, &id); err != nil {
|
||||
return 0, FeedRef{}, errors.Wrap(err, "read id")
|
||||
}
|
||||
|
||||
// Read Value (FeedRef).
|
||||
if err := binary.Read(r, binary.LittleEndian, &ref.Chunk); err != nil {
|
||||
return 0, FeedRef{}, errors.Wrap(err, "read chunk")
|
||||
}
|
||||
if err := binary.Read(r, binary.LittleEndian, &ref.Offset); err != nil {
|
||||
return 0, FeedRef{}, errors.Wrap(err, "read offset")
|
||||
}
|
||||
var timestamp int64
|
||||
if err := binary.Read(r, binary.LittleEndian, ×tamp); err != nil {
|
||||
return 0, FeedRef{}, errors.Wrap(err, "read time")
|
||||
}
|
||||
ref.Time = time.Unix(0, timestamp).In(time.UTC)
|
||||
|
||||
return id, ref, nil
|
||||
}
|
||||
|
||||
type mockIndex struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (m *mockIndex) Search(ctx context.Context, id uint64) (ref FeedRef, ok bool) {
|
||||
args := m.Called(ctx, id)
|
||||
|
||||
return args.Get(0).(FeedRef), args.Bool(1)
|
||||
}
|
||||
|
||||
func (m *mockIndex) Add(ctx context.Context, id uint64, item FeedRef) {
|
||||
m.Called(ctx, id, item)
|
||||
}
|
||||
|
||||
func (m *mockIndex) IDs(ctx context.Context) (ids map[uint64]bool) {
|
||||
args := m.Called(ctx)
|
||||
|
||||
return args.Get(0).(map[uint64]bool)
|
||||
}
|
||||
|
||||
func (m *mockIndex) Count(ctx context.Context) (count uint32) {
|
||||
args := m.Called(ctx)
|
||||
|
||||
return args.Get(0).(uint32)
|
||||
}
|
||||
|
||||
func (m *mockIndex) EncodeTo(ctx context.Context, w io.Writer) (err error) {
|
||||
args := m.Called(ctx, w)
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
func (m *mockIndex) DecodeFrom(ctx context.Context, r io.Reader) (err error) {
|
||||
args := m.Called(ctx, r)
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
222
pkg/storage/feed/block/index/primary/primary_test.go
Normal file
222
pkg/storage/feed/block/index/primary/primary_test.go
Normal file
@@ -0,0 +1,222 @@
|
||||
package primary
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/test"
|
||||
)
|
||||
|
||||
func TestAdd(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
existingItems map[uint64]FeedRef
|
||||
}
|
||||
type whenDetail struct {
|
||||
id uint64
|
||||
item FeedRef
|
||||
}
|
||||
type thenExpected struct {
|
||||
items map[uint64]FeedRef
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Add Single Feed",
|
||||
Given: "An index with existing item",
|
||||
When: "Adding a single item",
|
||||
Then: "Should store the item correctly",
|
||||
GivenDetail: givenDetail{
|
||||
existingItems: map[uint64]FeedRef{
|
||||
0: {Chunk: 0, Offset: 0},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
id: 1,
|
||||
item: FeedRef{Chunk: 1, Offset: 100},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
items: map[uint64]FeedRef{
|
||||
0: {Chunk: 0, Offset: 0},
|
||||
1: {Chunk: 1, Offset: 100},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Update Existing Feed",
|
||||
Given: "An index with existing item",
|
||||
When: "Adding item with same ID",
|
||||
Then: "Should update the item reference",
|
||||
GivenDetail: givenDetail{
|
||||
existingItems: map[uint64]FeedRef{
|
||||
1: {Chunk: 1, Offset: 100},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
id: 1,
|
||||
item: FeedRef{Chunk: 2, Offset: 200},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
items: map[uint64]FeedRef{
|
||||
1: {Chunk: 2, Offset: 200},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given.
|
||||
idx0, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
for id, item := range tt.GivenDetail.existingItems {
|
||||
idx0.Add(context.Background(), id, item)
|
||||
}
|
||||
|
||||
// When.
|
||||
idx0.Add(context.Background(), tt.WhenDetail.id, tt.WhenDetail.item)
|
||||
|
||||
// Then.
|
||||
primIdx := idx0.(*idx)
|
||||
for id, expected := range tt.ThenExpected.items {
|
||||
Expect(primIdx.m).To(HaveKey(id))
|
||||
Expect(primIdx.m[id]).To(Equal(expected))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSearch(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
feeds map[uint64]FeedRef
|
||||
}
|
||||
type whenDetail struct {
|
||||
searchID uint64
|
||||
}
|
||||
type thenExpected struct {
|
||||
feedRef FeedRef
|
||||
found bool
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Search Existing Feed",
|
||||
Given: "An index with feeds",
|
||||
When: "Searching for existing ID",
|
||||
Then: "Should return correct FeedRef",
|
||||
GivenDetail: givenDetail{
|
||||
feeds: map[uint64]FeedRef{
|
||||
1: {Chunk: 1, Offset: 100},
|
||||
2: {Chunk: 2, Offset: 200},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchID: 1,
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
feedRef: FeedRef{Chunk: 1, Offset: 100},
|
||||
found: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Search Non-Existing Feed",
|
||||
Given: "An index with feeds",
|
||||
When: "Searching for non-existing ID",
|
||||
Then: "Should return empty FeedRef",
|
||||
GivenDetail: givenDetail{
|
||||
feeds: map[uint64]FeedRef{
|
||||
1: {Chunk: 1, Offset: 100},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
searchID: 2,
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
feedRef: FeedRef{},
|
||||
found: false,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given.
|
||||
idx, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
for id, item := range tt.GivenDetail.feeds {
|
||||
idx.Add(context.Background(), id, item)
|
||||
}
|
||||
|
||||
// When.
|
||||
result, ok := idx.Search(context.Background(), tt.WhenDetail.searchID)
|
||||
|
||||
// Then.
|
||||
Expect(result).To(Equal(tt.ThenExpected.feedRef))
|
||||
Expect(ok).To(Equal(tt.ThenExpected.found))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeDecode(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
feeds map[uint64]FeedRef
|
||||
}
|
||||
type whenDetail struct{}
|
||||
type thenExpected struct {
|
||||
success bool
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Encode and Decode Index with Data",
|
||||
Given: "An index with feeds",
|
||||
When: "Encoding and decoding",
|
||||
Then: "Should restore all data correctly",
|
||||
GivenDetail: givenDetail{
|
||||
feeds: map[uint64]FeedRef{
|
||||
1: {Chunk: 1, Offset: 100, Time: time.Now()},
|
||||
2: {Chunk: 2, Offset: 200, Time: time.Now()},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{
|
||||
success: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given.
|
||||
original, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
for id, item := range tt.GivenDetail.feeds {
|
||||
original.Add(context.Background(), id, item)
|
||||
}
|
||||
|
||||
// When.
|
||||
var buf bytes.Buffer
|
||||
err = original.EncodeTo(context.Background(), &buf)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
decoded, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = decoded.DecodeFrom(context.Background(), &buf)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Then.
|
||||
origIdx := original.(*idx)
|
||||
decodedIdx := decoded.(*idx)
|
||||
Expect(decodedIdx.m).To(Equal(origIdx.m))
|
||||
})
|
||||
}
|
||||
}
|
||||
1158
pkg/storage/feed/block/index/vector/vector.go
Normal file
1158
pkg/storage/feed/block/index/vector/vector.go
Normal file
File diff suppressed because it is too large
Load Diff
329
pkg/storage/feed/block/index/vector/vector_test.go
Normal file
329
pkg/storage/feed/block/index/vector/vector_test.go
Normal file
@@ -0,0 +1,329 @@
|
||||
package vector
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/test"
|
||||
)
|
||||
|
||||
func TestSearch(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
vectors map[uint64][][]float32
|
||||
}
|
||||
type whenDetail struct {
|
||||
q []float32
|
||||
threshold float32
|
||||
limit int
|
||||
}
|
||||
type thenExpected struct {
|
||||
idWithScores map[uint64]float32
|
||||
err string
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Search for similar vectors",
|
||||
Given: "An index with some vectors",
|
||||
When: "Searching for a vector with a threshold",
|
||||
Then: "Should return IDs of similar vectors with scores",
|
||||
GivenDetail: givenDetail{
|
||||
vectors: map[uint64][][]float32{
|
||||
1: {{1.0, 0.0, 0.0}},
|
||||
2: {{0.8, 1.0, 0.0}},
|
||||
3: {{0.8, 0.1, 0.1} /*0.9847*/, {0.7, 0.1, 0.9} /*0.6116*/},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
q: []float32{1.0, 0.0, 0.0},
|
||||
threshold: 0.9,
|
||||
limit: 5,
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
idWithScores: map[uint64]float32{
|
||||
1: 1.0,
|
||||
3: 0.9847,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Search for similar vectors with strict limit",
|
||||
Given: "An index with some vectors",
|
||||
When: "Searching for a vector with a strict limit",
|
||||
Then: "Should return IDs of similar vectors with scores",
|
||||
GivenDetail: givenDetail{
|
||||
vectors: map[uint64][][]float32{
|
||||
1: {{1.0, 0.0, 0.0}},
|
||||
2: {{0.8, 1.0, 0.0}},
|
||||
3: {{0.8, 0.1, 0.1} /*0.9847*/, {0.7, 0.1, 0.9} /*0.6116*/},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
q: []float32{1.0, 0.0, 0.0},
|
||||
threshold: 0.9,
|
||||
limit: 1,
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
idWithScores: map[uint64]float32{
|
||||
1: 1.0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Search with dimension mismatch",
|
||||
Given: "An index with some vectors",
|
||||
When: "Searching for a vector with different dimension",
|
||||
Then: "Should return an error",
|
||||
GivenDetail: givenDetail{
|
||||
vectors: map[uint64][][]float32{
|
||||
1: {{1.0, 0.0, 0.0}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
q: []float32{1.0, 0.0}, // Different dimension.
|
||||
threshold: 0.8,
|
||||
limit: 10,
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
err: "vector dimension mismatch",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given.
|
||||
idx, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
for id, vectors := range tt.GivenDetail.vectors {
|
||||
err := idx.Add(context.Background(), id, vectors)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
}
|
||||
|
||||
// When.
|
||||
idWithScores, err := idx.Search(context.Background(), tt.WhenDetail.q, tt.WhenDetail.threshold, tt.WhenDetail.limit)
|
||||
|
||||
// Then.
|
||||
if tt.ThenExpected.err != "" {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(idWithScores).To(HaveLen(len(tt.ThenExpected.idWithScores)))
|
||||
for id, score := range tt.ThenExpected.idWithScores {
|
||||
Expect(idWithScores).To(HaveKey(id))
|
||||
Expect(idWithScores[id]).To(BeNumerically("~", score, 0.01))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdd(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
existingVectors map[uint64][][]float32
|
||||
}
|
||||
type whenDetail struct {
|
||||
id uint64
|
||||
vectors [][]float32
|
||||
}
|
||||
type thenExpected struct {
|
||||
err string
|
||||
nodeExists bool
|
||||
layersContain bool
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Add a vector to an empty index",
|
||||
Given: "An empty vector index",
|
||||
When: "Adding a vector",
|
||||
Then: "Should add the vector and update layers",
|
||||
GivenDetail: givenDetail{
|
||||
existingVectors: map[uint64][][]float32{},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
id: 1,
|
||||
vectors: [][]float32{{1.0, 0.0, 0.0}},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
nodeExists: true,
|
||||
layersContain: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Add multiple vectors",
|
||||
Given: "An index with existing vectors",
|
||||
When: "Adding another vector",
|
||||
Then: "Should add the vector and update layers",
|
||||
GivenDetail: givenDetail{
|
||||
existingVectors: map[uint64][][]float32{
|
||||
1: {{1.0, 0.0, 0.0}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
id: 2,
|
||||
vectors: [][]float32{{0.0, 1.0, 0.0}},
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
nodeExists: true,
|
||||
layersContain: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Scenario: "Add a vector with dimension mismatch",
|
||||
Given: "An index with existing vectors",
|
||||
When: "Adding a vector with different dimension",
|
||||
Then: "Should return error",
|
||||
GivenDetail: givenDetail{
|
||||
existingVectors: map[uint64][][]float32{
|
||||
1: {{1.0, 0.0, 0.0}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{
|
||||
id: 2,
|
||||
vectors: [][]float32{{1.0, 0.0}}, // Different dimension
|
||||
},
|
||||
ThenExpected: thenExpected{
|
||||
err: "vector dimension mismatch",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given
|
||||
idx0, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
for id, vectors := range tt.GivenDetail.existingVectors {
|
||||
err := idx0.Add(context.Background(), id, vectors)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
}
|
||||
|
||||
// When
|
||||
err = idx0.Add(context.Background(), tt.WhenDetail.id, tt.WhenDetail.vectors)
|
||||
|
||||
// Then
|
||||
if tt.ThenExpected.err != "" {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
v := idx0.(*idx)
|
||||
v.mu.RLock()
|
||||
defer v.mu.RUnlock()
|
||||
|
||||
if tt.ThenExpected.nodeExists {
|
||||
Expect(v.m).To(HaveKey(tt.WhenDetail.id))
|
||||
node := v.m[tt.WhenDetail.id]
|
||||
Expect(node.vectors).To(Equal(tt.WhenDetail.vectors))
|
||||
}
|
||||
|
||||
if tt.ThenExpected.layersContain {
|
||||
nodeInLayers := false
|
||||
for _, id := range v.layers[0].nodes {
|
||||
if id == tt.WhenDetail.id {
|
||||
nodeInLayers = true
|
||||
break
|
||||
}
|
||||
}
|
||||
Expect(nodeInLayers).To(BeTrue(), "Node should be in layer 0")
|
||||
|
||||
if len(tt.GivenDetail.existingVectors) > 0 {
|
||||
node := v.m[tt.WhenDetail.id]
|
||||
hasFriends := false
|
||||
for _, friends := range node.friendsOnLayers {
|
||||
if len(friends) > 0 {
|
||||
hasFriends = true
|
||||
break
|
||||
}
|
||||
}
|
||||
Expect(hasFriends).To(BeTrue(), "Node should have friends")
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeDecode(t *testing.T) {
|
||||
RegisterTestingT(t)
|
||||
|
||||
type givenDetail struct {
|
||||
vectors map[uint64][][]float32
|
||||
}
|
||||
type whenDetail struct{}
|
||||
type thenExpected struct {
|
||||
err string
|
||||
}
|
||||
|
||||
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
{
|
||||
Scenario: "Encode and decode an index with data",
|
||||
Given: "An index with some vectors",
|
||||
When: "Encoding and decoding the index",
|
||||
Then: "Should restore the index correctly",
|
||||
GivenDetail: givenDetail{
|
||||
vectors: map[uint64][][]float32{
|
||||
1: {{1.0, 0.0, 0.0}},
|
||||
2: {{0.0, 1.0, 0.0}},
|
||||
},
|
||||
},
|
||||
WhenDetail: whenDetail{},
|
||||
ThenExpected: thenExpected{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// Given.
|
||||
original, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
for id, vectors := range tt.GivenDetail.vectors {
|
||||
err := original.Add(context.Background(), id, vectors)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
}
|
||||
|
||||
// When.
|
||||
var buf bytes.Buffer
|
||||
err = original.EncodeTo(context.Background(), &buf)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
decoded, err := NewFactory().New("test", &Config{}, Dependencies{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = decoded.DecodeFrom(context.Background(), &buf)
|
||||
|
||||
// Then.
|
||||
if tt.ThenExpected.err != "" {
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
|
||||
} else {
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
// Verify by searching.
|
||||
for _, vectors := range tt.GivenDetail.vectors {
|
||||
for _, vector := range vectors {
|
||||
originalResults, err := original.Search(context.Background(), vector, 0.99, 10)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
decodedResults, err := decoded.Search(context.Background(), vector, 0.99, 10)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
Expect(decodedResults).To(HaveLen(len(originalResults)))
|
||||
for id, score := range originalResults {
|
||||
Expect(decodedResults).To(HaveKey(id))
|
||||
Expect(decodedResults[id]).To(BeNumerically("~", score, 0.000001))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
643
pkg/storage/feed/feed.go
Normal file
643
pkg/storage/feed/feed.go
Normal file
@@ -0,0 +1,643 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package feed
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/benbjohnson/clock"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/glidea/zenfeed/pkg/component"
|
||||
"github.com/glidea/zenfeed/pkg/config"
|
||||
"github.com/glidea/zenfeed/pkg/llm"
|
||||
"github.com/glidea/zenfeed/pkg/model"
|
||||
"github.com/glidea/zenfeed/pkg/rewrite"
|
||||
"github.com/glidea/zenfeed/pkg/storage/feed/block"
|
||||
"github.com/glidea/zenfeed/pkg/storage/feed/block/chunk"
|
||||
"github.com/glidea/zenfeed/pkg/storage/feed/block/index/inverted"
|
||||
"github.com/glidea/zenfeed/pkg/storage/feed/block/index/primary"
|
||||
"github.com/glidea/zenfeed/pkg/storage/feed/block/index/vector"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry"
|
||||
"github.com/glidea/zenfeed/pkg/telemetry/log"
|
||||
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
|
||||
timeutil "github.com/glidea/zenfeed/pkg/util/time"
|
||||
)
|
||||
|
||||
var clk = clock.New()
|
||||
|
||||
// --- Interface code block ---
|
||||
type Storage interface {
|
||||
component.Component
|
||||
config.Watcher
|
||||
|
||||
// Append stores some feeds.
|
||||
Append(ctx context.Context, feeds ...*model.Feed) error
|
||||
|
||||
// Query retrieves feeds by query options.
|
||||
// Results are sorted by score (if vector query) and time.
|
||||
Query(ctx context.Context, query block.QueryOptions) ([]*block.FeedVO, error)
|
||||
|
||||
// Exists checks if a feed exists in the storage.
|
||||
// If hintTime is zero, it only checks the head block.
|
||||
Exists(ctx context.Context, id uint64, hintTime time.Time) (bool, error)
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Dir string
|
||||
Retention time.Duration
|
||||
BlockDuration time.Duration
|
||||
EmbeddingLLM string
|
||||
FlushInterval time.Duration
|
||||
}
|
||||
|
||||
const subDir = "feed"
|
||||
|
||||
func (c *Config) Validate() error {
|
||||
if c.Dir == "" {
|
||||
c.Dir = "./data/" + subDir
|
||||
}
|
||||
if c.Retention <= 0 {
|
||||
c.Retention = 8 * timeutil.Day
|
||||
}
|
||||
if c.Retention < timeutil.Day || c.Retention > 15*timeutil.Day {
|
||||
return errors.New("retention must be between 1 day and 15 days")
|
||||
}
|
||||
if c.BlockDuration <= 0 {
|
||||
c.BlockDuration = 25 * time.Hour
|
||||
}
|
||||
if c.Retention < c.BlockDuration {
|
||||
return errors.Errorf("retention must be greater than %s", c.BlockDuration)
|
||||
}
|
||||
if c.EmbeddingLLM == "" {
|
||||
return errors.New("embedding LLM is required")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) From(app *config.App) {
|
||||
*c = Config{
|
||||
Dir: app.Storage.Dir,
|
||||
Retention: app.Storage.Feed.Retention,
|
||||
BlockDuration: app.Storage.Feed.BlockDuration,
|
||||
FlushInterval: app.Storage.Feed.FlushInterval,
|
||||
EmbeddingLLM: app.Storage.Feed.EmbeddingLLM,
|
||||
}
|
||||
}
|
||||
|
||||
type Dependencies struct {
|
||||
BlockFactory block.Factory
|
||||
LLMFactory llm.Factory
|
||||
ChunkFactory chunk.Factory
|
||||
PrimaryFactory primary.Factory
|
||||
InvertedFactory inverted.Factory
|
||||
VectorFactory vector.Factory
|
||||
Rewriter rewrite.Rewriter
|
||||
}
|
||||
|
||||
// --- Factory code block ---
|
||||
type Factory component.Factory[Storage, config.App, Dependencies]
|
||||
|
||||
func NewFactory(mockOn ...component.MockOption) Factory {
|
||||
if len(mockOn) > 0 {
|
||||
return component.FactoryFunc[Storage, config.App, Dependencies](
|
||||
func(instance string, app *config.App, dependencies Dependencies) (Storage, error) {
|
||||
m := &mockStorage{}
|
||||
component.MockOptions(mockOn).Apply(&m.Mock)
|
||||
|
||||
return m, nil
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
return component.FactoryFunc[Storage, config.App, Dependencies](new)
|
||||
}
|
||||
|
||||
func new(instance string, app *config.App, dependencies Dependencies) (Storage, error) {
|
||||
config := &Config{}
|
||||
config.From(app)
|
||||
if err := config.Validate(); err != nil {
|
||||
return nil, errors.Wrap(err, "validate config")
|
||||
}
|
||||
|
||||
s := &storage{
|
||||
Base: component.New(&component.BaseConfig[Config, Dependencies]{
|
||||
Name: "FeedStorage",
|
||||
Instance: instance,
|
||||
Config: config,
|
||||
Dependencies: dependencies,
|
||||
}),
|
||||
blocks: &blockChain{blocks: make(map[string]block.Block)},
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(config.Dir, 0700); err != nil {
|
||||
return nil, errors.Wrap(err, "ensure data dir")
|
||||
}
|
||||
if err := loadBlocks(config.Dir, s); err != nil {
|
||||
return nil, errors.Wrap(err, "load blocks")
|
||||
}
|
||||
|
||||
// Ensure head block.
|
||||
if len(s.blocks.list(nil)) == 0 {
|
||||
if _, err := s.createBlock(clk.Now()); err != nil {
|
||||
return nil, errors.Wrap(err, "create head block")
|
||||
}
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func loadBlocks(path string, s *storage) error {
|
||||
// Scan path.
|
||||
ls, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "read dir")
|
||||
}
|
||||
|
||||
// Load blocks.
|
||||
for _, info := range ls {
|
||||
if !info.IsDir() {
|
||||
continue
|
||||
}
|
||||
if _, err := s.loadBlock(info.Name()); err != nil {
|
||||
return errors.Wrapf(err, "load block %s", info.Name())
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type blockChain struct {
|
||||
blocks map[string]block.Block
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
func (c *blockChain) isHead(b block.Block) bool {
|
||||
return timeutil.InRange(clk.Now(), b.Start(), b.End())
|
||||
}
|
||||
func (c *blockChain) head() block.Block {
|
||||
b, ok := c.get(clk.Now())
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
return b
|
||||
}
|
||||
func (c *blockChain) list(filter func(block block.Block) bool) []block.Block {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
blocks := make([]block.Block, 0, len(c.blocks))
|
||||
for _, b := range c.blocks {
|
||||
if filter != nil && !filter(b) {
|
||||
continue
|
||||
}
|
||||
blocks = append(blocks, b)
|
||||
}
|
||||
|
||||
return blocks
|
||||
}
|
||||
func (c *blockChain) endTime() time.Time {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
if len(c.blocks) == 0 {
|
||||
return time.Time{}
|
||||
}
|
||||
var maxEnd time.Time
|
||||
for _, b := range c.blocks {
|
||||
if !b.End().After(maxEnd) {
|
||||
continue
|
||||
}
|
||||
maxEnd = b.End()
|
||||
}
|
||||
|
||||
return maxEnd
|
||||
}
|
||||
func (c *blockChain) get(time time.Time) (block.Block, bool) {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
for _, b := range c.blocks {
|
||||
if timeutil.InRange(time, b.Start(), b.End()) {
|
||||
return b, true
|
||||
}
|
||||
}
|
||||
|
||||
return nil, false
|
||||
}
|
||||
func (c *blockChain) add(block block.Block) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.blocks[blockName(block.Start())] = block
|
||||
}
|
||||
func (c *blockChain) remove(before time.Time, callback func(block block.Block)) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
keys := make([]string, 0)
|
||||
for key, b := range c.blocks {
|
||||
if b.End().After(before) {
|
||||
continue
|
||||
}
|
||||
keys = append(keys, key)
|
||||
}
|
||||
|
||||
for _, key := range keys {
|
||||
b := c.blocks[key]
|
||||
delete(c.blocks, key)
|
||||
callback(b)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Implementation code block ---
|
||||
|
||||
type storage struct {
|
||||
*component.Base[Config, Dependencies]
|
||||
blocks *blockChain
|
||||
}
|
||||
|
||||
func (s *storage) Run() (err error) {
|
||||
ctx := telemetry.StartWith(s.Context(), append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Run")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
|
||||
// Run blocks.
|
||||
for _, b := range s.blocks.list(nil) {
|
||||
if err := component.RunUntilReady(ctx, b, 10*time.Second); err != nil {
|
||||
return errors.Wrap(err, "run block")
|
||||
}
|
||||
}
|
||||
|
||||
// Maintain blocks.
|
||||
s.MarkReady()
|
||||
|
||||
ticker := clk.Timer(0)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case now := <-ticker.C:
|
||||
if err := s.reconcileBlocks(ctx, now); err != nil {
|
||||
log.Error(ctx, errors.Wrap(err, "reconcile blocks"))
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debug(ctx, "reconcile blocks success")
|
||||
ticker.Reset(30 * time.Second)
|
||||
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *storage) Close() error {
|
||||
if err := s.Base.Close(); err != nil {
|
||||
return errors.Wrap(err, "close base")
|
||||
}
|
||||
for _, b := range s.blocks.list(nil) {
|
||||
if err := b.Close(); err != nil {
|
||||
return errors.Wrap(err, "close block")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *storage) Reload(app *config.App) error {
|
||||
// Validate new config.
|
||||
newConfig := &Config{}
|
||||
newConfig.From(app)
|
||||
if err := newConfig.Validate(); err != nil {
|
||||
return errors.Wrap(err, "validate config")
|
||||
}
|
||||
if reflect.DeepEqual(s.Config(), newConfig) {
|
||||
log.Debug(s.Context(), "no changes in feed storage config")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check immutable fields.
|
||||
curConfig := s.Config()
|
||||
if newConfig.Dir != curConfig.Dir {
|
||||
return errors.New("cannot reload the dir, MUST pass the same dir, or set it to empty for unchange")
|
||||
}
|
||||
|
||||
// Reload blocks.
|
||||
for _, b := range s.blocks.list(nil) {
|
||||
if err := b.Reload(&block.Config{
|
||||
FlushInterval: newConfig.FlushInterval,
|
||||
}); err != nil {
|
||||
return errors.Wrapf(err, "reload block %s", blockName(b.Start()))
|
||||
}
|
||||
}
|
||||
|
||||
// Set config.
|
||||
s.SetConfig(newConfig)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *storage) Append(ctx context.Context, feeds ...*model.Feed) (err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Append")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
for _, f := range feeds {
|
||||
if err := f.Validate(); err != nil {
|
||||
return errors.Wrap(err, "validate feed")
|
||||
}
|
||||
}
|
||||
|
||||
// Rewrite feeds.
|
||||
rewritten, err := s.rewrite(ctx, feeds)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "rewrite feeds")
|
||||
}
|
||||
if len(rewritten) == 0 {
|
||||
log.Debug(ctx, "no feeds to write after rewrites")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Append feeds to head block.
|
||||
log.Debug(ctx, "append feeds", "count", len(rewritten))
|
||||
if err := s.blocks.head().Append(ctx, rewritten...); err != nil {
|
||||
return errors.Wrap(err, "append feeds")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *storage) Query(ctx context.Context, query block.QueryOptions) (feeds []*block.FeedVO, err error) {
|
||||
ctx = telemetry.StartWith(ctx, append(s.TelemetryLabels(), telemetrymodel.KeyOperation, "Query")...)
|
||||
defer func() { telemetry.End(ctx, err) }()
|
||||
if err := (&query).Validate(); err != nil {
|
||||
return nil, errors.Wrap(err, "validate query")
|
||||
}
|
||||
|
||||
// Parallel read.
|
||||
blocks := s.blocks.list(nil)
|
||||
feedHeap := block.NewFeedVOHeap(make(block.FeedVOs, 0, query.Limit))
|
||||
var (
|
||||
mu sync.Mutex
|
||||
wg sync.WaitGroup
|
||||
errs []error
|
||||
)
|
||||
|
||||
for _, b := range blocks {
|
||||
if !query.HitTimeRangeCondition(b) {
|
||||
continue
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func(b block.Block) {
|
||||
defer wg.Done()
|
||||
fs, err := b.Query(ctx, query)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
errs = append(errs, err)
|
||||
mu.Unlock()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
for _, f := range fs {
|
||||
feedHeap.TryEvictPush(f)
|
||||
}
|
||||
mu.Unlock()
|
||||
}(b)
|
||||
}
|
||||
wg.Wait()
|
||||
if len(errs) > 0 {
|
||||
return nil, errs[0]
|
||||
}
|
||||
|
||||
feedHeap.DESCSort()
|
||||
|
||||
return feedHeap.Slice(), nil
|
||||
}
|
||||
|
||||
func (s *storage) Exists(ctx context.Context, id uint64, hintTime time.Time) (bool, error) {
|
||||
// Normal path.
|
||||
if !hintTime.IsZero() {
|
||||
b, ok := s.blocks.get(hintTime)
|
||||
if ok {
|
||||
return b.Exists(ctx, id)
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to head block.
|
||||
return s.blocks.head().Exists(ctx, id)
|
||||
}
|
||||
|
||||
const headBlockCreateBuffer = 30 * time.Minute
|
||||
|
||||
func (s *storage) reconcileBlocks(ctx context.Context, now time.Time) error {
|
||||
// Create new head block if needed.
|
||||
if err := s.ensureHeadBlock(ctx, now); err != nil {
|
||||
return errors.Wrap(err, "ensure head block")
|
||||
}
|
||||
|
||||
// Transform non-head hot blocks to cold.
|
||||
if err := s.ensureColdBlocks(ctx); err != nil {
|
||||
return errors.Wrap(err, "ensure cold blocks")
|
||||
}
|
||||
|
||||
// Remove expired blocks.
|
||||
s.ensureRemovedExpiredBlocks(ctx, now)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *storage) ensureHeadBlock(ctx context.Context, now time.Time) error {
|
||||
if maxEnd := s.blocks.endTime(); now.After(maxEnd.Add(-headBlockCreateBuffer)) {
|
||||
nextStart := maxEnd
|
||||
if now.After(maxEnd) {
|
||||
nextStart = now
|
||||
}
|
||||
b, err := s.createBlock(nextStart)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "create new hot block")
|
||||
}
|
||||
if err := component.RunUntilReady(ctx, b, 10*time.Second); err != nil {
|
||||
return errors.Wrap(err, "run new hot block")
|
||||
}
|
||||
s.blocks.add(b)
|
||||
log.Info(ctx, "block created", "name", blockName(b.Start()))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *storage) ensureColdBlocks(ctx context.Context) error {
|
||||
for _, b := range s.blocks.list(func(b block.Block) bool {
|
||||
return b.State() == block.StateHot &&
|
||||
!s.blocks.isHead(b) &&
|
||||
clk.Now().After(b.End().Add(s.Config().BlockDuration)) // For recent queries.
|
||||
}) {
|
||||
if err := b.TransformToCold(); err != nil {
|
||||
return errors.Wrap(err, "transform to cold")
|
||||
}
|
||||
log.Info(ctx, "block transformed to cold", "name", blockName(b.Start()))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *storage) ensureRemovedExpiredBlocks(ctx context.Context, now time.Time) {
|
||||
s.blocks.remove(now.Add(-s.Config().Retention), func(b block.Block) {
|
||||
var err error
|
||||
if err = b.Close(); err != nil {
|
||||
log.Error(ctx, errors.Wrap(err, "close block"))
|
||||
}
|
||||
if err = b.ClearOnDisk(); err != nil {
|
||||
log.Error(ctx, errors.Wrap(err, "clear on disk"))
|
||||
}
|
||||
if err == nil {
|
||||
log.Info(ctx, "block deleted", "name", blockName(b.Start()))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
var blockName = func(start time.Time) string {
|
||||
return strconv.FormatInt(start.Unix(), 10)
|
||||
}
|
||||
|
||||
func (s *storage) createBlock(start time.Time) (block.Block, error) {
|
||||
config := s.Config()
|
||||
blockName := blockName(start)
|
||||
dir := filepath.Join(config.Dir, blockName)
|
||||
|
||||
b, err := s.Dependencies().BlockFactory.New(
|
||||
blockName,
|
||||
&block.Config{
|
||||
Dir: dir,
|
||||
FlushInterval: config.FlushInterval,
|
||||
ForCreate: &block.ForCreateConfig{
|
||||
Start: start,
|
||||
Duration: config.BlockDuration,
|
||||
EmbeddingLLM: config.EmbeddingLLM,
|
||||
},
|
||||
},
|
||||
s.blockDependencies(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "create block")
|
||||
}
|
||||
|
||||
s.blocks.add(b)
|
||||
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (s *storage) loadBlock(name string) (block.Block, error) {
|
||||
dir := filepath.Join(s.Config().Dir, name)
|
||||
|
||||
b, err := s.Dependencies().BlockFactory.New(
|
||||
name,
|
||||
&block.Config{Dir: dir},
|
||||
s.blockDependencies(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "create block")
|
||||
}
|
||||
|
||||
s.blocks.add(b)
|
||||
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (s *storage) blockDependencies() block.Dependencies {
|
||||
deps := s.Dependencies()
|
||||
|
||||
return block.Dependencies{
|
||||
ChunkFactory: deps.ChunkFactory,
|
||||
PrimaryFactory: deps.PrimaryFactory,
|
||||
InvertedFactory: deps.InvertedFactory,
|
||||
VectorFactory: deps.VectorFactory,
|
||||
LLMFactory: deps.LLMFactory,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *storage) rewrite(ctx context.Context, feeds []*model.Feed) ([]*model.Feed, error) {
|
||||
rewritten := make([]*model.Feed, 0, len(feeds))
|
||||
var wg sync.WaitGroup
|
||||
var errs []error
|
||||
var mu sync.Mutex
|
||||
for _, item := range feeds { // TODO: Limit the concurrency & goroutine number.
|
||||
wg.Add(1)
|
||||
go func(item *model.Feed) {
|
||||
defer wg.Done()
|
||||
labels, err := s.Dependencies().Rewriter.Labels(ctx, item.Labels)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
errs = append(errs, errors.Wrap(err, "rewrite item"))
|
||||
mu.Unlock()
|
||||
|
||||
return
|
||||
}
|
||||
if len(labels) == 0 {
|
||||
log.Debug(ctx, "drop feed", "id", item.ID)
|
||||
|
||||
return // Drop empty labels.
|
||||
}
|
||||
|
||||
item.Labels = labels
|
||||
mu.Lock()
|
||||
rewritten = append(rewritten, item)
|
||||
mu.Unlock()
|
||||
}(item)
|
||||
}
|
||||
wg.Wait()
|
||||
if len(errs) > 0 {
|
||||
return nil, errs[0]
|
||||
}
|
||||
|
||||
return rewritten, nil
|
||||
}
|
||||
|
||||
type mockStorage struct {
|
||||
component.Mock
|
||||
}
|
||||
|
||||
func (m *mockStorage) Reload(app *config.App) error {
|
||||
args := m.Called(app)
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
func (m *mockStorage) Append(ctx context.Context, feeds ...*model.Feed) error {
|
||||
args := m.Called(ctx, feeds)
|
||||
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
func (m *mockStorage) Query(ctx context.Context, query block.QueryOptions) ([]*block.FeedVO, error) {
|
||||
args := m.Called(ctx, query)
|
||||
|
||||
return args.Get(0).([]*block.FeedVO), args.Error(1)
|
||||
}
|
||||
|
||||
func (m *mockStorage) Exists(ctx context.Context, id uint64, hintTime time.Time) (bool, error) {
|
||||
args := m.Called(ctx, id, hintTime)
|
||||
|
||||
return args.Get(0).(bool), args.Error(1)
|
||||
}
|
||||
446
pkg/storage/feed/feed_test.go
Normal file
446
pkg/storage/feed/feed_test.go
Normal file
@@ -0,0 +1,446 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
// TODO: fix tests
|
||||
package feed
|
||||
|
||||
// import (
|
||||
// "context"
|
||||
// "os"
|
||||
// "testing"
|
||||
// "time"
|
||||
//
|
||||
|
||||
// "github.com/benbjohnson/clock"
|
||||
// . "github.com/onsi/gomega"
|
||||
// "github.com/stretchr/testify/mock"
|
||||
|
||||
// "github.com/glidea/zenfeed/pkg/config"
|
||||
// "github.com/glidea/zenfeed/pkg/storage/feed/block"
|
||||
// "github.com/glidea/zenfeed/pkg/storage/feed/block/chunk"
|
||||
// "github.com/glidea/zenfeed/pkg/test"
|
||||
// timeutil "github.com/glidea/zenfeed/pkg/util/time"
|
||||
// )
|
||||
|
||||
// func TestNew(t *testing.T) {
|
||||
// RegisterTestingT(t)
|
||||
|
||||
// type givenDetail struct {
|
||||
// now time.Time
|
||||
// blocksOnDisk []string // Block directory names in format "2006-01-02T15:04:05Z-2006-01-02T15:04:05Z"
|
||||
// }
|
||||
// type whenDetail struct {
|
||||
// app *config.App
|
||||
// }
|
||||
// type thenExpected struct {
|
||||
// storage storage
|
||||
// storageHotLen int
|
||||
// storageColdLen int
|
||||
// blockCalls []func(obj *mock.Mock)
|
||||
// }
|
||||
// tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
// {
|
||||
// Scenario: "Create a new storage from an empty directory",
|
||||
// Given: "just mock a time",
|
||||
// When: "call New with a config with a data directory",
|
||||
// Then: "should return a new storage and a hot block created",
|
||||
// GivenDetail: givenDetail{
|
||||
// now: timeutil.MustParse("2025-03-03T10:00:00Z"),
|
||||
// },
|
||||
// WhenDetail: whenDetail{
|
||||
// app: &config.App{
|
||||
// DB: config.DB{
|
||||
// Dir: "/tmp/TestNew",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// ThenExpected: thenExpected{
|
||||
// storage: storage{
|
||||
// config: &Config{
|
||||
// Dir: "/tmp/TestNew",
|
||||
// },
|
||||
// },
|
||||
// storageHotLen: 1,
|
||||
// storageColdLen: 0,
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Scenario: "Create a storage from existing directory with blocks",
|
||||
// Given: "existing blocks on disk",
|
||||
// GivenDetail: givenDetail{
|
||||
// now: timeutil.MustParse("2025-03-03T10:00:00Z"),
|
||||
// blocksOnDisk: []string{
|
||||
// "2025-03-02T10:00:00Z ~ 2025-03-03T10:00:00Z", // Hot block
|
||||
// "2025-03-01T10:00:00Z ~ 2025-03-02T10:00:00Z", // Cold block
|
||||
// "2025-02-28T10:00:00Z ~ 2025-03-01T10:00:00Z", // Cold block
|
||||
// },
|
||||
// },
|
||||
// When: "call New with a config with existing data directory",
|
||||
// WhenDetail: whenDetail{
|
||||
// app: &config.App{
|
||||
// DB: config.DB{
|
||||
// Dir: "/tmp/TestNew",
|
||||
// WriteableWindow: 49 * time.Hour,
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// Then: "should return a storage with existing blocks loaded",
|
||||
// ThenExpected: thenExpected{
|
||||
// storage: storage{
|
||||
// config: &Config{
|
||||
// Dir: "/tmp/TestNew",
|
||||
// Block: BlockConfig{
|
||||
// WriteableWindow: 49 * time.Hour,
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// storageHotLen: 1,
|
||||
// storageColdLen: 2,
|
||||
// blockCalls: []func(obj *mock.Mock){
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("State").Return(block.StateHot).Once()
|
||||
// },
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("State").Return(block.StateCold).Once()
|
||||
// },
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("State").Return(block.StateCold).Once()
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
|
||||
// for _, tt := range tests {
|
||||
// t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// // Given.
|
||||
// c := clock.NewMock()
|
||||
// c.Set(tt.GivenDetail.now)
|
||||
// clk = c // Set global clock.
|
||||
// defer func() { clk = clock.New() }()
|
||||
|
||||
// // Create test directories if needed
|
||||
// if len(tt.GivenDetail.blocksOnDisk) > 0 {
|
||||
// for _, blockDir := range tt.GivenDetail.blocksOnDisk {
|
||||
// err := os.MkdirAll(tt.WhenDetail.app.DB.Dir+"/"+blockDir, 0755)
|
||||
// Expect(err).To(BeNil())
|
||||
// }
|
||||
// }
|
||||
|
||||
// // When.
|
||||
// var calls int
|
||||
// var blockCalls []*mock.Mock
|
||||
// blockFactory := block.NewFactory(func(obj *mock.Mock) {
|
||||
// if calls < len(tt.ThenExpected.blockCalls) {
|
||||
// tt.ThenExpected.blockCalls[calls](obj)
|
||||
// calls++
|
||||
// blockCalls = append(blockCalls, obj)
|
||||
// }
|
||||
// })
|
||||
// s, err := new(tt.WhenDetail.app, blockFactory)
|
||||
// defer os.RemoveAll(tt.WhenDetail.app.DB.Dir)
|
||||
|
||||
// // Then.
|
||||
// Expect(err).To(BeNil())
|
||||
// Expect(s).NotTo(BeNil())
|
||||
// storage := s.(*storage)
|
||||
// Expect(storage.config).To(Equal(tt.ThenExpected.storage.config))
|
||||
// Expect(len(storage.hot.blocks)).To(Equal(tt.ThenExpected.storageHotLen))
|
||||
// Expect(len(storage.cold.blocks)).To(Equal(tt.ThenExpected.storageColdLen))
|
||||
// for _, call := range blockCalls {
|
||||
// call.AssertExpectations(t)
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
// }
|
||||
|
||||
// func TestAppend(t *testing.T) {
|
||||
// RegisterTestingT(t)
|
||||
|
||||
// type givenDetail struct {
|
||||
// hotBlocks []func(m *mock.Mock)
|
||||
// coldBlocks []func(m *mock.Mock)
|
||||
// }
|
||||
// type whenDetail struct {
|
||||
// feeds []*chunk.Feed
|
||||
// }
|
||||
// type thenExpected struct {
|
||||
// err string
|
||||
// }
|
||||
|
||||
// tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
// {
|
||||
// Scenario: "Append feeds to hot block",
|
||||
// Given: "a storage with one hot block",
|
||||
// When: "append feeds within hot block time range",
|
||||
// Then: "should append feeds to hot block successfully",
|
||||
// GivenDetail: givenDetail{
|
||||
// hotBlocks: []func(m *mock.Mock){
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("Start").Return(timeutil.MustParse("2025-03-02T10:00:00Z")).Twice()
|
||||
// m.On("End").Return(timeutil.MustParse("2025-03-03T10:00:00Z")).Twice()
|
||||
// m.On("State").Return(block.StateHot).Twice()
|
||||
// m.On("Append", mock.Anything, []*chunk.Feed{
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-02T11:00:00Z")},
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-02T12:00:00Z")},
|
||||
// }).Return(nil)
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// WhenDetail: whenDetail{
|
||||
// feeds: []*chunk.Feed{
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-02T11:00:00Z")},
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-02T12:00:00Z")},
|
||||
// },
|
||||
// },
|
||||
// ThenExpected: thenExpected{
|
||||
// err: "",
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Scenario: "Append feeds to non-hot block",
|
||||
// Given: "a storage with hot and cold blocks",
|
||||
// When: "append feeds with time in cold block range",
|
||||
// Then: "should return error",
|
||||
// GivenDetail: givenDetail{
|
||||
// coldBlocks: []func(m *mock.Mock){
|
||||
// func(m *mock.Mock) {},
|
||||
// },
|
||||
// },
|
||||
// WhenDetail: whenDetail{
|
||||
// feeds: []*chunk.Feed{
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-01T11:00:00Z")},
|
||||
// },
|
||||
// },
|
||||
// ThenExpected: thenExpected{
|
||||
// err: "cannot find hot block",
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
|
||||
// for _, tt := range tests {
|
||||
// t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// // Given.
|
||||
// calls := 0
|
||||
// var blockMocks []*mock.Mock
|
||||
// blockFactory := block.NewFactory(func(obj *mock.Mock) {
|
||||
// if calls < len(tt.GivenDetail.hotBlocks) {
|
||||
// tt.GivenDetail.hotBlocks[calls](obj)
|
||||
// calls++
|
||||
// blockMocks = append(blockMocks, obj)
|
||||
// }
|
||||
// })
|
||||
// var hotBlocks blockChain
|
||||
// for range tt.GivenDetail.hotBlocks {
|
||||
// block, err := blockFactory.New(nil, nil, nil, nil, nil)
|
||||
// Expect(err).To(BeNil())
|
||||
// hotBlocks.add(block)
|
||||
// }
|
||||
// blockFactory = block.NewFactory(func(obj *mock.Mock) {
|
||||
// if calls < len(tt.GivenDetail.coldBlocks) {
|
||||
// tt.GivenDetail.coldBlocks[calls](obj)
|
||||
// calls++
|
||||
// blockMocks = append(blockMocks, obj)
|
||||
// }
|
||||
// })
|
||||
// var coldBlocks blockChain
|
||||
// for range tt.GivenDetail.coldBlocks {
|
||||
// block, err := blockFactory.New(nil, nil, nil, nil, nil)
|
||||
// Expect(err).To(BeNil())
|
||||
// coldBlocks.add(block)
|
||||
// }
|
||||
// s := storage{
|
||||
// hot: &hotBlocks,
|
||||
// cold: &coldBlocks,
|
||||
// }
|
||||
|
||||
// // When.
|
||||
// err := s.Append(context.Background(), tt.WhenDetail.feeds...)
|
||||
|
||||
// // Then.
|
||||
// if tt.ThenExpected.err != "" {
|
||||
// Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
|
||||
// } else {
|
||||
// Expect(err).To(BeNil())
|
||||
// }
|
||||
// for _, m := range blockMocks {
|
||||
// m.AssertExpectations(t)
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
// }
|
||||
|
||||
// func TestQuery(t *testing.T) {
|
||||
// RegisterTestingT(t)
|
||||
|
||||
// type givenDetail struct {
|
||||
// hotBlocks []func(m *mock.Mock)
|
||||
// coldBlocks []func(m *mock.Mock)
|
||||
// }
|
||||
// type whenDetail struct {
|
||||
// query block.QueryOptions
|
||||
// }
|
||||
// type thenExpected struct {
|
||||
// feeds []*block.FeedVO
|
||||
// err string
|
||||
// }
|
||||
|
||||
// tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
// {
|
||||
// Scenario: "Query feeds from hot blocks",
|
||||
// Given: "a storage with one hot block containing feeds",
|
||||
// When: "querying with time range within hot block",
|
||||
// Then: "should return matching feeds from hot block",
|
||||
// GivenDetail: givenDetail{
|
||||
// hotBlocks: []func(m *mock.Mock){
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("Start").Return(timeutil.MustParse("2025-03-02T10:00:00Z")).Once()
|
||||
// m.On("End").Return(timeutil.MustParse("2025-03-03T10:00:00Z")).Once()
|
||||
// m.On("Query", mock.Anything, mock.MatchedBy(func(q block.QueryOptions) bool {
|
||||
// return q.Start.Equal(timeutil.MustParse("2025-03-02T12:00:00Z")) &&
|
||||
// q.End.Equal(timeutil.MustParse("2025-03-02T14:00:00Z"))
|
||||
// })).Return([]*block.FeedVO{
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-02T12:30:00Z")},
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-02T13:00:00Z")},
|
||||
// }, nil)
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// WhenDetail: whenDetail{
|
||||
// query: block.QueryOptions{
|
||||
// Start: timeutil.MustParse("2025-03-02T12:00:00Z"),
|
||||
// End: timeutil.MustParse("2025-03-02T14:00:00Z"),
|
||||
// Limit: 10,
|
||||
// },
|
||||
// },
|
||||
// ThenExpected: thenExpected{
|
||||
// feeds: []*block.FeedVO{
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-02T13:00:00Z")},
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-02T12:30:00Z")},
|
||||
// },
|
||||
// err: "",
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Scenario: "Query feeds from multiple blocks",
|
||||
// Given: "a storage with hot and cold blocks containing feeds",
|
||||
// When: "querying with time range spanning multiple blocks",
|
||||
// Then: "should return combined and sorted feeds from all matching blocks",
|
||||
// GivenDetail: givenDetail{
|
||||
// hotBlocks: []func(m *mock.Mock){
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("Start").Return(timeutil.MustParse("2025-03-02T10:00:00Z"))
|
||||
// m.On("End").Return(timeutil.MustParse("2025-03-03T10:00:00Z"))
|
||||
// m.On("Query", mock.Anything, mock.MatchedBy(func(q block.QueryOptions) bool {
|
||||
// return !q.Start.IsZero() && q.End.IsZero()
|
||||
// })).Return([]*block.FeedVO{
|
||||
// {ID: 3, Time: timeutil.MustParse("2025-03-02T15:00:00Z")},
|
||||
// {ID: 4, Time: timeutil.MustParse("2025-03-02T16:00:00Z")},
|
||||
// }, nil)
|
||||
// },
|
||||
// },
|
||||
// coldBlocks: []func(m *mock.Mock){
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("Start").Return(timeutil.MustParse("2025-03-01T10:00:00Z"))
|
||||
// m.On("End").Return(timeutil.MustParse("2025-03-02T10:00:00Z"))
|
||||
// m.On("Query", mock.Anything, mock.MatchedBy(func(q block.QueryOptions) bool {
|
||||
// return !q.Start.IsZero() && q.End.IsZero()
|
||||
// })).Return([]*block.FeedVO{
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-01T15:00:00Z")},
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-01T16:00:00Z")},
|
||||
// }, nil)
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// WhenDetail: whenDetail{
|
||||
// query: block.QueryOptions{
|
||||
// Start: timeutil.MustParse("2025-03-01T12:00:00Z"),
|
||||
// Limit: 3,
|
||||
// },
|
||||
// },
|
||||
// ThenExpected: thenExpected{
|
||||
// feeds: []*block.FeedVO{
|
||||
// {ID: 4, Time: timeutil.MustParse("2025-03-02T16:00:00Z")},
|
||||
// {ID: 3, Time: timeutil.MustParse("2025-03-02T15:00:00Z")},
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-01T16:00:00Z")},
|
||||
// },
|
||||
// err: "",
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
|
||||
// for _, tt := range tests {
|
||||
// t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// // Given.
|
||||
// calls := 0
|
||||
// var blockMocks []*mock.Mock
|
||||
// blockFactory := block.NewFactory(func(obj *mock.Mock) {
|
||||
// if calls < len(tt.GivenDetail.hotBlocks) {
|
||||
// tt.GivenDetail.hotBlocks[calls](obj)
|
||||
// calls++
|
||||
// blockMocks = append(blockMocks, obj)
|
||||
// }
|
||||
// })
|
||||
// var hotBlocks blockChain
|
||||
// for range tt.GivenDetail.hotBlocks {
|
||||
// block, err := blockFactory.New(nil, nil, nil, nil, nil)
|
||||
// Expect(err).To(BeNil())
|
||||
// hotBlocks.add(block)
|
||||
// }
|
||||
|
||||
// blockFactory = block.NewFactory(func(obj *mock.Mock) {
|
||||
// if calls < len(tt.GivenDetail.hotBlocks)+len(tt.GivenDetail.coldBlocks) {
|
||||
// tt.GivenDetail.coldBlocks[calls-len(tt.GivenDetail.hotBlocks)](obj)
|
||||
// calls++
|
||||
// blockMocks = append(blockMocks, obj)
|
||||
// }
|
||||
// })
|
||||
// var coldBlocks blockChain
|
||||
// for range tt.GivenDetail.coldBlocks {
|
||||
// block, err := blockFactory.New(nil, nil, nil, nil, nil)
|
||||
// Expect(err).To(BeNil())
|
||||
// coldBlocks.add(block)
|
||||
// }
|
||||
|
||||
// s := storage{
|
||||
// hot: &hotBlocks,
|
||||
// cold: &coldBlocks,
|
||||
// }
|
||||
|
||||
// // When.
|
||||
// feeds, err := s.Query(context.Background(), tt.WhenDetail.query)
|
||||
|
||||
// // Then.
|
||||
// if tt.ThenExpected.err != "" {
|
||||
// Expect(err).NotTo(BeNil())
|
||||
// Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
|
||||
// } else {
|
||||
// Expect(err).To(BeNil())
|
||||
// Expect(feeds).To(HaveLen(len(tt.ThenExpected.feeds)))
|
||||
|
||||
// // Check feeds match expected
|
||||
// for i, feed := range feeds {
|
||||
// Expect(feed.ID).To(Equal(tt.ThenExpected.feeds[i].ID))
|
||||
// Expect(feed.Time).To(Equal(tt.ThenExpected.feeds[i].Time))
|
||||
// Expect(feed.Labels).To(Equal(tt.ThenExpected.feeds[i].Labels))
|
||||
// }
|
||||
// }
|
||||
|
||||
// for _, m := range blockMocks {
|
||||
// m.AssertExpectations(t)
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
// }
|
||||
520
pkg/storage/feed/feed_test_outdate.go
Normal file
520
pkg/storage/feed/feed_test_outdate.go
Normal file
@@ -0,0 +1,520 @@
|
||||
// Copyright (C) 2025 wangyusong
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package feed
|
||||
|
||||
// import (
|
||||
// "context"
|
||||
// "os"
|
||||
// "testing"
|
||||
// "time"
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
//
|
||||
|
||||
// "github.com/benbjohnson/clock"
|
||||
// . "github.com/onsi/gomega"
|
||||
// "github.com/stretchr/testify/mock"
|
||||
|
||||
// "github.com/glidea/zenfeed/pkg/config"
|
||||
// "github.com/glidea/zenfeed/pkg/storage/feed/block"
|
||||
// "github.com/glidea/zenfeed/pkg/storage/feed/block/chunk"
|
||||
// "github.com/glidea/zenfeed/pkg/test"
|
||||
// timeutil "github.com/glidea/zenfeed/pkg/util/time"
|
||||
// )
|
||||
|
||||
// func TestNew(t *testing.T) {
|
||||
// RegisterTestingT(t)
|
||||
|
||||
// type givenDetail struct {
|
||||
// now time.Time
|
||||
// blocksOnDisk []string // Block directory names in format "2006-01-02T15:04:05Z-2006-01-02T15:04:05Z"
|
||||
// }
|
||||
// type whenDetail struct {
|
||||
// app *config.App
|
||||
// }
|
||||
// type thenExpected struct {
|
||||
// storage storage
|
||||
// storageHotLen int
|
||||
// storageColdLen int
|
||||
// blockCalls []func(obj *mock.Mock)
|
||||
// }
|
||||
// tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
// {
|
||||
// Scenario: "Create a new storage from an empty directory",
|
||||
// Given: "just mock a time",
|
||||
// When: "call New with a config with a data directory",
|
||||
// Then: "should return a new storage and a hot block created",
|
||||
// GivenDetail: givenDetail{
|
||||
// now: timeutil.MustParse("2025-03-03T10:00:00Z"),
|
||||
// },
|
||||
// WhenDetail: whenDetail{
|
||||
// app: &config.App{
|
||||
// DB: config.DB{
|
||||
// Dir: "/tmp/TestNew",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// ThenExpected: thenExpected{
|
||||
// storage: storage{
|
||||
// config: &Config{
|
||||
// Dir: "/tmp/TestNew",
|
||||
// },
|
||||
// },
|
||||
// storageHotLen: 1,
|
||||
// storageColdLen: 0,
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Scenario: "Create a storage from existing directory with blocks",
|
||||
// Given: "existing blocks on disk",
|
||||
// GivenDetail: givenDetail{
|
||||
// now: timeutil.MustParse("2025-03-03T10:00:00Z"),
|
||||
// blocksOnDisk: []string{
|
||||
// "2025-03-02T10:00:00Z ~ 2025-03-03T10:00:00Z", // Hot block
|
||||
// "2025-03-01T10:00:00Z ~ 2025-03-02T10:00:00Z", // Cold block
|
||||
// "2025-02-28T10:00:00Z ~ 2025-03-01T10:00:00Z", // Cold block
|
||||
// },
|
||||
// },
|
||||
// When: "call New with a config with existing data directory",
|
||||
// WhenDetail: whenDetail{
|
||||
// app: &config.App{
|
||||
// DB: config.DB{
|
||||
// Dir: "/tmp/TestNew",
|
||||
// WriteableWindow: 49 * time.Hour,
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// Then: "should return a storage with existing blocks loaded",
|
||||
// ThenExpected: thenExpected{
|
||||
// storage: storage{
|
||||
// config: &Config{
|
||||
// Dir: "/tmp/TestNew",
|
||||
// Block: BlockConfig{
|
||||
// WriteableWindow: 49 * time.Hour,
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// storageHotLen: 1,
|
||||
// storageColdLen: 2,
|
||||
// blockCalls: []func(obj *mock.Mock){
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("State").Return(block.StateHot).Once()
|
||||
// },
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("State").Return(block.StateCold).Once()
|
||||
// },
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("State").Return(block.StateCold).Once()
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
|
||||
// for _, tt := range tests {
|
||||
// t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// // Given.
|
||||
// c := clock.NewMock()
|
||||
// c.Set(tt.GivenDetail.now)
|
||||
// clk = c // Set global clock.
|
||||
// defer func() { clk = clock.New() }()
|
||||
|
||||
// // Create test directories if needed
|
||||
// if len(tt.GivenDetail.blocksOnDisk) > 0 {
|
||||
// for _, blockDir := range tt.GivenDetail.blocksOnDisk {
|
||||
// err := os.MkdirAll(tt.WhenDetail.app.DB.Dir+"/"+blockDir, 0755)
|
||||
// Expect(err).To(BeNil())
|
||||
// }
|
||||
// }
|
||||
|
||||
// // When.
|
||||
// var calls int
|
||||
// var blockCalls []*mock.Mock
|
||||
// blockFactory := block.NewFactory(func(obj *mock.Mock) {
|
||||
// if calls < len(tt.ThenExpected.blockCalls) {
|
||||
// tt.ThenExpected.blockCalls[calls](obj)
|
||||
// calls++
|
||||
// blockCalls = append(blockCalls, obj)
|
||||
// }
|
||||
// })
|
||||
// s, err := new(tt.WhenDetail.app, blockFactory)
|
||||
// defer os.RemoveAll(tt.WhenDetail.app.DB.Dir)
|
||||
|
||||
// // Then.
|
||||
// Expect(err).To(BeNil())
|
||||
// Expect(s).NotTo(BeNil())
|
||||
// storage := s.(*storage)
|
||||
// Expect(storage.config).To(Equal(tt.ThenExpected.storage.config))
|
||||
// Expect(len(storage.hot.blocks)).To(Equal(tt.ThenExpected.storageHotLen))
|
||||
// Expect(len(storage.cold.blocks)).To(Equal(tt.ThenExpected.storageColdLen))
|
||||
// for _, call := range blockCalls {
|
||||
// call.AssertExpectations(t)
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
// }
|
||||
|
||||
// func TestAppend(t *testing.T) {
|
||||
// RegisterTestingT(t)
|
||||
|
||||
// type givenDetail struct {
|
||||
// hotBlocks []func(m *mock.Mock)
|
||||
// coldBlocks []func(m *mock.Mock)
|
||||
// }
|
||||
// type whenDetail struct {
|
||||
// feeds []*chunk.Feed
|
||||
// }
|
||||
// type thenExpected struct {
|
||||
// err string
|
||||
// }
|
||||
|
||||
// tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
// {
|
||||
// Scenario: "Append feeds to hot block",
|
||||
// Given: "a storage with one hot block",
|
||||
// When: "append feeds within hot block time range",
|
||||
// Then: "should append feeds to hot block successfully",
|
||||
// GivenDetail: givenDetail{
|
||||
// hotBlocks: []func(m *mock.Mock){
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("Start").Return(timeutil.MustParse("2025-03-02T10:00:00Z")).Twice()
|
||||
// m.On("End").Return(timeutil.MustParse("2025-03-03T10:00:00Z")).Twice()
|
||||
// m.On("State").Return(block.StateHot).Twice()
|
||||
// m.On("Append", mock.Anything, []*chunk.Feed{
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-02T11:00:00Z")},
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-02T12:00:00Z")},
|
||||
// }).Return(nil)
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// WhenDetail: whenDetail{
|
||||
// feeds: []*chunk.Feed{
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-02T11:00:00Z")},
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-02T12:00:00Z")},
|
||||
// },
|
||||
// },
|
||||
// ThenExpected: thenExpected{
|
||||
// err: "",
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Scenario: "Append feeds to non-hot block",
|
||||
// Given: "a storage with hot and cold blocks",
|
||||
// When: "append feeds with time in cold block range",
|
||||
// Then: "should return error",
|
||||
// GivenDetail: givenDetail{
|
||||
// coldBlocks: []func(m *mock.Mock){
|
||||
// func(m *mock.Mock) {},
|
||||
// },
|
||||
// },
|
||||
// WhenDetail: whenDetail{
|
||||
// feeds: []*chunk.Feed{
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-01T11:00:00Z")},
|
||||
// },
|
||||
// },
|
||||
// ThenExpected: thenExpected{
|
||||
// err: "cannot find hot block",
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
|
||||
// for _, tt := range tests {
|
||||
// t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// // Given.
|
||||
// calls := 0
|
||||
// var blockMocks []*mock.Mock
|
||||
// blockFactory := block.NewFactory(func(obj *mock.Mock) {
|
||||
// if calls < len(tt.GivenDetail.hotBlocks) {
|
||||
// tt.GivenDetail.hotBlocks[calls](obj)
|
||||
// calls++
|
||||
// blockMocks = append(blockMocks, obj)
|
||||
// }
|
||||
// })
|
||||
// var hotBlocks blockChain
|
||||
// for range tt.GivenDetail.hotBlocks {
|
||||
// block, err := blockFactory.New(nil, nil, nil, nil, nil)
|
||||
// Expect(err).To(BeNil())
|
||||
// hotBlocks.add(block)
|
||||
// }
|
||||
// blockFactory = block.NewFactory(func(obj *mock.Mock) {
|
||||
// if calls < len(tt.GivenDetail.coldBlocks) {
|
||||
// tt.GivenDetail.coldBlocks[calls](obj)
|
||||
// calls++
|
||||
// blockMocks = append(blockMocks, obj)
|
||||
// }
|
||||
// })
|
||||
// var coldBlocks blockChain
|
||||
// for range tt.GivenDetail.coldBlocks {
|
||||
// block, err := blockFactory.New(nil, nil, nil, nil, nil)
|
||||
// Expect(err).To(BeNil())
|
||||
// coldBlocks.add(block)
|
||||
// }
|
||||
// s := storage{
|
||||
// hot: &hotBlocks,
|
||||
// cold: &coldBlocks,
|
||||
// }
|
||||
|
||||
// // When.
|
||||
// err := s.Append(context.Background(), tt.WhenDetail.feeds...)
|
||||
|
||||
// // Then.
|
||||
// if tt.ThenExpected.err != "" {
|
||||
// Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
|
||||
// } else {
|
||||
// Expect(err).To(BeNil())
|
||||
// }
|
||||
// for _, m := range blockMocks {
|
||||
// m.AssertExpectations(t)
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
// }
|
||||
|
||||
// func TestQuery(t *testing.T) {
|
||||
// RegisterTestingT(t)
|
||||
|
||||
// type givenDetail struct {
|
||||
// hotBlocks []func(m *mock.Mock)
|
||||
// coldBlocks []func(m *mock.Mock)
|
||||
// }
|
||||
// type whenDetail struct {
|
||||
// query block.QueryOptions
|
||||
// }
|
||||
// type thenExpected struct {
|
||||
// feeds []*block.FeedVO
|
||||
// err string
|
||||
// }
|
||||
|
||||
// tests := []test.Case[givenDetail, whenDetail, thenExpected]{
|
||||
// {
|
||||
// Scenario: "Query feeds from hot blocks",
|
||||
// Given: "a storage with one hot block containing feeds",
|
||||
// When: "querying with time range within hot block",
|
||||
// Then: "should return matching feeds from hot block",
|
||||
// GivenDetail: givenDetail{
|
||||
// hotBlocks: []func(m *mock.Mock){
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("Start").Return(timeutil.MustParse("2025-03-02T10:00:00Z")).Once()
|
||||
// m.On("End").Return(timeutil.MustParse("2025-03-03T10:00:00Z")).Once()
|
||||
// m.On("Query", mock.Anything, mock.MatchedBy(func(q block.QueryOptions) bool {
|
||||
// return q.Start.Equal(timeutil.MustParse("2025-03-02T12:00:00Z")) &&
|
||||
// q.End.Equal(timeutil.MustParse("2025-03-02T14:00:00Z"))
|
||||
// })).Return([]*block.FeedVO{
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-02T12:30:00Z")},
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-02T13:00:00Z")},
|
||||
// }, nil)
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// WhenDetail: whenDetail{
|
||||
// query: block.QueryOptions{
|
||||
// Start: timeutil.MustParse("2025-03-02T12:00:00Z"),
|
||||
// End: timeutil.MustParse("2025-03-02T14:00:00Z"),
|
||||
// Limit: 10,
|
||||
// },
|
||||
// },
|
||||
// ThenExpected: thenExpected{
|
||||
// feeds: []*block.FeedVO{
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-02T13:00:00Z")},
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-02T12:30:00Z")},
|
||||
// },
|
||||
// err: "",
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// Scenario: "Query feeds from multiple blocks",
|
||||
// Given: "a storage with hot and cold blocks containing feeds",
|
||||
// When: "querying with time range spanning multiple blocks",
|
||||
// Then: "should return combined and sorted feeds from all matching blocks",
|
||||
// GivenDetail: givenDetail{
|
||||
// hotBlocks: []func(m *mock.Mock){
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("Start").Return(timeutil.MustParse("2025-03-02T10:00:00Z"))
|
||||
// m.On("End").Return(timeutil.MustParse("2025-03-03T10:00:00Z"))
|
||||
// m.On("Query", mock.Anything, mock.MatchedBy(func(q block.QueryOptions) bool {
|
||||
// return !q.Start.IsZero() && q.End.IsZero()
|
||||
// })).Return([]*block.FeedVO{
|
||||
// {ID: 3, Time: timeutil.MustParse("2025-03-02T15:00:00Z")},
|
||||
// {ID: 4, Time: timeutil.MustParse("2025-03-02T16:00:00Z")},
|
||||
// }, nil)
|
||||
// },
|
||||
// },
|
||||
// coldBlocks: []func(m *mock.Mock){
|
||||
// func(m *mock.Mock) {
|
||||
// m.On("Start").Return(timeutil.MustParse("2025-03-01T10:00:00Z"))
|
||||
// m.On("End").Return(timeutil.MustParse("2025-03-02T10:00:00Z"))
|
||||
// m.On("Query", mock.Anything, mock.MatchedBy(func(q block.QueryOptions) bool {
|
||||
// return !q.Start.IsZero() && q.End.IsZero()
|
||||
// })).Return([]*block.FeedVO{
|
||||
// {ID: 1, Time: timeutil.MustParse("2025-03-01T15:00:00Z")},
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-01T16:00:00Z")},
|
||||
// }, nil)
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// WhenDetail: whenDetail{
|
||||
// query: block.QueryOptions{
|
||||
// Start: timeutil.MustParse("2025-03-01T12:00:00Z"),
|
||||
// Limit: 3,
|
||||
// },
|
||||
// },
|
||||
// ThenExpected: thenExpected{
|
||||
// feeds: []*block.FeedVO{
|
||||
// {ID: 4, Time: timeutil.MustParse("2025-03-02T16:00:00Z")},
|
||||
// {ID: 3, Time: timeutil.MustParse("2025-03-02T15:00:00Z")},
|
||||
// {ID: 2, Time: timeutil.MustParse("2025-03-01T16:00:00Z")},
|
||||
// },
|
||||
// err: "",
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
|
||||
// for _, tt := range tests {
|
||||
// t.Run(tt.Scenario, func(t *testing.T) {
|
||||
// // Given.
|
||||
// calls := 0
|
||||
// var blockMocks []*mock.Mock
|
||||
// blockFactory := block.NewFactory(func(obj *mock.Mock) {
|
||||
// if calls < len(tt.GivenDetail.hotBlocks) {
|
||||
// tt.GivenDetail.hotBlocks[calls](obj)
|
||||
// calls++
|
||||
// blockMocks = append(blockMocks, obj)
|
||||
// }
|
||||
// })
|
||||
// var hotBlocks blockChain
|
||||
// for range tt.GivenDetail.hotBlocks {
|
||||
// block, err := blockFactory.New(nil, nil, nil, nil, nil)
|
||||
// Expect(err).To(BeNil())
|
||||
// hotBlocks.add(block)
|
||||
// }
|
||||
|
||||
// blockFactory = block.NewFactory(func(obj *mock.Mock) {
|
||||
// if calls < len(tt.GivenDetail.hotBlocks)+len(tt.GivenDetail.coldBlocks) {
|
||||
// tt.GivenDetail.coldBlocks[calls-len(tt.GivenDetail.hotBlocks)](obj)
|
||||
// calls++
|
||||
// blockMocks = append(blockMocks, obj)
|
||||
// }
|
||||
// })
|
||||
// var coldBlocks blockChain
|
||||
// for range tt.GivenDetail.coldBlocks {
|
||||
// block, err := blockFactory.New(nil, nil, nil, nil, nil)
|
||||
// Expect(err).To(BeNil())
|
||||
// coldBlocks.add(block)
|
||||
// }
|
||||
|
||||
// s := storage{
|
||||
// hot: &hotBlocks,
|
||||
// cold: &coldBlocks,
|
||||
// }
|
||||
|
||||
// // When.
|
||||
// feeds, err := s.Query(context.Background(), tt.WhenDetail.query)
|
||||
|
||||
// // Then.
|
||||
// if tt.ThenExpected.err != "" {
|
||||
// Expect(err).NotTo(BeNil())
|
||||
// Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
|
||||
// } else {
|
||||
// Expect(err).To(BeNil())
|
||||
// Expect(feeds).To(HaveLen(len(tt.ThenExpected.feeds)))
|
||||
|
||||
// // Check feeds match expected
|
||||
// for i, feed := range feeds {
|
||||
// Expect(feed.ID).To(Equal(tt.ThenExpected.feeds[i].ID))
|
||||
// Expect(feed.Time).To(Equal(tt.ThenExpected.feeds[i].Time))
|
||||
// Expect(feed.Labels).To(Equal(tt.ThenExpected.feeds[i].Labels))
|
||||
// }
|
||||
// }
|
||||
|
||||
// for _, m := range blockMocks {
|
||||
// m.AssertExpectations(t)
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
// }
|
||||
Reference in New Issue
Block a user