This commit is contained in:
glidea
2025-04-19 15:50:26 +08:00
commit 8b33df8a05
109 changed files with 24407 additions and 0 deletions

View File

@@ -0,0 +1,741 @@
// Copyright (C) 2025 wangyusong
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package chunk
import (
"bytes"
"context"
"encoding/binary"
"io"
"os"
"sync"
"sync/atomic"
"time"
"github.com/edsrzf/mmap-go"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/glidea/zenfeed/pkg/component"
"github.com/glidea/zenfeed/pkg/model"
"github.com/glidea/zenfeed/pkg/telemetry"
telemetrymodel "github.com/glidea/zenfeed/pkg/telemetry/model"
"github.com/glidea/zenfeed/pkg/util/buffer"
timeutil "github.com/glidea/zenfeed/pkg/util/time"
)
// --- Interface code block ---
// File is the interface for a chunk file.
// Concurrent safe.
type File interface {
component.Component
// EnsureReadonly ensures the file is readonly (can not Append).
// It should be fast when the file already is readonly.
// It will ensure the writeonly related resources are closed,
// and open the readonly related resources, such as mmap to save memory.
EnsureReadonly(ctx context.Context) (err error)
Count(ctx context.Context) (count uint32)
// Append appends feeds to the file.
// onSuccess is called when the feed is appended successfully (synchronously).
// The offset is the offset of the feed in the file.
// !!! It doesn't buffer the data between requests, so the caller should buffer the feeds to avoid high I/O.
Append(ctx context.Context, feeds []*Feed, onSuccess func(feed *Feed, offset uint64) error) (err error)
// Read reads a feed from the file.
Read(ctx context.Context, offset uint64) (feed *Feed, err error)
// Range ranges over all feeds in the file.
Range(ctx context.Context, iter func(feed *Feed, offset uint64) (err error)) (err error)
}
// Config for a chunk file.
type Config struct {
// Path is the path to the chunk file.
// If the file does not exist, it will be created.
// If the file exists, it will be reloaded.
Path string
// ReadonlyAtFirst indicates whether the file should be readonly at first.
// If file of path does not exist, it cannot be true.
ReadonlyAtFirst bool
}
func (c *Config) Validate() (fileExists bool, err error) {
if c.Path == "" {
return false, errors.New("path is required")
}
fi, err := os.Stat(c.Path)
switch {
case err == nil:
if fi.IsDir() {
return false, errors.New("path is a directory")
}
return true, nil
case os.IsNotExist(err):
if c.ReadonlyAtFirst {
return false, errors.New("path does not exist")
}
return false, nil
default:
return false, errors.Wrap(err, "stat path")
}
}
type Dependencies struct{}
// File struct.
var (
headerBytes = 64
headerMagicNumber = []byte{0x77, 0x79, 0x73, 0x20, 0x69, 0x73, 0x20,
0x61, 0x77, 0x65, 0x73, 0x6f, 0x6d, 0x65, 0x00, 0x00}
headerMagicNumberBytes = 16
headerVersionStart = headerMagicNumberBytes
headerVersion = uint32(1)
headerVersionBytes = 4
dataStart = headerBytes
header = func() []byte {
b := make([]byte, headerBytes)
copy(b[:headerMagicNumberBytes], headerMagicNumber)
binary.LittleEndian.PutUint32(b[headerVersionStart:headerVersionStart+headerVersionBytes], headerVersion)
return b
}()
)
// Metrics.
var (
modes = []string{"readwrite", "readonly"}
feedCount = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: model.AppName,
Subsystem: "chunk",
Name: "feed_count",
Help: "Number of feeds in the chunk file.",
},
[]string{telemetrymodel.KeyComponent, telemetrymodel.KeyComponentInstance, "mode"},
)
byteSize = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: model.AppName,
Subsystem: "chunk",
Name: "bytes",
Help: "Size of the chunk file.",
},
[]string{telemetrymodel.KeyComponent, telemetrymodel.KeyComponentInstance, "mode"},
)
)
// --- Factory code block ---
type Factory component.Factory[File, Config, Dependencies]
func NewFactory(mockOn ...component.MockOption) Factory {
if len(mockOn) > 0 {
return component.FactoryFunc[File, Config, Dependencies](
func(instance string, config *Config, dependencies Dependencies) (File, error) {
m := &mockFile{}
component.MockOptions(mockOn).Apply(&m.Mock)
return m, nil
},
)
}
return component.FactoryFunc[File, Config, Dependencies](new)
}
// new creates a new chunk file.
// It will create a new chunk file if the file that path points to does not exist.
// It will open the file if the file exists, and reload it.
// If readonlyAtFirst is true, it will open the file readonly.
func new(instance string, config *Config, dependencies Dependencies) (File, error) {
fileExists, err := config.Validate()
if err != nil {
return nil, errors.Wrap(err, "validate config")
}
osFile, readWriteBuf, appendOffset, readonlyMmap, count, err := init0(fileExists, config)
if err != nil {
return nil, err
}
var rn atomic.Bool
rn.Store(config.ReadonlyAtFirst)
var cnt atomic.Uint32
cnt.Store(count)
return &file{
Base: component.New(&component.BaseConfig[Config, Dependencies]{
Name: "FeedChunk",
Instance: instance,
Config: config,
Dependencies: dependencies,
}),
f: osFile,
readWriteBuf: readWriteBuf,
appendOffset: appendOffset,
readonlyMmap: readonlyMmap,
readonly: &rn,
count: &cnt,
}, nil
}
func init0(
fileExists bool,
config *Config,
) (
osFile *os.File,
readWriteBuf *buffer.Bytes,
appendOffset uint64,
readonlyMmap mmap.MMap,
count uint32,
err error,
) {
// Ensure file.
if fileExists {
osFile, err = loadFromExisting(config.Path, config.ReadonlyAtFirst)
if err != nil {
return nil, nil, 0, nil, 0, errors.Wrap(err, "load from existing")
}
} else { // Create new file.
if config.ReadonlyAtFirst {
return nil, nil, 0, nil, 0, errors.New("cannot create readonly file")
}
osFile, err = createNewOSFile(config.Path)
if err != nil {
return nil, nil, 0, nil, 0, errors.Wrap(err, "create new os file")
}
}
// Setup for Read.
readWriteBuf, count, err = validateOSFile(osFile)
if err != nil {
_ = osFile.Close()
return nil, nil, 0, nil, 0, errors.Wrap(err, "validate os file")
}
if config.ReadonlyAtFirst {
readWriteBuf = nil // Help GC.
m, err := mmap.Map(osFile, mmap.RDONLY, 0)
if err != nil {
_ = osFile.Close()
return nil, nil, 0, nil, 0, errors.Wrap(err, "mmap file")
}
readonlyMmap = m
} else {
appendOffset = uint64(readWriteBuf.Len())
}
return
}
func validateOSFile(f *os.File) (readWriteBuf *buffer.Bytes, count uint32, err error) {
header, err := validateHeader(f)
if err != nil {
return nil, 0, errors.Wrap(err, "validate header")
}
readWriteBuf = &buffer.Bytes{B: header} // len(header) == cap(header).
if _, err := f.Seek(int64(dataStart), io.SeekStart); err != nil {
return nil, 0, errors.Wrap(err, "seek to data start")
}
tr := &trackReader{Reader: f}
var lastSuccessReaded int
var p Feed
for {
err := p.validateFrom(tr, readWriteBuf)
switch {
case err == nil:
count++
lastSuccessReaded = tr.Readed()
continue
case (errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF)) ||
errors.Is(err, errChecksumMismatch):
// Truncate uncompleted feed if any.
readWriteBuf.B = readWriteBuf.B[:lastSuccessReaded+len(header)]
return readWriteBuf, count, nil
default:
return nil, 0, errors.Wrap(err, "validate payload")
}
}
}
func validateHeader(f *os.File) (header []byte, err error) {
header = make([]byte, headerBytes)
if _, err := f.ReadAt(header, 0); err != nil {
return nil, errors.Wrap(err, "read header")
}
// Validate magic number.
if !bytes.Equal(header[:headerMagicNumberBytes], headerMagicNumber) {
return nil, errors.New("invalid magic number")
}
// Validate version.
version := binary.LittleEndian.Uint32(header[headerVersionStart : headerVersionStart+headerVersionBytes])
if version != headerVersion {
return nil, errors.New("invalid version")
}
return header, nil
}
func loadFromExisting(path string, readonlyAtFirst bool) (osFile *os.File, err error) {
flag := os.O_RDWR
if readonlyAtFirst {
flag = os.O_RDONLY
}
osFile, err = os.OpenFile(path, flag, 0600)
if err != nil {
return nil, errors.Wrap(err, "open file")
}
return osFile, nil
}
func createNewOSFile(path string) (osFile *os.File, err error) {
osFile, err = os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
if err != nil {
return nil, errors.Wrap(err, "create file")
}
if _, err = osFile.Write(header); err != nil {
_ = osFile.Close()
return nil, errors.Wrap(err, "write header")
}
if err = osFile.Sync(); err != nil {
_ = osFile.Close()
return nil, errors.Wrap(err, "sync file")
}
return osFile, nil
}
// --- Implementation code block ---
type file struct {
*component.Base[Config, Dependencies]
f *os.File
count *atomic.Uint32
readonly *atomic.Bool
mu sync.RWMutex
// Only readwrite.
readWriteBuf *buffer.Bytes
appendOffset uint64
// Only readonly.
readonlyMmap mmap.MMap
}
func (f *file) Run() error {
f.MarkReady()
return timeutil.Tick(f.Context(), 30*time.Second, func() error {
mode := "readwrite"
sizeValue := f.appendOffset
if f.readonly.Load() {
mode = "readonly"
sizeValue = uint64(len(f.readonlyMmap))
}
feedCount.WithLabelValues(append(f.TelemetryLabelsIDFields(), mode)...).Set(float64(f.Count(context.Background())))
byteSize.WithLabelValues(append(f.TelemetryLabelsIDFields(), mode)...).Set(float64(sizeValue))
for _, m := range modes {
if m == mode {
continue
}
feedCount.DeleteLabelValues(append(f.TelemetryLabelsIDFields(), m)...)
byteSize.DeleteLabelValues(append(f.TelemetryLabelsIDFields(), m)...)
}
return nil
})
}
func (f *file) Close() error {
// Close Run().
if err := f.Base.Close(); err != nil {
return errors.Wrap(err, "closing base")
}
// Clean metrics.
feedCount.DeletePartialMatch(f.TelemetryLabelsID())
byteSize.DeletePartialMatch(f.TelemetryLabelsID())
// Unmap if readonly.
f.mu.Lock()
defer f.mu.Unlock()
if f.readonlyMmap != nil {
if err := f.readonlyMmap.Unmap(); err != nil {
return errors.Wrap(err, "unmap file")
}
f.readonlyMmap = nil
}
// Close file.
if err := f.f.Close(); err != nil {
return errors.Wrap(err, "close file")
}
f.f = nil
f.appendOffset = 0
return nil
}
func (f *file) EnsureReadonly(ctx context.Context) (err error) {
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "EnsureReadonly")...)
defer func() { telemetry.End(ctx, err) }()
// Fast path - already readonly.
if f.readonly.Load() {
return nil
}
// Acquire write lock
f.mu.Lock()
defer f.mu.Unlock()
if f.readonly.Load() {
return nil
}
// Clear readwrite resources.
f.readWriteBuf = nil
// Open mmap.
m, err := mmap.Map(f.f, mmap.RDONLY, 0)
if err != nil {
return errors.Wrap(err, "mmap file")
}
// Update state.
f.readonlyMmap = m
f.readonly.Store(true)
return nil
}
func (f *file) Count(ctx context.Context) uint32 {
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "Count")...)
defer func() { telemetry.End(ctx, nil) }()
return f.count.Load()
}
func (f *file) Append(ctx context.Context, feeds []*Feed, onSuccess func(feed *Feed, offset uint64) error) (err error) {
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "Append")...)
defer func() { telemetry.End(ctx, err) }()
f.mu.Lock()
// Precheck.
if f.readonly.Load() {
f.mu.Unlock()
return errors.New("file is readonly")
}
// Encode feeds into buffer.
currentAppendOffset := f.appendOffset
relativeOffsets, encodedBytesCount, err := f.encodeFeeds(feeds)
if err != nil {
f.readWriteBuf.B = f.readWriteBuf.B[:currentAppendOffset]
f.mu.Unlock()
return errors.Wrap(err, "encode feeds")
}
// Prepare for commit.
encodedData := f.readWriteBuf.Bytes()[currentAppendOffset:]
newAppendOffset := currentAppendOffset + uint64(encodedBytesCount)
// Commit data and header to file.
if err = f.commitAppendToFile(encodedData, currentAppendOffset); err != nil {
f.readWriteBuf.B = f.readWriteBuf.B[:currentAppendOffset]
f.mu.Unlock()
return errors.Wrap(err, "commit append to file")
}
// Update internal state on successful commit.
f.appendOffset = newAppendOffset
f.count.Add(uint32(len(feeds)))
f.mu.Unlock()
// Call callbacks after releasing the lock.
absoluteOffsets := make([]uint64, len(relativeOffsets))
for i, relOff := range relativeOffsets {
absoluteOffsets[i] = currentAppendOffset + relOff // Calculate absolute offsets based on append position.
}
if err := f.notifySuccess(feeds, absoluteOffsets, onSuccess); err != nil {
return errors.Wrap(err, "notify success callbacks")
}
return nil
}
func (f *file) Read(ctx context.Context, offset uint64) (feed *Feed, err error) {
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "Read")...)
defer func() { telemetry.End(ctx, err) }()
// Validate offset.
if offset < uint64(dataStart) {
return nil, errors.New("offset too small")
}
// Handle readonly mode.
if f.readonly.Load() {
if offset >= uint64(len(f.readonlyMmap)) {
return nil, errors.New("offset too large")
}
feed, _, err = f.readFeed(ctx, f.readonlyMmap, offset)
if err != nil {
return nil, errors.Wrap(err, "read feed")
}
return feed, nil
}
// Handle readwrite mode.
f.mu.RLock()
defer f.mu.RUnlock()
if offset >= f.appendOffset {
return nil, errors.New("offset too large")
}
feed, _, err = f.readFeed(ctx, f.readWriteBuf.Bytes(), offset)
if err != nil {
return nil, errors.Wrap(err, "read feed")
}
return feed, nil
}
func (f *file) Range(ctx context.Context, iter func(feed *Feed, offset uint64) error) (err error) {
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "Range")...)
defer func() { telemetry.End(ctx, err) }()
// Handle readonly mode.
if f.readonly.Load() {
// Start from data section.
offset := uint64(dataStart)
for offset < uint64(len(f.readonlyMmap)) {
feed, n, err := f.readFeed(ctx, f.readonlyMmap, offset)
if err != nil {
return errors.Wrap(err, "read feed")
}
if err := iter(feed, offset); err != nil {
return errors.Wrap(err, "iterate feed")
}
// Move to next feed.
offset += uint64(n) // G115: Safe conversion as n is uint32
}
return nil
}
// Handle readwrite mode.
f.mu.RLock()
defer f.mu.RUnlock()
data := f.readWriteBuf.Bytes()
offset := uint64(dataStart)
for offset < f.appendOffset { // appendOffset is already checked/maintained correctly.
feed, n, err := f.readFeed(ctx, data, offset)
if err != nil {
return errors.Wrap(err, "read feed")
}
if err := iter(feed, offset); err != nil {
return errors.Wrap(err, "iterate feed")
}
// Move to next feed.
offset += uint64(n)
}
return nil
}
const estimatedFeedSize = 4 * 1024
// encodeFeeds encodes a slice of feeds into the internal readWriteBuf.
// It returns the relative offsets of each feed within the newly added data,
// the total number of bytes encoded, and any error encountered.
func (f *file) encodeFeeds(feeds []*Feed) (relativeOffsets []uint64, encodedBytesCount int, err error) {
relativeOffsets = make([]uint64, len(feeds))
startOffset := f.readWriteBuf.Len()
f.readWriteBuf.EnsureRemaining(estimatedFeedSize * len(feeds))
for i, feed := range feeds {
currentOffsetInBuf := f.readWriteBuf.Len()
relativeOffsets[i] = uint64(currentOffsetInBuf - startOffset)
if err := feed.encodeTo(f.readWriteBuf); err != nil {
return nil, 0, errors.Wrapf(err, "encode feed %d", i)
}
}
encodedBytesCount = f.readWriteBuf.Len() - startOffset
return relativeOffsets, encodedBytesCount, nil
}
// commitAppendToFile writes the encoded data and updated header to the file and syncs.
func (f *file) commitAppendToFile(data []byte, currentAppendOffset uint64) error {
// Append data.
if _, err := f.f.WriteAt(data, int64(currentAppendOffset)); err != nil {
// Data might be partially written.
// We will overwrite it in the next append.
return errors.Wrap(err, "write feeds")
}
// Sync file to persist changes.
if err := f.f.Sync(); err != nil {
return errors.Wrap(err, "sync file")
}
return nil
}
// notifySuccess calls the onSuccess callback for each successfully appended feed.
func (f *file) notifySuccess(
feeds []*Feed,
absoluteOffsets []uint64,
onSuccess func(feed *Feed, offset uint64) error,
) error {
if onSuccess == nil {
return nil
}
for i, feed := range feeds {
if err := onSuccess(feed, absoluteOffsets[i]); err != nil {
// Return the first error encountered during callbacks.
return errors.Wrapf(err, "on success callback for feed %d", i)
}
}
return nil
}
func (f *file) readFeed(ctx context.Context, data []byte, offset uint64) (feed *Feed, length int, err error) {
ctx = telemetry.StartWith(ctx, append(f.TelemetryLabels(), telemetrymodel.KeyOperation, "readFeed")...)
defer func() { telemetry.End(ctx, err) }()
// Prepare reader.
r := io.NewSectionReader(bytes.NewReader(data), int64(offset), int64(uint64(len(data))-offset))
tr := &trackReader{Reader: r}
// Decode feed.
feed = &Feed{Feed: &model.Feed{}}
if err = feed.decodeFrom(tr); err != nil {
return nil, 0, errors.Wrap(err, "decode feed")
}
return feed, tr.Readed(), nil
}
type trackReader struct {
io.Reader
length int
}
func (r *trackReader) Read(p []byte) (n int, err error) {
n, err = r.Reader.Read(p)
r.length += n
return
}
func (r *trackReader) Readed() int {
return r.length
}
type mockFile struct {
component.Mock
}
func (m *mockFile) Run() error {
args := m.Called()
return args.Error(0)
}
func (m *mockFile) Ready() <-chan struct{} {
args := m.Called()
return args.Get(0).(<-chan struct{})
}
func (m *mockFile) Close() error {
args := m.Called()
return args.Error(0)
}
func (m *mockFile) Append(ctx context.Context, feeds []*Feed, onSuccess func(feed *Feed, offset uint64) error) error {
args := m.Called(ctx, feeds, onSuccess)
return args.Error(0)
}
func (m *mockFile) Read(ctx context.Context, offset uint64) (*Feed, error) {
args := m.Called(ctx, offset)
return args.Get(0).(*Feed), args.Error(1)
}
func (m *mockFile) Range(ctx context.Context, iter func(feed *Feed, offset uint64) error) error {
args := m.Called(ctx, iter)
return args.Error(0)
}
func (m *mockFile) Count(ctx context.Context) uint32 {
args := m.Called(ctx)
return args.Get(0).(uint32)
}
func (m *mockFile) EnsureReadonly(ctx context.Context) error {
args := m.Called(ctx)
return args.Error(0)
}

View File

@@ -0,0 +1,270 @@
package chunk
import (
"context"
"fmt"
"math/rand"
"os"
"path/filepath"
"strings"
"testing"
"time"
"github.com/glidea/zenfeed/pkg/model"
)
// --- Benchmark Setup ---
const (
benchmarkFeedCount = 10000 // Number of feeds for benchmark setup
benchmarkBatchSize = 100 // Batch size for append benchmark
)
var (
benchmarkFeeds []*Feed
benchmarkOffsets []uint64 // Store offsets for read benchmark
benchmarkTempPath string
)
// setupBenchmarkFile creates a temporary file and populates it with benchmarkFeeds.
// It returns the path and a cleanup function.
func setupBenchmarkFile(b *testing.B, readonly bool) (File, func()) {
b.Helper()
// Create temp file path only once
if benchmarkTempPath == "" {
dir, err := os.MkdirTemp("", "chunk-benchmark")
if err != nil {
b.Fatalf("Failed to create temp dir: %v", err)
}
benchmarkTempPath = filepath.Join(dir, "benchmark.chunk")
}
cleanup := func() {
os.RemoveAll(filepath.Dir(benchmarkTempPath))
benchmarkTempPath = "" // Reset path for next potential setup
benchmarkFeeds = nil // Clear feeds
benchmarkOffsets = nil // Clear offsets
}
// Generate feeds only once per setup phase if needed
if len(benchmarkFeeds) == 0 {
benchmarkFeeds = generateBenchmarkFeeds(benchmarkFeedCount)
benchmarkOffsets = make([]uint64, 0, benchmarkFeedCount)
}
// Create and populate the file in read-write mode first
rwConfig := &Config{Path: benchmarkTempPath}
rwFile, err := new("benchmark-setup", rwConfig, Dependencies{})
if err != nil {
cleanup()
b.Fatalf("Failed to create benchmark file for setup: %v", err)
}
currentOffsetCount := int(rwFile.Count(context.Background()))
if currentOffsetCount < benchmarkFeedCount { // Only append if not already populated
appendCount := 0
onSuccess := func(feed *Feed, offset uint64) error {
// Collect offsets only during the initial population
if len(benchmarkOffsets) < benchmarkFeedCount {
benchmarkOffsets = append(benchmarkOffsets, offset)
}
appendCount++
return nil
}
for i := currentOffsetCount; i < benchmarkFeedCount; i += benchmarkBatchSize {
end := i + benchmarkBatchSize
if end > benchmarkFeedCount {
end = benchmarkFeedCount
}
if err := rwFile.Append(context.Background(), benchmarkFeeds[i:end], onSuccess); err != nil {
rwFile.Close()
cleanup()
b.Fatalf("Failed to append feeds during setup: %v", err)
}
}
}
// Close the read-write file before potentially reopening as readonly
if err := rwFile.Close(); err != nil {
cleanup()
b.Fatalf("Failed to close rw file during setup: %v", err)
}
// Reopen file with the desired mode for the benchmark
config := &Config{
Path: benchmarkTempPath,
ReadonlyAtFirst: readonly,
}
f, err := new("benchmark", config, Dependencies{})
if err != nil {
cleanup()
b.Fatalf("Failed to open benchmark file in target mode: %v", err)
}
if readonly {
// For read benchmarks, ensure mmap is active if file was just created/populated
if err := f.EnsureReadonly(context.Background()); err != nil {
f.Close()
cleanup()
b.Fatalf("Failed to ensure readonly mode: %v", err)
}
}
return f, cleanup
}
func generateBenchmarkFeeds(count int) []*Feed {
feeds := make([]*Feed, count)
rng := rand.New(rand.NewSource(time.Now().UnixNano())) // Use a fixed seed for reproducibility if needed
// Pre-generate some random characters for building large strings efficiently.
const letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 "
letterRunes := []rune(letters)
randString := func(n int) string {
sb := strings.Builder{}
sb.Grow(n)
for i := 0; i < n; i++ {
sb.WriteRune(letterRunes[rng.Intn(len(letterRunes))])
}
return sb.String()
}
minLabelSize := 8 * 1024 // 8KB
maxLabelSize := 15 * 1024 // 15KB
for i := range count {
// Generate large label content size.
largeLabelSize := minLabelSize + rng.Intn(maxLabelSize-minLabelSize+1)
// Estimate the overhead of other labels and structure (key names, length prefixes etc.).
// This is a rough estimation, adjust if needed.
otherLabelsOverhead := 100
largeContentSize := largeLabelSize - otherLabelsOverhead
if largeContentSize < 0 {
largeContentSize = 0
}
feeds[i] = &Feed{
Feed: &model.Feed{
ID: uint64(i + 1),
Labels: model.Labels{
model.Label{Key: "type", Value: fmt.Sprintf("type_%d", rng.Intn(10))},
model.Label{Key: "source", Value: fmt.Sprintf("source_%d", rng.Intn(5))},
model.Label{Key: "large_content", Value: randString(largeContentSize)}, // Add large label
},
Time: time.Now().Add(-time.Duration(rng.Intn(3600*24*30)) * time.Second), // Random time within the last 30 days
},
Vectors: [][]float32{
generateFloat32Vector(rng, 1024), // Example dimension
generateFloat32Vector(rng, 1024),
},
}
}
return feeds
}
func generateFloat32Vector(rng *rand.Rand, dim int) []float32 {
vec := make([]float32, dim)
for i := range vec {
vec[i] = rng.Float32()
}
return vec
}
// --- Benchmarks ---
func BenchmarkAppend(b *testing.B) {
// Setup: Start with an empty file for appending.
// Note: setupBenchmarkFile(b, false) creates the file but doesn't populate it fully here.
// We need a fresh file for append benchmark.
dir, err := os.MkdirTemp("", "chunk-append-benchmark")
if err != nil {
b.Fatalf("Failed to create temp dir: %v", err)
}
path := filepath.Join(dir, "append_benchmark.chunk")
cleanup := func() {
os.RemoveAll(dir)
}
defer cleanup()
config := &Config{Path: path}
f, err := new("benchmark-append", config, Dependencies{})
if err != nil {
b.Fatalf("Failed to create benchmark file for append: %v", err)
}
defer f.Close()
feedsToAppend := generateBenchmarkFeeds(benchmarkBatchSize) // Generate a batch
b.ResetTimer()
b.ReportAllocs()
// Measure appending batches of feeds.
for i := 0; i < b.N; i++ {
// Simulate appending new batches. In a real scenario, feeds would differ.
// For benchmark consistency, we reuse the same batch data.
err := f.Append(context.Background(), feedsToAppend, nil) // onSuccess is nil for performance
if err != nil {
b.Fatalf("Append failed during benchmark: %v", err)
}
}
b.StopTimer() // Stop timer before potential cleanup/close overhead
}
func BenchmarkRead(b *testing.B) {
// Setup: Populate a file and make it readonly (mmap).
f, cleanup := setupBenchmarkFile(b, true)
defer cleanup()
if len(benchmarkOffsets) == 0 {
b.Fatal("Benchmark setup failed: no offsets generated.")
}
// Pre-select random offsets to read
rng := rand.New(rand.NewSource(42)) // Use a fixed seed for reproducibility
readIndices := make([]int, b.N)
for i := 0; i < b.N; i++ {
readIndices[i] = rng.Intn(len(benchmarkOffsets))
}
b.ResetTimer()
b.ReportAllocs()
// Measure reading feeds at random valid offsets using mmap.
for i := 0; i < b.N; i++ {
offset := benchmarkOffsets[readIndices[i]]
feed, err := f.Read(context.Background(), offset)
if err != nil {
b.Fatalf("Read failed during benchmark at offset %d: %v", offset, err)
}
// Prevent compiler optimization by using the result slightly
if feed == nil {
b.Fatal("Read returned nil feed")
}
}
b.StopTimer()
}
func BenchmarkRange(b *testing.B) {
// Setup: Populate a file and make it readonly (mmap).
f, cleanup := setupBenchmarkFile(b, false)
defer cleanup()
b.ResetTimer()
b.ReportAllocs()
// Measure ranging over all feeds using mmap.
for i := 0; i < b.N; i++ {
count := 0
err := f.Range(context.Background(), func(feed *Feed, offset uint64) (err error) {
// Minimal operation inside the iterator
count++
if feed == nil { // Basic check
return fmt.Errorf("nil feed encountered at offset %d", offset)
}
return nil
})
if err != nil {
b.Fatalf("Range failed during benchmark: %v", err)
}
// Optionally verify count, though it adds overhead to the benchmark itself
// if uint32(count) != f.Count(context.Background()) {
// b.Fatalf("Range count mismatch: expected %d, got %d", f.Count(context.Background()), count)
// }
}
b.StopTimer()
}

View File

@@ -0,0 +1,567 @@
// Copyright (C) 2025 wangyusong
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package chunk
import (
"context"
"os"
"path/filepath"
"testing"
"time"
. "github.com/onsi/gomega"
"github.com/pkg/errors"
"github.com/glidea/zenfeed/pkg/model"
"github.com/glidea/zenfeed/pkg/test"
)
func TestNew(t *testing.T) {
RegisterTestingT(t)
type givenDetail struct {
path string
readonlyAtFirst bool
setupFeeds []*Feed
}
type whenDetail struct{}
type thenExpected struct {
count uint32
err string
}
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
{
Scenario: "Create New Chunk File",
Given: "A valid non-existing file path",
When: "Creating a new chunk file",
Then: "Should return a valid File instance with count 0",
GivenDetail: givenDetail{
readonlyAtFirst: false,
},
WhenDetail: whenDetail{},
ThenExpected: thenExpected{
count: 0,
},
},
{
Scenario: "Open Existing Chunk File",
Given: "A valid existing chunk file with data",
When: "Opening the file in readonly mode",
Then: "Should return a valid File instance with correct count",
GivenDetail: givenDetail{
readonlyAtFirst: true,
setupFeeds: []*Feed{
createTestFeed(1),
createTestFeed(2),
createTestFeed(3),
},
},
WhenDetail: whenDetail{},
ThenExpected: thenExpected{
count: 3,
},
},
{
Scenario: "Invalid Configuration",
Given: "An invalid configuration with empty path",
When: "Creating a new chunk file",
Then: "Should return an error",
GivenDetail: givenDetail{
path: "", // Empty path
},
WhenDetail: whenDetail{},
ThenExpected: thenExpected{
err: "validate config: path is required",
},
},
}
for _, tt := range tests {
t.Run(tt.Scenario, func(t *testing.T) {
// Given.
if tt.GivenDetail.path == "" && tt.ThenExpected.err == "" {
tt.GivenDetail.path = createTempFile(t)
defer cleanupTempFile(tt.GivenDetail.path)
}
if len(tt.GivenDetail.setupFeeds) > 0 {
initialFile, err := new("test", &Config{
Path: tt.GivenDetail.path,
ReadonlyAtFirst: false,
}, Dependencies{})
Expect(err).NotTo(HaveOccurred())
err = initialFile.Append(context.Background(), tt.GivenDetail.setupFeeds, nil)
Expect(err).NotTo(HaveOccurred())
initialFile.Close()
}
// When.
file, err := new("test", &Config{
Path: tt.GivenDetail.path,
ReadonlyAtFirst: tt.GivenDetail.readonlyAtFirst,
}, Dependencies{})
// Then.
if tt.ThenExpected.err != "" {
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
} else {
Expect(err).NotTo(HaveOccurred())
Expect(file).NotTo(BeNil())
Expect(file.Count(context.Background())).To(Equal(tt.ThenExpected.count))
file.Close()
}
})
}
}
func TestFileModeSwitching(t *testing.T) {
RegisterTestingT(t)
tests := []struct {
scenario string
given string
when string
then string
initialMode bool // true for readonly
expectedError string
}{
{
scenario: "ReadWrite to ReadOnly Switch",
given: "a read-write mode chunk file",
when: "calling EnsureReadonly()",
then: "file should switch to read-only mode",
initialMode: false,
expectedError: "",
},
{
scenario: "Already ReadOnly",
given: "a read-only mode chunk file",
when: "calling EnsureReadonly()",
then: "operation should return quickly",
initialMode: true,
expectedError: "",
},
}
for _, tt := range tests {
t.Run(tt.scenario, func(t *testing.T) {
// Setup
path := createTempFile(t)
defer cleanupTempFile(path)
// Create initial file
initialConfig := Config{
Path: path,
ReadonlyAtFirst: false,
}
initialFile, err := new("test", &initialConfig, Dependencies{})
Expect(err).NotTo(HaveOccurred())
initialFile.Close()
// Open file with specified mode
config := Config{
Path: path,
ReadonlyAtFirst: tt.initialMode,
}
f, err := new("test", &config, Dependencies{})
Expect(err).NotTo(HaveOccurred())
defer f.Close()
// Execute
err = f.EnsureReadonly(context.Background())
// Verify
if tt.expectedError != "" {
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring(tt.expectedError))
} else {
Expect(err).NotTo(HaveOccurred())
// Verify it's now in readonly mode by attempting an append
appendErr := f.Append(context.Background(), []*Feed{createTestFeed(1)}, nil)
Expect(appendErr).To(HaveOccurred())
Expect(appendErr.Error()).To(ContainSubstring("file is readonly"))
}
})
}
}
func TestAppend(t *testing.T) {
RegisterTestingT(t)
type givenDetail struct {
readonly bool
}
type whenDetail struct {
appendFeeds []*Feed
}
type thenExpected struct {
count uint32
err string
}
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
{
Scenario: "Append Single Feed",
Given: "A read-write mode chunk file",
When: "Adding a single feed",
Then: "Should successfully write the feed",
GivenDetail: givenDetail{
readonly: false,
},
WhenDetail: whenDetail{
appendFeeds: []*Feed{createTestFeed(1)},
},
ThenExpected: thenExpected{
count: 1,
},
},
{
Scenario: "Batch Append Multiple Feeds",
Given: "A read-write mode chunk file",
When: "Adding multiple feeds at once",
Then: "Should write all feeds as a single transaction",
GivenDetail: givenDetail{
readonly: false,
},
WhenDetail: whenDetail{
appendFeeds: []*Feed{
createTestFeed(1),
createTestFeed(2),
createTestFeed(3),
},
},
ThenExpected: thenExpected{
count: 3,
},
},
{
Scenario: "Append in ReadOnly Mode",
Given: "A read-only mode chunk file",
When: "Attempting to add a feed",
Then: "Should fail with readonly error",
GivenDetail: givenDetail{
readonly: true,
},
WhenDetail: whenDetail{
appendFeeds: []*Feed{createTestFeed(1)},
},
ThenExpected: thenExpected{
err: "file is readonly",
},
},
}
for _, tt := range tests {
t.Run(tt.Scenario, func(t *testing.T) {
// Given.
path := createTempFile(t)
defer cleanupTempFile(path)
if tt.GivenDetail.readonly {
// Create and close initial file for readonly test.
rwFile, err := new("test", &Config{Path: path}, Dependencies{})
Expect(err).NotTo(HaveOccurred())
rwFile.Close()
}
f, err := new("test", &Config{
Path: path,
ReadonlyAtFirst: tt.GivenDetail.readonly,
}, Dependencies{})
Expect(err).NotTo(HaveOccurred())
defer f.Close()
// When.
var offsets []uint64
err = f.Append(context.Background(), tt.WhenDetail.appendFeeds, func(_ *Feed, offset uint64) error {
offsets = append(offsets, offset)
return nil
})
// Then.
if tt.ThenExpected.err != "" {
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring(tt.ThenExpected.err))
} else {
Expect(err).NotTo(HaveOccurred())
Expect(f.Count(context.Background())).To(Equal(tt.ThenExpected.count))
// Verify each feed can be read back.
for i, offset := range offsets {
feed, readErr := f.Read(context.Background(), offset)
Expect(readErr).NotTo(HaveOccurred())
Expect(feed.ID).To(Equal(tt.WhenDetail.appendFeeds[i].ID))
}
}
})
}
}
func TestRead(t *testing.T) {
RegisterTestingT(t)
tests := []struct {
scenario string
given string
when string
then string
readonly bool
setupFeeds []*Feed
readOffset uint64
expectedErr string
}{
{
scenario: "Read from Valid Offset",
given: "a chunk file with feeds",
when: "reading with a valid offset",
then: "should return the correct feed",
readonly: false,
setupFeeds: []*Feed{createTestFeed(1)},
readOffset: uint64(dataStart), // Will be adjusted in the test
expectedErr: "",
},
{
scenario: "Read from ReadOnly Mode",
given: "a read-only chunk file with feeds",
when: "reading with a valid offset",
then: "should return the correct feed using mmap",
readonly: true,
setupFeeds: []*Feed{createTestFeed(2)},
readOffset: uint64(dataStart), // Will be adjusted in the test
expectedErr: "",
},
{
scenario: "Read with Small Offset",
given: "a chunk file with feeds",
when: "reading with an offset smaller than dataStart",
then: "should return 'offset too small' error",
readonly: false,
setupFeeds: []*Feed{createTestFeed(3)},
readOffset: uint64(dataStart - 1),
expectedErr: "offset too small",
},
{
scenario: "Read with Large Offset",
given: "a chunk file with feeds",
when: "reading with an offset larger than appendOffset",
then: "should return 'offset too large' error",
readonly: false,
setupFeeds: []*Feed{createTestFeed(4)},
readOffset: 999999, // Definitely beyond appendOffset
expectedErr: "offset too large",
},
}
for _, tt := range tests {
t.Run(tt.scenario, func(t *testing.T) {
// Setup
path := createTempFile(t)
defer cleanupTempFile(path)
// Create and populate initial file
initialConfig := Config{
Path: path,
ReadonlyAtFirst: false,
}
initialFile, err := new("test", &initialConfig, Dependencies{})
Expect(err).NotTo(HaveOccurred())
var validOffset uint64
if len(tt.setupFeeds) > 0 {
// Track the first offset for later reading
var firstOffset uint64
err = initialFile.Append(context.Background(), tt.setupFeeds, func(_ *Feed, offset uint64) error {
if firstOffset == 0 {
firstOffset = offset
}
return nil
})
Expect(err).NotTo(HaveOccurred())
validOffset = firstOffset
}
initialFile.Close()
// Reopen with specified mode
config := Config{
Path: path,
ReadonlyAtFirst: tt.readonly,
}
f, err := new("test", &config, Dependencies{})
Expect(err).NotTo(HaveOccurred())
defer f.Close()
// Use valid offset if needed
readOffset := tt.readOffset
if readOffset == uint64(dataStart) && validOffset > 0 {
readOffset = validOffset
}
// Execute
feed, err := f.Read(context.Background(), readOffset)
// Verify
if tt.expectedErr != "" {
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring(tt.expectedErr))
} else {
Expect(err).NotTo(HaveOccurred())
Expect(feed).NotTo(BeNil())
Expect(feed.ID).To(Equal(tt.setupFeeds[0].ID))
}
})
}
}
func TestRange(t *testing.T) {
RegisterTestingT(t)
tests := []struct {
scenario string
given string
when string
then string
readonly bool
setupFeeds []*Feed
earlyExit bool
expectedCount int
expectedErr string
}{
{
scenario: "Range All Feeds",
given: "a chunk file with multiple feeds",
when: "calling Range()",
then: "iterator should visit each feed in sequence",
readonly: false,
setupFeeds: []*Feed{
createTestFeed(1),
createTestFeed(2),
createTestFeed(3),
},
earlyExit: false,
expectedCount: 3,
expectedErr: "",
},
{
scenario: "Range with Early Exit",
given: "a chunk file with multiple feeds",
when: "calling Range() and returning an error from iterator",
then: "range should stop and return that error",
readonly: false,
setupFeeds: []*Feed{
createTestFeed(4),
createTestFeed(5),
createTestFeed(6),
},
earlyExit: true,
expectedCount: 1, // Should stop after first feed
expectedErr: "early exit",
},
{
scenario: "Range in ReadOnly Mode",
given: "a read-only chunk file with feeds",
when: "calling Range()",
then: "should use mmap and correctly visit all feeds",
readonly: true,
setupFeeds: []*Feed{
createTestFeed(7),
createTestFeed(8),
},
earlyExit: false,
expectedCount: 2,
expectedErr: "",
},
}
for _, tt := range tests {
t.Run(tt.scenario, func(t *testing.T) {
// Setup
path := createTempFile(t)
defer cleanupTempFile(path)
// Create and populate initial file
initialConfig := Config{
Path: path,
ReadonlyAtFirst: false,
}
initialFile, err := new("test", &initialConfig, Dependencies{})
Expect(err).NotTo(HaveOccurred())
if len(tt.setupFeeds) > 0 {
err = initialFile.Append(context.Background(), tt.setupFeeds, nil)
Expect(err).NotTo(HaveOccurred())
}
initialFile.Close()
// Reopen with specified mode
config := Config{
Path: path,
ReadonlyAtFirst: tt.readonly,
}
f, err := new("test", &config, Dependencies{})
Expect(err).NotTo(HaveOccurred())
defer f.Close()
// Execute
visitCount := 0
err = f.Range(context.Background(), func(feed *Feed, offset uint64) (err error) {
visitCount++
if tt.earlyExit && visitCount == 1 {
return errors.New("early exit")
}
return nil
})
// Verify
if tt.expectedErr != "" {
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring(tt.expectedErr))
} else {
Expect(err).NotTo(HaveOccurred())
}
Expect(visitCount).To(Equal(tt.expectedCount))
})
}
}
func createTempFile(t *testing.T) string {
dir, err := os.MkdirTemp("", "chunk-test")
if err != nil {
t.Fatalf("Failed to create temp dir: %v", err)
}
return filepath.Join(dir, "test.chunk")
}
func cleanupTempFile(path string) {
os.RemoveAll(filepath.Dir(path))
}
func createTestFeed(id uint64) *Feed {
return &Feed{
Feed: &model.Feed{
ID: id,
Labels: model.Labels{model.Label{Key: "test", Value: "value"}},
Time: time.Now(),
},
Vectors: [][]float32{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0},
},
}
}

View File

@@ -0,0 +1,296 @@
package chunk
import (
"bytes"
"encoding/binary"
"hash/crc32"
"io"
"math"
"time"
"github.com/pkg/errors"
"github.com/glidea/zenfeed/pkg/model"
binaryutil "github.com/glidea/zenfeed/pkg/util/binary"
"github.com/glidea/zenfeed/pkg/util/buffer"
)
const (
// feedHeaderSize is the size of the record header (length + checksum).
feedHeaderSize = 8 // uint32 length + uint32 checksum
)
var (
errChecksumMismatch = errors.New("checksum mismatch")
crc32Table = crc32.MakeTable(crc32.IEEE)
)
// Feed is the feed model in the chunk file.
type Feed struct {
*model.Feed
Vectors [][]float32
}
// encodeTo encodes the Feed into the provided buffer, including a length prefix and checksum.
// It writes the record structure: [payloadLen(uint32)][checksum(uint32)][payload...].
func (f *Feed) encodeTo(buf *buffer.Bytes) error {
buf.EnsureRemaining(4 * 1024)
// 1. Reserve space for length and checksum.
startOffset := buf.Len()
headerPos := buf.Len() // Position where header starts.
buf.B = buf.B[:headerPos+feedHeaderSize] // Extend buffer to include header space.
payloadStartOffset := buf.Len() // Position where payload starts.
// 2. Encode the actual payload.
if err := f.encodePayload(buf); err != nil {
// If payload encoding fails, revert the buffer to its initial state.
buf.B = buf.B[:startOffset]
return errors.Wrap(err, "encode payload")
}
payloadEndOffset := buf.Len()
// 3. Calculate payload length and checksum.
payloadLen := uint32(payloadEndOffset - payloadStartOffset)
payloadSlice := buf.Bytes()[payloadStartOffset:payloadEndOffset]
checksum := crc32.Checksum(payloadSlice, crc32Table)
// 4. Write the actual length and checksum into the reserved space.
binary.LittleEndian.PutUint32(buf.Bytes()[headerPos:headerPos+4], payloadLen)
binary.LittleEndian.PutUint32(buf.Bytes()[headerPos+4:headerPos+8], checksum)
return nil
}
// encodePayload encodes the core fields (ID, Time, Labels, Vectors) into the buffer.
func (f *Feed) encodePayload(w io.Writer) error {
// Write ID.
if err := binaryutil.WriteUint64(w, f.ID); err != nil {
return errors.Wrap(err, "write id")
}
// Write time.
if err := binaryutil.WriteUint64(w, uint64(f.Time.UnixNano())); err != nil {
return errors.Wrap(err, "write time")
}
// Write labels.
if err := f.encodeLabels(w); err != nil {
return errors.Wrap(err, "encode labels")
}
// Write vectors.
if err := f.encodeVectors(w); err != nil {
return errors.Wrap(err, "encode vectors")
}
return nil
}
// encodeLabels writes the label data to the writer.
func (f *Feed) encodeLabels(w io.Writer) error {
labelsLen := uint32(len(f.Labels))
if len(f.Labels) > math.MaxUint32 {
return errors.New("too many labels")
}
if err := binaryutil.WriteUint32(w, labelsLen); err != nil {
return errors.Wrap(err, "write labels count")
}
for i, label := range f.Labels {
if err := binaryutil.WriteString(w, label.Key); err != nil {
return errors.Wrapf(err, "write label key index %d", i)
}
if err := binaryutil.WriteString(w, label.Value); err != nil {
return errors.Wrapf(err, "write label value index %d", i)
}
}
return nil
}
// encodeVectors writes the vector data to the writer.
func (f *Feed) encodeVectors(w io.Writer) error {
vectorCount := uint32(len(f.Vectors))
if len(f.Vectors) > math.MaxUint32 {
return errors.New("too many vectors")
}
if err := binaryutil.WriteUint32(w, vectorCount); err != nil {
return errors.Wrap(err, "write vectors count")
}
if vectorCount == 0 {
return nil // Nothing more to write if there are no vectors.
}
// Write dimension.
dimension := uint32(len(f.Vectors[0]))
if len(f.Vectors[0]) > math.MaxUint32 {
return errors.New("vector dimension exceeds maximum uint32")
}
if err := binaryutil.WriteUint32(w, dimension); err != nil {
return errors.Wrap(err, "write vector dimension")
}
// Write vector data.
var floatBuf [4]byte
for i, vec := range f.Vectors {
// Ensure vector has the correct dimension.
if uint32(len(vec)) != dimension {
return errors.Errorf("vector %d has inconsistent dimension %d, expected %d", i, len(vec), dimension)
}
for _, val := range vec { // Avoid using binary.Write for performance.
bits := math.Float32bits(val)
binary.LittleEndian.PutUint32(floatBuf[:], bits)
if _, err := w.Write(floatBuf[:]); err != nil {
return errors.Wrapf(err, "write for vector %d, value %f", i, val)
}
}
}
return nil
}
func (f *Feed) validateFrom(r io.Reader, buf *buffer.Bytes) (err error) {
// 1. Read header (length and checksum).
var payloadLen, expectedChecksum uint32
startOffset := buf.Len()
if _, err := io.CopyN(buf, r, feedHeaderSize); err != nil {
return errors.Wrap(err, "read header")
}
payloadLen = binary.LittleEndian.Uint32(buf.B[startOffset : startOffset+4])
expectedChecksum = binary.LittleEndian.Uint32(buf.B[startOffset+4:])
// 2. Read payload, calculate checksum simultaneously.
buf.EnsureRemaining(int(payloadLen))
limitedReader := io.LimitReader(r, int64(payloadLen))
checksumWriter := crc32.New(crc32Table)
teeReader := io.TeeReader(limitedReader, checksumWriter)
// Read the exact payload length into the buffer.
if _, err := io.CopyN(buf, teeReader, int64(payloadLen)); err != nil {
// EOF, may be writing not complete.
return errors.Wrap(err, "read payload")
}
// 3. Verify checksum.
calculatedChecksum := checksumWriter.Sum32()
if calculatedChecksum != expectedChecksum {
return errors.Wrapf(errChecksumMismatch, "expected %x, got %x", expectedChecksum, calculatedChecksum)
}
return nil
}
// decodeFrom decodes the feed from the reader, validating length and checksum.
// It expects the format: [payloadLen(uint32)][checksum(uint32)][payload...].
func (f *Feed) decodeFrom(r io.Reader) (err error) {
buf := buffer.Get()
defer buffer.Put(buf)
if err := f.validateFrom(r, buf); err != nil {
return errors.Wrap(err, "validate payload")
}
payloadReader := bytes.NewReader(buf.B[feedHeaderSize:])
if err := f.decodePayload(payloadReader); err != nil {
return errors.Wrap(err, "decode payload")
}
return nil
}
// decodePayload decodes the core fields from the reader.
func (f *Feed) decodePayload(r io.Reader) error {
f.Feed = &model.Feed{} // Ensure Feed is initialized.
// Read ID.
if err := binary.Read(r, binary.LittleEndian, &f.ID); err != nil {
return errors.Wrap(err, "read id")
}
// Read time.
var timestamp int64
if err := binary.Read(r, binary.LittleEndian, &timestamp); err != nil {
return errors.Wrap(err, "read time")
}
f.Time = time.Unix(0, timestamp).In(time.UTC)
// Read labels.
if err := f.decodeLabels(r); err != nil {
return errors.Wrap(err, "decode labels")
}
// Read vectors.
if err := f.decodeVectors(r); err != nil {
return errors.Wrap(err, "decode vectors")
}
return nil
}
// decodeLabels reads the label data from the reader.
func (f *Feed) decodeLabels(r io.Reader) error {
var labelCount uint32
if err := binary.Read(r, binary.LittleEndian, &labelCount); err != nil {
return errors.Wrap(err, "read labels count")
}
f.Labels = make(model.Labels, labelCount)
for i := range labelCount {
// Read key.
key, err := binaryutil.ReadString(r)
if err != nil {
return errors.Wrapf(err, "read label key index %d", i)
}
// Read value.
value, err := binaryutil.ReadString(r)
if err != nil {
return errors.Wrapf(err, "read label value index %d", i)
}
f.Labels[i] = model.Label{
Key: key,
Value: value,
}
}
return nil
}
// decodeVectors reads the vector data from the reader.
func (f *Feed) decodeVectors(r io.Reader) error {
var vectorCount uint32
if err := binary.Read(r, binary.LittleEndian, &vectorCount); err != nil {
return errors.Wrap(err, "read vectors count")
}
if vectorCount == 0 {
f.Vectors = nil // Ensure vectors is nil if count is 0
return nil
}
f.Vectors = make([][]float32, vectorCount)
var dimension uint32
if err := binary.Read(r, binary.LittleEndian, &dimension); err != nil {
return errors.Wrap(err, "read vector dimension")
}
// Pre-allocate the underlying float data contiguously for potentially better cache locality.
totalFloats := uint64(vectorCount) * uint64(dimension)
floatData := make([]float32, totalFloats)
offset := 0
for i := range vectorCount {
f.Vectors[i] = floatData[offset : offset+int(dimension)] // Slice into the pre-allocated data
if err := binary.Read(r, binary.LittleEndian, f.Vectors[i]); err != nil {
return errors.Wrapf(err, "read vector data for vector %d", i)
}
offset += int(dimension)
}
return nil
}