This commit is contained in:
glidea
2025-04-19 15:50:26 +08:00
commit 8b33df8a05
109 changed files with 24407 additions and 0 deletions

53
pkg/util/vector/vector.go Normal file
View File

@@ -0,0 +1,53 @@
// Copyright (C) 2025 wangyusong
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package vector
import (
"math"
)
func Quantize(vec []float32) (quantized []int8, min, scale float32) {
// Find the minimum and maximum values.
min, max := float32(math.MaxFloat32), float32(-math.MaxFloat32)
for _, v := range vec {
if v < min {
min = v
}
if v > max {
max = v
}
}
// Calculate the quantization scale.
scale = float32(255) / (max - min)
// Quantize the data.
quantized = make([]int8, len(vec))
for i, v := range vec {
quantized[i] = int8(math.Round(float64((v-min)*scale - 128)))
}
return quantized, min, scale
}
func Dequantize(quantized []int8, min, scale float32) []float32 {
vec := make([]float32, len(quantized))
for i, v := range quantized {
vec[i] = (float32(v)+128)/scale + min
}
return vec
}

View File

@@ -0,0 +1,99 @@
// Copyright (C) 2025 wangyusong
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package vector
import (
"testing"
. "github.com/onsi/gomega"
"github.com/glidea/zenfeed/pkg/test"
)
func TestQuantizeDequantize(t *testing.T) {
RegisterTestingT(t)
type givenDetail struct{}
type whenDetail struct {
vector []float32
}
type thenExpected struct {
maxError float32
}
tests := []test.Case[givenDetail, whenDetail, thenExpected]{
{
Scenario: "Quantize and dequantize unit vector",
When: "quantizing and then dequantizing a vector with values between 0 and 1",
Then: "should return vector close to the original with small error",
WhenDetail: whenDetail{
vector: []float32{0.1, 0.5, 0.9, 0.3},
},
ThenExpected: thenExpected{
maxError: 0.01,
},
},
{
Scenario: "Quantize and dequantize vector with negative values",
When: "quantizing and then dequantizing a vector with negative values",
Then: "should return vector close to the original with small error",
WhenDetail: whenDetail{
vector: []float32{-1.0, -0.5, 0.0, 0.5, 1.0},
},
ThenExpected: thenExpected{
maxError: 0.01,
},
},
{
Scenario: "Quantize and dequantize large range vector",
When: "quantizing and then dequantizing a vector with large range of values",
Then: "should return vector close to the original with acceptable error",
WhenDetail: whenDetail{
vector: []float32{-100, -50, 0, 50, 100},
},
ThenExpected: thenExpected{
maxError: 1.5,
},
},
}
for _, tt := range tests {
t.Run(tt.Scenario, func(t *testing.T) {
// When.
quantized, min, scale := Quantize(tt.WhenDetail.vector)
dequantized := Dequantize(quantized, min, scale)
// Then.
Expect(len(dequantized)).To(Equal(len(tt.WhenDetail.vector)))
maxError := float32(0)
for i := range tt.WhenDetail.vector {
error := float32(0)
if tt.WhenDetail.vector[i] > dequantized[i] {
error = tt.WhenDetail.vector[i] - dequantized[i]
} else {
error = dequantized[i] - tt.WhenDetail.vector[i]
}
if error > maxError {
maxError = error
}
}
Expect(maxError).To(BeNumerically("<=", tt.ThenExpected.maxError))
})
}
}