audio/internal/convert: enable to resample a float32 stream

Updates #2160
This commit is contained in:
Hajime Hoshi 2024-07-14 00:24:36 +09:00
parent bf90217e68
commit b552266afe
6 changed files with 152 additions and 67 deletions

View File

@ -497,5 +497,5 @@ func Resample(source io.ReadSeeker, size int64, from, to int) io.ReadSeeker {
if from == to {
return source
}
return convert.NewResampling(source, size, from, to)
return convert.NewResampling(source, size, from, to, bitDepthInBytesInt16)
}

View File

@ -82,6 +82,7 @@ type Resampling struct {
size int64
from int
to int
bitDepthInBytes int
pos int64
srcBlock int64
srcBufL map[int64][]float64
@ -89,11 +90,12 @@ type Resampling struct {
lruSrcBlocks []int64
}
func NewResampling(source io.ReadSeeker, size int64, from, to int) *Resampling {
func NewResampling(source io.ReadSeeker, size int64, from, to int, bitDepthInBytes int) *Resampling {
r := &Resampling{
source: source,
size: size,
from: from,
bitDepthInBytes: bitDepthInBytes,
to: to,
srcBlock: -1,
srcBufL: map[int64][]float64{},
@ -102,9 +104,14 @@ func NewResampling(source io.ReadSeeker, size int64, from, to int) *Resampling {
return r
}
func (r *Resampling) bytesPerSample() int {
const channelNum = 2
return r.bitDepthInBytes * channelNum
}
func (r *Resampling) Length() int64 {
s := int64(float64(r.size) * float64(r.to) / float64(r.from))
return s / 4 * 4
return s / int64(r.bytesPerSample()) * int64(r.bytesPerSample())
}
func (r *Resampling) src(i int64) (float64, float64, error) {
@ -113,17 +120,18 @@ func (r *Resampling) src(i int64) (float64, float64, error) {
if i < 0 {
return 0, 0, nil
}
if r.size/4 <= i {
sizePerSample := int64(r.bytesPerSample())
if r.size/sizePerSample <= i {
return 0, 0, nil
}
nextPos := int64(i) / resamplingBufferSize
if _, ok := r.srcBufL[nextPos]; !ok {
if r.srcBlock+1 != nextPos {
if _, err := r.source.Seek(nextPos*resamplingBufferSize*4, io.SeekStart); err != nil {
if _, err := r.source.Seek(nextPos*resamplingBufferSize*sizePerSample, io.SeekStart); err != nil {
return 0, 0, err
}
}
buf := make([]byte, resamplingBufferSize*4)
buf := make([]byte, resamplingBufferSize*sizePerSample)
c := 0
for c < len(buf) {
n, err := r.source.Read(buf[c:])
@ -138,10 +146,20 @@ func (r *Resampling) src(i int64) (float64, float64, error) {
buf = buf[:c]
sl := make([]float64, resamplingBufferSize)
sr := make([]float64, resamplingBufferSize)
for i := 0; i < len(buf)/4; i++ {
switch r.bitDepthInBytes {
case 2:
for i := 0; i < len(buf)/int(sizePerSample); i++ {
sl[i] = float64(int16(buf[4*i])|(int16(buf[4*i+1])<<8)) / (1<<15 - 1)
sr[i] = float64(int16(buf[4*i+2])|(int16(buf[4*i+3])<<8)) / (1<<15 - 1)
}
case 4:
for i := 0; i < len(buf)/int(sizePerSample); i++ {
sl[i] = float64(math.Float32frombits(uint32(buf[8*i]) | uint32(buf[8*i+1])<<8 | uint32(buf[8*i+2])<<16 | uint32(buf[8*i+3])<<24))
sr[i] = float64(math.Float32frombits(uint32(buf[8*i+4]) | uint32(buf[8*i+5])<<8 | uint32(buf[8*i+6])<<16 | uint32(buf[8*i+7])<<24))
}
default:
panic("not reached")
}
r.srcBlock = nextPos
r.srcBufL[r.srcBlock] = sl
r.srcBufR[r.srcBlock] = sr
@ -180,12 +198,13 @@ func (r *Resampling) at(t int64) (float64, float64, error) {
if startN < 0 {
startN = 0
}
if r.size/4 <= startN {
startN = r.size/4 - 1
sizePerSample := int64(r.bytesPerSample())
if r.size/sizePerSample <= startN {
startN = r.size/sizePerSample - 1
}
endN := int64(tInSrc + windowSize)
if r.size/4 <= endN {
endN = r.size/4 - 1
if r.size/sizePerSample <= endN {
endN = r.size/sizePerSample - 1
}
lv := 0.0
rv := 0.0
@ -219,12 +238,15 @@ func (r *Resampling) Read(b []byte) (int, error) {
if r.pos == r.Length() {
return 0, io.EOF
}
n := len(b) / 4 * 4
size := r.bytesPerSample()
n := len(b) / size * size
if r.Length()-r.pos <= int64(n) {
n = int(r.Length() - r.pos)
}
for i := 0; i < n/4; i++ {
l, r, err := r.at(r.pos/4 + int64(i))
switch r.bitDepthInBytes {
case 2:
for i := 0; i < n/size; i++ {
l, r, err := r.at(r.pos/int64(size) + int64(i))
if err != nil {
return 0, err
}
@ -235,6 +257,28 @@ func (r *Resampling) Read(b []byte) (int, error) {
b[4*i+2] = byte(r16)
b[4*i+3] = byte(r16 >> 8)
}
case 4:
for i := 0; i < n/size; i++ {
l, r, err := r.at(r.pos/int64(size) + int64(i))
if err != nil {
return 0, err
}
l32 := float32(l)
r32 := float32(r)
l32b := math.Float32bits(l32)
r32b := math.Float32bits(r32)
b[8*i] = byte(l32b)
b[8*i+1] = byte(l32b >> 8)
b[8*i+2] = byte(l32b >> 16)
b[8*i+3] = byte(l32b >> 24)
b[8*i+4] = byte(r32b)
b[8*i+5] = byte(r32b >> 8)
b[8*i+6] = byte(r32b >> 16)
b[8*i+7] = byte(r32b >> 24)
}
default:
panic("not reached")
}
r.pos += int64(n)
return n, nil
}

View File

@ -16,6 +16,7 @@ package convert_test
import (
"bytes"
"fmt"
"io"
"math"
"testing"
@ -29,7 +30,7 @@ func soundAt(timeInSecond float64) float64 {
amp := []float64{1.0, 0.8, 0.6, 0.4, 0.2}
v := 0.0
for j := 0; j < len(amp); j++ {
v += amp[j] * math.Sin(2.0*math.Pi*timeInSecond*freq*float64(j+1))
v += amp[j] * math.Sin(2.0*math.Pi*timeInSecond*freq*float64(j+1)) / 2
}
if v > 1 {
v = 1
@ -40,15 +41,28 @@ func soundAt(timeInSecond float64) float64 {
return v
}
func newSoundBytes(sampleRate int) []byte {
func newSoundBytes(sampleRate int, bitDepthInBytes int) []byte {
b := make([]byte, sampleRate*4) // 1 second
for i := 0; i < len(b)/4; i++ {
for i := 0; i < len(b)/(bitDepthInBytes*2); i++ {
v := soundAt(float64(i) / float64(sampleRate))
switch bitDepthInBytes {
case 2:
v16 := int16(v * (1<<15 - 1))
b[4*i] = byte(v16)
b[4*i+1] = byte(v16 >> 8)
b[4*i+2] = byte(v16)
b[4*i+3] = byte(v16 >> 8)
case 4:
v32 := math.Float32bits(float32(v))
b[8*i] = byte(v32)
b[8*i+1] = byte(v32 >> 8)
b[8*i+2] = byte(v32 >> 16)
b[8*i+3] = byte(v32 >> 24)
b[8*i+4] = byte(v32)
b[8*i+5] = byte(v32 >> 8)
b[8*i+6] = byte(v32 >> 16)
b[8*i+7] = byte(v32 >> 24)
}
}
return b
}
@ -68,22 +82,37 @@ func TestResampling(t *testing.T) {
},
}
for _, c := range cases {
inB := newSoundBytes(c.In)
outS := convert.NewResampling(bytes.NewReader(inB), int64(len(inB)), c.In, c.Out)
c := c
t.Run(fmt.Sprintf("%d to %d", c.In, c.Out), func(t *testing.T) {
for _, bitDepthInBytes := range []int{2, 4} {
bitDepthInBytes := bitDepthInBytes
t.Run(fmt.Sprintf("bitDepthInBytes=%d", bitDepthInBytes), func(t *testing.T) {
inB := newSoundBytes(c.In, bitDepthInBytes)
outS := convert.NewResampling(bytes.NewReader(inB), int64(len(inB)), c.In, c.Out, bitDepthInBytes)
gotB, err := io.ReadAll(outS)
if err != nil {
t.Fatal(err)
}
wantB := newSoundBytes(c.Out)
wantB := newSoundBytes(c.Out, bitDepthInBytes)
if len(gotB) != len(wantB) {
t.Errorf("len(gotB) == %d but len(wantB) == %d", len(gotB), len(wantB))
}
for i := 0; i < len(gotB)/2; i++ {
got := float64(int16(gotB[2*i])|(int16(gotB[2*i+1])<<8)) / (1<<15 - 1)
want := float64(int16(wantB[2*i])|(int16(wantB[2*i+1])<<8)) / (1<<15 - 1)
for i := 0; i < len(gotB)/bitDepthInBytes; i++ {
var got, want float64
switch bitDepthInBytes {
case 2:
got = float64(int16(gotB[2*i])|(int16(gotB[2*i+1])<<8)) / (1<<15 - 1)
want = float64(int16(wantB[2*i])|(int16(wantB[2*i+1])<<8)) / (1<<15 - 1)
case 4:
got = float64(math.Float32frombits(uint32(gotB[4*i]) | (uint32(gotB[4*i+1]) << 8) | (uint32(gotB[4*i+2]) << 16) | (uint32(gotB[4*i+3]) << 24)))
want = float64(math.Float32frombits(uint32(wantB[4*i]) | (uint32(wantB[4*i+1]) << 8) | (uint32(wantB[4*i+2]) << 16) | (uint32(wantB[4*i+3]) << 24)))
}
if math.Abs(got-want) > 0.025 {
t.Errorf("sample rate: %d, index: %d: got: %f, want: %f", c.Out, i, got, want)
}
}
})
}
})
}
}

View File

@ -27,6 +27,10 @@ import (
"github.com/hajimehoshi/ebiten/v2/audio/internal/convert"
)
const (
bitDepthInBytesInt16 = 2
)
// Stream is a decoded stream.
type Stream struct {
orig *mp3.Decoder
@ -105,7 +109,7 @@ func DecodeWithSampleRate(sampleRate int, src io.Reader) (*Stream, error) {
var r *convert.Resampling
if d.SampleRate() != sampleRate {
r = convert.NewResampling(d, d.Length(), d.SampleRate(), sampleRate)
r = convert.NewResampling(d, d.Length(), d.SampleRate(), sampleRate, bitDepthInBytesInt16)
}
s := &Stream{
orig: d,

View File

@ -25,6 +25,10 @@ import (
"github.com/hajimehoshi/ebiten/v2/audio/internal/convert"
)
const (
bitDepthInBytesInt16 = 2
)
// Stream is a decoded audio stream.
type Stream struct {
readSeeker io.ReadSeeker
@ -200,7 +204,7 @@ func DecodeWithSampleRate(sampleRate int, src io.Reader) (*Stream, error) {
length *= 2
}
if origSampleRate != sampleRate {
r := convert.NewResampling(s, length, origSampleRate, sampleRate)
r := convert.NewResampling(s, length, origSampleRate, sampleRate, bitDepthInBytesInt16)
s = r
length = r.Length()
}

View File

@ -24,6 +24,10 @@ import (
"github.com/hajimehoshi/ebiten/v2/audio/internal/convert"
)
const (
bitDepthInBytesInt16 = 2
)
// Stream is a decoded audio stream.
type Stream struct {
inner io.ReadSeeker
@ -153,7 +157,7 @@ func DecodeWithSampleRate(sampleRate int, src io.Reader) (*Stream, error) {
return s, nil
}
r := convert.NewResampling(s.inner, s.size, s.sampleRate, sampleRate)
r := convert.NewResampling(s.inner, s.size, s.sampleRate, sampleRate, bitDepthInBytesInt16)
return &Stream{
inner: r,
size: r.Length(),