audio/internal/convert: enable to resample a float32 stream

Updates #2160
This commit is contained in:
Hajime Hoshi 2024-07-14 00:24:36 +09:00
parent bf90217e68
commit b552266afe
6 changed files with 152 additions and 67 deletions

View File

@ -497,5 +497,5 @@ func Resample(source io.ReadSeeker, size int64, from, to int) io.ReadSeeker {
if from == to { if from == to {
return source return source
} }
return convert.NewResampling(source, size, from, to) return convert.NewResampling(source, size, from, to, bitDepthInBytesInt16)
} }

View File

@ -82,6 +82,7 @@ type Resampling struct {
size int64 size int64
from int from int
to int to int
bitDepthInBytes int
pos int64 pos int64
srcBlock int64 srcBlock int64
srcBufL map[int64][]float64 srcBufL map[int64][]float64
@ -89,11 +90,12 @@ type Resampling struct {
lruSrcBlocks []int64 lruSrcBlocks []int64
} }
func NewResampling(source io.ReadSeeker, size int64, from, to int) *Resampling { func NewResampling(source io.ReadSeeker, size int64, from, to int, bitDepthInBytes int) *Resampling {
r := &Resampling{ r := &Resampling{
source: source, source: source,
size: size, size: size,
from: from, from: from,
bitDepthInBytes: bitDepthInBytes,
to: to, to: to,
srcBlock: -1, srcBlock: -1,
srcBufL: map[int64][]float64{}, srcBufL: map[int64][]float64{},
@ -102,9 +104,14 @@ func NewResampling(source io.ReadSeeker, size int64, from, to int) *Resampling {
return r return r
} }
func (r *Resampling) bytesPerSample() int {
const channelNum = 2
return r.bitDepthInBytes * channelNum
}
func (r *Resampling) Length() int64 { func (r *Resampling) Length() int64 {
s := int64(float64(r.size) * float64(r.to) / float64(r.from)) s := int64(float64(r.size) * float64(r.to) / float64(r.from))
return s / 4 * 4 return s / int64(r.bytesPerSample()) * int64(r.bytesPerSample())
} }
func (r *Resampling) src(i int64) (float64, float64, error) { func (r *Resampling) src(i int64) (float64, float64, error) {
@ -113,17 +120,18 @@ func (r *Resampling) src(i int64) (float64, float64, error) {
if i < 0 { if i < 0 {
return 0, 0, nil return 0, 0, nil
} }
if r.size/4 <= i { sizePerSample := int64(r.bytesPerSample())
if r.size/sizePerSample <= i {
return 0, 0, nil return 0, 0, nil
} }
nextPos := int64(i) / resamplingBufferSize nextPos := int64(i) / resamplingBufferSize
if _, ok := r.srcBufL[nextPos]; !ok { if _, ok := r.srcBufL[nextPos]; !ok {
if r.srcBlock+1 != nextPos { if r.srcBlock+1 != nextPos {
if _, err := r.source.Seek(nextPos*resamplingBufferSize*4, io.SeekStart); err != nil { if _, err := r.source.Seek(nextPos*resamplingBufferSize*sizePerSample, io.SeekStart); err != nil {
return 0, 0, err return 0, 0, err
} }
} }
buf := make([]byte, resamplingBufferSize*4) buf := make([]byte, resamplingBufferSize*sizePerSample)
c := 0 c := 0
for c < len(buf) { for c < len(buf) {
n, err := r.source.Read(buf[c:]) n, err := r.source.Read(buf[c:])
@ -138,10 +146,20 @@ func (r *Resampling) src(i int64) (float64, float64, error) {
buf = buf[:c] buf = buf[:c]
sl := make([]float64, resamplingBufferSize) sl := make([]float64, resamplingBufferSize)
sr := make([]float64, resamplingBufferSize) sr := make([]float64, resamplingBufferSize)
for i := 0; i < len(buf)/4; i++ { switch r.bitDepthInBytes {
case 2:
for i := 0; i < len(buf)/int(sizePerSample); i++ {
sl[i] = float64(int16(buf[4*i])|(int16(buf[4*i+1])<<8)) / (1<<15 - 1) sl[i] = float64(int16(buf[4*i])|(int16(buf[4*i+1])<<8)) / (1<<15 - 1)
sr[i] = float64(int16(buf[4*i+2])|(int16(buf[4*i+3])<<8)) / (1<<15 - 1) sr[i] = float64(int16(buf[4*i+2])|(int16(buf[4*i+3])<<8)) / (1<<15 - 1)
} }
case 4:
for i := 0; i < len(buf)/int(sizePerSample); i++ {
sl[i] = float64(math.Float32frombits(uint32(buf[8*i]) | uint32(buf[8*i+1])<<8 | uint32(buf[8*i+2])<<16 | uint32(buf[8*i+3])<<24))
sr[i] = float64(math.Float32frombits(uint32(buf[8*i+4]) | uint32(buf[8*i+5])<<8 | uint32(buf[8*i+6])<<16 | uint32(buf[8*i+7])<<24))
}
default:
panic("not reached")
}
r.srcBlock = nextPos r.srcBlock = nextPos
r.srcBufL[r.srcBlock] = sl r.srcBufL[r.srcBlock] = sl
r.srcBufR[r.srcBlock] = sr r.srcBufR[r.srcBlock] = sr
@ -180,12 +198,13 @@ func (r *Resampling) at(t int64) (float64, float64, error) {
if startN < 0 { if startN < 0 {
startN = 0 startN = 0
} }
if r.size/4 <= startN { sizePerSample := int64(r.bytesPerSample())
startN = r.size/4 - 1 if r.size/sizePerSample <= startN {
startN = r.size/sizePerSample - 1
} }
endN := int64(tInSrc + windowSize) endN := int64(tInSrc + windowSize)
if r.size/4 <= endN { if r.size/sizePerSample <= endN {
endN = r.size/4 - 1 endN = r.size/sizePerSample - 1
} }
lv := 0.0 lv := 0.0
rv := 0.0 rv := 0.0
@ -219,12 +238,15 @@ func (r *Resampling) Read(b []byte) (int, error) {
if r.pos == r.Length() { if r.pos == r.Length() {
return 0, io.EOF return 0, io.EOF
} }
n := len(b) / 4 * 4 size := r.bytesPerSample()
n := len(b) / size * size
if r.Length()-r.pos <= int64(n) { if r.Length()-r.pos <= int64(n) {
n = int(r.Length() - r.pos) n = int(r.Length() - r.pos)
} }
for i := 0; i < n/4; i++ { switch r.bitDepthInBytes {
l, r, err := r.at(r.pos/4 + int64(i)) case 2:
for i := 0; i < n/size; i++ {
l, r, err := r.at(r.pos/int64(size) + int64(i))
if err != nil { if err != nil {
return 0, err return 0, err
} }
@ -235,6 +257,28 @@ func (r *Resampling) Read(b []byte) (int, error) {
b[4*i+2] = byte(r16) b[4*i+2] = byte(r16)
b[4*i+3] = byte(r16 >> 8) b[4*i+3] = byte(r16 >> 8)
} }
case 4:
for i := 0; i < n/size; i++ {
l, r, err := r.at(r.pos/int64(size) + int64(i))
if err != nil {
return 0, err
}
l32 := float32(l)
r32 := float32(r)
l32b := math.Float32bits(l32)
r32b := math.Float32bits(r32)
b[8*i] = byte(l32b)
b[8*i+1] = byte(l32b >> 8)
b[8*i+2] = byte(l32b >> 16)
b[8*i+3] = byte(l32b >> 24)
b[8*i+4] = byte(r32b)
b[8*i+5] = byte(r32b >> 8)
b[8*i+6] = byte(r32b >> 16)
b[8*i+7] = byte(r32b >> 24)
}
default:
panic("not reached")
}
r.pos += int64(n) r.pos += int64(n)
return n, nil return n, nil
} }

View File

@ -16,6 +16,7 @@ package convert_test
import ( import (
"bytes" "bytes"
"fmt"
"io" "io"
"math" "math"
"testing" "testing"
@ -29,7 +30,7 @@ func soundAt(timeInSecond float64) float64 {
amp := []float64{1.0, 0.8, 0.6, 0.4, 0.2} amp := []float64{1.0, 0.8, 0.6, 0.4, 0.2}
v := 0.0 v := 0.0
for j := 0; j < len(amp); j++ { for j := 0; j < len(amp); j++ {
v += amp[j] * math.Sin(2.0*math.Pi*timeInSecond*freq*float64(j+1)) v += amp[j] * math.Sin(2.0*math.Pi*timeInSecond*freq*float64(j+1)) / 2
} }
if v > 1 { if v > 1 {
v = 1 v = 1
@ -40,15 +41,28 @@ func soundAt(timeInSecond float64) float64 {
return v return v
} }
func newSoundBytes(sampleRate int) []byte { func newSoundBytes(sampleRate int, bitDepthInBytes int) []byte {
b := make([]byte, sampleRate*4) // 1 second b := make([]byte, sampleRate*4) // 1 second
for i := 0; i < len(b)/4; i++ { for i := 0; i < len(b)/(bitDepthInBytes*2); i++ {
v := soundAt(float64(i) / float64(sampleRate)) v := soundAt(float64(i) / float64(sampleRate))
switch bitDepthInBytes {
case 2:
v16 := int16(v * (1<<15 - 1)) v16 := int16(v * (1<<15 - 1))
b[4*i] = byte(v16) b[4*i] = byte(v16)
b[4*i+1] = byte(v16 >> 8) b[4*i+1] = byte(v16 >> 8)
b[4*i+2] = byte(v16) b[4*i+2] = byte(v16)
b[4*i+3] = byte(v16 >> 8) b[4*i+3] = byte(v16 >> 8)
case 4:
v32 := math.Float32bits(float32(v))
b[8*i] = byte(v32)
b[8*i+1] = byte(v32 >> 8)
b[8*i+2] = byte(v32 >> 16)
b[8*i+3] = byte(v32 >> 24)
b[8*i+4] = byte(v32)
b[8*i+5] = byte(v32 >> 8)
b[8*i+6] = byte(v32 >> 16)
b[8*i+7] = byte(v32 >> 24)
}
} }
return b return b
} }
@ -68,22 +82,37 @@ func TestResampling(t *testing.T) {
}, },
} }
for _, c := range cases { for _, c := range cases {
inB := newSoundBytes(c.In) c := c
outS := convert.NewResampling(bytes.NewReader(inB), int64(len(inB)), c.In, c.Out) t.Run(fmt.Sprintf("%d to %d", c.In, c.Out), func(t *testing.T) {
for _, bitDepthInBytes := range []int{2, 4} {
bitDepthInBytes := bitDepthInBytes
t.Run(fmt.Sprintf("bitDepthInBytes=%d", bitDepthInBytes), func(t *testing.T) {
inB := newSoundBytes(c.In, bitDepthInBytes)
outS := convert.NewResampling(bytes.NewReader(inB), int64(len(inB)), c.In, c.Out, bitDepthInBytes)
gotB, err := io.ReadAll(outS) gotB, err := io.ReadAll(outS)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
wantB := newSoundBytes(c.Out) wantB := newSoundBytes(c.Out, bitDepthInBytes)
if len(gotB) != len(wantB) { if len(gotB) != len(wantB) {
t.Errorf("len(gotB) == %d but len(wantB) == %d", len(gotB), len(wantB)) t.Errorf("len(gotB) == %d but len(wantB) == %d", len(gotB), len(wantB))
} }
for i := 0; i < len(gotB)/2; i++ { for i := 0; i < len(gotB)/bitDepthInBytes; i++ {
got := float64(int16(gotB[2*i])|(int16(gotB[2*i+1])<<8)) / (1<<15 - 1) var got, want float64
want := float64(int16(wantB[2*i])|(int16(wantB[2*i+1])<<8)) / (1<<15 - 1) switch bitDepthInBytes {
case 2:
got = float64(int16(gotB[2*i])|(int16(gotB[2*i+1])<<8)) / (1<<15 - 1)
want = float64(int16(wantB[2*i])|(int16(wantB[2*i+1])<<8)) / (1<<15 - 1)
case 4:
got = float64(math.Float32frombits(uint32(gotB[4*i]) | (uint32(gotB[4*i+1]) << 8) | (uint32(gotB[4*i+2]) << 16) | (uint32(gotB[4*i+3]) << 24)))
want = float64(math.Float32frombits(uint32(wantB[4*i]) | (uint32(wantB[4*i+1]) << 8) | (uint32(wantB[4*i+2]) << 16) | (uint32(wantB[4*i+3]) << 24)))
}
if math.Abs(got-want) > 0.025 { if math.Abs(got-want) > 0.025 {
t.Errorf("sample rate: %d, index: %d: got: %f, want: %f", c.Out, i, got, want) t.Errorf("sample rate: %d, index: %d: got: %f, want: %f", c.Out, i, got, want)
} }
} }
})
}
})
} }
} }

View File

@ -27,6 +27,10 @@ import (
"github.com/hajimehoshi/ebiten/v2/audio/internal/convert" "github.com/hajimehoshi/ebiten/v2/audio/internal/convert"
) )
const (
bitDepthInBytesInt16 = 2
)
// Stream is a decoded stream. // Stream is a decoded stream.
type Stream struct { type Stream struct {
orig *mp3.Decoder orig *mp3.Decoder
@ -105,7 +109,7 @@ func DecodeWithSampleRate(sampleRate int, src io.Reader) (*Stream, error) {
var r *convert.Resampling var r *convert.Resampling
if d.SampleRate() != sampleRate { if d.SampleRate() != sampleRate {
r = convert.NewResampling(d, d.Length(), d.SampleRate(), sampleRate) r = convert.NewResampling(d, d.Length(), d.SampleRate(), sampleRate, bitDepthInBytesInt16)
} }
s := &Stream{ s := &Stream{
orig: d, orig: d,

View File

@ -25,6 +25,10 @@ import (
"github.com/hajimehoshi/ebiten/v2/audio/internal/convert" "github.com/hajimehoshi/ebiten/v2/audio/internal/convert"
) )
const (
bitDepthInBytesInt16 = 2
)
// Stream is a decoded audio stream. // Stream is a decoded audio stream.
type Stream struct { type Stream struct {
readSeeker io.ReadSeeker readSeeker io.ReadSeeker
@ -200,7 +204,7 @@ func DecodeWithSampleRate(sampleRate int, src io.Reader) (*Stream, error) {
length *= 2 length *= 2
} }
if origSampleRate != sampleRate { if origSampleRate != sampleRate {
r := convert.NewResampling(s, length, origSampleRate, sampleRate) r := convert.NewResampling(s, length, origSampleRate, sampleRate, bitDepthInBytesInt16)
s = r s = r
length = r.Length() length = r.Length()
} }

View File

@ -24,6 +24,10 @@ import (
"github.com/hajimehoshi/ebiten/v2/audio/internal/convert" "github.com/hajimehoshi/ebiten/v2/audio/internal/convert"
) )
const (
bitDepthInBytesInt16 = 2
)
// Stream is a decoded audio stream. // Stream is a decoded audio stream.
type Stream struct { type Stream struct {
inner io.ReadSeeker inner io.ReadSeeker
@ -153,7 +157,7 @@ func DecodeWithSampleRate(sampleRate int, src io.Reader) (*Stream, error) {
return s, nil return s, nil
} }
r := convert.NewResampling(s.inner, s.size, s.sampleRate, sampleRate) r := convert.NewResampling(s.inner, s.size, s.sampleRate, sampleRate, bitDepthInBytesInt16)
return &Stream{ return &Stream{
inner: r, inner: r,
size: r.Length(), size: r.Length(),