audio: Auto resampling (#301)

This commit is contained in:
Hajime Hoshi 2017-01-15 01:42:25 +09:00
parent ee3da352db
commit 71cecea9c9
4 changed files with 234 additions and 16 deletions

View File

@ -0,0 +1,172 @@
// Copyright 2017 The Ebiten Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package resampling
import (
"io"
"math"
"github.com/hajimehoshi/ebiten/audio"
)
func sinc(x float64) float64 {
if x == 0 {
return 1
}
return math.Sin(x) / x
}
type Stream struct {
source audio.ReadSeekCloser
size int64
from int
to int
pos int64
srcPos int64
srcCacheL []float64
srcCacheR []float64
}
func NewStream(source audio.ReadSeekCloser, size int64, from, to int) *Stream {
s := &Stream{
source: source,
size: size,
from: from,
to: to,
}
s.srcCacheL = make([]float64, s.size/4)
s.srcCacheR = make([]float64, s.size/4)
return s
}
func (s *Stream) Size() int64 {
return int64(float64(s.size) * float64(s.to) / float64(s.from))
}
func (s *Stream) src(i int) (float64, float64, error) {
// Use int here since int64 is very slow on browsers.
// TODO: Resampling is too heavy on browsers. How about using OfflineAudioContext?
if i < 0 {
return 0, 0, nil
}
if len(s.srcCacheL) <= i {
return 0, 0, nil
}
pos := int(s.srcPos) / 4
if pos <= i {
buf := make([]uint8, 4096)
n, err := s.source.Read(buf)
if err != nil && err != io.EOF {
return 0, 0, err
}
n = n / 4 * 4
buf = buf[:n]
for i := 0; i < len(buf)/4; i++ {
srcL := float64(int16(buf[4*i])|(int16(buf[4*i+1])<<8)) / (1<<15 - 1)
srcR := float64(int16(buf[4*i+2])|(int16(buf[4*i+3])<<8)) / (1<<15 - 1)
s.srcCacheL[pos+i] = srcL
s.srcCacheR[pos+i] = srcR
}
s.srcPos += int64(n)
}
return s.srcCacheL[i], s.srcCacheR[i], nil
}
func (s *Stream) at(t int64) (float64, float64, error) {
const windowSize = 8
tInSrc := float64(t) * float64(s.from) / float64(s.to)
startN := int64(tInSrc) - windowSize
if startN < 0 {
startN = 0
}
if s.size/4 < startN {
startN = s.size / 4
}
endN := int64(tInSrc) + windowSize + 1
if s.size/4 < endN {
endN = s.size / 4
}
l := 0.0
r := 0.0
for n := startN; n < endN; n++ {
srcL, srcR, err := s.src(int(n))
if err != nil {
return 0, 0, err
}
w := 0.5 + 0.5*math.Cos(2*math.Pi*(tInSrc-float64(n))/(windowSize*2+1))
s := sinc(math.Pi*(tInSrc-float64(n))) * w
l += srcL * s
r += srcR * s
}
if l < -1 {
l = -1
}
if l > 1 {
l = 1
}
if r < -1 {
r = -1
}
if r > 1 {
r = 1
}
return l, r, nil
}
func (s *Stream) Read(b []uint8) (int, error) {
if s.pos == s.Size() {
return 0, io.EOF
}
n := len(b) / 4 * 4
if s.Size()-s.pos <= int64(n) {
n = int(s.Size() - s.pos)
}
for i := 0; i < n/4; i++ {
l, r, err := s.at(s.pos/4 + int64(i))
if err != nil {
return 0, err
}
l16 := int16(l * (1<<15 - 1))
r16 := int16(r * (1<<15 - 1))
b[4*i] = uint8(l16)
b[4*i+1] = uint8(l16 >> 8)
b[4*i+2] = uint8(r16)
b[4*i+3] = uint8(r16 >> 8)
}
s.pos += int64(n)
return n, nil
}
func (s *Stream) Seek(offset int64, whence int) (int64, error) {
switch whence {
case io.SeekStart:
s.pos = offset
case io.SeekCurrent:
s.pos += offset
case io.SeekEnd:
s.pos += s.Size() + offset
}
if s.pos < 0 {
s.pos = 0
}
if s.Size() <= s.pos {
s.pos = s.Size()
}
return s.pos, nil
}
func (s *Stream) Close() error {
return s.source.Close()
}

View File

@ -21,12 +21,18 @@ import (
"runtime" "runtime"
"github.com/hajimehoshi/ebiten/audio" "github.com/hajimehoshi/ebiten/audio"
"github.com/hajimehoshi/ebiten/audio/internal/resampling"
"github.com/jfreymuth/oggvorbis" "github.com/jfreymuth/oggvorbis"
) )
type readSeekCloseSizer interface {
audio.ReadSeekCloser
Size() int64
}
// Stream is a decoded audio stream. // Stream is a decoded audio stream.
type Stream struct { type Stream struct {
decoded *decoded decoded readSeekCloseSizer
} }
// Read is implementation of io.Reader's Read. // Read is implementation of io.Reader's Read.
@ -172,7 +178,7 @@ func decode(in audio.ReadSeekCloser) (*decoded, int, int, error) {
// Decode decodes Ogg/Vorbis data to playable stream. // Decode decodes Ogg/Vorbis data to playable stream.
// //
// The sample rate must be same as that of audio context. // Sample rate is automatically adjusted to fit with the audio context.
func Decode(context *audio.Context, src audio.ReadSeekCloser) (*Stream, error) { func Decode(context *audio.Context, src audio.ReadSeekCloser) (*Stream, error) {
decoded, channelNum, sampleRate, err := decode(src) decoded, channelNum, sampleRate, err := decode(src)
if err != nil { if err != nil {
@ -183,10 +189,8 @@ func Decode(context *audio.Context, src audio.ReadSeekCloser) (*Stream, error) {
return nil, fmt.Errorf("vorbis: number of channels must be 2") return nil, fmt.Errorf("vorbis: number of channels must be 2")
} }
if sampleRate != context.SampleRate() { if sampleRate != context.SampleRate() {
return nil, fmt.Errorf("vorbis: sample rate must be %d but %d", context.SampleRate(), sampleRate) s := resampling.NewStream(decoded, decoded.Size(), sampleRate, context.SampleRate())
return &Stream{s}, nil
} }
s := &Stream{ return &Stream{decoded}, nil
decoded: decoded,
}
return s, nil
} }

View File

@ -21,10 +21,42 @@ import (
"io" "io"
"github.com/hajimehoshi/ebiten/audio" "github.com/hajimehoshi/ebiten/audio"
"github.com/hajimehoshi/ebiten/audio/internal/resampling"
) )
type readSeekCloseSizer interface {
audio.ReadSeekCloser
Size() int64
}
// Stream is a decoded audio stream. // Stream is a decoded audio stream.
type Stream struct { type Stream struct {
inner readSeekCloseSizer
}
// Read is implementation of io.Reader's Read.
func (s *Stream) Read(p []byte) (int, error) {
return s.inner.Read(p)
}
// Seek is implementation of io.Seeker's Seek.
//
// Note that Seek can take long since decoding is a relatively heavy task.
func (s *Stream) Seek(offset int64, whence int) (int64, error) {
return s.inner.Seek(offset, whence)
}
// Read is implementation of io.Closer's Close.
func (s *Stream) Close() error {
return s.inner.Close()
}
// Size returns the size of decoded stream in bytes.
func (s *Stream) Size() int64 {
return s.inner.Size()
}
type stream struct {
src audio.ReadSeekCloser src audio.ReadSeekCloser
headerSize int64 headerSize int64
dataSize int64 dataSize int64
@ -32,7 +64,7 @@ type Stream struct {
} }
// Read is implementation of io.Reader's Read. // Read is implementation of io.Reader's Read.
func (s *Stream) Read(p []byte) (int, error) { func (s *stream) Read(p []byte) (int, error) {
if s.remaining <= 0 { if s.remaining <= 0 {
return 0, io.EOF return 0, io.EOF
} }
@ -45,7 +77,7 @@ func (s *Stream) Read(p []byte) (int, error) {
} }
// Seek is implementation of io.Seeker's Seek. // Seek is implementation of io.Seeker's Seek.
func (s *Stream) Seek(offset int64, whence int) (int64, error) { func (s *stream) Seek(offset int64, whence int) (int64, error) {
if whence == io.SeekStart { if whence == io.SeekStart {
offset += s.headerSize offset += s.headerSize
} }
@ -66,19 +98,20 @@ func (s *Stream) Seek(offset int64, whence int) (int64, error) {
} }
// Read is implementation of io.Closer's Close. // Read is implementation of io.Closer's Close.
func (s *Stream) Close() error { func (s *stream) Close() error {
return s.src.Close() return s.src.Close()
} }
// Size returns the size of decoded stream in bytes. // Size returns the size of decoded stream in bytes.
func (s *Stream) Size() int64 { func (s *stream) Size() int64 {
return s.dataSize return s.dataSize
} }
// Decode decodes WAV (RIFF) data to playable stream. // Decode decodes WAV (RIFF) data to playable stream.
// //
// The format must be 2 channels, 16bit little endian PCM. // The format must be 2 channels, 16bit little endian PCM.
// The sample rate must be same as that of audio context. //
// Sample rate is automatically adjusted to fit with the audio context.
func Decode(context *audio.Context, src audio.ReadSeekCloser) (*Stream, error) { func Decode(context *audio.Context, src audio.ReadSeekCloser) (*Stream, error) {
buf := make([]byte, 12) buf := make([]byte, 12)
n, err := io.ReadFull(src, buf) n, err := io.ReadFull(src, buf)
@ -98,6 +131,8 @@ func Decode(context *audio.Context, src audio.ReadSeekCloser) (*Stream, error) {
// Read chunks // Read chunks
dataSize := int64(0) dataSize := int64(0)
headerSize := int64(0) headerSize := int64(0)
sampleRateFrom := 0
sampleRateTo := 0
chunks: chunks:
for { for {
buf := make([]byte, 8) buf := make([]byte, 8)
@ -139,7 +174,8 @@ chunks:
} }
sampleRate := int64(buf[4]) | int64(buf[5])<<8 | int64(buf[6])<<16 | int64(buf[7])<<24 sampleRate := int64(buf[4]) | int64(buf[5])<<8 | int64(buf[6])<<16 | int64(buf[7])<<24
if int64(context.SampleRate()) != sampleRate { if int64(context.SampleRate()) != sampleRate {
return nil, fmt.Errorf("wav: sample rate must be %d but %d", context.SampleRate(), sampleRate) sampleRateFrom = int(sampleRate)
sampleRateTo = context.SampleRate()
} }
headerSize += size headerSize += size
case bytes.Equal(buf[0:4], []byte("data")): case bytes.Equal(buf[0:4], []byte("data")):
@ -157,11 +193,15 @@ chunks:
headerSize += size headerSize += size
} }
} }
s := &Stream{ s := &stream{
src: src, src: src,
headerSize: headerSize, headerSize: headerSize,
dataSize: dataSize, dataSize: dataSize,
remaining: dataSize, remaining: dataSize,
} }
return s, nil if sampleRateFrom != sampleRateTo {
fixed := resampling.NewStream(s, s.dataSize, sampleRateFrom, sampleRateTo)
return &Stream{fixed}, nil
}
return &Stream{s}, nil
} }

View File

@ -268,7 +268,9 @@ func main() {
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
const sampleRate = 44100 // This sample rate doesn't match with wav/ogg's sample rate,
// but decoders adjust them.
const sampleRate = 48000
const bytesPerSample = 4 // TODO: This should be defined in audio package const bytesPerSample = 4 // TODO: This should be defined in audio package
audioContext, err = audio.NewContext(sampleRate) audioContext, err = audio.NewContext(sampleRate)
if err != nil { if err != nil {