From 71cecea9c99a99b0dd66a8ac411d054cac19ed35 Mon Sep 17 00:00:00 2001 From: Hajime Hoshi Date: Sun, 15 Jan 2017 01:42:25 +0900 Subject: [PATCH] audio: Auto resampling (#301) --- audio/internal/resampling/resampling.go | 172 ++++++++++++++++++++++++ audio/vorbis/vorbis.go | 18 ++- audio/wav/decode.go | 56 ++++++-- examples/audio/main.go | 4 +- 4 files changed, 234 insertions(+), 16 deletions(-) create mode 100644 audio/internal/resampling/resampling.go diff --git a/audio/internal/resampling/resampling.go b/audio/internal/resampling/resampling.go new file mode 100644 index 000000000..1ad522d82 --- /dev/null +++ b/audio/internal/resampling/resampling.go @@ -0,0 +1,172 @@ +// Copyright 2017 The Ebiten Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package resampling + +import ( + "io" + "math" + + "github.com/hajimehoshi/ebiten/audio" +) + +func sinc(x float64) float64 { + if x == 0 { + return 1 + } + return math.Sin(x) / x +} + +type Stream struct { + source audio.ReadSeekCloser + size int64 + from int + to int + pos int64 + srcPos int64 + srcCacheL []float64 + srcCacheR []float64 +} + +func NewStream(source audio.ReadSeekCloser, size int64, from, to int) *Stream { + s := &Stream{ + source: source, + size: size, + from: from, + to: to, + } + s.srcCacheL = make([]float64, s.size/4) + s.srcCacheR = make([]float64, s.size/4) + return s +} + +func (s *Stream) Size() int64 { + return int64(float64(s.size) * float64(s.to) / float64(s.from)) +} + +func (s *Stream) src(i int) (float64, float64, error) { + // Use int here since int64 is very slow on browsers. + // TODO: Resampling is too heavy on browsers. How about using OfflineAudioContext? + if i < 0 { + return 0, 0, nil + } + if len(s.srcCacheL) <= i { + return 0, 0, nil + } + pos := int(s.srcPos) / 4 + if pos <= i { + buf := make([]uint8, 4096) + n, err := s.source.Read(buf) + if err != nil && err != io.EOF { + return 0, 0, err + } + n = n / 4 * 4 + buf = buf[:n] + for i := 0; i < len(buf)/4; i++ { + srcL := float64(int16(buf[4*i])|(int16(buf[4*i+1])<<8)) / (1<<15 - 1) + srcR := float64(int16(buf[4*i+2])|(int16(buf[4*i+3])<<8)) / (1<<15 - 1) + s.srcCacheL[pos+i] = srcL + s.srcCacheR[pos+i] = srcR + } + s.srcPos += int64(n) + } + return s.srcCacheL[i], s.srcCacheR[i], nil +} + +func (s *Stream) at(t int64) (float64, float64, error) { + const windowSize = 8 + tInSrc := float64(t) * float64(s.from) / float64(s.to) + startN := int64(tInSrc) - windowSize + if startN < 0 { + startN = 0 + } + if s.size/4 < startN { + startN = s.size / 4 + } + endN := int64(tInSrc) + windowSize + 1 + if s.size/4 < endN { + endN = s.size / 4 + } + l := 0.0 + r := 0.0 + for n := startN; n < endN; n++ { + srcL, srcR, err := s.src(int(n)) + if err != nil { + return 0, 0, err + } + w := 0.5 + 0.5*math.Cos(2*math.Pi*(tInSrc-float64(n))/(windowSize*2+1)) + s := sinc(math.Pi*(tInSrc-float64(n))) * w + l += srcL * s + r += srcR * s + } + if l < -1 { + l = -1 + } + if l > 1 { + l = 1 + } + if r < -1 { + r = -1 + } + if r > 1 { + r = 1 + } + return l, r, nil +} + +func (s *Stream) Read(b []uint8) (int, error) { + if s.pos == s.Size() { + return 0, io.EOF + } + n := len(b) / 4 * 4 + if s.Size()-s.pos <= int64(n) { + n = int(s.Size() - s.pos) + } + for i := 0; i < n/4; i++ { + l, r, err := s.at(s.pos/4 + int64(i)) + if err != nil { + return 0, err + } + l16 := int16(l * (1<<15 - 1)) + r16 := int16(r * (1<<15 - 1)) + b[4*i] = uint8(l16) + b[4*i+1] = uint8(l16 >> 8) + b[4*i+2] = uint8(r16) + b[4*i+3] = uint8(r16 >> 8) + } + s.pos += int64(n) + return n, nil +} + +func (s *Stream) Seek(offset int64, whence int) (int64, error) { + switch whence { + case io.SeekStart: + s.pos = offset + case io.SeekCurrent: + s.pos += offset + case io.SeekEnd: + s.pos += s.Size() + offset + } + if s.pos < 0 { + s.pos = 0 + } + if s.Size() <= s.pos { + s.pos = s.Size() + } + return s.pos, nil +} + +func (s *Stream) Close() error { + return s.source.Close() +} diff --git a/audio/vorbis/vorbis.go b/audio/vorbis/vorbis.go index f60755516..f80d4be80 100644 --- a/audio/vorbis/vorbis.go +++ b/audio/vorbis/vorbis.go @@ -21,12 +21,18 @@ import ( "runtime" "github.com/hajimehoshi/ebiten/audio" + "github.com/hajimehoshi/ebiten/audio/internal/resampling" "github.com/jfreymuth/oggvorbis" ) +type readSeekCloseSizer interface { + audio.ReadSeekCloser + Size() int64 +} + // Stream is a decoded audio stream. type Stream struct { - decoded *decoded + decoded readSeekCloseSizer } // Read is implementation of io.Reader's Read. @@ -172,7 +178,7 @@ func decode(in audio.ReadSeekCloser) (*decoded, int, int, error) { // Decode decodes Ogg/Vorbis data to playable stream. // -// The sample rate must be same as that of audio context. +// Sample rate is automatically adjusted to fit with the audio context. func Decode(context *audio.Context, src audio.ReadSeekCloser) (*Stream, error) { decoded, channelNum, sampleRate, err := decode(src) if err != nil { @@ -183,10 +189,8 @@ func Decode(context *audio.Context, src audio.ReadSeekCloser) (*Stream, error) { return nil, fmt.Errorf("vorbis: number of channels must be 2") } if sampleRate != context.SampleRate() { - return nil, fmt.Errorf("vorbis: sample rate must be %d but %d", context.SampleRate(), sampleRate) + s := resampling.NewStream(decoded, decoded.Size(), sampleRate, context.SampleRate()) + return &Stream{s}, nil } - s := &Stream{ - decoded: decoded, - } - return s, nil + return &Stream{decoded}, nil } diff --git a/audio/wav/decode.go b/audio/wav/decode.go index e99e41c55..7f6758610 100644 --- a/audio/wav/decode.go +++ b/audio/wav/decode.go @@ -21,10 +21,42 @@ import ( "io" "github.com/hajimehoshi/ebiten/audio" + "github.com/hajimehoshi/ebiten/audio/internal/resampling" ) +type readSeekCloseSizer interface { + audio.ReadSeekCloser + Size() int64 +} + // Stream is a decoded audio stream. type Stream struct { + inner readSeekCloseSizer +} + +// Read is implementation of io.Reader's Read. +func (s *Stream) Read(p []byte) (int, error) { + return s.inner.Read(p) +} + +// Seek is implementation of io.Seeker's Seek. +// +// Note that Seek can take long since decoding is a relatively heavy task. +func (s *Stream) Seek(offset int64, whence int) (int64, error) { + return s.inner.Seek(offset, whence) +} + +// Read is implementation of io.Closer's Close. +func (s *Stream) Close() error { + return s.inner.Close() +} + +// Size returns the size of decoded stream in bytes. +func (s *Stream) Size() int64 { + return s.inner.Size() +} + +type stream struct { src audio.ReadSeekCloser headerSize int64 dataSize int64 @@ -32,7 +64,7 @@ type Stream struct { } // Read is implementation of io.Reader's Read. -func (s *Stream) Read(p []byte) (int, error) { +func (s *stream) Read(p []byte) (int, error) { if s.remaining <= 0 { return 0, io.EOF } @@ -45,7 +77,7 @@ func (s *Stream) Read(p []byte) (int, error) { } // Seek is implementation of io.Seeker's Seek. -func (s *Stream) Seek(offset int64, whence int) (int64, error) { +func (s *stream) Seek(offset int64, whence int) (int64, error) { if whence == io.SeekStart { offset += s.headerSize } @@ -66,19 +98,20 @@ func (s *Stream) Seek(offset int64, whence int) (int64, error) { } // Read is implementation of io.Closer's Close. -func (s *Stream) Close() error { +func (s *stream) Close() error { return s.src.Close() } // Size returns the size of decoded stream in bytes. -func (s *Stream) Size() int64 { +func (s *stream) Size() int64 { return s.dataSize } // Decode decodes WAV (RIFF) data to playable stream. // // The format must be 2 channels, 16bit little endian PCM. -// The sample rate must be same as that of audio context. +// +// Sample rate is automatically adjusted to fit with the audio context. func Decode(context *audio.Context, src audio.ReadSeekCloser) (*Stream, error) { buf := make([]byte, 12) n, err := io.ReadFull(src, buf) @@ -98,6 +131,8 @@ func Decode(context *audio.Context, src audio.ReadSeekCloser) (*Stream, error) { // Read chunks dataSize := int64(0) headerSize := int64(0) + sampleRateFrom := 0 + sampleRateTo := 0 chunks: for { buf := make([]byte, 8) @@ -139,7 +174,8 @@ chunks: } sampleRate := int64(buf[4]) | int64(buf[5])<<8 | int64(buf[6])<<16 | int64(buf[7])<<24 if int64(context.SampleRate()) != sampleRate { - return nil, fmt.Errorf("wav: sample rate must be %d but %d", context.SampleRate(), sampleRate) + sampleRateFrom = int(sampleRate) + sampleRateTo = context.SampleRate() } headerSize += size case bytes.Equal(buf[0:4], []byte("data")): @@ -157,11 +193,15 @@ chunks: headerSize += size } } - s := &Stream{ + s := &stream{ src: src, headerSize: headerSize, dataSize: dataSize, remaining: dataSize, } - return s, nil + if sampleRateFrom != sampleRateTo { + fixed := resampling.NewStream(s, s.dataSize, sampleRateFrom, sampleRateTo) + return &Stream{fixed}, nil + } + return &Stream{s}, nil } diff --git a/examples/audio/main.go b/examples/audio/main.go index 7a7ecaa99..da9bd9755 100644 --- a/examples/audio/main.go +++ b/examples/audio/main.go @@ -268,7 +268,9 @@ func main() { if err != nil { log.Fatal(err) } - const sampleRate = 44100 + // This sample rate doesn't match with wav/ogg's sample rate, + // but decoders adjust them. + const sampleRate = 48000 const bytesPerSample = 4 // TODO: This should be defined in audio package audioContext, err = audio.NewContext(sampleRate) if err != nil {