From 50021ef3e4e3028e81ee77e3763a2b248cc8666f Mon Sep 17 00:00:00 2001 From: Hajime Hoshi Date: Fri, 4 Nov 2022 21:51:33 +0900 Subject: [PATCH] internal/graphicscommand: use buffers for []float32 to reduce allocations --- internal/graphicscommand/command.go | 68 +++++++++++++++++------------ internal/ui/shader.go | 4 +- 2 files changed, 41 insertions(+), 31 deletions(-) diff --git a/internal/graphicscommand/command.go b/internal/graphicscommand/command.go index 038374494..941187cde 100644 --- a/internal/graphicscommand/command.go +++ b/internal/graphicscommand/command.go @@ -71,11 +71,8 @@ type commandQueue struct { drawTrianglesCommandPool drawTrianglesCommandPool - // float32sBuffer is a reusable buffer to allocate []float32. - // float32sBuffer's index is switched at the end of the frame, - // and the buffer of the original index is kept until the next frame ends. - float32sBuffer [2][]float32 - float32sBufferIndex int + float32sBuffer buffer[float32] + float32SlicesBuffer buffer[[]float32] } // theCommandQueue is the command queue for the current process. @@ -172,9 +169,8 @@ func (q *commandQueue) Flush(graphicsDriver graphicsdriver.Graphics, endFrame bo err = q.flush(graphicsDriver, endFrame) }) if endFrame { - q.float32sBuffer[q.float32sBufferIndex] = q.float32sBuffer[q.float32sBufferIndex][:0] - q.float32sBufferIndex++ - q.float32sBufferIndex %= len(q.float32sBuffer) + q.float32sBuffer.reset() + q.float32SlicesBuffer.reset() } return } @@ -590,29 +586,20 @@ func roundUpPower2(x int) int { return p2 } -func (q *commandQueue) allocFloat32s(n int) []float32 { - buf := q.float32sBuffer[q.float32sBufferIndex] - if len(buf)+n > cap(buf) { - buf = make([]float32, 0, max(roundUpPower2(len(buf)+n), 16)) - } - s := buf[len(buf) : len(buf)+n] - q.float32sBuffer[q.float32sBufferIndex] = buf[:len(buf)+n] - return s -} - func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image, srcs [graphics.ShaderImageCount]*Image, offsets [graphics.ShaderImageCount - 1][2]float32, dstRegion, srcRegion graphicsdriver.Region) [][]float32 { - uniforms = append(uniforms, make([][]float32, graphics.PreservedUniformVariablesCount)...) - copy(uniforms[graphics.PreservedUniformVariablesCount:], uniforms) + origUniforms := uniforms + uniforms = q.float32SlicesBuffer.alloc(len(origUniforms) + graphics.PreservedUniformVariablesCount) + copy(uniforms[graphics.PreservedUniformVariablesCount:], origUniforms) // Set the destination texture size. dw, dh := dst.InternalSize() - udstsize := q.allocFloat32s(2) + udstsize := q.float32sBuffer.alloc(2) udstsize[0] = float32(dw) udstsize[1] = float32(dh) uniforms[graphics.TextureDestinationSizeUniformVariableIndex] = udstsize // Set the source texture sizes. - usizes := q.allocFloat32s(2 * len(srcs)) + usizes := q.float32sBuffer.alloc(2 * len(srcs)) for i, src := range srcs { if src != nil { w, h := src.InternalSize() @@ -623,12 +610,12 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image uniforms[graphics.TextureSourceSizesUniformVariableIndex] = usizes // Set the destination region. - udstrorig := q.allocFloat32s(2) + udstrorig := q.float32sBuffer.alloc(2) udstrorig[0] = float32(dstRegion.X) / float32(dw) udstrorig[1] = float32(dstRegion.Y) / float32(dh) uniforms[graphics.TextureDestinationRegionOriginUniformVariableIndex] = udstrorig - udstrsize := q.allocFloat32s(2) + udstrsize := q.float32sBuffer.alloc(2) udstrsize[0] = float32(dstRegion.Width) / float32(dw) udstrsize[1] = float32(dstRegion.Height) / float32(dh) uniforms[graphics.TextureDestinationRegionSizeUniformVariableIndex] = udstrsize @@ -646,7 +633,7 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image } // Set the source offsets. - uoffsets := q.allocFloat32s(2 * len(offsets)) + uoffsets := q.float32sBuffer.alloc(2 * len(offsets)) for i, offset := range offsets { uoffsets[2*i] = offset[0] uoffsets[2*i+1] = offset[1] @@ -654,17 +641,17 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image uniforms[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets // Set the source region of texture0. - usrcrorig := q.allocFloat32s(2) + usrcrorig := q.float32sBuffer.alloc(2) usrcrorig[0] = float32(srcRegion.X) usrcrorig[1] = float32(srcRegion.Y) uniforms[graphics.TextureSourceRegionOriginUniformVariableIndex] = usrcrorig - usrcrsize := q.allocFloat32s(2) + usrcrsize := q.float32sBuffer.alloc(2) usrcrsize[0] = float32(srcRegion.Width) usrcrsize[1] = float32(srcRegion.Height) uniforms[graphics.TextureSourceRegionSizeUniformVariableIndex] = usrcrsize - umatrix := q.allocFloat32s(16) + umatrix := q.float32sBuffer.alloc(16) umatrix[0] = 2 / float32(dw) umatrix[1] = 0 umatrix[2] = 0 @@ -685,3 +672,28 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image return uniforms } + +// buffer is a reusable buffer to allocate []T. +type buffer[T any] struct { + buf [2][]T + + // index is switched at the end of the frame, + // and the buffer of the original index is kept until the next frame ends. + index int +} + +func (b *buffer[T]) alloc(n int) []T { + buf := b.buf[b.index] + if len(buf)+n > cap(buf) { + buf = make([]T, 0, max(roundUpPower2(len(buf)+n), 16)) + } + s := buf[len(buf) : len(buf)+n] + b.buf[b.index] = buf[:len(buf)+n] + return s +} + +func (b *buffer[T]) reset() { + b.buf[b.index] = b.buf[b.index][:0] + b.index++ + b.index %= len(b.buf) +} diff --git a/internal/ui/shader.go b/internal/ui/shader.go index 553fd6fd5..af3222597 100644 --- a/internal/ui/shader.go +++ b/internal/ui/shader.go @@ -18,7 +18,6 @@ import ( "fmt" "strings" - "github.com/hajimehoshi/ebiten/v2/internal/graphics" "github.com/hajimehoshi/ebiten/v2/internal/mipmap" "github.com/hajimehoshi/ebiten/v2/internal/shaderir" ) @@ -73,8 +72,7 @@ func (s *Shader) ConvertUniforms(uniforms map[string]any) [][]float32 { } } - // Allocate a new slice with an extra capacity. This slice's length is extended at internal/graphicscommand. - us := make([][]float32, len(s.uniformNameToIndex), len(s.uniformNameToIndex)+graphics.PreservedUniformVariablesCount) + us := make([][]float32, len(s.uniformNameToIndex)) for name, idx := range s.uniformNameToIndex { if v, ok := nameToF32s[name]; ok { us[idx] = v