internal/graphicscommand: use buffers for []float32 to reduce allocations

This commit is contained in:
Hajime Hoshi 2022-11-04 21:51:33 +09:00
parent d11c46ef1f
commit 50021ef3e4
2 changed files with 41 additions and 31 deletions

View File

@ -71,11 +71,8 @@ type commandQueue struct {
drawTrianglesCommandPool drawTrianglesCommandPool drawTrianglesCommandPool drawTrianglesCommandPool
// float32sBuffer is a reusable buffer to allocate []float32. float32sBuffer buffer[float32]
// float32sBuffer's index is switched at the end of the frame, float32SlicesBuffer buffer[[]float32]
// and the buffer of the original index is kept until the next frame ends.
float32sBuffer [2][]float32
float32sBufferIndex int
} }
// theCommandQueue is the command queue for the current process. // theCommandQueue is the command queue for the current process.
@ -172,9 +169,8 @@ func (q *commandQueue) Flush(graphicsDriver graphicsdriver.Graphics, endFrame bo
err = q.flush(graphicsDriver, endFrame) err = q.flush(graphicsDriver, endFrame)
}) })
if endFrame { if endFrame {
q.float32sBuffer[q.float32sBufferIndex] = q.float32sBuffer[q.float32sBufferIndex][:0] q.float32sBuffer.reset()
q.float32sBufferIndex++ q.float32SlicesBuffer.reset()
q.float32sBufferIndex %= len(q.float32sBuffer)
} }
return return
} }
@ -590,29 +586,20 @@ func roundUpPower2(x int) int {
return p2 return p2
} }
func (q *commandQueue) allocFloat32s(n int) []float32 {
buf := q.float32sBuffer[q.float32sBufferIndex]
if len(buf)+n > cap(buf) {
buf = make([]float32, 0, max(roundUpPower2(len(buf)+n), 16))
}
s := buf[len(buf) : len(buf)+n]
q.float32sBuffer[q.float32sBufferIndex] = buf[:len(buf)+n]
return s
}
func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image, srcs [graphics.ShaderImageCount]*Image, offsets [graphics.ShaderImageCount - 1][2]float32, dstRegion, srcRegion graphicsdriver.Region) [][]float32 { func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image, srcs [graphics.ShaderImageCount]*Image, offsets [graphics.ShaderImageCount - 1][2]float32, dstRegion, srcRegion graphicsdriver.Region) [][]float32 {
uniforms = append(uniforms, make([][]float32, graphics.PreservedUniformVariablesCount)...) origUniforms := uniforms
copy(uniforms[graphics.PreservedUniformVariablesCount:], uniforms) uniforms = q.float32SlicesBuffer.alloc(len(origUniforms) + graphics.PreservedUniformVariablesCount)
copy(uniforms[graphics.PreservedUniformVariablesCount:], origUniforms)
// Set the destination texture size. // Set the destination texture size.
dw, dh := dst.InternalSize() dw, dh := dst.InternalSize()
udstsize := q.allocFloat32s(2) udstsize := q.float32sBuffer.alloc(2)
udstsize[0] = float32(dw) udstsize[0] = float32(dw)
udstsize[1] = float32(dh) udstsize[1] = float32(dh)
uniforms[graphics.TextureDestinationSizeUniformVariableIndex] = udstsize uniforms[graphics.TextureDestinationSizeUniformVariableIndex] = udstsize
// Set the source texture sizes. // Set the source texture sizes.
usizes := q.allocFloat32s(2 * len(srcs)) usizes := q.float32sBuffer.alloc(2 * len(srcs))
for i, src := range srcs { for i, src := range srcs {
if src != nil { if src != nil {
w, h := src.InternalSize() w, h := src.InternalSize()
@ -623,12 +610,12 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
uniforms[graphics.TextureSourceSizesUniformVariableIndex] = usizes uniforms[graphics.TextureSourceSizesUniformVariableIndex] = usizes
// Set the destination region. // Set the destination region.
udstrorig := q.allocFloat32s(2) udstrorig := q.float32sBuffer.alloc(2)
udstrorig[0] = float32(dstRegion.X) / float32(dw) udstrorig[0] = float32(dstRegion.X) / float32(dw)
udstrorig[1] = float32(dstRegion.Y) / float32(dh) udstrorig[1] = float32(dstRegion.Y) / float32(dh)
uniforms[graphics.TextureDestinationRegionOriginUniformVariableIndex] = udstrorig uniforms[graphics.TextureDestinationRegionOriginUniformVariableIndex] = udstrorig
udstrsize := q.allocFloat32s(2) udstrsize := q.float32sBuffer.alloc(2)
udstrsize[0] = float32(dstRegion.Width) / float32(dw) udstrsize[0] = float32(dstRegion.Width) / float32(dw)
udstrsize[1] = float32(dstRegion.Height) / float32(dh) udstrsize[1] = float32(dstRegion.Height) / float32(dh)
uniforms[graphics.TextureDestinationRegionSizeUniformVariableIndex] = udstrsize uniforms[graphics.TextureDestinationRegionSizeUniformVariableIndex] = udstrsize
@ -646,7 +633,7 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
} }
// Set the source offsets. // Set the source offsets.
uoffsets := q.allocFloat32s(2 * len(offsets)) uoffsets := q.float32sBuffer.alloc(2 * len(offsets))
for i, offset := range offsets { for i, offset := range offsets {
uoffsets[2*i] = offset[0] uoffsets[2*i] = offset[0]
uoffsets[2*i+1] = offset[1] uoffsets[2*i+1] = offset[1]
@ -654,17 +641,17 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
uniforms[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets uniforms[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets
// Set the source region of texture0. // Set the source region of texture0.
usrcrorig := q.allocFloat32s(2) usrcrorig := q.float32sBuffer.alloc(2)
usrcrorig[0] = float32(srcRegion.X) usrcrorig[0] = float32(srcRegion.X)
usrcrorig[1] = float32(srcRegion.Y) usrcrorig[1] = float32(srcRegion.Y)
uniforms[graphics.TextureSourceRegionOriginUniformVariableIndex] = usrcrorig uniforms[graphics.TextureSourceRegionOriginUniformVariableIndex] = usrcrorig
usrcrsize := q.allocFloat32s(2) usrcrsize := q.float32sBuffer.alloc(2)
usrcrsize[0] = float32(srcRegion.Width) usrcrsize[0] = float32(srcRegion.Width)
usrcrsize[1] = float32(srcRegion.Height) usrcrsize[1] = float32(srcRegion.Height)
uniforms[graphics.TextureSourceRegionSizeUniformVariableIndex] = usrcrsize uniforms[graphics.TextureSourceRegionSizeUniformVariableIndex] = usrcrsize
umatrix := q.allocFloat32s(16) umatrix := q.float32sBuffer.alloc(16)
umatrix[0] = 2 / float32(dw) umatrix[0] = 2 / float32(dw)
umatrix[1] = 0 umatrix[1] = 0
umatrix[2] = 0 umatrix[2] = 0
@ -685,3 +672,28 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
return uniforms return uniforms
} }
// buffer is a reusable buffer to allocate []T.
type buffer[T any] struct {
buf [2][]T
// index is switched at the end of the frame,
// and the buffer of the original index is kept until the next frame ends.
index int
}
func (b *buffer[T]) alloc(n int) []T {
buf := b.buf[b.index]
if len(buf)+n > cap(buf) {
buf = make([]T, 0, max(roundUpPower2(len(buf)+n), 16))
}
s := buf[len(buf) : len(buf)+n]
b.buf[b.index] = buf[:len(buf)+n]
return s
}
func (b *buffer[T]) reset() {
b.buf[b.index] = b.buf[b.index][:0]
b.index++
b.index %= len(b.buf)
}

View File

@ -18,7 +18,6 @@ import (
"fmt" "fmt"
"strings" "strings"
"github.com/hajimehoshi/ebiten/v2/internal/graphics"
"github.com/hajimehoshi/ebiten/v2/internal/mipmap" "github.com/hajimehoshi/ebiten/v2/internal/mipmap"
"github.com/hajimehoshi/ebiten/v2/internal/shaderir" "github.com/hajimehoshi/ebiten/v2/internal/shaderir"
) )
@ -73,8 +72,7 @@ func (s *Shader) ConvertUniforms(uniforms map[string]any) [][]float32 {
} }
} }
// Allocate a new slice with an extra capacity. This slice's length is extended at internal/graphicscommand. us := make([][]float32, len(s.uniformNameToIndex))
us := make([][]float32, len(s.uniformNameToIndex), len(s.uniformNameToIndex)+graphics.PreservedUniformVariablesCount)
for name, idx := range s.uniformNameToIndex { for name, idx := range s.uniformNameToIndex {
if v, ok := nameToF32s[name]; ok { if v, ok := nameToF32s[name]; ok {
us[idx] = v us[idx] = v