internal/graphicscommand: use buffers for []float32 to reduce allocations

This commit is contained in:
Hajime Hoshi 2022-11-04 21:51:33 +09:00
parent d11c46ef1f
commit 50021ef3e4
2 changed files with 41 additions and 31 deletions

View File

@ -71,11 +71,8 @@ type commandQueue struct {
drawTrianglesCommandPool drawTrianglesCommandPool
// float32sBuffer is a reusable buffer to allocate []float32.
// float32sBuffer's index is switched at the end of the frame,
// and the buffer of the original index is kept until the next frame ends.
float32sBuffer [2][]float32
float32sBufferIndex int
float32sBuffer buffer[float32]
float32SlicesBuffer buffer[[]float32]
}
// theCommandQueue is the command queue for the current process.
@ -172,9 +169,8 @@ func (q *commandQueue) Flush(graphicsDriver graphicsdriver.Graphics, endFrame bo
err = q.flush(graphicsDriver, endFrame)
})
if endFrame {
q.float32sBuffer[q.float32sBufferIndex] = q.float32sBuffer[q.float32sBufferIndex][:0]
q.float32sBufferIndex++
q.float32sBufferIndex %= len(q.float32sBuffer)
q.float32sBuffer.reset()
q.float32SlicesBuffer.reset()
}
return
}
@ -590,29 +586,20 @@ func roundUpPower2(x int) int {
return p2
}
func (q *commandQueue) allocFloat32s(n int) []float32 {
buf := q.float32sBuffer[q.float32sBufferIndex]
if len(buf)+n > cap(buf) {
buf = make([]float32, 0, max(roundUpPower2(len(buf)+n), 16))
}
s := buf[len(buf) : len(buf)+n]
q.float32sBuffer[q.float32sBufferIndex] = buf[:len(buf)+n]
return s
}
func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image, srcs [graphics.ShaderImageCount]*Image, offsets [graphics.ShaderImageCount - 1][2]float32, dstRegion, srcRegion graphicsdriver.Region) [][]float32 {
uniforms = append(uniforms, make([][]float32, graphics.PreservedUniformVariablesCount)...)
copy(uniforms[graphics.PreservedUniformVariablesCount:], uniforms)
origUniforms := uniforms
uniforms = q.float32SlicesBuffer.alloc(len(origUniforms) + graphics.PreservedUniformVariablesCount)
copy(uniforms[graphics.PreservedUniformVariablesCount:], origUniforms)
// Set the destination texture size.
dw, dh := dst.InternalSize()
udstsize := q.allocFloat32s(2)
udstsize := q.float32sBuffer.alloc(2)
udstsize[0] = float32(dw)
udstsize[1] = float32(dh)
uniforms[graphics.TextureDestinationSizeUniformVariableIndex] = udstsize
// Set the source texture sizes.
usizes := q.allocFloat32s(2 * len(srcs))
usizes := q.float32sBuffer.alloc(2 * len(srcs))
for i, src := range srcs {
if src != nil {
w, h := src.InternalSize()
@ -623,12 +610,12 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
uniforms[graphics.TextureSourceSizesUniformVariableIndex] = usizes
// Set the destination region.
udstrorig := q.allocFloat32s(2)
udstrorig := q.float32sBuffer.alloc(2)
udstrorig[0] = float32(dstRegion.X) / float32(dw)
udstrorig[1] = float32(dstRegion.Y) / float32(dh)
uniforms[graphics.TextureDestinationRegionOriginUniformVariableIndex] = udstrorig
udstrsize := q.allocFloat32s(2)
udstrsize := q.float32sBuffer.alloc(2)
udstrsize[0] = float32(dstRegion.Width) / float32(dw)
udstrsize[1] = float32(dstRegion.Height) / float32(dh)
uniforms[graphics.TextureDestinationRegionSizeUniformVariableIndex] = udstrsize
@ -646,7 +633,7 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
}
// Set the source offsets.
uoffsets := q.allocFloat32s(2 * len(offsets))
uoffsets := q.float32sBuffer.alloc(2 * len(offsets))
for i, offset := range offsets {
uoffsets[2*i] = offset[0]
uoffsets[2*i+1] = offset[1]
@ -654,17 +641,17 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
uniforms[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets
// Set the source region of texture0.
usrcrorig := q.allocFloat32s(2)
usrcrorig := q.float32sBuffer.alloc(2)
usrcrorig[0] = float32(srcRegion.X)
usrcrorig[1] = float32(srcRegion.Y)
uniforms[graphics.TextureSourceRegionOriginUniformVariableIndex] = usrcrorig
usrcrsize := q.allocFloat32s(2)
usrcrsize := q.float32sBuffer.alloc(2)
usrcrsize[0] = float32(srcRegion.Width)
usrcrsize[1] = float32(srcRegion.Height)
uniforms[graphics.TextureSourceRegionSizeUniformVariableIndex] = usrcrsize
umatrix := q.allocFloat32s(16)
umatrix := q.float32sBuffer.alloc(16)
umatrix[0] = 2 / float32(dw)
umatrix[1] = 0
umatrix[2] = 0
@ -685,3 +672,28 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
return uniforms
}
// buffer is a reusable buffer to allocate []T.
type buffer[T any] struct {
buf [2][]T
// index is switched at the end of the frame,
// and the buffer of the original index is kept until the next frame ends.
index int
}
func (b *buffer[T]) alloc(n int) []T {
buf := b.buf[b.index]
if len(buf)+n > cap(buf) {
buf = make([]T, 0, max(roundUpPower2(len(buf)+n), 16))
}
s := buf[len(buf) : len(buf)+n]
b.buf[b.index] = buf[:len(buf)+n]
return s
}
func (b *buffer[T]) reset() {
b.buf[b.index] = b.buf[b.index][:0]
b.index++
b.index %= len(b.buf)
}

View File

@ -18,7 +18,6 @@ import (
"fmt"
"strings"
"github.com/hajimehoshi/ebiten/v2/internal/graphics"
"github.com/hajimehoshi/ebiten/v2/internal/mipmap"
"github.com/hajimehoshi/ebiten/v2/internal/shaderir"
)
@ -73,8 +72,7 @@ func (s *Shader) ConvertUniforms(uniforms map[string]any) [][]float32 {
}
}
// Allocate a new slice with an extra capacity. This slice's length is extended at internal/graphicscommand.
us := make([][]float32, len(s.uniformNameToIndex), len(s.uniformNameToIndex)+graphics.PreservedUniformVariablesCount)
us := make([][]float32, len(s.uniformNameToIndex))
for name, idx := range s.uniformNameToIndex {
if v, ok := nameToF32s[name]; ok {
us[idx] = v