mirror of
https://github.com/hajimehoshi/ebiten.git
synced 2025-01-11 19:48:54 +01:00
internal/graphicscommand: use buffers for []float32 to reduce allocations
This commit is contained in:
parent
d11c46ef1f
commit
50021ef3e4
@ -71,11 +71,8 @@ type commandQueue struct {
|
|||||||
|
|
||||||
drawTrianglesCommandPool drawTrianglesCommandPool
|
drawTrianglesCommandPool drawTrianglesCommandPool
|
||||||
|
|
||||||
// float32sBuffer is a reusable buffer to allocate []float32.
|
float32sBuffer buffer[float32]
|
||||||
// float32sBuffer's index is switched at the end of the frame,
|
float32SlicesBuffer buffer[[]float32]
|
||||||
// and the buffer of the original index is kept until the next frame ends.
|
|
||||||
float32sBuffer [2][]float32
|
|
||||||
float32sBufferIndex int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// theCommandQueue is the command queue for the current process.
|
// theCommandQueue is the command queue for the current process.
|
||||||
@ -172,9 +169,8 @@ func (q *commandQueue) Flush(graphicsDriver graphicsdriver.Graphics, endFrame bo
|
|||||||
err = q.flush(graphicsDriver, endFrame)
|
err = q.flush(graphicsDriver, endFrame)
|
||||||
})
|
})
|
||||||
if endFrame {
|
if endFrame {
|
||||||
q.float32sBuffer[q.float32sBufferIndex] = q.float32sBuffer[q.float32sBufferIndex][:0]
|
q.float32sBuffer.reset()
|
||||||
q.float32sBufferIndex++
|
q.float32SlicesBuffer.reset()
|
||||||
q.float32sBufferIndex %= len(q.float32sBuffer)
|
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -590,29 +586,20 @@ func roundUpPower2(x int) int {
|
|||||||
return p2
|
return p2
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *commandQueue) allocFloat32s(n int) []float32 {
|
|
||||||
buf := q.float32sBuffer[q.float32sBufferIndex]
|
|
||||||
if len(buf)+n > cap(buf) {
|
|
||||||
buf = make([]float32, 0, max(roundUpPower2(len(buf)+n), 16))
|
|
||||||
}
|
|
||||||
s := buf[len(buf) : len(buf)+n]
|
|
||||||
q.float32sBuffer[q.float32sBufferIndex] = buf[:len(buf)+n]
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image, srcs [graphics.ShaderImageCount]*Image, offsets [graphics.ShaderImageCount - 1][2]float32, dstRegion, srcRegion graphicsdriver.Region) [][]float32 {
|
func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image, srcs [graphics.ShaderImageCount]*Image, offsets [graphics.ShaderImageCount - 1][2]float32, dstRegion, srcRegion graphicsdriver.Region) [][]float32 {
|
||||||
uniforms = append(uniforms, make([][]float32, graphics.PreservedUniformVariablesCount)...)
|
origUniforms := uniforms
|
||||||
copy(uniforms[graphics.PreservedUniformVariablesCount:], uniforms)
|
uniforms = q.float32SlicesBuffer.alloc(len(origUniforms) + graphics.PreservedUniformVariablesCount)
|
||||||
|
copy(uniforms[graphics.PreservedUniformVariablesCount:], origUniforms)
|
||||||
|
|
||||||
// Set the destination texture size.
|
// Set the destination texture size.
|
||||||
dw, dh := dst.InternalSize()
|
dw, dh := dst.InternalSize()
|
||||||
udstsize := q.allocFloat32s(2)
|
udstsize := q.float32sBuffer.alloc(2)
|
||||||
udstsize[0] = float32(dw)
|
udstsize[0] = float32(dw)
|
||||||
udstsize[1] = float32(dh)
|
udstsize[1] = float32(dh)
|
||||||
uniforms[graphics.TextureDestinationSizeUniformVariableIndex] = udstsize
|
uniforms[graphics.TextureDestinationSizeUniformVariableIndex] = udstsize
|
||||||
|
|
||||||
// Set the source texture sizes.
|
// Set the source texture sizes.
|
||||||
usizes := q.allocFloat32s(2 * len(srcs))
|
usizes := q.float32sBuffer.alloc(2 * len(srcs))
|
||||||
for i, src := range srcs {
|
for i, src := range srcs {
|
||||||
if src != nil {
|
if src != nil {
|
||||||
w, h := src.InternalSize()
|
w, h := src.InternalSize()
|
||||||
@ -623,12 +610,12 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
|
|||||||
uniforms[graphics.TextureSourceSizesUniformVariableIndex] = usizes
|
uniforms[graphics.TextureSourceSizesUniformVariableIndex] = usizes
|
||||||
|
|
||||||
// Set the destination region.
|
// Set the destination region.
|
||||||
udstrorig := q.allocFloat32s(2)
|
udstrorig := q.float32sBuffer.alloc(2)
|
||||||
udstrorig[0] = float32(dstRegion.X) / float32(dw)
|
udstrorig[0] = float32(dstRegion.X) / float32(dw)
|
||||||
udstrorig[1] = float32(dstRegion.Y) / float32(dh)
|
udstrorig[1] = float32(dstRegion.Y) / float32(dh)
|
||||||
uniforms[graphics.TextureDestinationRegionOriginUniformVariableIndex] = udstrorig
|
uniforms[graphics.TextureDestinationRegionOriginUniformVariableIndex] = udstrorig
|
||||||
|
|
||||||
udstrsize := q.allocFloat32s(2)
|
udstrsize := q.float32sBuffer.alloc(2)
|
||||||
udstrsize[0] = float32(dstRegion.Width) / float32(dw)
|
udstrsize[0] = float32(dstRegion.Width) / float32(dw)
|
||||||
udstrsize[1] = float32(dstRegion.Height) / float32(dh)
|
udstrsize[1] = float32(dstRegion.Height) / float32(dh)
|
||||||
uniforms[graphics.TextureDestinationRegionSizeUniformVariableIndex] = udstrsize
|
uniforms[graphics.TextureDestinationRegionSizeUniformVariableIndex] = udstrsize
|
||||||
@ -646,7 +633,7 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Set the source offsets.
|
// Set the source offsets.
|
||||||
uoffsets := q.allocFloat32s(2 * len(offsets))
|
uoffsets := q.float32sBuffer.alloc(2 * len(offsets))
|
||||||
for i, offset := range offsets {
|
for i, offset := range offsets {
|
||||||
uoffsets[2*i] = offset[0]
|
uoffsets[2*i] = offset[0]
|
||||||
uoffsets[2*i+1] = offset[1]
|
uoffsets[2*i+1] = offset[1]
|
||||||
@ -654,17 +641,17 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
|
|||||||
uniforms[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets
|
uniforms[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets
|
||||||
|
|
||||||
// Set the source region of texture0.
|
// Set the source region of texture0.
|
||||||
usrcrorig := q.allocFloat32s(2)
|
usrcrorig := q.float32sBuffer.alloc(2)
|
||||||
usrcrorig[0] = float32(srcRegion.X)
|
usrcrorig[0] = float32(srcRegion.X)
|
||||||
usrcrorig[1] = float32(srcRegion.Y)
|
usrcrorig[1] = float32(srcRegion.Y)
|
||||||
uniforms[graphics.TextureSourceRegionOriginUniformVariableIndex] = usrcrorig
|
uniforms[graphics.TextureSourceRegionOriginUniformVariableIndex] = usrcrorig
|
||||||
|
|
||||||
usrcrsize := q.allocFloat32s(2)
|
usrcrsize := q.float32sBuffer.alloc(2)
|
||||||
usrcrsize[0] = float32(srcRegion.Width)
|
usrcrsize[0] = float32(srcRegion.Width)
|
||||||
usrcrsize[1] = float32(srcRegion.Height)
|
usrcrsize[1] = float32(srcRegion.Height)
|
||||||
uniforms[graphics.TextureSourceRegionSizeUniformVariableIndex] = usrcrsize
|
uniforms[graphics.TextureSourceRegionSizeUniformVariableIndex] = usrcrsize
|
||||||
|
|
||||||
umatrix := q.allocFloat32s(16)
|
umatrix := q.float32sBuffer.alloc(16)
|
||||||
umatrix[0] = 2 / float32(dw)
|
umatrix[0] = 2 / float32(dw)
|
||||||
umatrix[1] = 0
|
umatrix[1] = 0
|
||||||
umatrix[2] = 0
|
umatrix[2] = 0
|
||||||
@ -685,3 +672,28 @@ func (q *commandQueue) prependPreservedUniforms(uniforms [][]float32, dst *Image
|
|||||||
|
|
||||||
return uniforms
|
return uniforms
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// buffer is a reusable buffer to allocate []T.
|
||||||
|
type buffer[T any] struct {
|
||||||
|
buf [2][]T
|
||||||
|
|
||||||
|
// index is switched at the end of the frame,
|
||||||
|
// and the buffer of the original index is kept until the next frame ends.
|
||||||
|
index int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer[T]) alloc(n int) []T {
|
||||||
|
buf := b.buf[b.index]
|
||||||
|
if len(buf)+n > cap(buf) {
|
||||||
|
buf = make([]T, 0, max(roundUpPower2(len(buf)+n), 16))
|
||||||
|
}
|
||||||
|
s := buf[len(buf) : len(buf)+n]
|
||||||
|
b.buf[b.index] = buf[:len(buf)+n]
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer[T]) reset() {
|
||||||
|
b.buf[b.index] = b.buf[b.index][:0]
|
||||||
|
b.index++
|
||||||
|
b.index %= len(b.buf)
|
||||||
|
}
|
||||||
|
@ -18,7 +18,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/hajimehoshi/ebiten/v2/internal/graphics"
|
|
||||||
"github.com/hajimehoshi/ebiten/v2/internal/mipmap"
|
"github.com/hajimehoshi/ebiten/v2/internal/mipmap"
|
||||||
"github.com/hajimehoshi/ebiten/v2/internal/shaderir"
|
"github.com/hajimehoshi/ebiten/v2/internal/shaderir"
|
||||||
)
|
)
|
||||||
@ -73,8 +72,7 @@ func (s *Shader) ConvertUniforms(uniforms map[string]any) [][]float32 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate a new slice with an extra capacity. This slice's length is extended at internal/graphicscommand.
|
us := make([][]float32, len(s.uniformNameToIndex))
|
||||||
us := make([][]float32, len(s.uniformNameToIndex), len(s.uniformNameToIndex)+graphics.PreservedUniformVariablesCount)
|
|
||||||
for name, idx := range s.uniformNameToIndex {
|
for name, idx := range s.uniformNameToIndex {
|
||||||
if v, ok := nameToF32s[name]; ok {
|
if v, ok := nameToF32s[name]; ok {
|
||||||
us[idx] = v
|
us[idx] = v
|
||||||
|
Loading…
Reference in New Issue
Block a user