internal/graphicscommand: improve performance of prependPreservedUniforms

```
% benchstat old.txt new.txt
goos: darwin
goarch: arm64
pkg: github.com/hajimehoshi/ebiten/v2/internal/graphicscommand
cpu: Apple M3 Pro
                            │   old.txt   │               new.txt               │
                            │   sec/op    │   sec/op     vs base                │
PrependPreservedUniforms-12   23.07n ± 2%   19.96n ± 0%  -13.48% (p=0.000 n=10)
```

Updates #3144
This commit is contained in:
Hajime Hoshi 2024-10-26 00:20:13 +09:00
parent 9f5f53c041
commit 1fe988eabf

View File

@ -354,23 +354,45 @@ func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shade
func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle) []uint32 { func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle) []uint32 {
// Set the destination texture size. // Set the destination texture size.
// Hard-code indices for BCE optimization.
_ = uniforms[graphics.PreservedUniformUint32Count-1]
dw, dh := dst.InternalSize() dw, dh := dst.InternalSize()
uniforms[0] = math.Float32bits(float32(dw)) uniforms[0] = math.Float32bits(float32(dw))
uniforms[1] = math.Float32bits(float32(dh)) uniforms[1] = math.Float32bits(float32(dh))
uniformIndex := 2
for i := 0; i < graphics.ShaderSrcImageCount; i++ { if srcs[0] != nil {
var floatW, floatH uint32 w, h := srcs[0].InternalSize()
if srcs[i] != nil { uniforms[2] = math.Float32bits(float32(w))
w, h := srcs[i].InternalSize() uniforms[3] = math.Float32bits(float32(h))
floatW = math.Float32bits(float32(w)) } else {
floatH = math.Float32bits(float32(h)) uniforms[2] = 0
} uniforms[3] = 0
}
uniforms[uniformIndex+i*2] = floatW if srcs[1] != nil {
uniforms[uniformIndex+1+i*2] = floatH w, h := srcs[1].InternalSize()
uniforms[4] = math.Float32bits(float32(w))
uniforms[5] = math.Float32bits(float32(h))
} else {
uniforms[4] = 0
uniforms[5] = 0
}
if srcs[2] != nil {
w, h := srcs[2].InternalSize()
uniforms[6] = math.Float32bits(float32(w))
uniforms[7] = math.Float32bits(float32(h))
} else {
uniforms[6] = 0
uniforms[7] = 0
}
if srcs[3] != nil {
w, h := srcs[3].InternalSize()
uniforms[8] = math.Float32bits(float32(w))
uniforms[9] = math.Float32bits(float32(h))
} else {
uniforms[8] = 0
uniforms[9] = 0
} }
uniformIndex += graphics.ShaderSrcImageCount * 2
dr := imageRectangleToRectangleF32(dstRegion) dr := imageRectangleToRectangleF32(dstRegion)
if shader.unit() == shaderir.Texels { if shader.unit() == shaderir.Texels {
@ -381,14 +403,12 @@ func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, src
} }
// Set the destination region origin. // Set the destination region origin.
uniforms[uniformIndex] = math.Float32bits(dr.x) uniforms[10] = math.Float32bits(dr.x)
uniforms[uniformIndex+1] = math.Float32bits(dr.y) uniforms[11] = math.Float32bits(dr.y)
uniformIndex += 2
// Set the destination region size. // Set the destination region size.
uniforms[uniformIndex] = math.Float32bits(dr.width) uniforms[12] = math.Float32bits(dr.width)
uniforms[uniformIndex+1] = math.Float32bits(dr.height) uniforms[13] = math.Float32bits(dr.height)
uniformIndex += 2
var srs [graphics.ShaderSrcImageCount]rectangleF32 var srs [graphics.ShaderSrcImageCount]rectangleF32
for i, r := range srcRegions { for i, r := range srcRegions {
@ -408,36 +428,45 @@ func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, src
} }
// Set the source region origins. // Set the source region origins.
for i := 0; i < graphics.ShaderSrcImageCount; i++ { uniforms[14] = math.Float32bits(srs[0].x)
uniforms[uniformIndex+i*2] = math.Float32bits(srs[i].x) uniforms[15] = math.Float32bits(srs[0].y)
uniforms[uniformIndex+1+i*2] = math.Float32bits(srs[i].y) uniforms[16] = math.Float32bits(srs[1].x)
} uniforms[17] = math.Float32bits(srs[1].y)
uniformIndex += graphics.ShaderSrcImageCount * 2 uniforms[18] = math.Float32bits(srs[2].x)
uniforms[19] = math.Float32bits(srs[2].y)
uniforms[20] = math.Float32bits(srs[3].x)
uniforms[21] = math.Float32bits(srs[3].y)
// Set the source region sizes. // Set the source region sizes.
for i := 0; i < graphics.ShaderSrcImageCount; i++ { uniforms[22] = math.Float32bits(srs[0].width)
uniforms[uniformIndex+i*2] = math.Float32bits(srs[i].width) uniforms[23] = math.Float32bits(srs[0].height)
uniforms[uniformIndex+1+i*2] = math.Float32bits(srs[i].height) uniforms[24] = math.Float32bits(srs[1].width)
} uniforms[25] = math.Float32bits(srs[1].height)
uniformIndex += graphics.ShaderSrcImageCount * 2 uniforms[26] = math.Float32bits(srs[2].width)
uniforms[27] = math.Float32bits(srs[2].height)
uniforms[28] = math.Float32bits(srs[3].width)
uniforms[29] = math.Float32bits(srs[3].height)
// Set the projection matrix. // Set the projection matrix.
uniforms[uniformIndex] = math.Float32bits(2 / float32(dw)) uniforms[30] = math.Float32bits(2 / float32(dw))
uniforms[uniformIndex+1] = 0 uniforms[31] = 0
uniforms[uniformIndex+2] = 0 uniforms[32] = 0
uniforms[uniformIndex+3] = 0 uniforms[33] = 0
uniforms[uniformIndex+4] = 0 uniforms[34] = 0
uniforms[uniformIndex+5] = math.Float32bits(2 / float32(dh)) uniforms[35] = math.Float32bits(2 / float32(dh))
uniforms[uniformIndex+6] = 0 uniforms[36] = 0
uniforms[uniformIndex+7] = 0 uniforms[37] = 0
uniforms[uniformIndex+8] = 0 uniforms[38] = 0
uniforms[uniformIndex+9] = 0 uniforms[39] = 0
uniforms[uniformIndex+10] = math.Float32bits(1) uniforms[40] = math.Float32bits(1)
uniforms[uniformIndex+11] = 0 uniforms[41] = 0
uniforms[uniformIndex+12] = math.Float32bits(-1) uniforms[42] = math.Float32bits(-1)
uniforms[uniformIndex+13] = math.Float32bits(-1) uniforms[43] = math.Float32bits(-1)
uniforms[uniformIndex+14] = 0 uniforms[44] = 0
uniforms[uniformIndex+15] = math.Float32bits(1) uniforms[45] = math.Float32bits(1)
// Confirm the concrete value of graphics.PreservedUniformUint32Count.
var _ [0]struct{} = [graphics.PreservedUniformUint32Count - 46]struct{}{}
return uniforms return uniforms
} }