internal/graphicscommand: improve performance of prependPreservedUniforms

```
% benchstat old.txt new.txt
goos: darwin
goarch: arm64
pkg: github.com/hajimehoshi/ebiten/v2/internal/graphicscommand
cpu: Apple M3 Pro
                            │   old.txt   │               new.txt               │
                            │   sec/op    │   sec/op     vs base                │
PrependPreservedUniforms-12   23.07n ± 2%   19.96n ± 0%  -13.48% (p=0.000 n=10)
```

Updates #3144
This commit is contained in:
Hajime Hoshi 2024-10-26 00:20:13 +09:00
parent 9f5f53c041
commit 1fe988eabf

View File

@ -354,23 +354,45 @@ func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shade
func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle) []uint32 {
// Set the destination texture size.
// Hard-code indices for BCE optimization.
_ = uniforms[graphics.PreservedUniformUint32Count-1]
dw, dh := dst.InternalSize()
uniforms[0] = math.Float32bits(float32(dw))
uniforms[1] = math.Float32bits(float32(dh))
uniformIndex := 2
for i := 0; i < graphics.ShaderSrcImageCount; i++ {
var floatW, floatH uint32
if srcs[i] != nil {
w, h := srcs[i].InternalSize()
floatW = math.Float32bits(float32(w))
floatH = math.Float32bits(float32(h))
}
uniforms[uniformIndex+i*2] = floatW
uniforms[uniformIndex+1+i*2] = floatH
if srcs[0] != nil {
w, h := srcs[0].InternalSize()
uniforms[2] = math.Float32bits(float32(w))
uniforms[3] = math.Float32bits(float32(h))
} else {
uniforms[2] = 0
uniforms[3] = 0
}
if srcs[1] != nil {
w, h := srcs[1].InternalSize()
uniforms[4] = math.Float32bits(float32(w))
uniforms[5] = math.Float32bits(float32(h))
} else {
uniforms[4] = 0
uniforms[5] = 0
}
if srcs[2] != nil {
w, h := srcs[2].InternalSize()
uniforms[6] = math.Float32bits(float32(w))
uniforms[7] = math.Float32bits(float32(h))
} else {
uniforms[6] = 0
uniforms[7] = 0
}
if srcs[3] != nil {
w, h := srcs[3].InternalSize()
uniforms[8] = math.Float32bits(float32(w))
uniforms[9] = math.Float32bits(float32(h))
} else {
uniforms[8] = 0
uniforms[9] = 0
}
uniformIndex += graphics.ShaderSrcImageCount * 2
dr := imageRectangleToRectangleF32(dstRegion)
if shader.unit() == shaderir.Texels {
@ -381,14 +403,12 @@ func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, src
}
// Set the destination region origin.
uniforms[uniformIndex] = math.Float32bits(dr.x)
uniforms[uniformIndex+1] = math.Float32bits(dr.y)
uniformIndex += 2
uniforms[10] = math.Float32bits(dr.x)
uniforms[11] = math.Float32bits(dr.y)
// Set the destination region size.
uniforms[uniformIndex] = math.Float32bits(dr.width)
uniforms[uniformIndex+1] = math.Float32bits(dr.height)
uniformIndex += 2
uniforms[12] = math.Float32bits(dr.width)
uniforms[13] = math.Float32bits(dr.height)
var srs [graphics.ShaderSrcImageCount]rectangleF32
for i, r := range srcRegions {
@ -408,36 +428,45 @@ func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, src
}
// Set the source region origins.
for i := 0; i < graphics.ShaderSrcImageCount; i++ {
uniforms[uniformIndex+i*2] = math.Float32bits(srs[i].x)
uniforms[uniformIndex+1+i*2] = math.Float32bits(srs[i].y)
}
uniformIndex += graphics.ShaderSrcImageCount * 2
uniforms[14] = math.Float32bits(srs[0].x)
uniforms[15] = math.Float32bits(srs[0].y)
uniforms[16] = math.Float32bits(srs[1].x)
uniforms[17] = math.Float32bits(srs[1].y)
uniforms[18] = math.Float32bits(srs[2].x)
uniforms[19] = math.Float32bits(srs[2].y)
uniforms[20] = math.Float32bits(srs[3].x)
uniforms[21] = math.Float32bits(srs[3].y)
// Set the source region sizes.
for i := 0; i < graphics.ShaderSrcImageCount; i++ {
uniforms[uniformIndex+i*2] = math.Float32bits(srs[i].width)
uniforms[uniformIndex+1+i*2] = math.Float32bits(srs[i].height)
}
uniformIndex += graphics.ShaderSrcImageCount * 2
uniforms[22] = math.Float32bits(srs[0].width)
uniforms[23] = math.Float32bits(srs[0].height)
uniforms[24] = math.Float32bits(srs[1].width)
uniforms[25] = math.Float32bits(srs[1].height)
uniforms[26] = math.Float32bits(srs[2].width)
uniforms[27] = math.Float32bits(srs[2].height)
uniforms[28] = math.Float32bits(srs[3].width)
uniforms[29] = math.Float32bits(srs[3].height)
// Set the projection matrix.
uniforms[uniformIndex] = math.Float32bits(2 / float32(dw))
uniforms[uniformIndex+1] = 0
uniforms[uniformIndex+2] = 0
uniforms[uniformIndex+3] = 0
uniforms[uniformIndex+4] = 0
uniforms[uniformIndex+5] = math.Float32bits(2 / float32(dh))
uniforms[uniformIndex+6] = 0
uniforms[uniformIndex+7] = 0
uniforms[uniformIndex+8] = 0
uniforms[uniformIndex+9] = 0
uniforms[uniformIndex+10] = math.Float32bits(1)
uniforms[uniformIndex+11] = 0
uniforms[uniformIndex+12] = math.Float32bits(-1)
uniforms[uniformIndex+13] = math.Float32bits(-1)
uniforms[uniformIndex+14] = 0
uniforms[uniformIndex+15] = math.Float32bits(1)
uniforms[30] = math.Float32bits(2 / float32(dw))
uniforms[31] = 0
uniforms[32] = 0
uniforms[33] = 0
uniforms[34] = 0
uniforms[35] = math.Float32bits(2 / float32(dh))
uniforms[36] = 0
uniforms[37] = 0
uniforms[38] = 0
uniforms[39] = 0
uniforms[40] = math.Float32bits(1)
uniforms[41] = 0
uniforms[42] = math.Float32bits(-1)
uniforms[43] = math.Float32bits(-1)
uniforms[44] = 0
uniforms[45] = math.Float32bits(1)
// Confirm the concrete value of graphics.PreservedUniformUint32Count.
var _ [0]struct{} = [graphics.PreservedUniformUint32Count - 46]struct{}{}
return uniforms
}