mirror of
https://github.com/hajimehoshi/ebiten.git
synced 2025-01-27 19:22:49 +01:00
internal/graphicscommand: improve performance of prependPreservedUniforms
``` % benchstat old.txt new.txt goos: darwin goarch: arm64 pkg: github.com/hajimehoshi/ebiten/v2/internal/graphicscommand cpu: Apple M3 Pro │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ PrependPreservedUniforms-12 23.07n ± 2% 19.96n ± 0% -13.48% (p=0.000 n=10) ``` Updates #3144
This commit is contained in:
parent
9f5f53c041
commit
1fe988eabf
@ -354,23 +354,45 @@ func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shade
|
||||
|
||||
func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle) []uint32 {
|
||||
// Set the destination texture size.
|
||||
// Hard-code indices for BCE optimization.
|
||||
_ = uniforms[graphics.PreservedUniformUint32Count-1]
|
||||
|
||||
dw, dh := dst.InternalSize()
|
||||
uniforms[0] = math.Float32bits(float32(dw))
|
||||
uniforms[1] = math.Float32bits(float32(dh))
|
||||
uniformIndex := 2
|
||||
|
||||
for i := 0; i < graphics.ShaderSrcImageCount; i++ {
|
||||
var floatW, floatH uint32
|
||||
if srcs[i] != nil {
|
||||
w, h := srcs[i].InternalSize()
|
||||
floatW = math.Float32bits(float32(w))
|
||||
floatH = math.Float32bits(float32(h))
|
||||
}
|
||||
|
||||
uniforms[uniformIndex+i*2] = floatW
|
||||
uniforms[uniformIndex+1+i*2] = floatH
|
||||
if srcs[0] != nil {
|
||||
w, h := srcs[0].InternalSize()
|
||||
uniforms[2] = math.Float32bits(float32(w))
|
||||
uniforms[3] = math.Float32bits(float32(h))
|
||||
} else {
|
||||
uniforms[2] = 0
|
||||
uniforms[3] = 0
|
||||
}
|
||||
if srcs[1] != nil {
|
||||
w, h := srcs[1].InternalSize()
|
||||
uniforms[4] = math.Float32bits(float32(w))
|
||||
uniforms[5] = math.Float32bits(float32(h))
|
||||
} else {
|
||||
uniforms[4] = 0
|
||||
uniforms[5] = 0
|
||||
}
|
||||
if srcs[2] != nil {
|
||||
w, h := srcs[2].InternalSize()
|
||||
uniforms[6] = math.Float32bits(float32(w))
|
||||
uniforms[7] = math.Float32bits(float32(h))
|
||||
} else {
|
||||
uniforms[6] = 0
|
||||
uniforms[7] = 0
|
||||
}
|
||||
if srcs[3] != nil {
|
||||
w, h := srcs[3].InternalSize()
|
||||
uniforms[8] = math.Float32bits(float32(w))
|
||||
uniforms[9] = math.Float32bits(float32(h))
|
||||
} else {
|
||||
uniforms[8] = 0
|
||||
uniforms[9] = 0
|
||||
}
|
||||
uniformIndex += graphics.ShaderSrcImageCount * 2
|
||||
|
||||
dr := imageRectangleToRectangleF32(dstRegion)
|
||||
if shader.unit() == shaderir.Texels {
|
||||
@ -381,14 +403,12 @@ func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, src
|
||||
}
|
||||
|
||||
// Set the destination region origin.
|
||||
uniforms[uniformIndex] = math.Float32bits(dr.x)
|
||||
uniforms[uniformIndex+1] = math.Float32bits(dr.y)
|
||||
uniformIndex += 2
|
||||
uniforms[10] = math.Float32bits(dr.x)
|
||||
uniforms[11] = math.Float32bits(dr.y)
|
||||
|
||||
// Set the destination region size.
|
||||
uniforms[uniformIndex] = math.Float32bits(dr.width)
|
||||
uniforms[uniformIndex+1] = math.Float32bits(dr.height)
|
||||
uniformIndex += 2
|
||||
uniforms[12] = math.Float32bits(dr.width)
|
||||
uniforms[13] = math.Float32bits(dr.height)
|
||||
|
||||
var srs [graphics.ShaderSrcImageCount]rectangleF32
|
||||
for i, r := range srcRegions {
|
||||
@ -408,36 +428,45 @@ func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, src
|
||||
}
|
||||
|
||||
// Set the source region origins.
|
||||
for i := 0; i < graphics.ShaderSrcImageCount; i++ {
|
||||
uniforms[uniformIndex+i*2] = math.Float32bits(srs[i].x)
|
||||
uniforms[uniformIndex+1+i*2] = math.Float32bits(srs[i].y)
|
||||
}
|
||||
uniformIndex += graphics.ShaderSrcImageCount * 2
|
||||
uniforms[14] = math.Float32bits(srs[0].x)
|
||||
uniforms[15] = math.Float32bits(srs[0].y)
|
||||
uniforms[16] = math.Float32bits(srs[1].x)
|
||||
uniforms[17] = math.Float32bits(srs[1].y)
|
||||
uniforms[18] = math.Float32bits(srs[2].x)
|
||||
uniforms[19] = math.Float32bits(srs[2].y)
|
||||
uniforms[20] = math.Float32bits(srs[3].x)
|
||||
uniforms[21] = math.Float32bits(srs[3].y)
|
||||
|
||||
// Set the source region sizes.
|
||||
for i := 0; i < graphics.ShaderSrcImageCount; i++ {
|
||||
uniforms[uniformIndex+i*2] = math.Float32bits(srs[i].width)
|
||||
uniforms[uniformIndex+1+i*2] = math.Float32bits(srs[i].height)
|
||||
}
|
||||
uniformIndex += graphics.ShaderSrcImageCount * 2
|
||||
uniforms[22] = math.Float32bits(srs[0].width)
|
||||
uniforms[23] = math.Float32bits(srs[0].height)
|
||||
uniforms[24] = math.Float32bits(srs[1].width)
|
||||
uniforms[25] = math.Float32bits(srs[1].height)
|
||||
uniforms[26] = math.Float32bits(srs[2].width)
|
||||
uniforms[27] = math.Float32bits(srs[2].height)
|
||||
uniforms[28] = math.Float32bits(srs[3].width)
|
||||
uniforms[29] = math.Float32bits(srs[3].height)
|
||||
|
||||
// Set the projection matrix.
|
||||
uniforms[uniformIndex] = math.Float32bits(2 / float32(dw))
|
||||
uniforms[uniformIndex+1] = 0
|
||||
uniforms[uniformIndex+2] = 0
|
||||
uniforms[uniformIndex+3] = 0
|
||||
uniforms[uniformIndex+4] = 0
|
||||
uniforms[uniformIndex+5] = math.Float32bits(2 / float32(dh))
|
||||
uniforms[uniformIndex+6] = 0
|
||||
uniforms[uniformIndex+7] = 0
|
||||
uniforms[uniformIndex+8] = 0
|
||||
uniforms[uniformIndex+9] = 0
|
||||
uniforms[uniformIndex+10] = math.Float32bits(1)
|
||||
uniforms[uniformIndex+11] = 0
|
||||
uniforms[uniformIndex+12] = math.Float32bits(-1)
|
||||
uniforms[uniformIndex+13] = math.Float32bits(-1)
|
||||
uniforms[uniformIndex+14] = 0
|
||||
uniforms[uniformIndex+15] = math.Float32bits(1)
|
||||
uniforms[30] = math.Float32bits(2 / float32(dw))
|
||||
uniforms[31] = 0
|
||||
uniforms[32] = 0
|
||||
uniforms[33] = 0
|
||||
uniforms[34] = 0
|
||||
uniforms[35] = math.Float32bits(2 / float32(dh))
|
||||
uniforms[36] = 0
|
||||
uniforms[37] = 0
|
||||
uniforms[38] = 0
|
||||
uniforms[39] = 0
|
||||
uniforms[40] = math.Float32bits(1)
|
||||
uniforms[41] = 0
|
||||
uniforms[42] = math.Float32bits(-1)
|
||||
uniforms[43] = math.Float32bits(-1)
|
||||
uniforms[44] = 0
|
||||
uniforms[45] = math.Float32bits(1)
|
||||
|
||||
// Confirm the concrete value of graphics.PreservedUniformUint32Count.
|
||||
var _ [0]struct{} = [graphics.PreservedUniformUint32Count - 46]struct{}{}
|
||||
|
||||
return uniforms
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user