Compare commits

...

3 Commits

Author SHA1 Message Date
Hajime Hoshi
ecc42d4042 internal/graphicscommand: move the const check outside of the function
old.txt: 1fe988eabf
new.txt: this commit

```
benchstat old.txt new.txt
goos: darwin
goarch: arm64
pkg: github.com/hajimehoshi/ebiten/v2/internal/graphicscommand
cpu: Apple M3 Pro
                            │   old.txt   │              new.txt               │
                            │   sec/op    │   sec/op     vs base               │
PrependPreservedUniforms-12   19.79n ± 4%   18.84n ± 2%  -4.80% (p=0.001 n=10)
```

Updates #3144
2024-10-26 13:03:37 +09:00
Hajime Hoshi
1fe988eabf internal/graphicscommand: improve performance of prependPreservedUniforms
```
% benchstat old.txt new.txt
goos: darwin
goarch: arm64
pkg: github.com/hajimehoshi/ebiten/v2/internal/graphicscommand
cpu: Apple M3 Pro
                            │   old.txt   │               new.txt               │
                            │   sec/op    │   sec/op     vs base                │
PrependPreservedUniforms-12   23.07n ± 2%   19.96n ± 0%  -13.48% (p=0.000 n=10)
```

Updates #3144
2024-10-26 01:18:06 +09:00
Hajime Hoshi
9f5f53c041 internal/graphicscommand: add BenchPrependPreservedUniforms
Updates #3144
2024-10-26 01:15:12 +09:00
3 changed files with 120 additions and 44 deletions

View File

@ -0,0 +1,34 @@
// Copyright 2024 The Ebitengine Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package graphicscommand_test
import (
"image"
"testing"
"github.com/hajimehoshi/ebiten/v2/internal/graphics"
"github.com/hajimehoshi/ebiten/v2/internal/graphicscommand"
)
func BenchmarkPrependPreservedUniforms(b *testing.B) {
var uniforms [graphics.PreservedUniformUint32Count]uint32
dst := graphicscommand.NewImage(16, 16, false, "")
src := graphicscommand.NewImage(16, 16, false, "")
dr := image.Rect(0, 0, 16, 16)
sr := image.Rect(0, 0, 16, 16)
for i := 0; i < b.N; i++ {
graphicscommand.PrependPreservedUniforms(uniforms[:], nearestFilterShader, dst, [graphics.ShaderSrcImageCount]*graphicscommand.Image{src}, dr, [graphics.ShaderSrcImageCount]image.Rectangle{sr})
}
}

View File

@ -349,25 +349,50 @@ func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shade
origUniforms := uniforms
uniforms = q.uint32sBuffer.alloc(len(origUniforms) + graphics.PreservedUniformUint32Count)
copy(uniforms[graphics.PreservedUniformUint32Count:], origUniforms)
return prependPreservedUniforms(uniforms, shader, dst, srcs, dstRegion, srcRegions)
}
func prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle) []uint32 {
// Set the destination texture size.
// Hard-code indices for BCE optimization.
_ = uniforms[graphics.PreservedUniformUint32Count-1]
dw, dh := dst.InternalSize()
uniforms[0] = math.Float32bits(float32(dw))
uniforms[1] = math.Float32bits(float32(dh))
uniformIndex := 2
for i := 0; i < graphics.ShaderSrcImageCount; i++ {
var floatW, floatH uint32
if srcs[i] != nil {
w, h := srcs[i].InternalSize()
floatW = math.Float32bits(float32(w))
floatH = math.Float32bits(float32(h))
}
uniforms[uniformIndex+i*2] = floatW
uniforms[uniformIndex+1+i*2] = floatH
if srcs[0] != nil {
w, h := srcs[0].InternalSize()
uniforms[2] = math.Float32bits(float32(w))
uniforms[3] = math.Float32bits(float32(h))
} else {
uniforms[2] = 0
uniforms[3] = 0
}
if srcs[1] != nil {
w, h := srcs[1].InternalSize()
uniforms[4] = math.Float32bits(float32(w))
uniforms[5] = math.Float32bits(float32(h))
} else {
uniforms[4] = 0
uniforms[5] = 0
}
if srcs[2] != nil {
w, h := srcs[2].InternalSize()
uniforms[6] = math.Float32bits(float32(w))
uniforms[7] = math.Float32bits(float32(h))
} else {
uniforms[6] = 0
uniforms[7] = 0
}
if srcs[3] != nil {
w, h := srcs[3].InternalSize()
uniforms[8] = math.Float32bits(float32(w))
uniforms[9] = math.Float32bits(float32(h))
} else {
uniforms[8] = 0
uniforms[9] = 0
}
uniformIndex += graphics.ShaderSrcImageCount * 2
dr := imageRectangleToRectangleF32(dstRegion)
if shader.unit() == shaderir.Texels {
@ -378,14 +403,12 @@ func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shade
}
// Set the destination region origin.
uniforms[uniformIndex] = math.Float32bits(dr.x)
uniforms[uniformIndex+1] = math.Float32bits(dr.y)
uniformIndex += 2
uniforms[10] = math.Float32bits(dr.x)
uniforms[11] = math.Float32bits(dr.y)
// Set the destination region size.
uniforms[uniformIndex] = math.Float32bits(dr.width)
uniforms[uniformIndex+1] = math.Float32bits(dr.height)
uniformIndex += 2
uniforms[12] = math.Float32bits(dr.width)
uniforms[13] = math.Float32bits(dr.height)
var srs [graphics.ShaderSrcImageCount]rectangleF32
for i, r := range srcRegions {
@ -405,40 +428,49 @@ func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shade
}
// Set the source region origins.
for i := 0; i < graphics.ShaderSrcImageCount; i++ {
uniforms[uniformIndex+i*2] = math.Float32bits(srs[i].x)
uniforms[uniformIndex+1+i*2] = math.Float32bits(srs[i].y)
}
uniformIndex += graphics.ShaderSrcImageCount * 2
uniforms[14] = math.Float32bits(srs[0].x)
uniforms[15] = math.Float32bits(srs[0].y)
uniforms[16] = math.Float32bits(srs[1].x)
uniforms[17] = math.Float32bits(srs[1].y)
uniforms[18] = math.Float32bits(srs[2].x)
uniforms[19] = math.Float32bits(srs[2].y)
uniforms[20] = math.Float32bits(srs[3].x)
uniforms[21] = math.Float32bits(srs[3].y)
// Set the source region sizes.
for i := 0; i < graphics.ShaderSrcImageCount; i++ {
uniforms[uniformIndex+i*2] = math.Float32bits(srs[i].width)
uniforms[uniformIndex+1+i*2] = math.Float32bits(srs[i].height)
}
uniformIndex += graphics.ShaderSrcImageCount * 2
uniforms[22] = math.Float32bits(srs[0].width)
uniforms[23] = math.Float32bits(srs[0].height)
uniforms[24] = math.Float32bits(srs[1].width)
uniforms[25] = math.Float32bits(srs[1].height)
uniforms[26] = math.Float32bits(srs[2].width)
uniforms[27] = math.Float32bits(srs[2].height)
uniforms[28] = math.Float32bits(srs[3].width)
uniforms[29] = math.Float32bits(srs[3].height)
// Set the projection matrix.
uniforms[uniformIndex] = math.Float32bits(2 / float32(dw))
uniforms[uniformIndex+1] = 0
uniforms[uniformIndex+2] = 0
uniforms[uniformIndex+3] = 0
uniforms[uniformIndex+4] = 0
uniforms[uniformIndex+5] = math.Float32bits(2 / float32(dh))
uniforms[uniformIndex+6] = 0
uniforms[uniformIndex+7] = 0
uniforms[uniformIndex+8] = 0
uniforms[uniformIndex+9] = 0
uniforms[uniformIndex+10] = math.Float32bits(1)
uniforms[uniformIndex+11] = 0
uniforms[uniformIndex+12] = math.Float32bits(-1)
uniforms[uniformIndex+13] = math.Float32bits(-1)
uniforms[uniformIndex+14] = 0
uniforms[uniformIndex+15] = math.Float32bits(1)
uniforms[30] = math.Float32bits(2 / float32(dw))
uniforms[31] = 0
uniforms[32] = 0
uniforms[33] = 0
uniforms[34] = 0
uniforms[35] = math.Float32bits(2 / float32(dh))
uniforms[36] = 0
uniforms[37] = 0
uniforms[38] = 0
uniforms[39] = 0
uniforms[40] = math.Float32bits(1)
uniforms[41] = 0
uniforms[42] = math.Float32bits(-1)
uniforms[43] = math.Float32bits(-1)
uniforms[44] = 0
uniforms[45] = math.Float32bits(1)
return uniforms
}
// Confirm the concrete value of graphics.PreservedUniformUint32Count.
var _ [0]struct{} = [graphics.PreservedUniformUint32Count - 46]struct{}{}
type commandQueuePool struct {
cache []*commandQueue
m sync.Mutex

View File

@ -14,8 +14,18 @@
package graphicscommand
import (
"image"
"github.com/hajimehoshi/ebiten/v2/internal/graphics"
)
type WritePixelsCommandArgs = writePixelsCommandArgs
func (i *Image) BufferedWritePixelsArgsForTesting() []WritePixelsCommandArgs {
return i.bufferedWritePixelsArgs
}
func PrependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle) []uint32 {
return prependPreservedUniforms(uniforms, shader, dst, srcs, dstRegion, srcRegions)
}