From 89c64f83e33f53411731b5d720e6a2579f3bd1d3 Mon Sep 17 00:00:00 2001 From: Hajime Hoshi Date: Sat, 3 Dec 2022 21:47:10 +0900 Subject: [PATCH] ebiten: performance improvement by reducing allocations of []float32 --- image.go | 29 +++++++++++++++++++++++------ internal/buffered/image.go | 5 +++-- internal/restorable/image.go | 5 ++++- internal/ui/shader.go | 24 ++++++++++++------------ shader.go | 8 ++++++-- 5 files changed, 48 insertions(+), 23 deletions(-) diff --git a/image.go b/image.go index 0902782bf..3a749f52d 100644 --- a/image.go +++ b/image.go @@ -43,6 +43,9 @@ type Image struct { // tmpVertices must not be reused until the vertices are sent to the graphics command queue. tmpVertices []float32 + + // tmpUniforms must not be reused until the vertices are sent to the graphics command queue. + tmpUniforms []uint32 } func (i *Image) copyCheck() { @@ -252,12 +255,12 @@ func (i *Image) DrawImage(img *Image, options *DrawImageOptions) { useColorM := !colorm.IsIdentity() shader := builtinShader(filter, builtinshader.AddressUnsafe, useColorM) - var uniforms []uint32 + uniforms := i.ensureTmpUniforms(shader) if useColorM { var body [16]float32 var translation [4]float32 colorm.Elements(body[:], translation[:]) - uniforms = shader.convertUniforms(map[string]any{ + shader.convertUniforms(uniforms, map[string]any{ builtinshader.UniformColorMBody: body[:], builtinshader.UniformColorMTranslation: translation[:], }) @@ -481,12 +484,12 @@ func (i *Image) DrawTriangles(vertices []Vertex, indices []uint16, img *Image, o useColorM := !colorm.IsIdentity() shader := builtinShader(filter, address, useColorM) - var uniforms []uint32 + uniforms := i.ensureTmpUniforms(shader) if useColorM { var body [16]float32 var translation [4]float32 colorm.Elements(body[:], translation[:]) - uniforms = shader.convertUniforms(map[string]any{ + shader.convertUniforms(uniforms, map[string]any{ builtinshader.UniformColorMBody: body[:], builtinshader.UniformColorMTranslation: translation[:], }) @@ -643,7 +646,10 @@ func (i *Image) DrawTrianglesShader(vertices []Vertex, indices []uint16, shader offsets[i][1] = float32(y - sy) } - i.image.DrawTriangles(imgs, vs, is, blend, i.adjustedRegion(), sr, offsets, shader.shader, shader.convertUniforms(options.Uniforms), options.FillRule == EvenOdd, true, options.AntiAlias) + uniforms := i.ensureTmpUniforms(shader) + shader.convertUniforms(uniforms, options.Uniforms) + + i.image.DrawTriangles(imgs, vs, is, blend, i.adjustedRegion(), sr, offsets, shader.shader, uniforms, options.FillRule == EvenOdd, true, options.AntiAlias) } // DrawRectShaderOptions represents options for DrawRectShader. @@ -753,7 +759,10 @@ func (i *Image) DrawRectShader(width, height int, shader *Shader, options *DrawR offsets[i][1] = float32(y - sy) } - i.image.DrawTriangles(imgs, vs, is, blend, i.adjustedRegion(), sr, offsets, shader.shader, shader.convertUniforms(options.Uniforms), false, true, false) + uniforms := i.ensureTmpUniforms(shader) + shader.convertUniforms(uniforms, options.Uniforms) + + i.image.DrawTriangles(imgs, vs, is, blend, i.adjustedRegion(), sr, offsets, shader.shader, uniforms, false, true, false) } // SubImage returns an image representing the portion of the image p visible through r. @@ -1155,6 +1164,14 @@ func (i *Image) ensureTmpVertices(n int) []float32 { return i.tmpVertices[:n] } +func (i *Image) ensureTmpUniforms(shader *Shader) []uint32 { + n := shader.uniformUint32Count() + if cap(i.tmpUniforms) < n { + i.tmpUniforms = make([]uint32, n) + } + return i.tmpUniforms[:n] +} + // private implements FinalScreen. func (*Image) private() { } diff --git a/internal/buffered/image.go b/internal/buffered/image.go index fdf0e5673..72cc328f0 100644 --- a/internal/buffered/image.go +++ b/internal/buffered/image.go @@ -152,9 +152,10 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderImageCount]*Image, vertices [ copy(vs, vertices) is := make([]uint16, len(indices)) copy(is, indices) - // TODO: Copy uniform variables. Now this is created for each call, so copying is not necessary, but this is fragile. + us := make([]uint32, len(uniforms)) + copy(us, uniforms) if tryAddDelayedCommand(func() { - i.DrawTriangles(srcs, vs, is, blend, dstRegion, srcRegion, subimageOffsets, shader, uniforms, evenOdd) + i.DrawTriangles(srcs, vs, is, blend, dstRegion, srcRegion, subimageOffsets, shader, us, evenOdd) }) { return } diff --git a/internal/restorable/image.go b/internal/restorable/image.go index 66ede6797..2402df0e1 100644 --- a/internal/restorable/image.go +++ b/internal/restorable/image.go @@ -423,6 +423,9 @@ func (i *Image) appendDrawTrianglesHistory(srcs [graphics.ShaderImageCount]*Imag is := make([]uint16, len(indices)) copy(is, indices) + us := make([]uint32, len(uniforms)) + copy(us, uniforms) + item := &drawTrianglesHistoryItem{ images: srcs, offsets: offsets, @@ -432,7 +435,7 @@ func (i *Image) appendDrawTrianglesHistory(srcs [graphics.ShaderImageCount]*Imag dstRegion: dstRegion, srcRegion: srcRegion, shader: shader, - uniforms: uniforms, + uniforms: us, evenOdd: evenOdd, } i.drawTrianglesHistory = append(i.drawTrianglesHistory, item) diff --git a/internal/ui/shader.go b/internal/ui/shader.go index a00364741..75cc95f5e 100644 --- a/internal/ui/shader.go +++ b/internal/ui/shader.go @@ -44,13 +44,15 @@ func (s *Shader) MarkDisposed() { s.shader = nil } -func (s *Shader) ConvertUniforms(uniforms map[string]any) []uint32 { +func (s *Shader) UniformUint32Count() int { var n int for _, typ := range s.uniformTypes { n += typ.Uint32Count() } + return n +} - us := make([]uint32, n) +func (s *Shader) ConvertUniforms(dst []uint32, uniforms map[string]any) { var idx int for i, name := range s.uniformNames { typ := s.uniformTypes[i] @@ -61,36 +63,34 @@ func (s *Shader) ConvertUniforms(uniforms map[string]any) []uint32 { t := v.Type() switch t.Kind() { case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - us[idx] = uint32(v.Int()) + dst[idx] = uint32(v.Int()) case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - us[idx] = uint32(v.Uint()) + dst[idx] = uint32(v.Uint()) case reflect.Float32, reflect.Float64: - us[idx] = math.Float32bits(float32(v.Float())) + dst[idx] = math.Float32bits(float32(v.Float())) case reflect.Slice, reflect.Array: l := v.Len() switch t.Elem().Kind() { case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: for i := 0; i < l; i++ { - us[idx+i] = uint32(v.Index(i).Int()) + dst[idx+i] = uint32(v.Index(i).Int()) } case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: for i := 0; i < l; i++ { - us[idx+i] = uint32(v.Index(i).Uint()) + dst[idx+i] = uint32(v.Index(i).Uint()) } case reflect.Float32, reflect.Float64: for i := 0; i < l; i++ { - us[idx+i] = math.Float32bits(float32(v.Index(i).Float())) + dst[idx+i] = math.Float32bits(float32(v.Index(i).Float())) } default: - panic(fmt.Sprintf("ebiten: unexpected uniform value type: %s (%s)", name, v.Kind().String())) + panic(fmt.Sprintf("ui: unexpected uniform value type: %s (%s)", name, v.Kind().String())) } default: - panic(fmt.Sprintf("ebiten: unexpected uniform value type: %s (%s)", name, v.Kind().String())) + panic(fmt.Sprintf("ui: unexpected uniform value type: %s (%s)", name, v.Kind().String())) } } idx += typ.Uint32Count() } - - return us } diff --git a/shader.go b/shader.go index c5e1e89e3..a44db6e13 100644 --- a/shader.go +++ b/shader.go @@ -52,8 +52,12 @@ func (s *Shader) Dispose() { s.shader = nil } -func (s *Shader) convertUniforms(uniforms map[string]any) []uint32 { - return s.shader.ConvertUniforms(uniforms) +func (s *Shader) uniformUint32Count() int { + return s.shader.UniformUint32Count() +} + +func (s *Shader) convertUniforms(dst []uint32, uniforms map[string]any) { + s.shader.ConvertUniforms(dst, uniforms) } type builtinShaderKey struct {