ebiten: performance improvement by reducing allocations of []float32

This commit is contained in:
Hajime Hoshi 2022-12-03 21:47:10 +09:00
parent 6a72b1e10d
commit 89c64f83e3
5 changed files with 48 additions and 23 deletions

View File

@ -43,6 +43,9 @@ type Image struct {
// tmpVertices must not be reused until the vertices are sent to the graphics command queue. // tmpVertices must not be reused until the vertices are sent to the graphics command queue.
tmpVertices []float32 tmpVertices []float32
// tmpUniforms must not be reused until the vertices are sent to the graphics command queue.
tmpUniforms []uint32
} }
func (i *Image) copyCheck() { func (i *Image) copyCheck() {
@ -252,12 +255,12 @@ func (i *Image) DrawImage(img *Image, options *DrawImageOptions) {
useColorM := !colorm.IsIdentity() useColorM := !colorm.IsIdentity()
shader := builtinShader(filter, builtinshader.AddressUnsafe, useColorM) shader := builtinShader(filter, builtinshader.AddressUnsafe, useColorM)
var uniforms []uint32 uniforms := i.ensureTmpUniforms(shader)
if useColorM { if useColorM {
var body [16]float32 var body [16]float32
var translation [4]float32 var translation [4]float32
colorm.Elements(body[:], translation[:]) colorm.Elements(body[:], translation[:])
uniforms = shader.convertUniforms(map[string]any{ shader.convertUniforms(uniforms, map[string]any{
builtinshader.UniformColorMBody: body[:], builtinshader.UniformColorMBody: body[:],
builtinshader.UniformColorMTranslation: translation[:], builtinshader.UniformColorMTranslation: translation[:],
}) })
@ -481,12 +484,12 @@ func (i *Image) DrawTriangles(vertices []Vertex, indices []uint16, img *Image, o
useColorM := !colorm.IsIdentity() useColorM := !colorm.IsIdentity()
shader := builtinShader(filter, address, useColorM) shader := builtinShader(filter, address, useColorM)
var uniforms []uint32 uniforms := i.ensureTmpUniforms(shader)
if useColorM { if useColorM {
var body [16]float32 var body [16]float32
var translation [4]float32 var translation [4]float32
colorm.Elements(body[:], translation[:]) colorm.Elements(body[:], translation[:])
uniforms = shader.convertUniforms(map[string]any{ shader.convertUniforms(uniforms, map[string]any{
builtinshader.UniformColorMBody: body[:], builtinshader.UniformColorMBody: body[:],
builtinshader.UniformColorMTranslation: translation[:], builtinshader.UniformColorMTranslation: translation[:],
}) })
@ -643,7 +646,10 @@ func (i *Image) DrawTrianglesShader(vertices []Vertex, indices []uint16, shader
offsets[i][1] = float32(y - sy) offsets[i][1] = float32(y - sy)
} }
i.image.DrawTriangles(imgs, vs, is, blend, i.adjustedRegion(), sr, offsets, shader.shader, shader.convertUniforms(options.Uniforms), options.FillRule == EvenOdd, true, options.AntiAlias) uniforms := i.ensureTmpUniforms(shader)
shader.convertUniforms(uniforms, options.Uniforms)
i.image.DrawTriangles(imgs, vs, is, blend, i.adjustedRegion(), sr, offsets, shader.shader, uniforms, options.FillRule == EvenOdd, true, options.AntiAlias)
} }
// DrawRectShaderOptions represents options for DrawRectShader. // DrawRectShaderOptions represents options for DrawRectShader.
@ -753,7 +759,10 @@ func (i *Image) DrawRectShader(width, height int, shader *Shader, options *DrawR
offsets[i][1] = float32(y - sy) offsets[i][1] = float32(y - sy)
} }
i.image.DrawTriangles(imgs, vs, is, blend, i.adjustedRegion(), sr, offsets, shader.shader, shader.convertUniforms(options.Uniforms), false, true, false) uniforms := i.ensureTmpUniforms(shader)
shader.convertUniforms(uniforms, options.Uniforms)
i.image.DrawTriangles(imgs, vs, is, blend, i.adjustedRegion(), sr, offsets, shader.shader, uniforms, false, true, false)
} }
// SubImage returns an image representing the portion of the image p visible through r. // SubImage returns an image representing the portion of the image p visible through r.
@ -1155,6 +1164,14 @@ func (i *Image) ensureTmpVertices(n int) []float32 {
return i.tmpVertices[:n] return i.tmpVertices[:n]
} }
func (i *Image) ensureTmpUniforms(shader *Shader) []uint32 {
n := shader.uniformUint32Count()
if cap(i.tmpUniforms) < n {
i.tmpUniforms = make([]uint32, n)
}
return i.tmpUniforms[:n]
}
// private implements FinalScreen. // private implements FinalScreen.
func (*Image) private() { func (*Image) private() {
} }

View File

@ -152,9 +152,10 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderImageCount]*Image, vertices [
copy(vs, vertices) copy(vs, vertices)
is := make([]uint16, len(indices)) is := make([]uint16, len(indices))
copy(is, indices) copy(is, indices)
// TODO: Copy uniform variables. Now this is created for each call, so copying is not necessary, but this is fragile. us := make([]uint32, len(uniforms))
copy(us, uniforms)
if tryAddDelayedCommand(func() { if tryAddDelayedCommand(func() {
i.DrawTriangles(srcs, vs, is, blend, dstRegion, srcRegion, subimageOffsets, shader, uniforms, evenOdd) i.DrawTriangles(srcs, vs, is, blend, dstRegion, srcRegion, subimageOffsets, shader, us, evenOdd)
}) { }) {
return return
} }

View File

@ -423,6 +423,9 @@ func (i *Image) appendDrawTrianglesHistory(srcs [graphics.ShaderImageCount]*Imag
is := make([]uint16, len(indices)) is := make([]uint16, len(indices))
copy(is, indices) copy(is, indices)
us := make([]uint32, len(uniforms))
copy(us, uniforms)
item := &drawTrianglesHistoryItem{ item := &drawTrianglesHistoryItem{
images: srcs, images: srcs,
offsets: offsets, offsets: offsets,
@ -432,7 +435,7 @@ func (i *Image) appendDrawTrianglesHistory(srcs [graphics.ShaderImageCount]*Imag
dstRegion: dstRegion, dstRegion: dstRegion,
srcRegion: srcRegion, srcRegion: srcRegion,
shader: shader, shader: shader,
uniforms: uniforms, uniforms: us,
evenOdd: evenOdd, evenOdd: evenOdd,
} }
i.drawTrianglesHistory = append(i.drawTrianglesHistory, item) i.drawTrianglesHistory = append(i.drawTrianglesHistory, item)

View File

@ -44,13 +44,15 @@ func (s *Shader) MarkDisposed() {
s.shader = nil s.shader = nil
} }
func (s *Shader) ConvertUniforms(uniforms map[string]any) []uint32 { func (s *Shader) UniformUint32Count() int {
var n int var n int
for _, typ := range s.uniformTypes { for _, typ := range s.uniformTypes {
n += typ.Uint32Count() n += typ.Uint32Count()
} }
return n
}
us := make([]uint32, n) func (s *Shader) ConvertUniforms(dst []uint32, uniforms map[string]any) {
var idx int var idx int
for i, name := range s.uniformNames { for i, name := range s.uniformNames {
typ := s.uniformTypes[i] typ := s.uniformTypes[i]
@ -61,36 +63,34 @@ func (s *Shader) ConvertUniforms(uniforms map[string]any) []uint32 {
t := v.Type() t := v.Type()
switch t.Kind() { switch t.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
us[idx] = uint32(v.Int()) dst[idx] = uint32(v.Int())
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
us[idx] = uint32(v.Uint()) dst[idx] = uint32(v.Uint())
case reflect.Float32, reflect.Float64: case reflect.Float32, reflect.Float64:
us[idx] = math.Float32bits(float32(v.Float())) dst[idx] = math.Float32bits(float32(v.Float()))
case reflect.Slice, reflect.Array: case reflect.Slice, reflect.Array:
l := v.Len() l := v.Len()
switch t.Elem().Kind() { switch t.Elem().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
for i := 0; i < l; i++ { for i := 0; i < l; i++ {
us[idx+i] = uint32(v.Index(i).Int()) dst[idx+i] = uint32(v.Index(i).Int())
} }
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
for i := 0; i < l; i++ { for i := 0; i < l; i++ {
us[idx+i] = uint32(v.Index(i).Uint()) dst[idx+i] = uint32(v.Index(i).Uint())
} }
case reflect.Float32, reflect.Float64: case reflect.Float32, reflect.Float64:
for i := 0; i < l; i++ { for i := 0; i < l; i++ {
us[idx+i] = math.Float32bits(float32(v.Index(i).Float())) dst[idx+i] = math.Float32bits(float32(v.Index(i).Float()))
} }
default: default:
panic(fmt.Sprintf("ebiten: unexpected uniform value type: %s (%s)", name, v.Kind().String())) panic(fmt.Sprintf("ui: unexpected uniform value type: %s (%s)", name, v.Kind().String()))
} }
default: default:
panic(fmt.Sprintf("ebiten: unexpected uniform value type: %s (%s)", name, v.Kind().String())) panic(fmt.Sprintf("ui: unexpected uniform value type: %s (%s)", name, v.Kind().String()))
} }
} }
idx += typ.Uint32Count() idx += typ.Uint32Count()
} }
return us
} }

View File

@ -52,8 +52,12 @@ func (s *Shader) Dispose() {
s.shader = nil s.shader = nil
} }
func (s *Shader) convertUniforms(uniforms map[string]any) []uint32 { func (s *Shader) uniformUint32Count() int {
return s.shader.ConvertUniforms(uniforms) return s.shader.UniformUint32Count()
}
func (s *Shader) convertUniforms(dst []uint32, uniforms map[string]any) {
s.shader.ConvertUniforms(dst, uniforms)
} }
type builtinShaderKey struct { type builtinShaderKey struct {