From 74e204d952789a7155b9fa3ae8d40e85057fd6d8 Mon Sep 17 00:00:00 2001 From: seebs Date: Tue, 30 Oct 2018 20:53:17 -0500 Subject: [PATCH] graphics: Speed up DrawTriangles (#723) DrawTriangles is expensive and slow because of massive memory allocation and garbage collection costs. This patch moves from ~47TPS on my laptop (with ~24k triangles) to 60TPS. The first part is just allocating the right size of vertex buffer up front; that got to about 55TPS. The second part replaces the frequent allocations of []float32 in Vertex() calls with writing the desired values into a provided destination slice. Time spent in drawing triangles for 1,000 frames: 13.07s baseline 11.09s preallocate whole buffer to avoid resizing 6.13s use new PutVertex function This might need some cleanup, but I think it's good evidence that the design change is viable. --- image.go | 6 +++--- internal/graphics/vertices.go | 5 +---- internal/shareable/shareable.go | 4 ++-- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/image.go b/image.go index 45d67b50d..d530f32f2 100644 --- a/image.go +++ b/image.go @@ -519,10 +519,10 @@ func (i *Image) DrawTriangles(vertices []Vertex, indices []uint16, img *Image, o filter = graphics.Filter(img.filter) } - vs := []float32{} + vs := make([]float32, len(vertices)*10) src := img.mipmap.original() - for _, v := range vertices { - vs = append(vs, src.Vertex(float32(v.DstX), float32(v.DstY), v.SrcX, v.SrcY, v.ColorR, v.ColorG, v.ColorB, v.ColorA)...) + for idx, v := range vertices { + src.PutVertex(vs[idx*10:idx*10+10], float32(v.DstX), float32(v.DstY), v.SrcX, v.SrcY, v.ColorR, v.ColorG, v.ColorB, v.ColorA) } i.mipmap.original().DrawImage(img.mipmap.original(), vs, indices, options.ColorM.impl, mode, filter) i.disposeMipmaps() diff --git a/internal/graphics/vertices.go b/internal/graphics/vertices.go index 91e2e5e03..380e3286a 100644 --- a/internal/graphics/vertices.go +++ b/internal/graphics/vertices.go @@ -140,7 +140,7 @@ func QuadIndices() []uint16 { return quadIndices } -func Vertex(width, height int, dx, dy, sx, sy float32, cr, cg, cb, ca float32) []float32 { +func PutVertex(vs []float32, width, height int, dx, dy, sx, sy float32, cr, cg, cb, ca float32) { if !isPowerOf2(width) { panic("not reached") } @@ -155,7 +155,6 @@ func Vertex(width, height int, dx, dy, sx, sy float32, cr, cg, cb, ca float32) [ // // NaN would make more sense to represent an invalid state, but vertices including NaN values doesn't work on // some machines (#696). Let's use negative numbers to represent such state. - vs := theVerticesBackend.slice(1)[0:10] vs[0] = dx vs[1] = dy vs[2] = sx / wf @@ -166,6 +165,4 @@ func Vertex(width, height int, dx, dy, sx, sy float32, cr, cg, cb, ca float32) [ vs[7] = cg vs[8] = cb vs[9] = ca - - return vs } diff --git a/internal/shareable/shareable.go b/internal/shareable/shareable.go index fee5d33ed..2b3dc1673 100644 --- a/internal/shareable/shareable.go +++ b/internal/shareable/shareable.go @@ -191,13 +191,13 @@ func (i *Image) QuadVertices(sx0, sy0, sx1, sy1 int, a, b, c, d, tx, ty float32, return graphics.QuadVertices(w, h, sx0+ox, sy0+oy, sx1+ox, sy1+oy, a, b, c, d, tx, ty, cr, cg, cb, ca) } -func (i *Image) Vertex(dx, dy, sx, sy float32, cr, cg, cb, ca float32) []float32 { +func (i *Image) PutVertex(dest []float32, dx, dy, sx, sy float32, cr, cg, cb, ca float32) { if i.backend == nil { i.allocate(true) } ox, oy, _, _ := i.region() w, h := i.backend.restorable.SizePowerOf2() - return graphics.Vertex(w, h, dx, dy, sx+float32(ox), sy+float32(oy), cr, cg, cb, ca) + graphics.PutVertex(dest, w, h, dx, dy, sx+float32(ox), sy+float32(oy), cr, cg, cb, ca) } const MaxCountForShare = 10