From 5a1eb2413821f92e33cc7d6334b7b001317aec76 Mon Sep 17 00:00:00 2001 From: Hajime Hoshi Date: Wed, 26 Oct 2016 00:20:41 +0900 Subject: [PATCH] graphics: Improve speed by using []int16 instead of []uint8 --- imageparts.go | 77 +++++++++------------ internal/graphics/command.go | 15 ++-- internal/graphics/image.go | 2 +- internal/graphics/opengl/context_desktop.go | 4 +- internal/graphics/opengl/context_js.go | 2 +- internal/graphics/opengl/context_mobile.go | 13 +++- internal/restorable/image.go | 6 +- math.go | 18 ++--- math_js.go | 6 +- 9 files changed, 67 insertions(+), 76 deletions(-) diff --git a/imageparts.go b/imageparts.go index 651763b18..d8afeb5d1 100644 --- a/imageparts.go +++ b/imageparts.go @@ -18,7 +18,6 @@ import ( "image" "math" - "github.com/hajimehoshi/ebiten/internal/endian" "github.com/hajimehoshi/ebiten/internal/graphics" ) @@ -77,17 +76,16 @@ func v(y, height2p int) int16 { return int16(math.MaxInt16 * y / height2p) } -func vertices(parts ImageParts, width, height int, geo *GeoM) []uint8 { +func vertices(parts ImageParts, width, height int, geo *GeoM) []int16 { // TODO: This function should be in graphics package? - totalSize := graphics.QuadVertexSizeInBytes() + totalSize := graphics.QuadVertexSizeInBytes() / 2 oneSize := totalSize / 4 l := parts.Len() - vertices := make([]uint8, l*totalSize) + vs := make([]int16, l*totalSize) width2p := graphics.NextPowerOf2Int(width) height2p := graphics.NextPowerOf2Int(height) n := 0 - vs := make([]int16, 16) - geoBytes := floatBytes(geo.Element(0, 0), + geo16 := floatsToInt16s(geo.Element(0, 0), geo.Element(0, 1), geo.Element(1, 0), geo.Element(1, 1), @@ -104,45 +102,36 @@ func vertices(parts ImageParts, width, height int, geo *GeoM) []uint8 { continue } u0, v0, u1, v1 := u(sx0, width2p), v(sy0, height2p), u(sx1, width2p), v(sy1, height2p) - vs[0] = x0 - vs[1] = y0 - vs[2] = u0 - vs[3] = v0 - vs[4] = x1 - vs[5] = y0 - vs[6] = u1 - vs[7] = v0 - vs[8] = x0 - vs[9] = y1 - vs[10] = u0 - vs[11] = v1 - vs[12] = x1 - vs[13] = y1 - vs[14] = u1 - vs[15] = v1 - // Use direct assign here. `append` function might be slow on browsers. - for j := 0; j < 4; j++ { - offset := totalSize*n + oneSize*j - if endian.IsLittle() { - // Subslicing like vs[4*j:4*j+4] is slow on browsers. - // Don't do this. - for k := 0; k < 4; k++ { - v := vs[4*j+k] - vertices[offset+2*k] = uint8(v) - vertices[offset+2*k+1] = uint8(v >> 8) - } - } else { - for k := 0; k < 4; k++ { - v := vs[4*j+k] - vertices[offset+2*k] = uint8(v >> 8) - vertices[offset+2*k+1] = uint8(v) - } - } - for k, g := range geoBytes { - vertices[offset+8+k] = g - } + offset := n * totalSize + vs[offset] = x0 + vs[offset+1] = y0 + vs[offset+2] = u0 + vs[offset+3] = v0 + for j, g := range geo16 { + vs[offset+4+j] = g + } + vs[offset+oneSize] = x1 + vs[offset+oneSize+1] = y0 + vs[offset+oneSize+2] = u1 + vs[offset+oneSize+3] = v0 + for j, g := range geo16 { + vs[offset+oneSize+4+j] = g + } + vs[offset+2*oneSize] = x0 + vs[offset+2*oneSize+1] = y1 + vs[offset+2*oneSize+2] = u0 + vs[offset+2*oneSize+3] = v1 + for j, g := range geo16 { + vs[offset+2*oneSize+4+j] = g + } + vs[offset+3*oneSize] = x1 + vs[offset+3*oneSize+1] = y1 + vs[offset+3*oneSize+2] = u1 + vs[offset+3*oneSize+3] = v1 + for j, g := range geo16 { + vs[offset+3*oneSize+4+j] = g } n++ } - return vertices[:n*totalSize] + return vs[:n*totalSize] } diff --git a/internal/graphics/command.go b/internal/graphics/command.go index bb17e5b66..b435364ca 100644 --- a/internal/graphics/command.go +++ b/internal/graphics/command.go @@ -126,7 +126,7 @@ func (q *commandQueue) Flush(context *opengl.Context) error { // glViewport must be called at least at every frame on iOS. context.ResetViewportSize() for _, g := range q.commandGroups() { - vertices := []uint8{} + vertices := []int16{} for _, c := range g { switch c := c.(type) { case *drawImageCommand: @@ -148,7 +148,8 @@ func (q *commandQueue) Flush(context *opengl.Context) error { return err } if c, ok := c.(*drawImageCommand); ok { - indexOffsetInBytes += 6 * len(c.vertices) / QuadVertexSizeInBytes() * 2 + n := len(c.vertices) * 2 / QuadVertexSizeInBytes() + indexOffsetInBytes += 6 * n * 2 } } if 0 < numc { @@ -185,7 +186,7 @@ func (c *fillCommand) Exec(context *opengl.Context, indexOffsetInBytes int) erro type drawImageCommand struct { dst *Image src *Image - vertices []uint8 + vertices []int16 color Matrix mode opengl.CompositeMode } @@ -227,8 +228,8 @@ func (c *drawImageCommand) Exec(context *opengl.Context, indexOffsetInBytes int) func (c *drawImageCommand) split(quadsNum int) [2]*drawImageCommand { c1 := *c c2 := *c - c1.vertices = c.vertices[:quadsNum*QuadVertexSizeInBytes()] - c2.vertices = c.vertices[quadsNum*QuadVertexSizeInBytes():] + c1.vertices = c.vertices[:quadsNum*QuadVertexSizeInBytes()/2] + c2.vertices = c.vertices[quadsNum*QuadVertexSizeInBytes()/2:] return [2]*drawImageCommand{&c1, &c2} } @@ -254,14 +255,14 @@ func (c *drawImageCommand) isMergeable(other *drawImageCommand) bool { func (c *drawImageCommand) merge(other *drawImageCommand) *drawImageCommand { newC := *c - newC.vertices = make([]uint8, 0, len(c.vertices)+len(other.vertices)) + newC.vertices = make([]int16, 0, len(c.vertices)+len(other.vertices)) newC.vertices = append(newC.vertices, c.vertices...) newC.vertices = append(newC.vertices, other.vertices...) return &newC } func (c *drawImageCommand) quadsNum() int { - return len(c.vertices) / QuadVertexSizeInBytes() + return len(c.vertices) * 2 / QuadVertexSizeInBytes() } type replacePixelsCommand struct { diff --git a/internal/graphics/image.go b/internal/graphics/image.go index 7b89903d1..fd962a93b 100644 --- a/internal/graphics/image.go +++ b/internal/graphics/image.go @@ -94,7 +94,7 @@ func (i *Image) Fill(clr color.RGBA) error { return nil } -func (i *Image) DrawImage(src *Image, vertices []uint8, clr Matrix, mode opengl.CompositeMode) error { +func (i *Image) DrawImage(src *Image, vertices []int16, clr Matrix, mode opengl.CompositeMode) error { c := &drawImageCommand{ dst: i, src: src, diff --git a/internal/graphics/opengl/context_desktop.go b/internal/graphics/opengl/context_desktop.go index 973eda3aa..13a07bf90 100644 --- a/internal/graphics/opengl/context_desktop.go +++ b/internal/graphics/opengl/context_desktop.go @@ -486,9 +486,9 @@ func (c *Context) BindElementArrayBuffer(b Buffer) { }) } -func (c *Context) BufferSubData(bufferType BufferType, data []uint8) { +func (c *Context) BufferSubData(bufferType BufferType, data []int16) { _ = c.runOnContextThread(func() error { - gl.BufferSubData(uint32(bufferType), 0, len(data), gl.Ptr(data)) + gl.BufferSubData(uint32(bufferType), 0, len(data)*2, gl.Ptr(data)) return nil }) } diff --git a/internal/graphics/opengl/context_js.go b/internal/graphics/opengl/context_js.go index 17c76174c..37b4e3805 100644 --- a/internal/graphics/opengl/context_js.go +++ b/internal/graphics/opengl/context_js.go @@ -387,7 +387,7 @@ func (c *Context) BindElementArrayBuffer(b Buffer) { gl.BindBuffer(gl.ELEMENT_ARRAY_BUFFER, b.Object) } -func (c *Context) BufferSubData(bufferType BufferType, data []uint8) { +func (c *Context) BufferSubData(bufferType BufferType, data []int16) { gl := c.gl gl.BufferSubData(int(bufferType), 0, data) } diff --git a/internal/graphics/opengl/context_mobile.go b/internal/graphics/opengl/context_mobile.go index 4ced338c8..5f67981c6 100644 --- a/internal/graphics/opengl/context_mobile.go +++ b/internal/graphics/opengl/context_mobile.go @@ -386,9 +386,18 @@ func (c *Context) BindElementArrayBuffer(b Buffer) { gl.BindBuffer(mgl.ELEMENT_ARRAY_BUFFER, mgl.Buffer(b)) } -func (c *Context) BufferSubData(bufferType BufferType, data []uint8) { +func int16ToBytes(v []int16) []byte { + b := make([]byte, len(v)*2) + for i, x := range v { + b[2*i] = uint8(uint16(x)) + b[2*i+1] = uint8(uint16(x) >> 8) + } + return b +} + +func (c *Context) BufferSubData(bufferType BufferType, data []int16) { gl := c.gl - gl.BufferSubData(mgl.Enum(bufferType), 0, data) + gl.BufferSubData(mgl.Enum(bufferType), 0, int16ToBytes(data)) } func (c *Context) DeleteBuffer(b Buffer) { diff --git a/internal/restorable/image.go b/internal/restorable/image.go index 8a6ed9a6b..c090f44f3 100644 --- a/internal/restorable/image.go +++ b/internal/restorable/image.go @@ -25,7 +25,7 @@ import ( type drawImageHistoryItem struct { image *graphics.Image - vertices []uint8 + vertices []int16 colorm graphics.Matrix mode opengl.CompositeMode } @@ -144,7 +144,7 @@ func (p *Image) ReplacePixels(pixels []uint8) error { return nil } -func (p *Image) DrawImage(img *Image, vertices []uint8, colorm graphics.Matrix, mode opengl.CompositeMode) error { +func (p *Image) DrawImage(img *Image, vertices []int16, colorm graphics.Matrix, mode opengl.CompositeMode) error { if img.stale || img.volatile { p.makeStale() } else { @@ -156,7 +156,7 @@ func (p *Image) DrawImage(img *Image, vertices []uint8, colorm graphics.Matrix, return nil } -func (p *Image) appendDrawImageHistory(image *graphics.Image, vertices []uint8, colorm graphics.Matrix, mode opengl.CompositeMode) { +func (p *Image) appendDrawImageHistory(image *graphics.Image, vertices []int16, colorm graphics.Matrix, mode opengl.CompositeMode) { if p.stale { return } diff --git a/math.go b/math.go index 6e2f54e4f..701a581e1 100644 --- a/math.go +++ b/math.go @@ -22,24 +22,16 @@ import ( "github.com/hajimehoshi/ebiten/internal/endian" ) -func floatBytes(xs ...float64) []uint8 { - bits := make([]uint8, 0, len(xs)*4) +func floatsToInt16s(xs ...float64) []int16 { + r := make([]int16, 0, len(xs)*2) for _, x := range xs { x32 := float32(x) n := *(*uint32)(unsafe.Pointer(&x32)) if endian.IsLittle() { - bits = append(bits, - uint8(n), - uint8(n>>8), - uint8(n>>16), - uint8(n>>24)) + r = append(r, int16(n), int16(n>>16)) } else { - bits = append(bits, - uint8(n>>24), - uint8(n>>16), - uint8(n>>8), - uint8(n)) + r = append(r, int16(n>>16), int16(n)) } } - return bits + return r } diff --git a/math_js.go b/math_js.go index b5ee9581f..162f70c02 100644 --- a/math_js.go +++ b/math_js.go @@ -20,12 +20,12 @@ import ( "github.com/gopherjs/gopherjs/js" ) -func floatBytes(xs ...float64) []uint8 { +func floatsToInt16s(xs ...float64) []int16 { a := js.Global.Get("ArrayBuffer").New(4 * len(xs)) af32 := js.Global.Get("Float32Array").New(a) - a8 := js.Global.Get("Uint8Array").New(a) + a16 := js.Global.Get("Int16Array").New(a) for i, x := range xs { af32.SetIndex(i, x) } - return a8.Interface().([]uint8) + return a16.Interface().([]int16) }