diff --git a/internal/graphicscommand/command.go b/internal/graphicscommand/command.go index da0e2b6cf..42e07871a 100644 --- a/internal/graphicscommand/command.go +++ b/internal/graphicscommand/command.go @@ -16,13 +16,9 @@ package graphicscommand import ( "fmt" - "image" "math" "strings" - "sync" - "sync/atomic" - "github.com/hajimehoshi/ebiten/v2/internal/debug" "github.com/hajimehoshi/ebiten/v2/internal/graphics" "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver" "github.com/hajimehoshi/ebiten/v2/internal/shaderir" @@ -62,247 +58,6 @@ func (p *drawTrianglesCommandPool) put(v *drawTrianglesCommand) { p.pool = append(p.pool, v) } -// commandQueue is a command queue for drawing commands. -type commandQueue struct { - // commands is a queue of drawing commands. - commands []command - - // vertices represents a vertices data in OpenGL's array buffer. - vertices []float32 - indices []uint16 - - tmpNumVertexFloats int - - drawTrianglesCommandPool drawTrianglesCommandPool - - uint32sBuffer uint32sBuffer - - temporaryBytes temporaryBytes - - err atomic.Value -} - -func (q *commandQueue) appendIndices(indices []uint16, offset uint16) { - n := len(q.indices) - q.indices = append(q.indices, indices...) - for i := n; i < len(q.indices); i++ { - q.indices[i] += offset - } -} - -// mustUseDifferentVertexBuffer reports whether a different vertex buffer must be used. -func mustUseDifferentVertexBuffer(nextNumVertexFloats int) bool { - return nextNumVertexFloats > graphics.MaxVertexFloatsCount -} - -// EnqueueDrawTrianglesCommand enqueues a drawing-image command. -func (q *commandQueue) EnqueueDrawTrianglesCommand(dst *Image, srcs [graphics.ShaderImageCount]*Image, vertices []float32, indices []uint16, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderImageCount]image.Rectangle, shader *Shader, uniforms []uint32, evenOdd bool) { - if len(vertices) > graphics.MaxVertexFloatsCount { - panic(fmt.Sprintf("graphicscommand: len(vertices) must equal to or less than %d but was %d", graphics.MaxVertexFloatsCount, len(vertices))) - } - - split := false - if mustUseDifferentVertexBuffer(q.tmpNumVertexFloats + len(vertices)) { - q.tmpNumVertexFloats = 0 - split = true - } - - // Assume that all the image sizes are same. - // Assume that the images are packed from the front in the slice srcs. - q.vertices = append(q.vertices, vertices...) - q.appendIndices(indices, uint16(q.tmpNumVertexFloats/graphics.VertexFloatCount)) - q.tmpNumVertexFloats += len(vertices) - - // prependPreservedUniforms not only prepends values to the given slice but also creates a new slice. - // Allocating a new slice is necessary to make EnqueueDrawTrianglesCommand safe so far. - // TODO: This might cause a performance issue (#2601). - uniforms = q.prependPreservedUniforms(uniforms, shader, dst, srcs, dstRegion, srcRegions) - - // Remove unused uniform variables so that more commands can be merged. - shader.ir.FilterUniformVariables(uniforms) - - // TODO: If dst is the screen, reorder the command to be the last. - if !split && 0 < len(q.commands) { - if last, ok := q.commands[len(q.commands)-1].(*drawTrianglesCommand); ok { - if last.CanMergeWithDrawTrianglesCommand(dst, srcs, vertices, blend, shader, uniforms, evenOdd) { - last.setVertices(q.lastVertices(len(vertices) + last.numVertices())) - if last.dstRegions[len(last.dstRegions)-1].Region == dstRegion { - last.dstRegions[len(last.dstRegions)-1].IndexCount += len(indices) - } else { - last.dstRegions = append(last.dstRegions, graphicsdriver.DstRegion{ - Region: dstRegion, - IndexCount: len(indices), - }) - } - return - } - } - } - - c := q.drawTrianglesCommandPool.get() - c.dst = dst - c.srcs = srcs - c.vertices = q.lastVertices(len(vertices)) - c.blend = blend - c.dstRegions = []graphicsdriver.DstRegion{ - { - Region: dstRegion, - IndexCount: len(indices), - }, - } - c.shader = shader - c.uniforms = uniforms - c.evenOdd = evenOdd - q.commands = append(q.commands, c) -} - -func (q *commandQueue) lastVertices(n int) []float32 { - return q.vertices[len(q.vertices)-n : len(q.vertices)] -} - -// Enqueue enqueues a drawing command other than a draw-triangles command. -// -// For a draw-triangles command, use EnqueueDrawTrianglesCommand. -func (q *commandQueue) Enqueue(command command) { - // TODO: If dst is the screen, reorder the command to be the last. - q.commands = append(q.commands, command) -} - -// Flush flushes the command queue. -func (q *commandQueue) Flush(graphicsDriver graphicsdriver.Graphics, endFrame bool, swapBuffersForGL func()) error { - if err := q.err.Load(); err != nil { - return err.(error) - } - - var sync bool - for _, c := range q.commands { - if c.NeedsSync() { - sync = true - break - } - } - - logger := debug.SwitchLogger() - - var flushErr error - runOnRenderThread(func() { - defer logger.Flush() - - if err := q.flush(graphicsDriver, endFrame, logger); err != nil { - if sync { - flushErr = err - return - } - q.err.Store(err) - return - } - - if endFrame && swapBuffersForGL != nil { - swapBuffersForGL() - } - - theCommandQueueManager.putCommandQueue(q) - }, sync) - - if sync && flushErr != nil { - return flushErr - } - - return nil -} - -// flush must be called the main thread. -func (q *commandQueue) flush(graphicsDriver graphicsdriver.Graphics, endFrame bool, logger debug.Logger) (err error) { - // If endFrame is true, Begin/End should be called to ensure the framebuffer is swapped. - if len(q.commands) == 0 && !endFrame { - return nil - } - - es := q.indices - vs := q.vertices - logger.Logf("Graphics commands:\n") - - if err := graphicsDriver.Begin(); err != nil { - return err - } - - defer func() { - // Call End even if an error causes, or the graphics driver's state might be stale (#2388). - if err1 := graphicsDriver.End(endFrame); err1 != nil && err == nil { - err = err1 - } - - // Release the commands explicitly (#1803). - // Apparently, the part of a slice between len and cap-1 still holds references. - // Then, resetting the length by [:0] doesn't release the references. - for i, c := range q.commands { - if c, ok := c.(*drawTrianglesCommand); ok { - q.drawTrianglesCommandPool.put(c) - } - q.commands[i] = nil - } - q.commands = q.commands[:0] - q.vertices = q.vertices[:0] - q.indices = q.indices[:0] - q.tmpNumVertexFloats = 0 - - if endFrame { - q.uint32sBuffer.reset() - q.temporaryBytes.reset() - } - }() - - cs := q.commands - for len(cs) > 0 { - nv := 0 - ne := 0 - nc := 0 - for _, c := range cs { - if dtc, ok := c.(*drawTrianglesCommand); ok { - if nc > 0 && mustUseDifferentVertexBuffer(nv+dtc.numVertices()) { - break - } - nv += dtc.numVertices() - ne += dtc.numIndices() - } - nc++ - } - if 0 < ne { - if err := graphicsDriver.SetVertices(vs[:nv], es[:ne]); err != nil { - return err - } - es = es[ne:] - vs = vs[nv:] - } - indexOffset := 0 - for _, c := range cs[:nc] { - if err := c.Exec(graphicsDriver, indexOffset); err != nil { - return err - } - logger.Logf(" %s\n", c) - // TODO: indexOffset should be reset if the command type is different - // from the previous one. This fix is needed when another drawing command is - // introduced than drawTrianglesCommand. - if dtc, ok := c.(*drawTrianglesCommand); ok { - indexOffset += dtc.numIndices() - } - } - cs = cs[nc:] - } - - return nil -} - -// FlushCommands flushes the command queue and present the screen if needed. -// If endFrame is true, the current screen might be used to present. -func FlushCommands(graphicsDriver graphicsdriver.Graphics, endFrame bool, swapBuffersForGL func()) error { - flushImageBuffers() - if err := theCommandQueueManager.flush(graphicsDriver, endFrame, swapBuffersForGL); err != nil { - return err - } - return nil -} - // drawTrianglesCommand represents a drawing command to draw an image on another image. type drawTrianglesCommand struct { dst *Image @@ -633,254 +388,3 @@ func MaxImageSize(graphicsDriver graphicsdriver.Graphics) int { }, true) return size } - -type commandQueuePool struct { - cache []*commandQueue - m sync.Mutex -} - -func (c *commandQueuePool) get() (*commandQueue, error) { - c.m.Lock() - defer c.m.Unlock() - - if len(c.cache) == 0 { - return &commandQueue{}, nil - } - - for _, q := range c.cache { - if err := q.err.Load(); err != nil { - return nil, err.(error) - } - } - - q := c.cache[len(c.cache)-1] - c.cache[len(c.cache)-1] = nil - c.cache = c.cache[:len(c.cache)-1] - return q, nil -} - -func (c *commandQueuePool) put(queue *commandQueue) { - c.m.Lock() - defer c.m.Unlock() - - c.cache = append(c.cache, queue) -} - -type commandQueueManager struct { - pool commandQueuePool - current *commandQueue -} - -var theCommandQueueManager commandQueueManager - -func (c *commandQueueManager) allocBytes(size int) []byte { - if c.current == nil { - c.current, _ = c.pool.get() - } - return c.current.temporaryBytes.alloc(size) -} - -func (c *commandQueueManager) enqueueCommand(command command) { - if c.current == nil { - c.current, _ = c.pool.get() - } - c.current.Enqueue(command) -} - -// put can be called from any goroutines. -func (c *commandQueueManager) putCommandQueue(commandQueue *commandQueue) { - c.pool.put(commandQueue) -} - -func (c *commandQueueManager) enqueueDrawTrianglesCommand(dst *Image, srcs [graphics.ShaderImageCount]*Image, vertices []float32, indices []uint16, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderImageCount]image.Rectangle, shader *Shader, uniforms []uint32, evenOdd bool) { - if c.current == nil { - c.current, _ = c.pool.get() - } - c.current.EnqueueDrawTrianglesCommand(dst, srcs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, evenOdd) -} - -func (c *commandQueueManager) flush(graphicsDriver graphicsdriver.Graphics, endFrame bool, swapBuffersForGL func()) error { - // Switch the command queue. - prev := c.current - q, err := c.pool.get() - if err != nil { - return err - } - c.current = q - - if prev == nil { - return nil - } - if err := prev.Flush(graphicsDriver, endFrame, swapBuffersForGL); err != nil { - return err - } - return nil -} - -func max(a, b int) int { - if a < b { - return b - } - return a -} - -func roundUpPower2(x int) int { - p2 := 1 - for p2 < x { - p2 *= 2 - } - return p2 -} - -type rectangleF32 struct { - x float32 - y float32 - width float32 - height float32 -} - -func imageRectangleToRectangleF32(r image.Rectangle) rectangleF32 { - return rectangleF32{ - x: float32(r.Min.X), - y: float32(r.Min.Y), - width: float32(r.Dx()), - height: float32(r.Dy()), - } -} - -func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderImageCount]image.Rectangle) []uint32 { - origUniforms := uniforms - uniforms = q.uint32sBuffer.alloc(len(origUniforms) + graphics.PreservedUniformUint32Count) - copy(uniforms[graphics.PreservedUniformUint32Count:], origUniforms) - - // Set the destination texture size. - dw, dh := dst.InternalSize() - uniforms[0] = math.Float32bits(float32(dw)) - uniforms[1] = math.Float32bits(float32(dh)) - - // Set the source texture sizes. - if srcs[0] != nil { - w, h := srcs[0].InternalSize() - uniforms[2] = math.Float32bits(float32(w)) - uniforms[3] = math.Float32bits(float32(h)) - } else { - uniforms[2] = 0 - uniforms[3] = 0 - } - if srcs[1] != nil { - w, h := srcs[1].InternalSize() - uniforms[4] = math.Float32bits(float32(w)) - uniforms[5] = math.Float32bits(float32(h)) - } else { - uniforms[4] = 0 - uniforms[5] = 0 - } - if srcs[2] != nil { - w, h := srcs[2].InternalSize() - uniforms[6] = math.Float32bits(float32(w)) - uniforms[7] = math.Float32bits(float32(h)) - } else { - uniforms[6] = 0 - uniforms[7] = 0 - } - if srcs[3] != nil { - w, h := srcs[3].InternalSize() - uniforms[8] = math.Float32bits(float32(w)) - uniforms[9] = math.Float32bits(float32(h)) - } else { - uniforms[8] = 0 - uniforms[9] = 0 - } - - dr := imageRectangleToRectangleF32(dstRegion) - if shader.unit() == shaderir.Texels { - dr.x /= float32(dw) - dr.y /= float32(dh) - dr.width /= float32(dw) - dr.height /= float32(dh) - } - - // Set the destination region origin. - uniforms[10] = math.Float32bits(dr.x) - uniforms[11] = math.Float32bits(dr.y) - - // Set the destination region size. - uniforms[12] = math.Float32bits(dr.width) - uniforms[13] = math.Float32bits(dr.height) - - var srs [graphics.ShaderImageCount]rectangleF32 - for i, r := range srcRegions { - srs[i] = imageRectangleToRectangleF32(r) - } - if shader.unit() == shaderir.Texels { - for i, src := range srcs { - if src == nil { - continue - } - w, h := src.InternalSize() - srs[i].x /= float32(w) - srs[i].y /= float32(h) - srs[i].width /= float32(w) - srs[i].height /= float32(h) - } - } - - // Set the source region origins. - uniforms[14] = math.Float32bits(srs[0].x) - uniforms[15] = math.Float32bits(srs[0].y) - uniforms[16] = math.Float32bits(srs[1].x) - uniforms[17] = math.Float32bits(srs[1].y) - uniforms[18] = math.Float32bits(srs[2].x) - uniforms[19] = math.Float32bits(srs[2].y) - uniforms[20] = math.Float32bits(srs[3].x) - uniforms[21] = math.Float32bits(srs[3].y) - - // Set the source region sizes. - uniforms[22] = math.Float32bits(srs[0].width) - uniforms[23] = math.Float32bits(srs[0].height) - uniforms[24] = math.Float32bits(srs[1].width) - uniforms[25] = math.Float32bits(srs[1].height) - uniforms[26] = math.Float32bits(srs[2].width) - uniforms[27] = math.Float32bits(srs[2].height) - uniforms[28] = math.Float32bits(srs[3].width) - uniforms[29] = math.Float32bits(srs[3].height) - - // Set the projection matrix. - uniforms[30] = math.Float32bits(2 / float32(dw)) - uniforms[31] = 0 - uniforms[32] = 0 - uniforms[33] = 0 - uniforms[34] = 0 - uniforms[35] = math.Float32bits(2 / float32(dh)) - uniforms[36] = 0 - uniforms[37] = 0 - uniforms[38] = 0 - uniforms[39] = 0 - uniforms[40] = math.Float32bits(1) - uniforms[41] = 0 - uniforms[42] = math.Float32bits(-1) - uniforms[43] = math.Float32bits(-1) - uniforms[44] = 0 - uniforms[45] = math.Float32bits(1) - - return uniforms -} - -// uint32sBuffer is a reusable buffer to allocate []uint32. -type uint32sBuffer struct { - buf []uint32 -} - -func (b *uint32sBuffer) alloc(n int) []uint32 { - buf := b.buf - if len(buf)+n > cap(buf) { - buf = make([]uint32, 0, max(roundUpPower2(len(buf)+n), 16)) - } - s := buf[len(buf) : len(buf)+n] - b.buf = buf[:len(buf)+n] - return s -} - -func (b *uint32sBuffer) reset() { - b.buf = b.buf[:0] -} diff --git a/internal/graphicscommand/commandqueue.go b/internal/graphicscommand/commandqueue.go new file mode 100644 index 000000000..aec758943 --- /dev/null +++ b/internal/graphicscommand/commandqueue.go @@ -0,0 +1,520 @@ +// Copyright 2023 The Ebitengine Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package graphicscommand + +import ( + "fmt" + "image" + "math" + "sync" + "sync/atomic" + + "github.com/hajimehoshi/ebiten/v2/internal/debug" + "github.com/hajimehoshi/ebiten/v2/internal/graphics" + "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver" + "github.com/hajimehoshi/ebiten/v2/internal/shaderir" +) + +// FlushCommands flushes the command queue and present the screen if needed. +// If endFrame is true, the current screen might be used to present. +func FlushCommands(graphicsDriver graphicsdriver.Graphics, endFrame bool, swapBuffersForGL func()) error { + flushImageBuffers() + if err := theCommandQueueManager.flush(graphicsDriver, endFrame, swapBuffersForGL); err != nil { + return err + } + return nil +} + +// commandQueue is a command queue for drawing commands. +type commandQueue struct { + // commands is a queue of drawing commands. + commands []command + + // vertices represents a vertices data in OpenGL's array buffer. + vertices []float32 + indices []uint16 + + tmpNumVertexFloats int + + drawTrianglesCommandPool drawTrianglesCommandPool + + uint32sBuffer uint32sBuffer + + temporaryBytes temporaryBytes + + err atomic.Value +} + +func (q *commandQueue) appendIndices(indices []uint16, offset uint16) { + n := len(q.indices) + q.indices = append(q.indices, indices...) + for i := n; i < len(q.indices); i++ { + q.indices[i] += offset + } +} + +// mustUseDifferentVertexBuffer reports whether a different vertex buffer must be used. +func mustUseDifferentVertexBuffer(nextNumVertexFloats int) bool { + return nextNumVertexFloats > graphics.MaxVertexFloatsCount +} + +// EnqueueDrawTrianglesCommand enqueues a drawing-image command. +func (q *commandQueue) EnqueueDrawTrianglesCommand(dst *Image, srcs [graphics.ShaderImageCount]*Image, vertices []float32, indices []uint16, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderImageCount]image.Rectangle, shader *Shader, uniforms []uint32, evenOdd bool) { + if len(vertices) > graphics.MaxVertexFloatsCount { + panic(fmt.Sprintf("graphicscommand: len(vertices) must equal to or less than %d but was %d", graphics.MaxVertexFloatsCount, len(vertices))) + } + + split := false + if mustUseDifferentVertexBuffer(q.tmpNumVertexFloats + len(vertices)) { + q.tmpNumVertexFloats = 0 + split = true + } + + // Assume that all the image sizes are same. + // Assume that the images are packed from the front in the slice srcs. + q.vertices = append(q.vertices, vertices...) + q.appendIndices(indices, uint16(q.tmpNumVertexFloats/graphics.VertexFloatCount)) + q.tmpNumVertexFloats += len(vertices) + + // prependPreservedUniforms not only prepends values to the given slice but also creates a new slice. + // Allocating a new slice is necessary to make EnqueueDrawTrianglesCommand safe so far. + // TODO: This might cause a performance issue (#2601). + uniforms = q.prependPreservedUniforms(uniforms, shader, dst, srcs, dstRegion, srcRegions) + + // Remove unused uniform variables so that more commands can be merged. + shader.ir.FilterUniformVariables(uniforms) + + // TODO: If dst is the screen, reorder the command to be the last. + if !split && 0 < len(q.commands) { + if last, ok := q.commands[len(q.commands)-1].(*drawTrianglesCommand); ok { + if last.CanMergeWithDrawTrianglesCommand(dst, srcs, vertices, blend, shader, uniforms, evenOdd) { + last.setVertices(q.lastVertices(len(vertices) + last.numVertices())) + if last.dstRegions[len(last.dstRegions)-1].Region == dstRegion { + last.dstRegions[len(last.dstRegions)-1].IndexCount += len(indices) + } else { + last.dstRegions = append(last.dstRegions, graphicsdriver.DstRegion{ + Region: dstRegion, + IndexCount: len(indices), + }) + } + return + } + } + } + + c := q.drawTrianglesCommandPool.get() + c.dst = dst + c.srcs = srcs + c.vertices = q.lastVertices(len(vertices)) + c.blend = blend + c.dstRegions = []graphicsdriver.DstRegion{ + { + Region: dstRegion, + IndexCount: len(indices), + }, + } + c.shader = shader + c.uniforms = uniforms + c.evenOdd = evenOdd + q.commands = append(q.commands, c) +} + +func (q *commandQueue) lastVertices(n int) []float32 { + return q.vertices[len(q.vertices)-n : len(q.vertices)] +} + +// Enqueue enqueues a drawing command other than a draw-triangles command. +// +// For a draw-triangles command, use EnqueueDrawTrianglesCommand. +func (q *commandQueue) Enqueue(command command) { + // TODO: If dst is the screen, reorder the command to be the last. + q.commands = append(q.commands, command) +} + +// Flush flushes the command queue. +func (q *commandQueue) Flush(graphicsDriver graphicsdriver.Graphics, endFrame bool, swapBuffersForGL func()) error { + if err := q.err.Load(); err != nil { + return err.(error) + } + + var sync bool + for _, c := range q.commands { + if c.NeedsSync() { + sync = true + break + } + } + + logger := debug.SwitchLogger() + + var flushErr error + runOnRenderThread(func() { + defer logger.Flush() + + if err := q.flush(graphicsDriver, endFrame, logger); err != nil { + if sync { + flushErr = err + return + } + q.err.Store(err) + return + } + + if endFrame && swapBuffersForGL != nil { + swapBuffersForGL() + } + + theCommandQueueManager.putCommandQueue(q) + }, sync) + + if sync && flushErr != nil { + return flushErr + } + + return nil +} + +// flush must be called the render thread. +func (q *commandQueue) flush(graphicsDriver graphicsdriver.Graphics, endFrame bool, logger debug.Logger) (err error) { + // If endFrame is true, Begin/End should be called to ensure the framebuffer is swapped. + if len(q.commands) == 0 && !endFrame { + return nil + } + + es := q.indices + vs := q.vertices + logger.Logf("Graphics commands:\n") + + if err := graphicsDriver.Begin(); err != nil { + return err + } + + defer func() { + // Call End even if an error causes, or the graphics driver's state might be stale (#2388). + if err1 := graphicsDriver.End(endFrame); err1 != nil && err == nil { + err = err1 + } + + // Release the commands explicitly (#1803). + // Apparently, the part of a slice between len and cap-1 still holds references. + // Then, resetting the length by [:0] doesn't release the references. + for i, c := range q.commands { + if c, ok := c.(*drawTrianglesCommand); ok { + q.drawTrianglesCommandPool.put(c) + } + q.commands[i] = nil + } + q.commands = q.commands[:0] + q.vertices = q.vertices[:0] + q.indices = q.indices[:0] + q.tmpNumVertexFloats = 0 + + if endFrame { + q.uint32sBuffer.reset() + q.temporaryBytes.reset() + } + }() + + cs := q.commands + for len(cs) > 0 { + nv := 0 + ne := 0 + nc := 0 + for _, c := range cs { + if dtc, ok := c.(*drawTrianglesCommand); ok { + if nc > 0 && mustUseDifferentVertexBuffer(nv+dtc.numVertices()) { + break + } + nv += dtc.numVertices() + ne += dtc.numIndices() + } + nc++ + } + if 0 < ne { + if err := graphicsDriver.SetVertices(vs[:nv], es[:ne]); err != nil { + return err + } + es = es[ne:] + vs = vs[nv:] + } + indexOffset := 0 + for _, c := range cs[:nc] { + if err := c.Exec(graphicsDriver, indexOffset); err != nil { + return err + } + logger.Logf(" %s\n", c) + // TODO: indexOffset should be reset if the command type is different + // from the previous one. This fix is needed when another drawing command is + // introduced than drawTrianglesCommand. + if dtc, ok := c.(*drawTrianglesCommand); ok { + indexOffset += dtc.numIndices() + } + } + cs = cs[nc:] + } + + return nil +} + +type rectangleF32 struct { + x float32 + y float32 + width float32 + height float32 +} + +func imageRectangleToRectangleF32(r image.Rectangle) rectangleF32 { + return rectangleF32{ + x: float32(r.Min.X), + y: float32(r.Min.Y), + width: float32(r.Dx()), + height: float32(r.Dy()), + } +} + +func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderImageCount]image.Rectangle) []uint32 { + origUniforms := uniforms + uniforms = q.uint32sBuffer.alloc(len(origUniforms) + graphics.PreservedUniformUint32Count) + copy(uniforms[graphics.PreservedUniformUint32Count:], origUniforms) + + // Set the destination texture size. + dw, dh := dst.InternalSize() + uniforms[0] = math.Float32bits(float32(dw)) + uniforms[1] = math.Float32bits(float32(dh)) + + // Set the source texture sizes. + if srcs[0] != nil { + w, h := srcs[0].InternalSize() + uniforms[2] = math.Float32bits(float32(w)) + uniforms[3] = math.Float32bits(float32(h)) + } else { + uniforms[2] = 0 + uniforms[3] = 0 + } + if srcs[1] != nil { + w, h := srcs[1].InternalSize() + uniforms[4] = math.Float32bits(float32(w)) + uniforms[5] = math.Float32bits(float32(h)) + } else { + uniforms[4] = 0 + uniforms[5] = 0 + } + if srcs[2] != nil { + w, h := srcs[2].InternalSize() + uniforms[6] = math.Float32bits(float32(w)) + uniforms[7] = math.Float32bits(float32(h)) + } else { + uniforms[6] = 0 + uniforms[7] = 0 + } + if srcs[3] != nil { + w, h := srcs[3].InternalSize() + uniforms[8] = math.Float32bits(float32(w)) + uniforms[9] = math.Float32bits(float32(h)) + } else { + uniforms[8] = 0 + uniforms[9] = 0 + } + + dr := imageRectangleToRectangleF32(dstRegion) + if shader.unit() == shaderir.Texels { + dr.x /= float32(dw) + dr.y /= float32(dh) + dr.width /= float32(dw) + dr.height /= float32(dh) + } + + // Set the destination region origin. + uniforms[10] = math.Float32bits(dr.x) + uniforms[11] = math.Float32bits(dr.y) + + // Set the destination region size. + uniforms[12] = math.Float32bits(dr.width) + uniforms[13] = math.Float32bits(dr.height) + + var srs [graphics.ShaderImageCount]rectangleF32 + for i, r := range srcRegions { + srs[i] = imageRectangleToRectangleF32(r) + } + if shader.unit() == shaderir.Texels { + for i, src := range srcs { + if src == nil { + continue + } + w, h := src.InternalSize() + srs[i].x /= float32(w) + srs[i].y /= float32(h) + srs[i].width /= float32(w) + srs[i].height /= float32(h) + } + } + + // Set the source region origins. + uniforms[14] = math.Float32bits(srs[0].x) + uniforms[15] = math.Float32bits(srs[0].y) + uniforms[16] = math.Float32bits(srs[1].x) + uniforms[17] = math.Float32bits(srs[1].y) + uniforms[18] = math.Float32bits(srs[2].x) + uniforms[19] = math.Float32bits(srs[2].y) + uniforms[20] = math.Float32bits(srs[3].x) + uniforms[21] = math.Float32bits(srs[3].y) + + // Set the source region sizes. + uniforms[22] = math.Float32bits(srs[0].width) + uniforms[23] = math.Float32bits(srs[0].height) + uniforms[24] = math.Float32bits(srs[1].width) + uniforms[25] = math.Float32bits(srs[1].height) + uniforms[26] = math.Float32bits(srs[2].width) + uniforms[27] = math.Float32bits(srs[2].height) + uniforms[28] = math.Float32bits(srs[3].width) + uniforms[29] = math.Float32bits(srs[3].height) + + // Set the projection matrix. + uniforms[30] = math.Float32bits(2 / float32(dw)) + uniforms[31] = 0 + uniforms[32] = 0 + uniforms[33] = 0 + uniforms[34] = 0 + uniforms[35] = math.Float32bits(2 / float32(dh)) + uniforms[36] = 0 + uniforms[37] = 0 + uniforms[38] = 0 + uniforms[39] = 0 + uniforms[40] = math.Float32bits(1) + uniforms[41] = 0 + uniforms[42] = math.Float32bits(-1) + uniforms[43] = math.Float32bits(-1) + uniforms[44] = 0 + uniforms[45] = math.Float32bits(1) + + return uniforms +} + +type commandQueuePool struct { + cache []*commandQueue + m sync.Mutex +} + +func (c *commandQueuePool) get() (*commandQueue, error) { + c.m.Lock() + defer c.m.Unlock() + + if len(c.cache) == 0 { + return &commandQueue{}, nil + } + + for _, q := range c.cache { + if err := q.err.Load(); err != nil { + return nil, err.(error) + } + } + + q := c.cache[len(c.cache)-1] + c.cache[len(c.cache)-1] = nil + c.cache = c.cache[:len(c.cache)-1] + return q, nil +} + +func (c *commandQueuePool) put(queue *commandQueue) { + c.m.Lock() + defer c.m.Unlock() + + c.cache = append(c.cache, queue) +} + +type commandQueueManager struct { + pool commandQueuePool + current *commandQueue +} + +var theCommandQueueManager commandQueueManager + +func (c *commandQueueManager) allocBytes(size int) []byte { + if c.current == nil { + c.current, _ = c.pool.get() + } + return c.current.temporaryBytes.alloc(size) +} + +func (c *commandQueueManager) enqueueCommand(command command) { + if c.current == nil { + c.current, _ = c.pool.get() + } + c.current.Enqueue(command) +} + +// put can be called from any goroutines. +func (c *commandQueueManager) putCommandQueue(commandQueue *commandQueue) { + c.pool.put(commandQueue) +} + +func (c *commandQueueManager) enqueueDrawTrianglesCommand(dst *Image, srcs [graphics.ShaderImageCount]*Image, vertices []float32, indices []uint16, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderImageCount]image.Rectangle, shader *Shader, uniforms []uint32, evenOdd bool) { + if c.current == nil { + c.current, _ = c.pool.get() + } + c.current.EnqueueDrawTrianglesCommand(dst, srcs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, evenOdd) +} + +func (c *commandQueueManager) flush(graphicsDriver graphicsdriver.Graphics, endFrame bool, swapBuffersForGL func()) error { + // Switch the command queue. + prev := c.current + q, err := c.pool.get() + if err != nil { + return err + } + c.current = q + + if prev == nil { + return nil + } + if err := prev.Flush(graphicsDriver, endFrame, swapBuffersForGL); err != nil { + return err + } + return nil +} + +// uint32sBuffer is a reusable buffer to allocate []uint32. +type uint32sBuffer struct { + buf []uint32 +} + +func roundUpPower2(x int) int { + p2 := 1 + for p2 < x { + p2 *= 2 + } + return p2 +} + +func max(a, b int) int { + if a < b { + return b + } + return a +} + +func (b *uint32sBuffer) alloc(n int) []uint32 { + buf := b.buf + if len(buf)+n > cap(buf) { + buf = make([]uint32, 0, max(roundUpPower2(len(buf)+n), 16)) + } + s := buf[len(buf) : len(buf)+n] + b.buf = buf[:len(buf)+n] + return s +} + +func (b *uint32sBuffer) reset() { + b.buf = b.buf[:0] +}