ebiten/internal/graphicscommand/commandqueue.go
Hajime Hoshi 6339872da8 internal/graphicscommand: disable asynchronous rendering when vsync is on
Asynchronouse rendering was introduced at #2664, but apparently this
caused a delay between a game's update and its rendering.

Disable this when vsync is on. When vsync is off, we should not have
to care the delay since new renderings keep to come. Rather,
asynchronous renderings improves FPS.

Updates #2664
Updates #2822
2023-10-28 14:59:25 +09:00

538 lines
14 KiB
Go

// Copyright 2023 The Ebitengine Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package graphicscommand
import (
"fmt"
"image"
"math"
"sync"
"sync/atomic"
"github.com/hajimehoshi/ebiten/v2/internal/debug"
"github.com/hajimehoshi/ebiten/v2/internal/graphics"
"github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver"
"github.com/hajimehoshi/ebiten/v2/internal/shaderir"
)
var vsyncEnabled int32 = 1
func SetVsyncEnabled(enabled bool) {
if enabled {
atomic.StoreInt32(&vsyncEnabled, 1)
} else {
atomic.StoreInt32(&vsyncEnabled, 0)
}
}
// FlushCommands flushes the command queue and present the screen if needed.
// If endFrame is true, the current screen might be used to present.
func FlushCommands(graphicsDriver graphicsdriver.Graphics, endFrame bool, swapBuffersForGL func()) error {
if err := theCommandQueueManager.flush(graphicsDriver, endFrame, swapBuffersForGL); err != nil {
return err
}
return nil
}
// commandQueue is a command queue for drawing commands.
type commandQueue struct {
// commands is a queue of drawing commands.
commands []command
// vertices represents a vertices data in OpenGL's array buffer.
vertices []float32
indices []uint16
tmpNumVertexFloats int
drawTrianglesCommandPool drawTrianglesCommandPool
uint32sBuffer uint32sBuffer
finalizers []func()
err atomic.Value
}
// addFinalizer adds a finalizer function to this queue.
// A finalizer is executed when the command queue is flushed at the end of the frame.
func (q *commandQueue) addFinalizer(f func()) {
q.finalizers = append(q.finalizers, f)
}
func (q *commandQueue) appendIndices(indices []uint16, offset uint16) {
n := len(q.indices)
q.indices = append(q.indices, indices...)
for i := n; i < len(q.indices); i++ {
q.indices[i] += offset
}
}
// mustUseDifferentVertexBuffer reports whether a different vertex buffer must be used.
func mustUseDifferentVertexBuffer(nextNumVertexFloats int) bool {
return nextNumVertexFloats > graphics.MaxVertexFloatsCount
}
// EnqueueDrawTrianglesCommand enqueues a drawing-image command.
func (q *commandQueue) EnqueueDrawTrianglesCommand(dst *Image, srcs [graphics.ShaderImageCount]*Image, vertices []float32, indices []uint16, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderImageCount]image.Rectangle, shader *Shader, uniforms []uint32, evenOdd bool) {
if len(vertices) > graphics.MaxVertexFloatsCount {
panic(fmt.Sprintf("graphicscommand: len(vertices) must equal to or less than %d but was %d", graphics.MaxVertexFloatsCount, len(vertices)))
}
split := false
if mustUseDifferentVertexBuffer(q.tmpNumVertexFloats + len(vertices)) {
q.tmpNumVertexFloats = 0
split = true
}
// Assume that all the image sizes are same.
// Assume that the images are packed from the front in the slice srcs.
q.vertices = append(q.vertices, vertices...)
q.appendIndices(indices, uint16(q.tmpNumVertexFloats/graphics.VertexFloatCount))
q.tmpNumVertexFloats += len(vertices)
// prependPreservedUniforms not only prepends values to the given slice but also creates a new slice.
// Allocating a new slice is necessary to make EnqueueDrawTrianglesCommand safe so far.
// TODO: This might cause a performance issue (#2601).
uniforms = q.prependPreservedUniforms(uniforms, shader, dst, srcs, dstRegion, srcRegions)
// Remove unused uniform variables so that more commands can be merged.
shader.ir.FilterUniformVariables(uniforms)
// TODO: If dst is the screen, reorder the command to be the last.
if !split && 0 < len(q.commands) {
if last, ok := q.commands[len(q.commands)-1].(*drawTrianglesCommand); ok {
if last.CanMergeWithDrawTrianglesCommand(dst, srcs, vertices, blend, shader, uniforms, evenOdd) {
last.setVertices(q.lastVertices(len(vertices) + last.numVertices()))
if last.dstRegions[len(last.dstRegions)-1].Region == dstRegion {
last.dstRegions[len(last.dstRegions)-1].IndexCount += len(indices)
} else {
last.dstRegions = append(last.dstRegions, graphicsdriver.DstRegion{
Region: dstRegion,
IndexCount: len(indices),
})
}
return
}
}
}
c := q.drawTrianglesCommandPool.get()
c.dst = dst
c.srcs = srcs
c.vertices = q.lastVertices(len(vertices))
c.blend = blend
c.dstRegions = []graphicsdriver.DstRegion{
{
Region: dstRegion,
IndexCount: len(indices),
},
}
c.shader = shader
c.uniforms = uniforms
c.evenOdd = evenOdd
q.commands = append(q.commands, c)
}
func (q *commandQueue) lastVertices(n int) []float32 {
return q.vertices[len(q.vertices)-n : len(q.vertices)]
}
// Enqueue enqueues a drawing command other than a draw-triangles command.
//
// For a draw-triangles command, use EnqueueDrawTrianglesCommand.
func (q *commandQueue) Enqueue(command command) {
// TODO: If dst is the screen, reorder the command to be the last.
q.commands = append(q.commands, command)
}
// Flush flushes the command queue.
func (q *commandQueue) Flush(graphicsDriver graphicsdriver.Graphics, endFrame bool, swapBuffersForGL func()) error {
if err := q.err.Load(); err != nil {
return err.(error)
}
var sync bool
// Disable asynchrnous rendering when vsync is on, as this causes a rendering delay (#2822).
if endFrame && atomic.LoadInt32(&vsyncEnabled) != 0 {
sync = true
}
if !sync {
for _, c := range q.commands {
if c.NeedsSync() {
sync = true
break
}
}
}
logger := debug.SwitchLogger()
var flushErr error
runOnRenderThread(func() {
defer logger.Flush()
if err := q.flush(graphicsDriver, endFrame, logger); err != nil {
if sync {
flushErr = err
return
}
q.err.Store(err)
return
}
if endFrame && swapBuffersForGL != nil {
swapBuffersForGL()
}
theCommandQueueManager.putCommandQueue(q)
}, sync)
if sync && flushErr != nil {
return flushErr
}
return nil
}
// flush must be called the render thread.
func (q *commandQueue) flush(graphicsDriver graphicsdriver.Graphics, endFrame bool, logger debug.Logger) (err error) {
// If endFrame is true, Begin/End should be called to ensure the framebuffer is swapped.
if len(q.commands) == 0 && !endFrame {
return nil
}
es := q.indices
vs := q.vertices
logger.Logf("Graphics commands:\n")
if err := graphicsDriver.Begin(); err != nil {
return err
}
defer func() {
// Call End even if an error causes, or the graphics driver's state might be stale (#2388).
if err1 := graphicsDriver.End(endFrame); err1 != nil && err == nil {
err = err1
}
// Release the commands explicitly (#1803).
// Apparently, the part of a slice between len and cap-1 still holds references.
// Then, resetting the length by [:0] doesn't release the references.
for i, c := range q.commands {
if c, ok := c.(*drawTrianglesCommand); ok {
q.drawTrianglesCommandPool.put(c)
}
q.commands[i] = nil
}
q.commands = q.commands[:0]
q.vertices = q.vertices[:0]
q.indices = q.indices[:0]
q.tmpNumVertexFloats = 0
if endFrame {
q.uint32sBuffer.reset()
for i, f := range q.finalizers {
f()
q.finalizers[i] = nil
}
q.finalizers = q.finalizers[:0]
}
}()
cs := q.commands
for len(cs) > 0 {
nv := 0
ne := 0
nc := 0
for _, c := range cs {
if dtc, ok := c.(*drawTrianglesCommand); ok {
if nc > 0 && mustUseDifferentVertexBuffer(nv+dtc.numVertices()) {
break
}
nv += dtc.numVertices()
ne += dtc.numIndices()
}
nc++
}
if 0 < ne {
if err := graphicsDriver.SetVertices(vs[:nv], es[:ne]); err != nil {
return err
}
es = es[ne:]
vs = vs[nv:]
}
indexOffset := 0
for _, c := range cs[:nc] {
if err := c.Exec(q, graphicsDriver, indexOffset); err != nil {
return err
}
logger.Logf(" %s\n", c)
// TODO: indexOffset should be reset if the command type is different
// from the previous one. This fix is needed when another drawing command is
// introduced than drawTrianglesCommand.
if dtc, ok := c.(*drawTrianglesCommand); ok {
indexOffset += dtc.numIndices()
}
}
cs = cs[nc:]
}
return nil
}
type rectangleF32 struct {
x float32
y float32
width float32
height float32
}
func imageRectangleToRectangleF32(r image.Rectangle) rectangleF32 {
return rectangleF32{
x: float32(r.Min.X),
y: float32(r.Min.Y),
width: float32(r.Dx()),
height: float32(r.Dy()),
}
}
func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderImageCount]image.Rectangle) []uint32 {
origUniforms := uniforms
uniforms = q.uint32sBuffer.alloc(len(origUniforms) + graphics.PreservedUniformUint32Count)
copy(uniforms[graphics.PreservedUniformUint32Count:], origUniforms)
// Set the destination texture size.
dw, dh := dst.InternalSize()
uniforms[0] = math.Float32bits(float32(dw))
uniforms[1] = math.Float32bits(float32(dh))
// Set the source texture sizes.
if srcs[0] != nil {
w, h := srcs[0].InternalSize()
uniforms[2] = math.Float32bits(float32(w))
uniforms[3] = math.Float32bits(float32(h))
} else {
uniforms[2] = 0
uniforms[3] = 0
}
if srcs[1] != nil {
w, h := srcs[1].InternalSize()
uniforms[4] = math.Float32bits(float32(w))
uniforms[5] = math.Float32bits(float32(h))
} else {
uniforms[4] = 0
uniforms[5] = 0
}
if srcs[2] != nil {
w, h := srcs[2].InternalSize()
uniforms[6] = math.Float32bits(float32(w))
uniforms[7] = math.Float32bits(float32(h))
} else {
uniforms[6] = 0
uniforms[7] = 0
}
if srcs[3] != nil {
w, h := srcs[3].InternalSize()
uniforms[8] = math.Float32bits(float32(w))
uniforms[9] = math.Float32bits(float32(h))
} else {
uniforms[8] = 0
uniforms[9] = 0
}
dr := imageRectangleToRectangleF32(dstRegion)
if shader.unit() == shaderir.Texels {
dr.x /= float32(dw)
dr.y /= float32(dh)
dr.width /= float32(dw)
dr.height /= float32(dh)
}
// Set the destination region origin.
uniforms[10] = math.Float32bits(dr.x)
uniforms[11] = math.Float32bits(dr.y)
// Set the destination region size.
uniforms[12] = math.Float32bits(dr.width)
uniforms[13] = math.Float32bits(dr.height)
var srs [graphics.ShaderImageCount]rectangleF32
for i, r := range srcRegions {
srs[i] = imageRectangleToRectangleF32(r)
}
if shader.unit() == shaderir.Texels {
for i, src := range srcs {
if src == nil {
continue
}
w, h := src.InternalSize()
srs[i].x /= float32(w)
srs[i].y /= float32(h)
srs[i].width /= float32(w)
srs[i].height /= float32(h)
}
}
// Set the source region origins.
uniforms[14] = math.Float32bits(srs[0].x)
uniforms[15] = math.Float32bits(srs[0].y)
uniforms[16] = math.Float32bits(srs[1].x)
uniforms[17] = math.Float32bits(srs[1].y)
uniforms[18] = math.Float32bits(srs[2].x)
uniforms[19] = math.Float32bits(srs[2].y)
uniforms[20] = math.Float32bits(srs[3].x)
uniforms[21] = math.Float32bits(srs[3].y)
// Set the source region sizes.
uniforms[22] = math.Float32bits(srs[0].width)
uniforms[23] = math.Float32bits(srs[0].height)
uniforms[24] = math.Float32bits(srs[1].width)
uniforms[25] = math.Float32bits(srs[1].height)
uniforms[26] = math.Float32bits(srs[2].width)
uniforms[27] = math.Float32bits(srs[2].height)
uniforms[28] = math.Float32bits(srs[3].width)
uniforms[29] = math.Float32bits(srs[3].height)
// Set the projection matrix.
uniforms[30] = math.Float32bits(2 / float32(dw))
uniforms[31] = 0
uniforms[32] = 0
uniforms[33] = 0
uniforms[34] = 0
uniforms[35] = math.Float32bits(2 / float32(dh))
uniforms[36] = 0
uniforms[37] = 0
uniforms[38] = 0
uniforms[39] = 0
uniforms[40] = math.Float32bits(1)
uniforms[41] = 0
uniforms[42] = math.Float32bits(-1)
uniforms[43] = math.Float32bits(-1)
uniforms[44] = 0
uniforms[45] = math.Float32bits(1)
return uniforms
}
type commandQueuePool struct {
cache []*commandQueue
m sync.Mutex
}
func (c *commandQueuePool) get() (*commandQueue, error) {
c.m.Lock()
defer c.m.Unlock()
if len(c.cache) == 0 {
return &commandQueue{}, nil
}
for _, q := range c.cache {
if err := q.err.Load(); err != nil {
return nil, err.(error)
}
}
q := c.cache[len(c.cache)-1]
c.cache[len(c.cache)-1] = nil
c.cache = c.cache[:len(c.cache)-1]
return q, nil
}
func (c *commandQueuePool) put(queue *commandQueue) {
c.m.Lock()
defer c.m.Unlock()
c.cache = append(c.cache, queue)
}
type commandQueueManager struct {
pool commandQueuePool
current *commandQueue
}
var theCommandQueueManager commandQueueManager
func (c *commandQueueManager) enqueueCommand(command command) {
if c.current == nil {
c.current, _ = c.pool.get()
}
c.current.Enqueue(command)
}
// put can be called from any goroutines.
func (c *commandQueueManager) putCommandQueue(commandQueue *commandQueue) {
c.pool.put(commandQueue)
}
func (c *commandQueueManager) enqueueDrawTrianglesCommand(dst *Image, srcs [graphics.ShaderImageCount]*Image, vertices []float32, indices []uint16, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderImageCount]image.Rectangle, shader *Shader, uniforms []uint32, evenOdd bool) {
if c.current == nil {
c.current, _ = c.pool.get()
}
c.current.EnqueueDrawTrianglesCommand(dst, srcs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, evenOdd)
}
func (c *commandQueueManager) flush(graphicsDriver graphicsdriver.Graphics, endFrame bool, swapBuffersForGL func()) error {
// Switch the command queue.
prev := c.current
q, err := c.pool.get()
if err != nil {
return err
}
c.current = q
if prev == nil {
return nil
}
if err := prev.Flush(graphicsDriver, endFrame, swapBuffersForGL); err != nil {
return err
}
return nil
}
// uint32sBuffer is a reusable buffer to allocate []uint32.
type uint32sBuffer struct {
buf []uint32
}
func roundUpPower2(x int) int {
p2 := 1
for p2 < x {
p2 *= 2
}
return p2
}
func max(a, b int) int {
if a < b {
return b
}
return a
}
func (b *uint32sBuffer) alloc(n int) []uint32 {
buf := b.buf
if len(buf)+n > cap(buf) {
buf = make([]uint32, 0, max(roundUpPower2(len(buf)+n), 16))
}
s := buf[len(buf) : len(buf)+n]
b.buf = buf[:len(buf)+n]
return s
}
func (b *uint32sBuffer) reset() {
b.buf = b.buf[:0]
}