internal/graphics: remove the common vertices backend

It was actually impossible to use the common vertices backend in a
thread-safe manner, and actually this caused race conditions.

This changes fixes the issue by giving up a central backend, and
letting images have their own vertices buffer.

Closes #2473
This commit is contained in:
Hajime Hoshi 2022-12-02 21:04:03 +09:00
parent d407607168
commit 61f1d8b69f
7 changed files with 89 additions and 152 deletions

View File

@ -40,6 +40,9 @@ type Image struct {
// Do not add a 'buffering' member that are resolved lazily.
// This tends to forget resolving the buffer easily (#2362).
// tmpVertices must not be reused until the vertices are sent to the graphics command queue.
tmpVertices []float32
}
func (i *Image) copyCheck() {
@ -241,7 +244,8 @@ func (i *Image) DrawImage(img *Image, options *DrawImageOptions) {
sx1, sy1 := img.adjustPosition(bounds.Max.X, bounds.Max.Y)
colorm, cr, cg, cb, ca := colorMToScale(options.ColorM.affineColorM())
cr, cg, cb, ca = options.ColorScale.apply(cr, cg, cb, ca)
vs := graphics.QuadVertices(float32(sx0), float32(sy0), float32(sx1), float32(sy1), a, b, c, d, tx, ty, cr, cg, cb, ca)
vs := i.ensureTmpVertices(4 * graphics.VertexFloatCount)
graphics.QuadVertices(vs, float32(sx0), float32(sy0), float32(sx1), float32(sy1), a, b, c, d, tx, ty, cr, cg, cb, ca)
is := graphics.QuadIndices()
srcs := [graphics.ShaderImageCount]*ui.Image{img.image}
@ -441,7 +445,7 @@ func (i *Image) DrawTriangles(vertices []Vertex, indices []uint16, img *Image, o
colorm, cr, cg, cb, ca := colorMToScale(options.ColorM.affineColorM())
vs := graphics.Vertices(len(vertices))
vs := i.ensureTmpVertices(len(vertices) * graphics.VertexFloatCount)
dst := i
if options.ColorScaleMode == ColorScaleModeStraightAlpha {
for i, v := range vertices {
@ -577,7 +581,7 @@ func (i *Image) DrawTrianglesShader(vertices []Vertex, indices []uint16, shader
blend = options.CompositeMode.blend().internalBlend()
}
vs := graphics.Vertices(len(vertices))
vs := i.ensureTmpVertices(len(vertices) * graphics.VertexFloatCount)
dst := i
src := options.Images[0]
for i, v := range vertices {
@ -732,7 +736,8 @@ func (i *Image) DrawRectShader(width, height int, shader *Shader, options *DrawR
}
a, b, c, d, tx, ty := options.GeoM.elements32()
cr, cg, cb, ca := options.ColorScale.elements()
vs := graphics.QuadVertices(float32(sx), float32(sy), float32(sx+width), float32(sy+height), a, b, c, d, tx, ty, cr, cg, cb, ca)
vs := i.ensureTmpVertices(4 * graphics.VertexFloatCount)
graphics.QuadVertices(vs, float32(sx), float32(sy), float32(sx+width), float32(sy+height), a, b, c, d, tx, ty, cr, cg, cb, ca)
is := graphics.QuadIndices()
var offsets [graphics.ShaderImageCount - 1][2]float32
@ -1143,6 +1148,13 @@ func colorMToScale(colorm affine.ColorM) (newColorM affine.ColorM, r, g, b, a fl
return affine.ColorMIdentity{}, r * a, g * a, b * a, a
}
func (i *Image) ensureTmpVertices(n int) []float32 {
if cap(i.tmpVertices) < n {
i.tmpVertices = make([]float32, n)
}
return i.tmpVertices[:n]
}
// private implements FinalScreen.
func (*Image) private() {
}

View File

@ -321,7 +321,8 @@ func (i *Image) putOnAtlas(graphicsDriver graphicsdriver.Graphics) error {
newI := NewImage(i.width, i.height, ImageTypeRegular)
w, h := float32(i.width), float32(i.height)
vs := graphics.QuadVertices(0, 0, w, h, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1)
vs := make([]float32, 4*graphics.VertexFloatCount)
graphics.QuadVertices(vs, 0, 0, w, h, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1)
is := graphics.QuadIndices()
dr := graphicsdriver.Region{
X: 0,

View File

@ -148,9 +148,13 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderImageCount]*Image, vertices [
}
if maybeCanAddDelayedCommand() {
vs := make([]float32, len(vertices))
copy(vs, vertices)
is := make([]uint16, len(indices))
copy(is, indices)
// TODO: Copy uniform variables. Now this is created for each call, so copying is not necessary, but this is fragile.
if tryAddDelayedCommand(func() {
// Arguments are not copied. Copying is the caller's responsibility.
i.DrawTriangles(srcs, vertices, indices, blend, dstRegion, srcRegion, subimageOffsets, shader, uniforms, evenOdd)
i.DrawTriangles(srcs, vs, is, blend, dstRegion, srcRegion, subimageOffsets, shader, uniforms, evenOdd)
}) {
return
}

View File

@ -14,10 +14,6 @@
package graphics
import (
"sync"
)
const (
ShaderImageCount = 4
@ -55,138 +51,53 @@ func QuadIndices() []uint16 {
return quadIndices
}
var (
theVerticesBackend = &verticesBackend{}
)
// TODO: The logic is very similar to atlas.temporaryPixels. Unify them.
type verticesBackend struct {
backend []float32
pos int
notFullyUsedTime int
m sync.Mutex
}
func verticesBackendFloat32Size(size int) int {
l := 128 * VertexFloatCount
for l < size {
l *= 2
}
return l
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func (v *verticesBackend) slice(n int) []float32 {
v.m.Lock()
defer v.m.Unlock()
need := n * VertexFloatCount
if len(v.backend) < v.pos+need {
v.backend = make([]float32, max(len(v.backend)*2, verticesBackendFloat32Size(need)))
v.pos = 0
}
s := v.backend[v.pos : v.pos+need]
v.pos += need
return s
}
func (v *verticesBackend) lockAndReset(f func() error) error {
v.m.Lock()
defer v.m.Unlock()
if err := f(); err != nil {
return err
}
const maxNotFullyUsedTime = 60
if verticesBackendFloat32Size(v.pos) < len(v.backend) {
if v.notFullyUsedTime < maxNotFullyUsedTime {
v.notFullyUsedTime++
}
} else {
v.notFullyUsedTime = 0
}
if v.notFullyUsedTime == maxNotFullyUsedTime && len(v.backend) > 0 {
v.backend = nil
v.notFullyUsedTime = 0
}
v.pos = 0
return nil
}
// Vertices returns a float32 slice for n vertices.
// Vertices returns a slice that never overlaps with other slices returned this function,
// QuadVertices sets a float32 slice for a quadrangle.
// QuadVertices sets a slice that never overlaps with other slices returned this function,
// and users can do optimization based on this fact.
func Vertices(n int) []float32 {
return theVerticesBackend.slice(n)
}
func LockAndResetVertices(f func() error) error {
return theVerticesBackend.lockAndReset(f)
}
// QuadVertices returns a float32 slice for a quadrangle.
// QuadVertices returns a slice that never overlaps with other slices returned this function,
// and users can do optimization based on this fact.
func QuadVertices(sx0, sy0, sx1, sy1 float32, a, b, c, d, tx, ty float32, cr, cg, cb, ca float32) []float32 {
func QuadVertices(dst []float32, sx0, sy0, sx1, sy1 float32, a, b, c, d, tx, ty float32, cr, cg, cb, ca float32) {
x := sx1 - sx0
y := sy1 - sy0
ax, by, cx, dy := a*x, b*y, c*x, d*y
u0, v0, u1, v1 := float32(sx0), float32(sy0), float32(sx1), float32(sy1)
// Use the vertex backend instead of calling make to reduce GCs (#1521).
vs := theVerticesBackend.slice(4)
// This function is very performance-sensitive and implement in a very dumb way.
_ = vs[:4*VertexFloatCount]
_ = dst[:4*VertexFloatCount]
vs[0] = adjustDestinationPixel(tx)
vs[1] = adjustDestinationPixel(ty)
vs[2] = u0
vs[3] = v0
vs[4] = cr
vs[5] = cg
vs[6] = cb
vs[7] = ca
dst[0] = adjustDestinationPixel(tx)
dst[1] = adjustDestinationPixel(ty)
dst[2] = u0
dst[3] = v0
dst[4] = cr
dst[5] = cg
dst[6] = cb
dst[7] = ca
vs[8] = adjustDestinationPixel(ax + tx)
vs[9] = adjustDestinationPixel(cx + ty)
vs[10] = u1
vs[11] = v0
vs[12] = cr
vs[13] = cg
vs[14] = cb
vs[15] = ca
dst[8] = adjustDestinationPixel(ax + tx)
dst[9] = adjustDestinationPixel(cx + ty)
dst[10] = u1
dst[11] = v0
dst[12] = cr
dst[13] = cg
dst[14] = cb
dst[15] = ca
vs[16] = adjustDestinationPixel(by + tx)
vs[17] = adjustDestinationPixel(dy + ty)
vs[18] = u0
vs[19] = v1
vs[20] = cr
vs[21] = cg
vs[22] = cb
vs[23] = ca
dst[16] = adjustDestinationPixel(by + tx)
dst[17] = adjustDestinationPixel(dy + ty)
dst[18] = u0
dst[19] = v1
dst[20] = cr
dst[21] = cg
dst[22] = cb
dst[23] = ca
vs[24] = adjustDestinationPixel(ax + by + tx)
vs[25] = adjustDestinationPixel(cx + dy + ty)
vs[26] = u1
vs[27] = v1
vs[28] = cr
vs[29] = cg
vs[30] = cb
vs[31] = ca
return vs
dst[24] = adjustDestinationPixel(ax + by + tx)
dst[25] = adjustDestinationPixel(cx + dy + ty)
dst[26] = u1
dst[27] = v1
dst[28] = cr
dst[29] = cg
dst[30] = cb
dst[31] = ca
}
func adjustDestinationPixel(x float32) float32 {

View File

@ -148,12 +148,12 @@ func (m *Mipmap) level(level int) *buffered.Image {
}
var src *buffered.Image
var vs []float32
vs := make([]float32, 4*graphics.VertexFloatCount)
shader := NearestFilterShader
switch {
case level == 1:
src = m.orig
vs = graphics.QuadVertices(0, 0, float32(m.width), float32(m.height), 0.5, 0, 0, 0.5, 0, 0, 1, 1, 1, 1)
graphics.QuadVertices(vs, 0, 0, float32(m.width), float32(m.height), 0.5, 0, 0, 0.5, 0, 0, 1, 1, 1, 1)
shader = LinearFilterShader
case level > 1:
src = m.level(level - 1)
@ -163,7 +163,7 @@ func (m *Mipmap) level(level int) *buffered.Image {
}
w := sizeForLevel(m.width, level-1)
h := sizeForLevel(m.height, level-1)
vs = graphics.QuadVertices(0, 0, float32(w), float32(h), 0.5, 0, 0, 0.5, 0, 0, 1, 1, 1, 1)
graphics.QuadVertices(vs, 0, 0, float32(w), float32(h), 0.5, 0, 0, 0.5, 0, 0, 1, 1, 1, 1)
shader = LinearFilterShader
default:
panic(fmt.Sprintf("mipmap: invalid level: %d", level))

View File

@ -21,7 +21,6 @@ import (
"github.com/hajimehoshi/ebiten/v2/internal/buffered"
"github.com/hajimehoshi/ebiten/v2/internal/clock"
"github.com/hajimehoshi/ebiten/v2/internal/debug"
"github.com/hajimehoshi/ebiten/v2/internal/graphics"
"github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver"
"github.com/hajimehoshi/ebiten/v2/internal/hooks"
"github.com/hajimehoshi/ebiten/v2/internal/mipmap"
@ -105,15 +104,7 @@ func (c *context) updateFrameImpl(graphicsDriver graphicsdriver.Graphics, update
return err
}
defer func() {
// All the vertices data are consumed at the end of the frame, and the data backend can be
// available after that. Until then, lock the vertices backend.
err1 := graphics.LockAndResetVertices(func() error {
if err := buffered.EndFrame(graphicsDriver); err != nil {
return err
}
return nil
})
if err == nil {
if err1 := buffered.EndFrame(graphicsDriver); err == nil && err1 != nil {
err = err1
}
}()

View File

@ -49,6 +49,12 @@ type Image struct {
// drawCallback is a callback called when DrawTriangles or WritePixels is called.
// drawCallback is useful to detect whether the image is manipulated or not after a certain time.
drawCallback func()
// These temporary vertices must not be reused until the vertices are sent to the graphics command queue.
tmpVerticesForFlushing []float32
tmpVerticesForCopying []float32
tmpVerticesForFill []float32
}
func NewImage(width, height int, imageType atlas.ImageType) *Image {
@ -106,7 +112,11 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderImageCount]*Image, vertices [
// Copy the current rendering result to get the correct blending result.
if blend != graphicsdriver.BlendSourceOver && !i.bigOffscreenBufferDirty {
srcs := [graphics.ShaderImageCount]*Image{i}
vs := graphics.QuadVertices(
if len(i.tmpVerticesForCopying) < 4*graphics.VertexFloatCount {
i.tmpVerticesForCopying = make([]float32, 4*graphics.VertexFloatCount)
}
graphics.QuadVertices(
i.tmpVerticesForCopying,
0, 0, float32(i.width), float32(i.height),
bigOffscreenScale, 0, 0, bigOffscreenScale, 0, 0,
1, 1, 1, 1)
@ -117,7 +127,7 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderImageCount]*Image, vertices [
Width: float32(i.width * bigOffscreenScale),
Height: float32(i.height * bigOffscreenScale),
}
i.bigOffscreenBuffer.DrawTriangles(srcs, vs, is, graphicsdriver.BlendCopy, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, NearestFilterShader, nil, false, true, false)
i.bigOffscreenBuffer.DrawTriangles(srcs, i.tmpVerticesForCopying, is, graphicsdriver.BlendCopy, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, NearestFilterShader, nil, false, true, false)
}
for i := 0; i < len(vertices); i += graphics.VertexFloatCount {
@ -221,7 +231,7 @@ func (i *Image) flushDotsBufferIfNeeded() {
}
l := len(i.dotsBuffer)
vs := graphics.Vertices(l * 4)
vs := make([]float32, l*4*graphics.VertexFloatCount)
is := make([]uint16, l*6)
sx, sy := float32(1), float32(1)
var idx int
@ -296,7 +306,11 @@ func (i *Image) flushBigOffscreenBufferIfNeeded() {
i.bigOffscreenBufferDirty = false
srcs := [graphics.ShaderImageCount]*Image{i.bigOffscreenBuffer}
vs := graphics.QuadVertices(
if len(i.tmpVerticesForFlushing) < 4*graphics.VertexFloatCount {
i.tmpVerticesForFlushing = make([]float32, 4*graphics.VertexFloatCount)
}
graphics.QuadVertices(
i.tmpVerticesForFlushing,
0, 0, float32(i.width*bigOffscreenScale), float32(i.height*bigOffscreenScale),
1.0/bigOffscreenScale, 0, 0, 1.0/bigOffscreenScale, 0, 0,
1, 1, 1, 1)
@ -311,7 +325,7 @@ func (i *Image) flushBigOffscreenBufferIfNeeded() {
if i.bigOffscreenBufferBlend != graphicsdriver.BlendSourceOver {
blend = graphicsdriver.BlendCopy
}
i.DrawTriangles(srcs, vs, is, blend, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, LinearFilterShader, nil, false, true, false)
i.DrawTriangles(srcs, i.tmpVerticesForFlushing, is, blend, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, LinearFilterShader, nil, false, true, false)
i.bigOffscreenBuffer.clear()
i.bigOffscreenBufferDirty = false
@ -346,7 +360,11 @@ func (i *Image) Fill(r, g, b, a float32, x, y, width, height int) {
Height: float32(height),
}
vs := graphics.QuadVertices(
if len(i.tmpVerticesForFill) < 4*graphics.VertexFloatCount {
i.tmpVerticesForFill = make([]float32, 4*graphics.VertexFloatCount)
}
graphics.QuadVertices(
i.tmpVerticesForFill,
1, 1, float32(whiteImage.width-1), float32(whiteImage.height-1),
float32(i.width), 0, 0, float32(i.height), 0, 0,
r, g, b, a)
@ -354,5 +372,5 @@ func (i *Image) Fill(r, g, b, a float32, x, y, width, height int) {
srcs := [graphics.ShaderImageCount]*Image{whiteImage}
i.DrawTriangles(srcs, vs, is, graphicsdriver.BlendCopy, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, NearestFilterShader, nil, false, true, false)
i.DrawTriangles(srcs, i.tmpVerticesForFill, is, graphicsdriver.BlendCopy, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, NearestFilterShader, nil, false, true, false)
}