internal/ui: reland: move dotsBuffer from internal/ui.Image to internal/buffered.Image

This is a reland of fb426cd0dd.

This is a refactoring as 'caching' things should be in internal/buffered,
but also this is a performance optimization to make pixels and dotsBuffer
work well together.
This commit is contained in:
Hajime Hoshi 2024-01-15 23:45:00 +09:00
parent b2759b66f9
commit 7b3eae3dcb
3 changed files with 225 additions and 114 deletions

View File

@ -4582,3 +4582,29 @@ func TestImageDrawImageAfterDeallocation(t *testing.T) {
}
}
}
func TestUnsyncedPixels(t *testing.T) {
// This tests a corner case in internal/buffer.Image.
dst := ebiten.NewImage(16, 16)
// Add an entry for dotsBuffer at (0, 0).
dst.Set(0, 0, color.RGBA{0xff, 0xff, 0xff, 0xff})
// Merge the entry into the cached pixels.
// The entry for dotsBuffer is now gone in the current implementation.
dst.ReadPixels(make([]byte, 4*16*16))
// Call WritePixels with the outside region of (0, 0).
dst.SubImage(image.Rect(1, 1, 3, 3)).(*ebiten.Image).WritePixels(make([]byte, 4*2*2))
// Flush unsynced pixel cache.
src := ebiten.NewImage(16, 16)
dst.DrawImage(src, nil)
// Check the result is correct.
got := dst.At(0, 0)
want := color.RGBA{0xff, 0xff, 0xff, 0xff}
if got != want {
t.Errorf("got: %v, want: %v", got, want)
}
}

View File

@ -23,30 +23,61 @@ import (
"github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver"
)
var whiteImage *Image
func init() {
whiteImage = NewImage(3, 3, atlas.ImageTypeRegular)
pix := make([]byte, 4*3*3)
for i := range pix {
pix[i] = 0xff
}
whiteImage.WritePixels(pix, image.Rect(0, 0, 3, 3))
}
type Image struct {
img *atlas.Image
width int
height int
// dotsBuffer is a buffer for drawing a lot of dots.
// An entry in this map is the primary data of pixels for ReadPixels.
dotsBuffer map[image.Point][4]byte
// pixels is cached pixels for ReadPixels.
// pixels might be out of sync with GPU.
// The data of pixels is the secondary data of pixels for ReadPixels.
pixels []byte
// pixelsUnsynced represents whether the pixels in CPU and GPU are not synced.
pixelsUnsynced bool
}
func NewImage(width, height int, imageType atlas.ImageType) *Image {
return &Image{
img: atlas.NewImage(width, height, imageType),
width: width,
height: height,
img: atlas.NewImage(width, height, imageType),
}
}
func (i *Image) invalidatePixels() {
i.pixels = nil
}
func (i *Image) Deallocate() {
i.img.Deallocate()
i.dotsBuffer = nil
i.pixels = nil
i.pixelsUnsynced = false
}
func (i *Image) ReadPixels(graphicsDriver graphicsdriver.Graphics, pixels []byte, region image.Rectangle) error {
// Do not call flushDotsBufferIfNeeded here. This would slow (image/draw).Draw.
// See ebiten.TestImageDrawOver.
if region.Dx() == 1 && region.Dy() == 1 {
if c, ok := i.dotsBuffer[region.Min]; ok {
copy(pixels, c[:])
return nil
}
}
if i.pixels == nil {
pix := make([]byte, 4*i.width*i.height)
if err := i.img.ReadPixels(graphicsDriver, pix, image.Rect(0, 0, i.width, i.height)); err != nil {
@ -55,16 +86,30 @@ func (i *Image) ReadPixels(graphicsDriver graphicsdriver.Graphics, pixels []byte
i.pixels = pix
}
if len(i.dotsBuffer) > 0 {
for pos, clr := range i.dotsBuffer {
idx := 4 * (pos.Y*i.width + pos.X)
i.pixels[idx] = clr[0]
i.pixels[idx+1] = clr[1]
i.pixels[idx+2] = clr[2]
i.pixels[idx+3] = clr[3]
delete(i.dotsBuffer, pos)
}
i.pixelsUnsynced = true
}
lineWidth := 4 * region.Dx()
for j := 0; j < region.Dy(); j++ {
dstX := 4 * j * region.Dx()
srcX := 4 * ((region.Min.Y+j)*i.width + region.Min.X)
copy(pixels[dstX:dstX+lineWidth], i.pixels[srcX:srcX+lineWidth])
}
return nil
}
func (i *Image) DumpScreenshot(graphicsDriver graphicsdriver.Graphics, name string, blackbg bool) (string, error) {
i.syncPixelsIfNeeded()
return i.img.DumpScreenshot(graphicsDriver, name, blackbg)
}
@ -73,7 +118,48 @@ func (i *Image) WritePixels(pix []byte, region image.Rectangle) {
if l := 4 * region.Dx() * region.Dy(); len(pix) != l {
panic(fmt.Sprintf("buffered: len(pix) was %d but must be %d", len(pix), l))
}
i.invalidatePixels()
// Writing one pixel is a special case.
// Do not discard the cached pixels in this case, especially for (image/draw).Image.
if region.Dx() == 1 && region.Dy() == 1 {
if i.dotsBuffer == nil {
i.dotsBuffer = map[image.Point][4]byte{}
}
var clr [4]byte
copy(clr[:], pix)
i.dotsBuffer[region.Min] = clr
if len(i.dotsBuffer) >= 10000 {
i.syncPixelsIfNeeded()
}
return
}
// If i.pixels is not nil, this indicates ReadPixels is called and might be called again later.
// Keep and update the pixels data in this case.
if i.pixels != nil {
lineWidth := 4 * region.Dx()
for j := 0; j < region.Dy(); j++ {
dstX := 4 * ((region.Min.Y+j)*i.width + region.Min.X)
srcX := 4 * j * region.Dx()
copy(i.pixels[dstX:dstX+lineWidth], pix[srcX:srcX+lineWidth])
}
// pixelsUnsynced can NOT be set false as the outside pixels of the region is not written by WritePixels here.
// See the test ebiten.TestUnsyncedPixels.
}
// Even if i.pixels is nil, do not create a pixel cache.
// It is in theroy possible to copy the argument pixels, but this tends to consume a lot of memory.
// Avoid this unless ReadPixels is called.
// Remove entries in the dots buffer that are overwritten by this WritePixels call.
for pos := range i.dotsBuffer {
if !pos.In(region) {
continue
}
delete(i.dotsBuffer, pos)
}
i.img.WritePixels(pix, region)
}
@ -86,8 +172,14 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderImageCount]*Image, vertices [
if i == src {
panic("buffered: Image.DrawTriangles: source images must be different from the receiver")
}
if src != nil {
// src's pixels have to be synced between CPU and GPU,
// but doesn't have to be cleared since src is not modified in this function.
src.syncPixelsIfNeeded()
}
i.invalidatePixels()
}
i.syncPixelsIfNeeded()
var imgs [graphics.ShaderImageCount]*atlas.Image
for i, img := range srcs {
@ -98,4 +190,104 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderImageCount]*Image, vertices [
}
i.img.DrawTriangles(imgs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, fillRule)
// After rendering, the pixel cache is no longer valid.
i.pixels = nil
}
// syncPixelsIfNeeded syncs the pixels between CPU and GPU.
// After syncPixelsIfNeeded, dotsBuffer is cleared, but pixels migth remain.
func (i *Image) syncPixelsIfNeeded() {
if len(i.dotsBuffer) == 0 && !i.pixelsUnsynced {
return
}
if i.pixels != nil {
// If this image already has pixels, use WritePixels instead of DrawTriangles for efficiency.
for pos, clr := range i.dotsBuffer {
idx := 4 * (pos.Y*i.width + pos.X)
i.pixels[idx] = clr[0]
i.pixels[idx+1] = clr[1]
i.pixels[idx+2] = clr[2]
i.pixels[idx+3] = clr[3]
delete(i.dotsBuffer, pos)
}
i.img.WritePixels(i.pixels, image.Rect(0, 0, i.width, i.height))
i.pixelsUnsynced = false
return
}
if i.pixelsUnsynced {
panic("buffered: pixelsUnsynced must be false as pixels is nil")
}
if len(i.dotsBuffer) == 0 {
panic("buffered: len(i.dotsBuffers) must be > 0 at this point")
}
l := len(i.dotsBuffer)
vs := make([]float32, l*4*graphics.VertexFloatCount)
is := make([]uint32, l*6)
sx, sy := float32(1), float32(1)
var idx int
for p, c := range i.dotsBuffer {
dx := float32(p.X)
dy := float32(p.Y)
crf := float32(c[0]) / 0xff
cgf := float32(c[1]) / 0xff
cbf := float32(c[2]) / 0xff
caf := float32(c[3]) / 0xff
vs[graphics.VertexFloatCount*4*idx] = dx
vs[graphics.VertexFloatCount*4*idx+1] = dy
vs[graphics.VertexFloatCount*4*idx+2] = sx
vs[graphics.VertexFloatCount*4*idx+3] = sy
vs[graphics.VertexFloatCount*4*idx+4] = crf
vs[graphics.VertexFloatCount*4*idx+5] = cgf
vs[graphics.VertexFloatCount*4*idx+6] = cbf
vs[graphics.VertexFloatCount*4*idx+7] = caf
vs[graphics.VertexFloatCount*4*idx+8] = dx + 1
vs[graphics.VertexFloatCount*4*idx+9] = dy
vs[graphics.VertexFloatCount*4*idx+10] = sx + 1
vs[graphics.VertexFloatCount*4*idx+11] = sy
vs[graphics.VertexFloatCount*4*idx+12] = crf
vs[graphics.VertexFloatCount*4*idx+13] = cgf
vs[graphics.VertexFloatCount*4*idx+14] = cbf
vs[graphics.VertexFloatCount*4*idx+15] = caf
vs[graphics.VertexFloatCount*4*idx+16] = dx
vs[graphics.VertexFloatCount*4*idx+17] = dy + 1
vs[graphics.VertexFloatCount*4*idx+18] = sx
vs[graphics.VertexFloatCount*4*idx+19] = sy + 1
vs[graphics.VertexFloatCount*4*idx+20] = crf
vs[graphics.VertexFloatCount*4*idx+21] = cgf
vs[graphics.VertexFloatCount*4*idx+22] = cbf
vs[graphics.VertexFloatCount*4*idx+23] = caf
vs[graphics.VertexFloatCount*4*idx+24] = dx + 1
vs[graphics.VertexFloatCount*4*idx+25] = dy + 1
vs[graphics.VertexFloatCount*4*idx+26] = sx + 1
vs[graphics.VertexFloatCount*4*idx+27] = sy + 1
vs[graphics.VertexFloatCount*4*idx+28] = crf
vs[graphics.VertexFloatCount*4*idx+29] = cgf
vs[graphics.VertexFloatCount*4*idx+30] = cbf
vs[graphics.VertexFloatCount*4*idx+31] = caf
is[6*idx] = uint32(4 * idx)
is[6*idx+1] = uint32(4*idx + 1)
is[6*idx+2] = uint32(4*idx + 2)
is[6*idx+3] = uint32(4*idx + 1)
is[6*idx+4] = uint32(4*idx + 2)
is[6*idx+5] = uint32(4*idx + 3)
idx++
}
srcs := [graphics.ShaderImageCount]*atlas.Image{whiteImage.img}
dr := image.Rect(0, 0, i.width, i.height)
blend := graphicsdriver.BlendCopy
i.img.DrawTriangles(srcs, vs, is, blend, dr, [graphics.ShaderImageCount]image.Rectangle{}, atlas.NearestFilterShader, nil, graphicsdriver.FillAll)
// TODO: Use clear if Go 1.21 is available.
for pos := range i.dotsBuffer {
delete(i.dotsBuffer, pos)
}
}

View File

@ -43,8 +43,6 @@ type Image struct {
height int
imageType atlas.ImageType
dotsBuffer map[image.Point][4]byte
// lastBlend is the lastly-used blend for mipmap.Image.
lastBlend graphicsdriver.Blend
@ -77,7 +75,6 @@ func (i *Image) Deallocate() {
i.bigOffscreenBuffer.deallocate()
}
i.mipmap.Deallocate()
i.dotsBuffer = nil
}
func (i *Image) DrawTriangles(srcs [graphics.ShaderImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderImageCount]image.Rectangle, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule, canSkipMipmap bool, antialias bool) {
@ -88,9 +85,6 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderImageCount]*Image, vertices [
i.lastBlend = blend
if antialias {
// Flush the other buffer to make the buffers exclusive.
i.flushDotsBufferIfNeeded()
if i.bigOffscreenBuffer == nil {
var imageType atlas.ImageType
switch i.imageType {
@ -126,25 +120,6 @@ func (i *Image) WritePixels(pix []byte, region image.Rectangle) {
if i.modifyCallback != nil {
i.modifyCallback()
}
if region.Dx() == 1 && region.Dy() == 1 {
// Flush the other buffer to make the buffers exclusive.
i.flushBigOffscreenBufferIfNeeded()
if i.dotsBuffer == nil {
i.dotsBuffer = map[image.Point][4]byte{}
}
var clr [4]byte
copy(clr[:], pix)
i.dotsBuffer[region.Min] = clr
if len(i.dotsBuffer) >= 10000 {
i.flushDotsBufferIfNeeded()
}
return
}
i.flushBufferIfNeeded()
i.mipmap.WritePixels(pix, region)
}
@ -157,17 +132,6 @@ func (i *Image) ReadPixels(pixels []byte, region image.Rectangle) {
i.flushBigOffscreenBufferIfNeeded()
if region.Dx() == 1 && region.Dy() == 1 {
if c, ok := i.dotsBuffer[region.Min]; ok {
copy(pixels, c[:])
return
}
// Do not call flushDotsBufferIfNeeded here. This would slow (image/draw).Draw.
// See ebiten.TestImageDrawOver.
} else {
i.flushDotsBufferIfNeeded()
}
if err := i.ui.readPixels(i.mipmap, pixels, region); err != nil {
if panicOnErrorOnReadingPixels {
panic(err)
@ -182,80 +146,9 @@ func (i *Image) DumpScreenshot(name string, blackbg bool) (string, error) {
}
func (i *Image) flushBufferIfNeeded() {
// The buffers are exclusive and the order should not matter.
i.flushDotsBufferIfNeeded()
i.flushBigOffscreenBufferIfNeeded()
}
func (i *Image) flushDotsBufferIfNeeded() {
if len(i.dotsBuffer) == 0 {
return
}
l := len(i.dotsBuffer)
vs := make([]float32, l*4*graphics.VertexFloatCount)
is := make([]uint32, l*6)
sx, sy := float32(1), float32(1)
var idx int
for p, c := range i.dotsBuffer {
dx := float32(p.X)
dy := float32(p.Y)
crf := float32(c[0]) / 0xff
cgf := float32(c[1]) / 0xff
cbf := float32(c[2]) / 0xff
caf := float32(c[3]) / 0xff
vs[graphics.VertexFloatCount*4*idx] = dx
vs[graphics.VertexFloatCount*4*idx+1] = dy
vs[graphics.VertexFloatCount*4*idx+2] = sx
vs[graphics.VertexFloatCount*4*idx+3] = sy
vs[graphics.VertexFloatCount*4*idx+4] = crf
vs[graphics.VertexFloatCount*4*idx+5] = cgf
vs[graphics.VertexFloatCount*4*idx+6] = cbf
vs[graphics.VertexFloatCount*4*idx+7] = caf
vs[graphics.VertexFloatCount*4*idx+8] = dx + 1
vs[graphics.VertexFloatCount*4*idx+9] = dy
vs[graphics.VertexFloatCount*4*idx+10] = sx + 1
vs[graphics.VertexFloatCount*4*idx+11] = sy
vs[graphics.VertexFloatCount*4*idx+12] = crf
vs[graphics.VertexFloatCount*4*idx+13] = cgf
vs[graphics.VertexFloatCount*4*idx+14] = cbf
vs[graphics.VertexFloatCount*4*idx+15] = caf
vs[graphics.VertexFloatCount*4*idx+16] = dx
vs[graphics.VertexFloatCount*4*idx+17] = dy + 1
vs[graphics.VertexFloatCount*4*idx+18] = sx
vs[graphics.VertexFloatCount*4*idx+19] = sy + 1
vs[graphics.VertexFloatCount*4*idx+20] = crf
vs[graphics.VertexFloatCount*4*idx+21] = cgf
vs[graphics.VertexFloatCount*4*idx+22] = cbf
vs[graphics.VertexFloatCount*4*idx+23] = caf
vs[graphics.VertexFloatCount*4*idx+24] = dx + 1
vs[graphics.VertexFloatCount*4*idx+25] = dy + 1
vs[graphics.VertexFloatCount*4*idx+26] = sx + 1
vs[graphics.VertexFloatCount*4*idx+27] = sy + 1
vs[graphics.VertexFloatCount*4*idx+28] = crf
vs[graphics.VertexFloatCount*4*idx+29] = cgf
vs[graphics.VertexFloatCount*4*idx+30] = cbf
vs[graphics.VertexFloatCount*4*idx+31] = caf
is[6*idx] = uint32(4 * idx)
is[6*idx+1] = uint32(4*idx + 1)
is[6*idx+2] = uint32(4*idx + 2)
is[6*idx+3] = uint32(4*idx + 1)
is[6*idx+4] = uint32(4*idx + 2)
is[6*idx+5] = uint32(4*idx + 3)
idx++
}
i.dotsBuffer = nil
srcs := [graphics.ShaderImageCount]*mipmap.Mipmap{i.ui.whiteImage.mipmap}
dr := image.Rect(0, 0, i.width, i.height)
blend := graphicsdriver.BlendCopy
i.lastBlend = blend
i.mipmap.DrawTriangles(srcs, vs, is, blend, dr, [graphics.ShaderImageCount]image.Rectangle{}, NearestFilterShader.shader, nil, graphicsdriver.FillAll, true)
}
func (i *Image) flushBigOffscreenBufferIfNeeded() {
if i.bigOffscreenBuffer != nil {
i.bigOffscreenBuffer.flush()