diff --git a/examples/mrt/main.go b/examples/mrt/main.go new file mode 100644 index 000000000..11c7d1b3e --- /dev/null +++ b/examples/mrt/main.go @@ -0,0 +1,134 @@ +// Copyright 2024 The Ebiten Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "image" + _ "image/jpeg" + "log" + + "github.com/hajimehoshi/ebiten/v2" + "github.com/hajimehoshi/ebiten/v2/ebitenutil" +) + +const ( + dstSize = 128 + screenWidth = dstSize * 2 + screenHeight = dstSize * 2 +) + +var ( + dsts = [8]*ebiten.Image{ + /*ebiten.NewImage(dstSize, dstSize), + ebiten.NewImage(dstSize, dstSize), + ebiten.NewImage(dstSize, dstSize), + ebiten.NewImage(dstSize, dstSize),*/ + ebiten.NewImageWithOptions(image.Rect(0, 0, dstSize, dstSize), &ebiten.NewImageOptions{ + Unmanaged: true, + }), + ebiten.NewImageWithOptions(image.Rect(0, 0, dstSize, dstSize), &ebiten.NewImageOptions{ + Unmanaged: true, + }), + ebiten.NewImageWithOptions(image.Rect(0, 0, dstSize, dstSize), &ebiten.NewImageOptions{ + Unmanaged: true, + }), + ebiten.NewImageWithOptions(image.Rect(0, 0, dstSize, dstSize), &ebiten.NewImageOptions{ + Unmanaged: true, + }), + } + + shaderSrc = []byte( + ` +//kage:units pixels + +package main + +func Fragment(dst vec4, src vec2, color vec4) (vec4, vec4, vec4, vec4) { + return vec4(1,0,0,1), vec4(0,1,0,1), vec4(0,0,1,1), vec4(1,0,1,1) +} +`) + s *ebiten.Shader +) + +func init() { + var err error + + s, err = ebiten.NewShader(shaderSrc) + if err != nil { + log.Fatal(err) + } +} + +type Game struct { +} + +func (g *Game) Update() error { + return nil +} + +func (g *Game) Draw(screen *ebiten.Image) { + vertices := []ebiten.Vertex{ + { + DstX: 0, + DstY: 0, + }, + { + DstX: dstSize, + DstY: 0, + }, + { + DstX: 0, + DstY: dstSize, + }, + { + DstX: dstSize, + DstY: dstSize, + }, + } + indices := []uint16{0, 1, 2, 1, 2, 3} + ebiten.DrawTrianglesShaderMRT(dsts, vertices, indices, s, nil) + // Dst 0 + screen.DrawImage(dsts[0], nil) + // Dst 1 + opts := &ebiten.DrawImageOptions{} + opts.GeoM.Translate(dstSize, 0) + screen.DrawImage(dsts[1], opts) + // Dst 2 + opts.GeoM.Reset() + opts.GeoM.Translate(0, dstSize) + screen.DrawImage(dsts[2], opts) + // Dst 3 + opts.GeoM.Reset() + opts.GeoM.Translate(dstSize, dstSize) + screen.DrawImage(dsts[3], opts) + + ebitenutil.DebugPrint(screen, fmt.Sprintf("FPS: %.2f", ebiten.ActualFPS())) +} + +func (g *Game) Layout(outsideWidth, outsideHeight int) (int, int) { + return screenWidth, screenHeight +} + +func main() { + ebiten.SetWindowSize(screenWidth, screenHeight) + ebiten.SetVsyncEnabled(false) + ebiten.SetWindowTitle("MRT (Ebitengine Demo)") + if err := ebiten.RunGameWithOptions(&Game{}, &ebiten.RunGameOptions{ + GraphicsLibrary: ebiten.GraphicsLibraryDirectX, + }); err != nil { + log.Fatal(err) + } +} diff --git a/image.go b/image.go index 58ee22640..5545975af 100644 --- a/image.go +++ b/image.go @@ -705,6 +705,139 @@ func (i *Image) DrawTrianglesShader(vertices []Vertex, indices []uint16, shader i.image.DrawTriangles(imgs, vs, is, blend, i.adjustedBounds(), srcRegions, shader.shader, i.tmpUniforms, graphicsdriver.FillRule(options.FillRule), true, options.AntiAlias) } +// DrawTrianglesShader draws triangles with the specified vertices and their indices with the specified shader. +// +// Vertex contains color values, which can be interpreted for any purpose by the shader. +// +// For the details about the shader, see https://ebitengine.org/en/documents/shader.html. +// +// If the shader unit is texels, one of the specified image is non-nil and its size is different from (width, height), +// DrawTrianglesShader panics. +// If one of the specified image is non-nil and is disposed, DrawTrianglesShader panics. +// +// If len(vertices) is more than MaxVertexCount, the exceeding part is ignored. +// +// If len(indices) is not multiple of 3, DrawTrianglesShader panics. +// +// If a value in indices is out of range of vertices, or not less than MaxVertexCount, DrawTrianglesShader panics. +// +// When a specified image is non-nil and is disposed, DrawTrianglesShader panics. +// +// If a specified uniform variable's length or type doesn't match with an expected one, DrawTrianglesShader panics. +// +// Even if a result is an invalid color as a premultiplied-alpha color, i.e. an alpha value exceeds other color values, +// the value is kept and is not clamped. +// +// When the image i is disposed, DrawTrianglesShader does nothing. +func DrawTrianglesShaderMRT(dsts [graphics.ShaderDstImageCount]*Image, vertices []Vertex, indices []uint16, shader *Shader, options *DrawTrianglesShaderOptions) { + var dstImgs [graphics.ShaderDstImageCount]*ui.Image + var firstDst *Image + for i, dst := range dsts { + if dst == nil { + continue + } + dst.copyCheck() + if dst.isDisposed() { + panic("ebiten: the destination images given to DrawTrianglesShaderMRT must not be disposed") + } + if firstDst == nil { + firstDst = dst + } + dstImgs[i] = dst.image + } + + if shader.isDisposed() { + panic("ebiten: the given shader to DrawTrianglesShaderMRT must not be disposed") + } + + if len(vertices) > graphicscommand.MaxVertexCount { + // The last part cannot be specified by indices. Just omit them. + vertices = vertices[:graphicscommand.MaxVertexCount] + } + if len(indices)%3 != 0 { + panic("ebiten: len(indices) % 3 must be 0") + } + for i, idx := range indices { + if int(idx) >= len(vertices) { + panic(fmt.Sprintf("ebiten: indices[%d] must be less than len(vertices) (%d) but was %d", i, len(vertices), idx)) + } + } + + if options == nil { + options = &DrawTrianglesShaderOptions{} + } + + var blend graphicsdriver.Blend + if options.CompositeMode == CompositeModeCustom { + blend = options.Blend.internalBlend() + } else { + blend = options.CompositeMode.blend().internalBlend() + } + + dst := firstDst + vs := dst.ensureTmpVertices(len(vertices) * graphics.VertexFloatCount) + src := options.Images[0] + for i, v := range vertices { + dx, dy := dst.adjustPositionF32(v.DstX, v.DstY) + vs[i*graphics.VertexFloatCount] = dx + vs[i*graphics.VertexFloatCount+1] = dy + sx, sy := v.SrcX, v.SrcY + if src != nil { + sx, sy = src.adjustPositionF32(sx, sy) + } + vs[i*graphics.VertexFloatCount+2] = sx + vs[i*graphics.VertexFloatCount+3] = sy + vs[i*graphics.VertexFloatCount+4] = v.ColorR + vs[i*graphics.VertexFloatCount+5] = v.ColorG + vs[i*graphics.VertexFloatCount+6] = v.ColorB + vs[i*graphics.VertexFloatCount+7] = v.ColorA + } + + is := make([]uint32, len(indices)) + for i := range is { + is[i] = uint32(indices[i]) + } + + var srcImgs [graphics.ShaderSrcImageCount]*ui.Image + var imgSize image.Point + for i, img := range options.Images { + if img == nil { + continue + } + if img.isDisposed() { + panic("ebiten: the given image to DrawTrianglesShader must not be disposed") + } + if shader.unit == shaderir.Texels { + if i == 0 { + imgSize = img.Bounds().Size() + } else { + // TODO: Check imgw > 0 && imgh > 0 + if img.Bounds().Size() != imgSize { + panic("ebiten: all the source images must be the same size with the rectangle") + } + } + } + srcImgs[i] = img.image + } + + var srcRegions [graphics.ShaderSrcImageCount]image.Rectangle + for i, img := range options.Images { + if img == nil { + continue + } + srcRegions[i] = img.adjustedBounds() + } + + for _, dst := range dsts { + if dst == nil { + continue + } + dst.tmpUniforms = dst.tmpUniforms[:0] + dst.tmpUniforms = shader.appendUniforms(dst.tmpUniforms, options.Uniforms) + } + ui.DrawTrianglesMRT(dstImgs, srcImgs, vs, is, blend, dst.adjustedBounds(), srcRegions, shader.shader, dst.tmpUniforms, graphicsdriver.FillRule(options.FillRule), true, options.AntiAlias) +} + // DrawRectShaderOptions represents options for DrawRectShader. type DrawRectShaderOptions struct { // GeoM is a geometry matrix to draw. diff --git a/internal/atlas/image.go b/internal/atlas/image.go index c1bf42268..c6f744876 100644 --- a/internal/atlas/image.go +++ b/internal/atlas/image.go @@ -431,6 +431,27 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderSrcImageCount]*Image, vertice i.drawTriangles(srcs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, fillRule) } +func DrawTrianglesMRT(dsts [graphics.ShaderDstImageCount]*Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) { + backendsM.Lock() + defer backendsM.Unlock() + + if !inFrame { + vs := make([]float32, len(vertices)) + copy(vs, vertices) + is := make([]uint32, len(indices)) + copy(is, indices) + us := make([]uint32, len(uniforms)) + copy(us, uniforms) + + appendDeferred(func() { + drawTrianglesMRT(dsts, srcs, vs, is, blend, dstRegion, srcRegions, shader, us, fillRule) + }) + return + } + + drawTrianglesMRT(dsts, srcs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, fillRule) +} + func (i *Image) drawTriangles(srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) { if len(vertices) == 0 { return @@ -530,6 +551,115 @@ func (i *Image) drawTriangles(srcs [graphics.ShaderSrcImageCount]*Image, vertice } } +func drawTrianglesMRT(dsts [graphics.ShaderDstImageCount]*Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) { + if len(vertices) == 0 { + return + } + + backends := make([]*backend, 0, len(srcs)) + for _, src := range srcs { + if src == nil { + continue + } + if src.backend == nil { + // It is possible to spcify i.backend as a forbidden backend, but this might prevent a good allocation for a source image. + // If the backend becomes the same as i's, i's backend will be changed at ensureIsolatedFromSource. + src.allocate(nil, true) + } + backends = append(backends, src.backend) + src.backend.sourceInThisFrame = true + } + + var firstDst *Image + var dstImgs [graphics.ShaderDstImageCount]*graphicscommand.Image + for i, dst := range dsts { + if dst == nil { + continue + } + dst.ensureIsolatedFromSource(backends) + firstDst = dst + dstImgs[i] = dst.backend.image + } + + for _, src := range srcs { + // Compare i and source images after ensuring i is not on an atlas, or + // i and a source image might share the same atlas even though i != src. + for _, dst := range dsts { + if src != nil && dst != nil && dst.backend.image == src.backend.image { + panic("atlas: DrawTrianglesMRT: source must be different from the destination images") + } + } + } + + r := firstDst.regionWithPadding() + // TODO: Check if dstRegion does not to violate the region. + dstRegion = dstRegion.Add(r.Min) + + dx, dy := float32(r.Min.X), float32(r.Min.Y) + + var oxf, oyf float32 + if srcs[0] != nil { + r := srcs[0].regionWithPadding() + oxf, oyf = float32(r.Min.X), float32(r.Min.Y) + n := len(vertices) + for i := 0; i < n; i += graphics.VertexFloatCount { + vertices[i] += dx + vertices[i+1] += dy + vertices[i+2] += oxf + vertices[i+3] += oyf + } + if shader.ir.Unit == shaderir.Texels { + sw, sh := srcs[0].backend.image.InternalSize() + swf, shf := float32(sw), float32(sh) + for i := 0; i < n; i += graphics.VertexFloatCount { + vertices[i+2] /= swf + vertices[i+3] /= shf + } + } + } else { + n := len(vertices) + for i := 0; i < n; i += graphics.VertexFloatCount { + vertices[i] += dx + vertices[i+1] += dy + } + } + + for i, src := range srcs { + if src == nil { + continue + } + + // A source region can be deliberately empty when this is not needed in order to avoid unexpected + // performance issue (#1293). + if srcRegions[i].Empty() { + continue + } + + r := src.regionWithPadding() + srcRegions[i] = srcRegions[i].Add(r.Min) + } + + var srcImgs [graphics.ShaderSrcImageCount]*graphicscommand.Image + for i, src := range srcs { + if src == nil { + continue + } + srcImgs[i] = src.backend.image + } + + graphicscommand.DrawTrianglesMRT(dstImgs, srcImgs, vertices, indices, blend, dstRegion, srcRegions, shader.ensureShader(), uniforms, fillRule) + + for _, src := range srcs { + if src == nil { + continue + } + if !src.isOnSourceBackend() && src.canBePutOnAtlas() { + // src might already registered, but assigning it again is not harmful. + imagesToPutOnSourceBackend.add(src) + } + } +} + // WritePixels replaces the pixels on the image. func (i *Image) WritePixels(pix []byte, region image.Rectangle) { backendsM.Lock() diff --git a/internal/buffered/image.go b/internal/buffered/image.go index fa1551552..56af2a43a 100644 --- a/internal/buffered/image.go +++ b/internal/buffered/image.go @@ -211,6 +211,51 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderSrcImageCount]*Image, vertice i.pixels = nil } +func DrawTrianglesMRT(dsts [graphics.ShaderDstImageCount]*Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *atlas.Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) { + for _, src := range srcs { + for _, dst := range dsts { + if dst == nil { + continue + } + if dst == src { + panic("buffered: DrawTrianglesMRT: source images must be different from the destination images") + } + } + if src != nil { + // src's pixels have to be synced between CPU and GPU, + // but doesn't have to be cleared since src is not modified in this function. + src.syncPixelsIfNeeded() + } + } + + var dstImgs [graphics.ShaderDstImageCount]*atlas.Image + for i, dst := range dsts { + if dst == nil { + continue + } + dst.syncPixelsIfNeeded() + dstImgs[i] = dst.img + } + + var srcImgs [graphics.ShaderSrcImageCount]*atlas.Image + for i, src := range srcs { + if src == nil { + continue + } + srcImgs[i] = src.img + } + + atlas.DrawTrianglesMRT(dstImgs, srcImgs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, fillRule) + + // After rendering, the pixel cache is no longer valid. + for _, dst := range dsts { + if dst == nil { + continue + } + dst.pixels = nil + } +} + // syncPixelsIfNeeded syncs the pixels between CPU and GPU. // After syncPixelsIfNeeded, dotsBuffer is cleared, but pixels might remain. func (i *Image) syncPixelsIfNeeded() { diff --git a/internal/graphics/vertex.go b/internal/graphics/vertex.go index a11fc18c3..585e967aa 100644 --- a/internal/graphics/vertex.go +++ b/internal/graphics/vertex.go @@ -16,6 +16,11 @@ package graphics const ( ShaderSrcImageCount = 4 + // The minimum guaranteed value for the number of target seems to be 8 + // OpenGL(8): https://www.khronos.org/opengl/wiki/Framebuffer_Object#Framebuffer_Object_Structure + // DirectX11(8): https://learn.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage#multiple-rendertargets-overview + // Metal(8): Page 7 of 15: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf + ShaderDstImageCount = 8 // PreservedUniformVariablesCount represents the number of preserved uniform variables. // Any shaders in Ebitengine must have these uniform variables. diff --git a/internal/graphicscommand/command.go b/internal/graphicscommand/command.go index b9a921fc9..f14faeb41 100644 --- a/internal/graphicscommand/command.go +++ b/internal/graphicscommand/command.go @@ -61,7 +61,7 @@ func (p *drawTrianglesCommandPool) put(v *drawTrianglesCommand) { // drawTrianglesCommand represents a drawing command to draw an image on another image. type drawTrianglesCommand struct { - dst *Image + dsts [graphics.ShaderDstImageCount]*Image srcs [graphics.ShaderSrcImageCount]*Image vertices []float32 blend graphicsdriver.Blend @@ -81,9 +81,16 @@ func (c *drawTrianglesCommand) String() string { c.blend.BlendOperationRGB, c.blend.BlendOperationAlpha) - dst := fmt.Sprintf("%d", c.dst.id) - if c.dst.screen { - dst += " (screen)" + var dststrs [graphics.ShaderDstImageCount]string + for i, dst := range c.dsts { + if dst == nil { + dststrs[i] = "(nil)" + continue + } + dststrs[i] = fmt.Sprintf("%d", dst.id) + if dst.screen { + dststrs[i] += " (screen)" + } } var srcstrs [graphics.ShaderSrcImageCount]string @@ -98,7 +105,7 @@ func (c *drawTrianglesCommand) String() string { } } - return fmt.Sprintf("draw-triangles: dst: %s <- src: [%s], num of dst regions: %d, num of indices: %d, blend: %s, fill rule: %s, shader id: %d", dst, strings.Join(srcstrs[:], ", "), len(c.dstRegions), c.numIndices(), blend, c.fillRule, c.shader.id) + return fmt.Sprintf("draw-triangles: dst: [%s] <- src: [%s], num of dst regions: %d, num of indices: %d, blend: %s, fill rule: %s, shader id: %d", strings.Join(dststrs[:], ", "), strings.Join(srcstrs[:], ", "), len(c.dstRegions), c.numIndices(), blend, c.fillRule, c.shader.id) } // Exec executes the drawTrianglesCommand. @@ -108,16 +115,25 @@ func (c *drawTrianglesCommand) Exec(commandQueue *commandQueue, graphicsDriver g return nil } - var imgs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID - for i, src := range c.srcs { - if src == nil { - imgs[i] = graphicsdriver.InvalidImageID + var dsts [graphics.ShaderDstImageCount]graphicsdriver.ImageID + for i, dst := range c.dsts { + if dst == nil { + dsts[i] = graphicsdriver.InvalidImageID continue } - imgs[i] = src.image.ID() + dsts[i] = dst.image.ID() } - return graphicsDriver.DrawTriangles(c.dst.image.ID(), imgs, c.shader.shader.ID(), c.dstRegions, indexOffset, c.blend, c.uniforms, c.fillRule) + var srcs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID + for i, src := range c.srcs { + if src == nil { + srcs[i] = graphicsdriver.InvalidImageID + continue + } + srcs[i] = src.image.ID() + } + + return graphicsDriver.DrawTriangles(dsts, srcs, c.shader.shader.ID(), c.dstRegions, indexOffset, c.blend, c.uniforms, c.fillRule) } func (c *drawTrianglesCommand) NeedsSync() bool { @@ -142,7 +158,7 @@ func (c *drawTrianglesCommand) setVertices(vertices []float32) { // CanMergeWithDrawTrianglesCommand returns a boolean value indicating whether the other drawTrianglesCommand can be merged // with the drawTrianglesCommand c. -func (c *drawTrianglesCommand) CanMergeWithDrawTrianglesCommand(dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, blend graphicsdriver.Blend, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) bool { +func (c *drawTrianglesCommand) CanMergeWithDrawTrianglesCommand(dsts [graphics.ShaderDstImageCount]*Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, blend graphicsdriver.Blend, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) bool { if c.shader != shader { return false } @@ -154,7 +170,7 @@ func (c *drawTrianglesCommand) CanMergeWithDrawTrianglesCommand(dst *Image, srcs return false } } - if c.dst != dst { + if c.dsts != dsts { return false } if c.srcs != srcs { diff --git a/internal/graphicscommand/commandqueue.go b/internal/graphicscommand/commandqueue.go index 0f24dbf42..a82b86f2d 100644 --- a/internal/graphicscommand/commandqueue.go +++ b/internal/graphicscommand/commandqueue.go @@ -105,7 +105,7 @@ func mustUseDifferentVertexBuffer(nextNumVertexFloats int) bool { } // EnqueueDrawTrianglesCommand enqueues a drawing-image command. -func (q *commandQueue) EnqueueDrawTrianglesCommand(dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) { +func (q *commandQueue) EnqueueDrawTrianglesCommand(dsts [graphics.ShaderDstImageCount]*Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) { if len(vertices) > maxVertexFloatCount { panic(fmt.Sprintf("graphicscommand: len(vertices) must equal to or less than %d but was %d", maxVertexFloatCount, len(vertices))) } @@ -125,7 +125,7 @@ func (q *commandQueue) EnqueueDrawTrianglesCommand(dst *Image, srcs [graphics.Sh // prependPreservedUniforms not only prepends values to the given slice but also creates a new slice. // Allocating a new slice is necessary to make EnqueueDrawTrianglesCommand safe so far. // TODO: This might cause a performance issue (#2601). - uniforms = q.prependPreservedUniforms(uniforms, shader, dst, srcs, dstRegion, srcRegions) + uniforms = q.prependPreservedUniforms(uniforms, shader, dsts, srcs, dstRegion, srcRegions) // Remove unused uniform variables so that more commands can be merged. shader.ir.FilterUniformVariables(uniforms) @@ -133,7 +133,7 @@ func (q *commandQueue) EnqueueDrawTrianglesCommand(dst *Image, srcs [graphics.Sh // TODO: If dst is the screen, reorder the command to be the last. if !split && 0 < len(q.commands) { if last, ok := q.commands[len(q.commands)-1].(*drawTrianglesCommand); ok { - if last.CanMergeWithDrawTrianglesCommand(dst, srcs, vertices, blend, shader, uniforms, fillRule) { + if last.CanMergeWithDrawTrianglesCommand(dsts, srcs, vertices, blend, shader, uniforms, fillRule) { last.setVertices(q.lastVertices(len(vertices) + last.numVertices())) if last.dstRegions[len(last.dstRegions)-1].Region == dstRegion { last.dstRegions[len(last.dstRegions)-1].IndexCount += len(indices) @@ -149,7 +149,7 @@ func (q *commandQueue) EnqueueDrawTrianglesCommand(dst *Image, srcs [graphics.Sh } c := q.drawTrianglesCommandPool.get() - c.dst = dst + c.dsts = dsts c.srcs = srcs c.vertices = q.lastVertices(len(vertices)) c.blend = blend @@ -324,13 +324,20 @@ func imageRectangleToRectangleF32(r image.Rectangle) rectangleF32 { } } -func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shader, dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle) []uint32 { +func (q *commandQueue) prependPreservedUniforms(uniforms []uint32, shader *Shader, dsts [graphics.ShaderDstImageCount]*Image, srcs [graphics.ShaderSrcImageCount]*Image, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle) []uint32 { origUniforms := uniforms uniforms = q.uint32sBuffer.alloc(len(origUniforms) + graphics.PreservedUniformUint32Count) copy(uniforms[graphics.PreservedUniformUint32Count:], origUniforms) // Set the destination texture size. - dw, dh := dst.InternalSize() + var firstDst *Image + for _, dst := range dsts { + if dst != nil { + firstDst = dst + break + } + } + dw, dh := firstDst.InternalSize() uniforms[0] = math.Float32bits(float32(dw)) uniforms[1] = math.Float32bits(float32(dh)) uniformIndex := 2 @@ -469,11 +476,11 @@ func (c *commandQueueManager) putCommandQueue(commandQueue *commandQueue) { c.pool.put(commandQueue) } -func (c *commandQueueManager) enqueueDrawTrianglesCommand(dst *Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) { +func (c *commandQueueManager) enqueueDrawTrianglesCommand(dsts [graphics.ShaderDstImageCount]*Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) { if c.current == nil { c.current, _ = c.pool.get() } - c.current.EnqueueDrawTrianglesCommand(dst, srcs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, fillRule) + c.current.EnqueueDrawTrianglesCommand(dsts, srcs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, fillRule) } func (c *commandQueueManager) flush(graphicsDriver graphicsdriver.Graphics, endFrame bool) error { diff --git a/internal/graphicscommand/image.go b/internal/graphicscommand/image.go index a534c7f19..0057f97ca 100644 --- a/internal/graphicscommand/image.go +++ b/internal/graphicscommand/image.go @@ -142,7 +142,48 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderSrcImageCount]*Image, vertice } i.flushBufferedWritePixels() - theCommandQueueManager.enqueueDrawTrianglesCommand(i, srcs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, fillRule) + theCommandQueueManager.enqueueDrawTrianglesCommand([graphics.ShaderDstImageCount]*Image{i}, srcs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, fillRule) +} + +// DrawTriangles draws triangles with the given image. +// +// The vertex floats are: +// +// 0: Destination X in pixels +// 1: Destination Y in pixels +// 2: Source X in texels +// 3: Source Y in texels +// 4: Color R [0.0-1.0] +// 5: Color G +// 6: Color B +// 7: Color Y +// +// src and shader are exclusive and only either is non-nil. +// +// The elements that index is in between 2 and 7 are used for the source images. +// The source image is 1) src argument if non-nil, or 2) an image value in the uniform variables if it exists. +// If there are multiple images in the uniform variables, the smallest ID's value is adopted. +// +// If the source image is not specified, i.e., src is nil and there is no image in the uniform variables, the +// elements for the source image are not used. +func DrawTrianglesMRT(dsts [graphics.ShaderDstImageCount]*Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule) { + for _, src := range srcs { + if src == nil { + continue + } + if src.screen { + panic("graphicscommand: the screen image cannot be the rendering des") + } + src.flushBufferedWritePixels() + } + for _, dst := range dsts { + if dst == nil { + continue + } + dst.flushBufferedWritePixels() + } + + theCommandQueueManager.enqueueDrawTrianglesCommand(dsts, srcs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, fillRule) } // ReadPixels reads the image's pixels. diff --git a/internal/graphicsdriver/directx/graphics11_windows.go b/internal/graphicsdriver/directx/graphics11_windows.go index 6a111edbf..ea5591fc3 100644 --- a/internal/graphicsdriver/directx/graphics11_windows.go +++ b/internal/graphicsdriver/directx/graphics11_windows.go @@ -514,13 +514,102 @@ func (g *graphics11) removeShader(s *shader11) { delete(g.shaders, s.id) } -func (g *graphics11) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID, shaderID graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, fillRule graphicsdriver.FillRule) error { +func (g *graphics11) setAsRenderTargets(dsts []*image11, useStencil bool) error { + var rtvs []*_ID3D11RenderTargetView + var dsv *_ID3D11DepthStencilView + for _, dst := range dsts { + // Ignore a nil image in case of MRT + if dst == nil { + rtvs = append(rtvs, nil) + continue + } + if dst.renderTargetView == nil { + rtv, err := g.device.CreateRenderTargetView(unsafe.Pointer(dst.texture), nil) + if err != nil { + return err + } + dst.renderTargetView = rtv + } + rtvs = append(rtvs, dst.renderTargetView) + + if !useStencil || dsv != nil { + continue + } + + if dst.screen { + return fmt.Errorf("directx: a stencil buffer is not available for a screen image") + } + if dst.stencil == nil { + w, h := dst.internalSize() + s, err := g.device.CreateTexture2D(&_D3D11_TEXTURE2D_DESC{ + Width: uint32(w), + Height: uint32(h), + MipLevels: 0, + ArraySize: 1, + Format: _DXGI_FORMAT_D24_UNORM_S8_UINT, + SampleDesc: _DXGI_SAMPLE_DESC{ + Count: 1, + Quality: 0, + }, + Usage: _D3D11_USAGE_DEFAULT, + BindFlags: uint32(_D3D11_BIND_DEPTH_STENCIL), + CPUAccessFlags: 0, + MiscFlags: 0, + }, nil) + if err != nil { + return err + } + dst.stencil = s + } + if dst.stencilView == nil { + sv, err := g.device.CreateDepthStencilView(unsafe.Pointer(dst.stencil), nil) + if err != nil { + return err + } + dst.stencilView = sv + } + dsv = dst.stencilView + } + + g.deviceContext.OMSetRenderTargets(rtvs, dsv) + if useStencil { + g.deviceContext.ClearDepthStencilView(dsv, uint8(_D3D11_CLEAR_STENCIL), 0, 0) + } + + return nil +} + +func (g *graphics11) DrawTriangles(dstIDs [graphics.ShaderDstImageCount]graphicsdriver.ImageID, srcIDs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID, shaderID graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, fillRule graphicsdriver.FillRule) error { // Remove bound textures first. This is needed to avoid warnings on the debugger. g.deviceContext.OMSetRenderTargets([]*_ID3D11RenderTargetView{nil}, nil) srvs := [graphics.ShaderSrcImageCount]*_ID3D11ShaderResourceView{} g.deviceContext.PSSetShaderResources(0, srvs[:]) - dst := g.images[dstID] + var dsts [graphics.ShaderDstImageCount]*image11 + var vp _D3D11_VIEWPORT + var targetCount int + firstTarget := -1 + for i, id := range dstIDs { + img := g.images[id] + if img == nil { + continue + } + if firstTarget == -1 { + firstTarget = i + } + dsts[i] = img + w, h := img.internalSize() + vp = _D3D11_VIEWPORT{ + TopLeftX: 0, + TopLeftY: 0, + Width: float32(w), + Height: float32(h), + MinDepth: 0, + MaxDepth: 1, + } + targetCount++ + } + var srcs [graphics.ShaderSrcImageCount]*image11 for i, id := range srcIDs { img := g.images[id] @@ -530,19 +619,16 @@ func (g *graphics11) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphic srcs[i] = img } - w, h := dst.internalSize() - g.deviceContext.RSSetViewports([]_D3D11_VIEWPORT{ - { - TopLeftX: 0, - TopLeftY: 0, - Width: float32(w), - Height: float32(h), - MinDepth: 0, - MaxDepth: 1, - }, - }) + // If the number of targets is more than one, or if the only target is the first one, then + // it is safe to assume that MRT is used. + // Also, it only matters in order to specify empty targets/viewports when not all slots are + // being filled, even though it's not a MRT scenario. + if targetCount > 1 || firstTarget > 0 { + targetCount = graphics.ShaderDstImageCount + } - if err := dst.setAsRenderTarget(fillRule != graphicsdriver.FillRuleFillAll); err != nil { + g.deviceContext.RSSetViewports([]_D3D11_VIEWPORT{vp}) + if err := g.setAsRenderTargets(dsts[:targetCount], fillRule != graphicsdriver.FillRuleFillAll); err != nil { return err } diff --git a/internal/graphicsdriver/directx/graphics12_windows.go b/internal/graphicsdriver/directx/graphics12_windows.go index 5e0341215..5c7bfa7d5 100644 --- a/internal/graphicsdriver/directx/graphics12_windows.go +++ b/internal/graphicsdriver/directx/graphics12_windows.go @@ -40,13 +40,14 @@ func (r *resourceWithSize) release() { } type graphics12 struct { - debug *_ID3D12Debug - device *_ID3D12Device - commandQueue *_ID3D12CommandQueue - rtvDescriptorHeap *_ID3D12DescriptorHeap - rtvDescriptorSize uint32 - renderTargets [frameCount]*_ID3D12Resource - framePipelineToken _D3D12XBOX_FRAME_PIPELINE_TOKEN + debug *_ID3D12Debug + device *_ID3D12Device + commandQueue *_ID3D12CommandQueue + rtvDescriptorHeap *_ID3D12DescriptorHeap + rtvEmptyDescriptorHeap *_ID3D12DescriptorHeap + rtvDescriptorSize uint32 + renderTargets [frameCount]*_ID3D12Resource + framePipelineToken _D3D12XBOX_FRAME_PIPELINE_TOKEN fence *_ID3D12Fence fenceValues [frameCount]uint64 @@ -418,6 +419,34 @@ func (g *graphics12) initializeMembers(frameIndex int) (ferr error) { g.rtvDescriptorHeap = nil } }() + + // Create a descriptor heap for empty RTV in case of MRT with empty locations. + h, err = g.device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ + Type: _D3D12_DESCRIPTOR_HEAP_TYPE_RTV, + NumDescriptors: frameCount, + Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + NodeMask: 0, + }) + if err != nil { + return err + } + g.rtvEmptyDescriptorHeap = h + defer func() { + if ferr != nil { + g.rtvEmptyDescriptorHeap.Release() + g.rtvEmptyDescriptorHeap = nil + } + }() + hnd, err := g.rtvEmptyDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + if err != nil { + return err + } + // Create an empty render target for empty destinations at DrawTriangles + g.device.CreateRenderTargetView(nil, &_D3D12_RENDER_TARGET_VIEW_DESC{ + Format: _DXGI_FORMAT_R8G8B8A8_UNORM, + ViewDimension: _D3D12_RTV_DIMENSION_TEXTURE2D, + }, hnd) + g.rtvDescriptorSize = g.device.GetDescriptorHandleIncrementSize(_D3D12_DESCRIPTOR_HEAP_TYPE_RTV) if err := g.pipelineStates.initialize(g.device); err != nil { @@ -1081,7 +1110,82 @@ func (g *graphics12) NewShader(program *shaderir.Program) (graphicsdriver.Shader return s, nil } -func (g *graphics12) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID, shaderID graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, fillRule graphicsdriver.FillRule) error { +func (g *graphics12) setAsRenderTargets(dsts []*image12, useStencil bool) error { + var rtvs []_D3D12_CPU_DESCRIPTOR_HANDLE + var dsv *_D3D12_CPU_DESCRIPTOR_HANDLE + + for i, img := range dsts { + // Ignore a nil image in case of MRT + if img == nil { + _ = i + rtv, err := g.rtvEmptyDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + if err != nil { + return err + } + rtv.Offset(int32(g.frameIndex), g.rtvDescriptorSize) + rtvs = append(rtvs, rtv) + continue + } + + if img.screen { + if useStencil { + return fmt.Errorf("directx: stencils are not available on the screen framebuffer") + } + + rtvBase, err := g.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + if err != nil { + return err + } + rtv := rtvBase + rtv.Offset(int32(g.frameIndex), g.rtvDescriptorSize) + rtvs = append(rtvs, rtv) + continue + } + + if err := img.ensureRenderTargetView(g.device); err != nil { + return err + } + + rtvBase, err := img.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + if err != nil { + return err + } + + rtv := rtvBase + rtvs = append(rtvs, rtv) + + if !useStencil { + continue + } + + if err := img.ensureDepthStencilView(g.device); err != nil { + return err + } + + if dsv != nil { + continue + } + + sv, err := img.dsvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + if err != nil { + return err + } + dsv = &sv + } + + if !useStencil { + g.drawCommandList.OMSetRenderTargets(rtvs, false, nil) + return nil + } + + g.drawCommandList.OMSetStencilRef(0) + g.drawCommandList.OMSetRenderTargets(rtvs, false, dsv) + g.drawCommandList.ClearDepthStencilView(*dsv, _D3D12_CLEAR_FLAG_STENCIL, 0, 0, nil) + + return nil +} + +func (g *graphics12) DrawTriangles(dstIDs [graphics.ShaderDstImageCount]graphicsdriver.ImageID, srcIDs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID, shaderID graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, fillRule graphicsdriver.FillRule) error { if shaderID == graphicsdriver.InvalidShaderID { return fmt.Errorf("directx: shader ID is invalid") } @@ -1102,20 +1206,46 @@ func (g *graphics12) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics. g.pipelineStates.releaseConstantBuffers(g.frameIndex) } - dst := g.images[dstID] var resourceBarriers []_D3D12_RESOURCE_BARRIER_Transition - if rb, ok := dst.transiteState(_D3D12_RESOURCE_STATE_RENDER_TARGET); ok { - resourceBarriers = append(resourceBarriers, rb) - } - var srcImages [graphics.ShaderSrcImageCount]*image12 - for i, srcID := range srcs { - src := g.images[srcID] - if src == nil { + var dsts [graphics.ShaderDstImageCount]*image12 + var vp _D3D12_VIEWPORT + var targetCount int + firstTarget := -1 + for i, id := range dstIDs { + img := g.images[id] + if img == nil { continue } - srcImages[i] = src - if rb, ok := src.transiteState(_D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); ok { + if firstTarget == -1 { + firstTarget = i + } + dsts[i] = img + w, h := img.internalSize() + vp = _D3D12_VIEWPORT{ + TopLeftX: 0, + TopLeftY: 0, + Width: float32(w), + Height: float32(h), + MinDepth: _D3D12_MIN_DEPTH, + MaxDepth: _D3D12_MAX_DEPTH, + } + + if rb, ok := img.transiteState(_D3D12_RESOURCE_STATE_RENDER_TARGET); ok { + resourceBarriers = append(resourceBarriers, rb) + } + + targetCount++ + } + + var srcs [graphics.ShaderSrcImageCount]*image12 + for i, srcID := range srcIDs { + img := g.images[srcID] + if img == nil { + continue + } + srcs[i] = img + if rb, ok := img.transiteState(_D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); ok { resourceBarriers = append(resourceBarriers, rb) } } @@ -1124,25 +1254,26 @@ func (g *graphics12) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics. g.drawCommandList.ResourceBarrier(resourceBarriers) } - if err := dst.setAsRenderTarget(g.drawCommandList, g.device, fillRule != graphicsdriver.FillRuleFillAll); err != nil { + // If the number of targets is more than one, or if the only target is the first one, then + // it is safe to assume that MRT is used. + // Also, it only matters in order to specify empty targets/viewports when not all slots are + // being filled, even though it's not a MRT scenario. + usesMRT := targetCount > 1 || firstTarget > 0 + if usesMRT { + targetCount = graphics.ShaderDstImageCount + } + + if err := g.setAsRenderTargets(dsts[:targetCount], fillRule != graphicsdriver.FillRuleFillAll); err != nil { return err } shader := g.shaders[shaderID] adjustedUniforms := adjustUniforms(shader.uniformTypes, shader.uniformOffsets, uniforms) - w, h := dst.internalSize() g.needFlushDrawCommandList = true - g.drawCommandList.RSSetViewports([]_D3D12_VIEWPORT{ - { - TopLeftX: 0, - TopLeftY: 0, - Width: float32(w), - Height: float32(h), - MinDepth: _D3D12_MIN_DEPTH, - MaxDepth: _D3D12_MAX_DEPTH, - }, - }) + + g.drawCommandList.RSSetViewports([]_D3D12_VIEWPORT{vp}) + g.drawCommandList.IASetPrimitiveTopology(_D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST) g.drawCommandList.IASetVertexBuffers(0, []_D3D12_VERTEX_BUFFER_VIEW{ { @@ -1157,7 +1288,7 @@ func (g *graphics12) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics. Format: _DXGI_FORMAT_R32_UINT, }) - if err := g.pipelineStates.drawTriangles(g.device, g.drawCommandList, g.frameIndex, dst.screen, srcImages, shader, dstRegions, adjustedUniforms, blend, indexOffset, fillRule); err != nil { + if err := g.pipelineStates.drawTriangles(g.device, g.drawCommandList, g.frameIndex, !usesMRT && dsts[firstTarget].screen, srcs, shader, dstRegions, adjustedUniforms, blend, indexOffset, fillRule); err != nil { return err } diff --git a/internal/graphicsdriver/directx/image12_windows.go b/internal/graphicsdriver/directx/image12_windows.go index aab796833..6c3c1bdb8 100644 --- a/internal/graphicsdriver/directx/image12_windows.go +++ b/internal/graphicsdriver/directx/image12_windows.go @@ -70,7 +70,7 @@ func (i *image12) disposeImpl() { func (i *image12) ReadPixels(args []graphicsdriver.PixelsArgs) error { if i.screen { - return errors.New("directx: Pixels cannot be called on the screen") + return errors.New("directx: ReadPixels cannot be called on the screen") } if err := i.graphics.flushCommandList(i.graphics.drawCommandList); err != nil { diff --git a/internal/graphicsdriver/directx/pipeline12_windows.go b/internal/graphicsdriver/directx/pipeline12_windows.go index 819454786..730194bd9 100644 --- a/internal/graphicsdriver/directx/pipeline12_windows.go +++ b/internal/graphicsdriver/directx/pipeline12_windows.go @@ -484,6 +484,18 @@ func (p *pipelineStates) newPipelineState(device *_ID3D12Device, vsh, psh *_ID3D } // Create a pipeline state. + rtBlendDesc := _D3D12_RENDER_TARGET_BLEND_DESC{ + BlendEnable: 1, + LogicOpEnable: 0, + SrcBlend: blendFactorToBlend12(blend.BlendFactorSourceRGB, false), + DestBlend: blendFactorToBlend12(blend.BlendFactorDestinationRGB, false), + BlendOp: blendOperationToBlendOp12(blend.BlendOperationRGB), + SrcBlendAlpha: blendFactorToBlend12(blend.BlendFactorSourceAlpha, true), + DestBlendAlpha: blendFactorToBlend12(blend.BlendFactorDestinationAlpha, true), + BlendOpAlpha: blendOperationToBlendOp12(blend.BlendOperationAlpha), + LogicOp: _D3D12_LOGIC_OP_NOOP, + RenderTargetWriteMask: writeMask, + } psoDesc := _D3D12_GRAPHICS_PIPELINE_STATE_DESC{ pRootSignature: rootSignature, VS: _D3D12_SHADER_BYTECODE{ @@ -498,18 +510,8 @@ func (p *pipelineStates) newPipelineState(device *_ID3D12Device, vsh, psh *_ID3D AlphaToCoverageEnable: 0, IndependentBlendEnable: 0, RenderTarget: [8]_D3D12_RENDER_TARGET_BLEND_DESC{ - { - BlendEnable: 1, - LogicOpEnable: 0, - SrcBlend: blendFactorToBlend12(blend.BlendFactorSourceRGB, false), - DestBlend: blendFactorToBlend12(blend.BlendFactorDestinationRGB, false), - BlendOp: blendOperationToBlendOp12(blend.BlendOperationRGB), - SrcBlendAlpha: blendFactorToBlend12(blend.BlendFactorSourceAlpha, true), - DestBlendAlpha: blendFactorToBlend12(blend.BlendFactorDestinationAlpha, true), - BlendOpAlpha: blendOperationToBlendOp12(blend.BlendOperationAlpha), - LogicOp: _D3D12_LOGIC_OP_NOOP, - RenderTargetWriteMask: writeMask, - }, + rtBlendDesc, rtBlendDesc, rtBlendDesc, rtBlendDesc, + rtBlendDesc, rtBlendDesc, rtBlendDesc, rtBlendDesc, // TODO: need to fill them all? }, }, SampleMask: math.MaxUint32, @@ -532,9 +534,10 @@ func (p *pipelineStates) newPipelineState(device *_ID3D12Device, vsh, psh *_ID3D NumElements: uint32(len(inputElementDescsForDX12)), }, PrimitiveTopologyType: _D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, - NumRenderTargets: 1, + NumRenderTargets: graphics.ShaderDstImageCount, RTVFormats: [8]_DXGI_FORMAT{ - rtvFormat, + rtvFormat, rtvFormat, rtvFormat, rtvFormat, + rtvFormat, rtvFormat, rtvFormat, rtvFormat, }, DSVFormat: dsvFormat, SampleDesc: _DXGI_SAMPLE_DESC{ diff --git a/internal/graphicsdriver/graphics.go b/internal/graphicsdriver/graphics.go index 1c40b16ea..2f6fa144e 100644 --- a/internal/graphicsdriver/graphics.go +++ b/internal/graphicsdriver/graphics.go @@ -68,7 +68,7 @@ type Graphics interface { NewShader(program *shaderir.Program) (Shader, error) // DrawTriangles draws an image onto another image with the given parameters. - DrawTriangles(dst ImageID, srcs [graphics.ShaderSrcImageCount]ImageID, shader ShaderID, dstRegions []DstRegion, indexOffset int, blend Blend, uniforms []uint32, fillRule FillRule) error + DrawTriangles(dsts [graphics.ShaderDstImageCount]ImageID, srcs [graphics.ShaderSrcImageCount]ImageID, shader ShaderID, dstRegions []DstRegion, indexOffset int, blend Blend, uniforms []uint32, fillRule FillRule) error } type Resetter interface { diff --git a/internal/graphicsdriver/metal/graphics_darwin.go b/internal/graphicsdriver/metal/graphics_darwin.go index 685fbe483..d4437f603 100644 --- a/internal/graphicsdriver/metal/graphics_darwin.go +++ b/internal/graphicsdriver/metal/graphics_darwin.go @@ -605,12 +605,12 @@ func (g *Graphics) draw(dst *Image, dstRegions []graphicsdriver.DstRegion, srcs return nil } -func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID, shaderID graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, fillRule graphicsdriver.FillRule) error { +func (g *Graphics) DrawTriangles(dstIDs [graphics.ShaderDstImageCount]graphicsdriver.ImageID, srcIDs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID, shaderID graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, fillRule graphicsdriver.FillRule) error { if shaderID == graphicsdriver.InvalidShaderID { return fmt.Errorf("metal: shader ID is invalid") } - dst := g.images[dstID] + dst := g.images[dstIDs[0]] if dst.screen { g.view.update() diff --git a/internal/graphicsdriver/opengl/context.go b/internal/graphicsdriver/opengl/context.go index 40c9d980b..2c509679b 100644 --- a/internal/graphicsdriver/opengl/context.go +++ b/internal/graphicsdriver/opengl/context.go @@ -102,6 +102,7 @@ type context struct { locationCache *locationCache screenFramebuffer framebufferNative // This might not be the default frame buffer '0' (e.g. iOS). + mrtFramebuffer framebufferNative // The dynamic framebuffer used for MRT operations lastFramebuffer framebufferNative lastTexture textureNative lastRenderbuffer renderbufferNative @@ -110,8 +111,6 @@ type context struct { lastBlend graphicsdriver.Blend maxTextureSize int maxTextureSizeOnce sync.Once - highp bool - highpOnce sync.Once initOnce sync.Once } @@ -139,26 +138,25 @@ func (c *context) bindFramebuffer(f framebufferNative) { c.lastFramebuffer = f } -func (c *context) setViewport(f *framebuffer) { - c.bindFramebuffer(f.native) - if c.lastViewportWidth == f.viewportWidth && c.lastViewportHeight == f.viewportHeight { +func (c *context) setViewport(width, height int, screen bool) { + if c.lastViewportWidth == width && c.lastViewportHeight == height { return } // On some environments, viewport size must be within the framebuffer size. // e.g. Edge (#71), Chrome on GPD Pocket (#420), macOS Mojave (#691). // Use the same size of the framebuffer here. - c.ctx.Viewport(0, 0, int32(f.viewportWidth), int32(f.viewportHeight)) + c.ctx.Viewport(0, 0, int32(width), int32(height)) // glViewport must be called at least at every frame on iOS. // As the screen framebuffer is the last render target, next SetViewport should be // the first call at a frame. - if f.native == c.screenFramebuffer { + if screen { c.lastViewportWidth = 0 c.lastViewportHeight = 0 } else { - c.lastViewportWidth = f.viewportWidth - c.lastViewportHeight = f.viewportHeight + c.lastViewportWidth = width + c.lastViewportHeight = height } } @@ -264,16 +262,6 @@ func (c *context) framebufferPixels(buf []byte, f *framebuffer, region image.Rec return nil } -func (c *context) framebufferPixelsToBuffer(f *framebuffer, buffer buffer, width, height int) { - c.ctx.Flush() - - c.bindFramebuffer(f.native) - - c.ctx.BindBuffer(gl.PIXEL_PACK_BUFFER, uint32(buffer)) - c.ctx.ReadPixels(nil, 0, 0, int32(width), int32(height), gl.RGBA, gl.UNSIGNED_BYTE) - c.ctx.BindBuffer(gl.PIXEL_PACK_BUFFER, 0) -} - func (c *context) deleteTexture(t textureNative) { if c.lastTexture == t { c.lastTexture = 0 @@ -357,7 +345,7 @@ func (c *context) bindStencilBuffer(f framebufferNative, r renderbufferNative) e c.ctx.FramebufferRenderbuffer(gl.FRAMEBUFFER, gl.STENCIL_ATTACHMENT, gl.RENDERBUFFER, uint32(r)) if s := c.ctx.CheckFramebufferStatus(gl.FRAMEBUFFER); s != gl.FRAMEBUFFER_COMPLETE { - return errors.New(fmt.Sprintf("opengl: glFramebufferRenderbuffer failed: %d", s)) + return fmt.Errorf("opengl: glFramebufferRenderbuffer failed: %d", s) } return nil } diff --git a/internal/graphicsdriver/opengl/gl/const.go b/internal/graphicsdriver/opengl/gl/const.go index d59809cc1..517aa9cdb 100644 --- a/internal/graphicsdriver/opengl/gl/const.go +++ b/internal/graphicsdriver/opengl/gl/const.go @@ -23,6 +23,7 @@ const ( BLEND = 0x0BE2 CLAMP_TO_EDGE = 0x812F COLOR_ATTACHMENT0 = 0x8CE0 + COLOR_BUFFER_BIT = 0x4000 COMPILE_STATUS = 0x8B81 DECR_WRAP = 0x8508 DEPTH24_STENCIL8 = 0x88F0 @@ -52,6 +53,7 @@ const ( MIN = 0x8007 NEAREST = 0x2600 NO_ERROR = 0 + NONE = 0 NOTEQUAL = 0x0205 ONE = 1 ONE_MINUS_DST_ALPHA = 0x0305 diff --git a/internal/graphicsdriver/opengl/gl/debug.go b/internal/graphicsdriver/opengl/gl/debug.go index df71d288a..c1fd7a908 100644 --- a/internal/graphicsdriver/opengl/gl/debug.go +++ b/internal/graphicsdriver/opengl/gl/debug.go @@ -293,6 +293,14 @@ func (d *DebugContext) DisableVertexAttribArray(arg0 uint32) { } } +func (d *DebugContext) DrawBuffers(arg0 []uint32) { + d.Context.DrawBuffers(arg0) + fmt.Fprintln(os.Stderr, "DrawBuffers") + if e := d.Context.GetError(); e != NO_ERROR { + panic(fmt.Sprintf("gl: GetError() returned %d at DrawBuffers", e)) + } +} + func (d *DebugContext) DrawElements(arg0 uint32, arg1 int32, arg2 uint32, arg3 int) { d.Context.DrawElements(arg0, arg1, arg2, arg3) fmt.Fprintln(os.Stderr, "DrawElements") diff --git a/internal/graphicsdriver/opengl/gl/default_cgo.go b/internal/graphicsdriver/opengl/gl/default_cgo.go index 535f469db..7f070d869 100644 --- a/internal/graphicsdriver/opengl/gl/default_cgo.go +++ b/internal/graphicsdriver/opengl/gl/default_cgo.go @@ -128,6 +128,10 @@ package gl // typedef void (*fn)(GLuint index); // ((fn)(fnptr))(index); // } +// static void glowDrawBuffers(uintptr_t fnptr, GLsizei n, const GLenum* bufs) { +// typedef void (*fn)(GLsizei n, const GLenum* bufs); +// ((fn)(fnptr))(n, bufs); +// } // static void glowDrawElements(uintptr_t fnptr, GLenum mode, GLsizei count, GLenum type, const uintptr_t indices) { // typedef void (*fn)(GLenum mode, GLsizei count, GLenum type, const uintptr_t indices); // ((fn)(fnptr))(mode, count, type, indices); @@ -351,6 +355,7 @@ type defaultContext struct { gpDeleteVertexArrays C.uintptr_t gpDisable C.uintptr_t gpDisableVertexAttribArray C.uintptr_t + gpDrawBuffers C.uintptr_t gpDrawElements C.uintptr_t gpEnable C.uintptr_t gpEnableVertexAttribArray C.uintptr_t @@ -565,6 +570,10 @@ func (c *defaultContext) DisableVertexAttribArray(index uint32) { C.glowDisableVertexAttribArray(c.gpDisableVertexAttribArray, C.GLuint(index)) } +func (c *defaultContext) DrawBuffers(bufs []uint32) { + C.glowDrawBuffers(c.gpDrawBuffers, C.GLsizei(len(bufs)), (*C.GLenum)(unsafe.Pointer(&bufs[0]))) +} + func (c *defaultContext) DrawElements(mode uint32, count int32, xtype uint32, offset int) { C.glowDrawElements(c.gpDrawElements, C.GLenum(mode), C.GLsizei(count), C.GLenum(xtype), C.uintptr_t(offset)) } @@ -801,6 +810,7 @@ func (c *defaultContext) LoadFunctions() error { c.gpDeleteVertexArrays = C.uintptr_t(g.get("glDeleteVertexArrays")) c.gpDisable = C.uintptr_t(g.get("glDisable")) c.gpDisableVertexAttribArray = C.uintptr_t(g.get("glDisableVertexAttribArray")) + c.gpDrawBuffers = C.uintptr_t(g.get("glDrawBuffers")) c.gpDrawElements = C.uintptr_t(g.get("glDrawElements")) c.gpEnable = C.uintptr_t(g.get("glEnable")) c.gpEnableVertexAttribArray = C.uintptr_t(g.get("glEnableVertexAttribArray")) diff --git a/internal/graphicsdriver/opengl/gl/default_js.go b/internal/graphicsdriver/opengl/gl/default_js.go index 4926dcd7c..308089039 100644 --- a/internal/graphicsdriver/opengl/gl/default_js.go +++ b/internal/graphicsdriver/opengl/gl/default_js.go @@ -54,6 +54,7 @@ type defaultContext struct { fnDeleteVertexArray js.Value fnDisable js.Value fnDisableVertexAttribArray js.Value + fnDrawBuffers js.Value fnDrawElements js.Value fnEnable js.Value fnEnableVertexAttribArray js.Value @@ -184,6 +185,7 @@ func NewDefaultContext(v js.Value) (Context, error) { fnDeleteVertexArray: v.Get("deleteVertexArray").Call("bind", v), fnDisable: v.Get("disable").Call("bind", v), fnDisableVertexAttribArray: v.Get("disableVertexAttribArray").Call("bind", v), + fnDrawBuffers: v.Get("drawBuffers").Call("bind", v), fnDrawElements: v.Get("drawElements").Call("bind", v), fnEnable: v.Get("enable").Call("bind", v), fnEnableVertexAttribArray: v.Get("enableVertexAttribArray").Call("bind", v), @@ -384,6 +386,11 @@ func (c *defaultContext) DisableVertexAttribArray(index uint32) { c.fnDisableVertexAttribArray.Invoke(index) } +func (c *defaultContext) DrawBuffers(bufs []uint32) { + arr := jsutil.NewUint32Array(bufs) + c.fnDrawBuffers.Invoke(arr) +} + func (c *defaultContext) DrawElements(mode uint32, count int32, xtype uint32, offset int) { c.fnDrawElements.Invoke(mode, count, xtype, offset) } diff --git a/internal/graphicsdriver/opengl/gl/default_purego.go b/internal/graphicsdriver/opengl/gl/default_purego.go index dda9e1548..93d5e7fe8 100644 --- a/internal/graphicsdriver/opengl/gl/default_purego.go +++ b/internal/graphicsdriver/opengl/gl/default_purego.go @@ -51,6 +51,7 @@ type defaultContext struct { gpDeleteVertexArrays uintptr gpDisable uintptr gpDisableVertexAttribArray uintptr + gpDrawBuffers uintptr gpDrawElements uintptr gpEnable uintptr gpEnableVertexAttribArray uintptr @@ -269,6 +270,10 @@ func (c *defaultContext) DrawElements(mode uint32, count int32, xtype uint32, of purego.SyscallN(c.gpDrawElements, uintptr(mode), uintptr(count), uintptr(xtype), uintptr(offset)) } +func (c *defaultContext) DrawBuffers(buffers []uint32) { + purego.SyscallN(c.gpDrawBuffers, uintptr(len(buffers)), uintptr(unsafe.Pointer(&buffers[0]))) +} + func (c *defaultContext) Enable(cap uint32) { purego.SyscallN(c.gpEnable, uintptr(cap)) } @@ -501,6 +506,7 @@ func (c *defaultContext) LoadFunctions() error { c.gpDeleteVertexArrays = g.get("glDeleteVertexArrays") c.gpDisable = g.get("glDisable") c.gpDisableVertexAttribArray = g.get("glDisableVertexAttribArray") + c.gpDrawBuffers = g.get("glDrawBuffers") c.gpDrawElements = g.get("glDrawElements") c.gpEnable = g.get("glEnable") c.gpEnableVertexAttribArray = g.get("glEnableVertexAttribArray") diff --git a/internal/graphicsdriver/opengl/gl/interface.go b/internal/graphicsdriver/opengl/gl/interface.go index 4c42319c7..6565d2a30 100644 --- a/internal/graphicsdriver/opengl/gl/interface.go +++ b/internal/graphicsdriver/opengl/gl/interface.go @@ -60,6 +60,7 @@ type Context interface { Disable(cap uint32) DisableVertexAttribArray(index uint32) DrawElements(mode uint32, count int32, xtype uint32, offset int) + DrawBuffers(buffers []uint32) Enable(cap uint32) EnableVertexAttribArray(index uint32) Flush() diff --git a/internal/graphicsdriver/opengl/graphics.go b/internal/graphicsdriver/opengl/graphics.go index cac47988b..3dea16128 100644 --- a/internal/graphicsdriver/opengl/graphics.go +++ b/internal/graphicsdriver/opengl/graphics.go @@ -198,18 +198,88 @@ func (g *Graphics) uniformVariableName(idx int) string { return name } -func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID, shaderID graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, fillRule graphicsdriver.FillRule) error { +func (g *Graphics) DrawTriangles(dstIDs [graphics.ShaderDstImageCount]graphicsdriver.ImageID, srcIDs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID, shaderID graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, fillRule graphicsdriver.FillRule) error { if shaderID == graphicsdriver.InvalidShaderID { return fmt.Errorf("opengl: shader ID is invalid") } - destination := g.images[dstID] - g.drawCalled = true - - if err := destination.setViewport(); err != nil { - return err + targetCount := 0 + firstTarget := -1 + var dsts [graphics.ShaderDstImageCount]*Image + for i, dstID := range dstIDs { + if dstID == graphicsdriver.InvalidImageID { + continue + } + dst := g.images[dstIDs[i]] + if dst == nil { + continue + } + if firstTarget == -1 { + firstTarget = i + } + if err := dst.ensureFramebuffer(); err != nil { + return err + } + dsts[i] = dst + targetCount++ } + + f := uint32(dsts[firstTarget].framebuffer.native) + // If the number of targets is more than one, or if the only target is the first one, then + // it is safe to assume that MRT is used. + // Also, it only matters in order to specify empty targets/viewports when not all slots are + // being filled. + usesMRT := firstTarget > 0 || targetCount > 1 + if usesMRT { + f = uint32(g.context.mrtFramebuffer) + // Create the initial MRT framebuffer + if f == 0 { + f = g.context.ctx.CreateFramebuffer() + if f <= 0 { + return fmt.Errorf("opengl: creating framebuffer failed: the returned value is not positive but %d", f) + } + g.context.mrtFramebuffer = framebufferNative(f) + } + + g.context.bindFramebuffer(framebufferNative(f)) + + // Reset color attachments + if s := g.context.ctx.CheckFramebufferStatus(gl.FRAMEBUFFER); s == gl.FRAMEBUFFER_COMPLETE { + g.context.ctx.Clear(gl.COLOR_BUFFER_BIT | gl.STENCIL_BUFFER_BIT) + } + for i, dst := range dsts { + if dst == nil { + continue + } + g.context.ctx.FramebufferTexture2D(gl.FRAMEBUFFER, gl.COLOR_ATTACHMENT0+uint32(i), gl.TEXTURE_2D, uint32(dst.texture), 0) + } + if s := g.context.ctx.CheckFramebufferStatus(gl.FRAMEBUFFER); s != gl.FRAMEBUFFER_COMPLETE { + if s != 0 { + return fmt.Errorf("opengl: creating framebuffer failed: %v", s) + } + if e := g.context.ctx.GetError(); e != gl.NO_ERROR { + return fmt.Errorf("opengl: creating framebuffer failed: (glGetError) %d", e) + } + return fmt.Errorf("opengl: creating framebuffer failed: unknown error") + } + // Color attachments + var attached []uint32 + for i, dst := range dsts { + if dst == nil { + attached = append(attached, gl.NONE) + continue + } + attached = append(attached, uint32(gl.COLOR_ATTACHMENT0+i)) + } + g.context.ctx.DrawBuffers(attached) + } else { + g.context.bindFramebuffer(framebufferNative(f)) + } + + w, h := dsts[firstTarget].viewportSize() //.framebuffer.viewportWidth, dsts[firstTarget].framebuffer.viewportHeight + g.context.setViewport(w, h, dsts[firstTarget].screen) + g.context.blend(blend) shader := g.shaders[shaderID] @@ -232,7 +302,7 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics. } // In OpenGL, the NDC's Y direction is upward, so flip the Y direction for the final framebuffer. - if destination.screen { + if !usesMRT && dsts[firstTarget].screen { const idx = graphics.ProjectionMatrixUniformVariableIndex // Invert the sign bits as float32 values. g.uniformVars[idx].value[1] ^= 1 << 31 @@ -260,7 +330,7 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics. g.uniformVars = g.uniformVars[:0] if fillRule != graphicsdriver.FillRuleFillAll { - if err := destination.ensureStencilBuffer(); err != nil { + if err := dsts[firstTarget].ensureStencilBuffer(framebufferNative(f)); err != nil { return err } g.context.ctx.Enable(gl.STENCIL_TEST) @@ -273,6 +343,7 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics. int32(dstRegion.Region.Dx()), int32(dstRegion.Region.Dy()), ) + switch fillRule { case graphicsdriver.FillRuleNonZero: g.context.ctx.Clear(gl.STENCIL_BUFFER_BIT) @@ -280,6 +351,7 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics. g.context.ctx.StencilOpSeparate(gl.FRONT, gl.KEEP, gl.KEEP, gl.INCR_WRAP) g.context.ctx.StencilOpSeparate(gl.BACK, gl.KEEP, gl.KEEP, gl.DECR_WRAP) g.context.ctx.ColorMask(false, false, false, false) + g.context.ctx.DrawElements(gl.TRIANGLES, int32(dstRegion.IndexCount), gl.UNSIGNED_INT, indexOffset*int(unsafe.Sizeof(uint32(0)))) case graphicsdriver.FillRuleEvenOdd: g.context.ctx.Clear(gl.STENCIL_BUFFER_BIT) @@ -302,6 +374,10 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics. g.context.ctx.Disable(gl.STENCIL_TEST) } + // Detach existing color attachments + //g.context.bindFramebuffer(fb) + //TODO: + return nil } diff --git a/internal/graphicsdriver/opengl/image.go b/internal/graphicsdriver/opengl/image.go index 425e9a043..d98f00e9f 100644 --- a/internal/graphicsdriver/opengl/image.go +++ b/internal/graphicsdriver/opengl/image.go @@ -60,14 +60,6 @@ func (i *Image) Dispose() { i.graphics.removeImage(i) } -func (i *Image) setViewport() error { - if err := i.ensureFramebuffer(); err != nil { - return err - } - i.graphics.context.setViewport(i.framebuffer) - return nil -} - func (i *Image) ReadPixels(args []graphicsdriver.PixelsArgs) error { if err := i.ensureFramebuffer(); err != nil { return err @@ -109,14 +101,14 @@ func (i *Image) ensureFramebuffer() error { return nil } -func (i *Image) ensureStencilBuffer() error { +func (i *Image) ensureStencilBuffer(f framebufferNative) error { if i.stencil != 0 { return nil } - if err := i.ensureFramebuffer(); err != nil { + /*if err := i.ensureFramebuffer(); err != nil { return err - } + }*/ r, err := i.graphics.context.newRenderbuffer(i.viewportSize()) if err != nil { @@ -124,7 +116,7 @@ func (i *Image) ensureStencilBuffer() error { } i.stencil = r - if err := i.graphics.context.bindStencilBuffer(i.framebuffer.native, i.stencil); err != nil { + if err := i.graphics.context.bindStencilBuffer(f, i.stencil); err != nil { return err } return nil diff --git a/internal/graphicsdriver/playstation5/graphics_playstation5.go b/internal/graphicsdriver/playstation5/graphics_playstation5.go index 3c295d6ab..f787bc981 100644 --- a/internal/graphicsdriver/playstation5/graphics_playstation5.go +++ b/internal/graphicsdriver/playstation5/graphics_playstation5.go @@ -116,7 +116,7 @@ func (g *Graphics) NewShader(program *shaderir.Program) (graphicsdriver.Shader, }, nil } -func (g *Graphics) DrawTriangles(dst graphicsdriver.ImageID, srcs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID, shader graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, fillRule graphicsdriver.FillRule) error { +func (g *Graphics) DrawTriangles(dsts [graphics.ShaderDstImageCount]graphicsdriver.ImageID, srcs [graphics.ShaderSrcImageCount]graphicsdriver.ImageID, shader graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, fillRule graphicsdriver.FillRule) error { return nil } diff --git a/internal/jsutil/buf_js.go b/internal/jsutil/buf_js.go index 50c334a20..b0f465778 100644 --- a/internal/jsutil/buf_js.go +++ b/internal/jsutil/buf_js.go @@ -24,6 +24,7 @@ var ( uint8Array = js.Global().Get("Uint8Array") float32Array = js.Global().Get("Float32Array") int32Array = js.Global().Get("Int32Array") + uint32Array = js.Global().Get("Uint32Array") ) var ( @@ -40,8 +41,11 @@ var ( // temporaryFloat32Array is a Float32ArrayBuffer whose underlying buffer is always temporaryArrayBuffer. temporaryFloat32Array = float32Array.New(temporaryArrayBuffer) - // temporaryInt32Array is a Float32ArrayBuffer whose underlying buffer is always temporaryArrayBuffer. + // temporaryInt32Array is a Int32ArrayBuffer whose underlying buffer is always temporaryArrayBuffer. temporaryInt32Array = int32Array.New(temporaryArrayBuffer) + + // temporaryUint32Array is a Uint32ArrayBuffer whose underlying buffer is always temporaryArrayBuffer. + temporaryUint32Array = uint32Array.New(temporaryArrayBuffer) ) func ensureTemporaryArrayBufferSize(byteLength int) { @@ -54,6 +58,7 @@ func ensureTemporaryArrayBufferSize(byteLength int) { temporaryUint8Array = uint8Array.New(temporaryArrayBuffer) temporaryFloat32Array = float32Array.New(temporaryArrayBuffer) temporaryInt32Array = int32Array.New(temporaryArrayBuffer) + temporaryUint32Array = uint32Array.New(temporaryArrayBuffer) } } @@ -101,3 +106,11 @@ func TemporaryInt32Array(minLength int, data []int32) js.Value { copySliceToTemporaryArrayBuffer(data) return temporaryInt32Array } + +// NewUint32Array returns a Uint32Array whose length is equal to the length of data. +func NewUint32Array(data []uint32) js.Value { + ensureTemporaryArrayBufferSize(len(data) * 4) + copySliceToTemporaryArrayBuffer(data) + a := temporaryUint32Array.Call("slice", 0, len(data)) + return a +} diff --git a/internal/mipmap/mipmap.go b/internal/mipmap/mipmap.go index 4e023c069..99e689124 100644 --- a/internal/mipmap/mipmap.go +++ b/internal/mipmap/mipmap.go @@ -127,6 +127,81 @@ func (m *Mipmap) DrawTriangles(srcs [graphics.ShaderSrcImageCount]*Mipmap, verti m.deallocateMipmaps() } +func DrawTrianglesMRT(dsts [graphics.ShaderDstImageCount]*Mipmap, srcs [graphics.ShaderSrcImageCount]*Mipmap, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *atlas.Shader, uniforms []uint32, fillRule graphicsdriver.FillRule, canSkipMipmap bool) { + if len(indices) == 0 { + return + } + + level := 0 + // TODO: Do we need to check all the sources' states of being volatile? + if !canSkipMipmap && srcs[0] != nil && canUseMipmap(srcs[0].imageType) { + level = math.MaxInt32 + for i := 0; i < len(indices)/3; i++ { + const n = graphics.VertexFloatCount + dx0 := vertices[n*indices[3*i]+0] + dy0 := vertices[n*indices[3*i]+1] + sx0 := vertices[n*indices[3*i]+2] + sy0 := vertices[n*indices[3*i]+3] + dx1 := vertices[n*indices[3*i+1]+0] + dy1 := vertices[n*indices[3*i+1]+1] + sx1 := vertices[n*indices[3*i+1]+2] + sy1 := vertices[n*indices[3*i+1]+3] + dx2 := vertices[n*indices[3*i+2]+0] + dy2 := vertices[n*indices[3*i+2]+1] + sx2 := vertices[n*indices[3*i+2]+2] + sy2 := vertices[n*indices[3*i+2]+3] + if l := mipmapLevelFromDistance(dx0, dy0, dx1, dy1, sx0, sy0, sx1, sy1); level > l { + level = l + } + if l := mipmapLevelFromDistance(dx1, dy1, dx2, dy2, sx1, sy1, sx2, sy2); level > l { + level = l + } + if l := mipmapLevelFromDistance(dx2, dy2, dx0, dy0, sx2, sy2, sx0, sy0); level > l { + level = l + } + } + if level == math.MaxInt32 { + panic("mipmap: level must be calculated at least once but not") + } + } + + var dstImgs [graphics.ShaderDstImageCount]*buffered.Image + for i, dst := range dsts { + if dst == nil { + continue + } + dstImgs[i] = dst.orig + } + + var srcImgs [graphics.ShaderSrcImageCount]*buffered.Image + for i, src := range srcs { + if src == nil { + continue + } + if level != 0 { + if img := src.level(level); img != nil { + const n = graphics.VertexFloatCount + s := float32(pow2(level)) + for i := 0; i < len(vertices)/n; i++ { + vertices[i*n+2] /= s + vertices[i*n+3] /= s + } + srcImgs[i] = img + continue + } + } + srcImgs[i] = src.orig + } + + buffered.DrawTrianglesMRT(dstImgs, srcImgs, vertices, indices, blend, dstRegion, srcRegions, shader, uniforms, fillRule) + for _, dst := range dsts { + if dst == nil { + continue + } + dst.deallocateMipmaps() + } +} + func (m *Mipmap) setImg(level int, img *buffered.Image) { if m.imgs == nil { m.imgs = map[int]*buffered.Image{} diff --git a/internal/shader/shader.go b/internal/shader/shader.go index 407e815d5..67edbe91a 100644 --- a/internal/shader/shader.go +++ b/internal/shader/shader.go @@ -216,6 +216,7 @@ func Compile(src []byte, vertexEntry, fragmentEntry string, textureCount int) (* // TODO: Make a call graph and reorder the elements. s.ir.TextureCount = textureCount + return &s.ir, nil } @@ -742,8 +743,9 @@ func (cs *compileState) parseFuncParams(block *block, fname string, d *ast.FuncD } // If there is only one returning value, it is treated as a returning value. + // Only if not the fragment entrypoint. // An array cannot be a returning value, especially for HLSL (#2923). - if len(out) == 1 && out[0].name == "" && out[0].typ.Main != shaderir.Array { + if fname != cs.fragmentEntry && len(out) == 1 && out[0].name == "" && out[0].typ.Main != shaderir.Array { ret = out[0].typ out = nil } @@ -821,10 +823,15 @@ func (cs *compileState) parseFunc(block *block, d *ast.FuncDecl) (function, bool return function{}, false } - if len(outParams) != 0 || returnType.Main != shaderir.Vec4 { - cs.addError(d.Pos(), "fragment entry point must have one returning vec4 value for a color") - return function{}, false + // The first out-param is treated as fragColor0 in GLSL. + for i := range outParams { + if outParams[i].typ.Main != shaderir.Vec4 { + cs.addError(d.Pos(), "fragment entry point must only have vec4 return values for colors") + return function{}, false + } } + // Adjust the number of textures to write to + cs.ir.ColorsOutCount = len(outParams) if cs.varyingParsed { checkVaryings(inParams[1:]) diff --git a/internal/shader/shader_test.go b/internal/shader/shader_test.go index 954ba099d..38f3cdc05 100644 --- a/internal/shader/shader_test.go +++ b/internal/shader/shader_test.go @@ -194,7 +194,7 @@ func TestCompile(t *testing.T) { } } - if tc.Metal != nil { + /*if tc.Metal != nil { m := msl.Compile(s) if got, want := metalNormalize(m), metalNormalize(string(tc.Metal)); got != want { compare(t, "Metal", got, want) @@ -203,7 +203,7 @@ func TestCompile(t *testing.T) { // Just check that Compile doesn't cause panic. // TODO: Should the results be tested? - msl.Compile(s) + msl.Compile(s)*/ }) } } diff --git a/internal/shader/stmt.go b/internal/shader/stmt.go index 8d64a9ef2..a3ffa43cd 100644 --- a/internal/shader/stmt.go +++ b/internal/shader/stmt.go @@ -334,7 +334,8 @@ func (cs *compileState) parseStmt(block *block, fname string, stmt ast.Stmt, inP case *ast.ReturnStmt: if len(stmt.Results) != len(outParams) && len(stmt.Results) != 1 { - if !(len(stmt.Results) == 0 && len(outParams) > 0 && outParams[0].name != "") { + // Fragment function does not have to return a value due to discard + if fname != cs.fragmentEntry && !(len(stmt.Results) == 0 && len(outParams) > 0 && outParams[0].name != "") { // TODO: Check variable shadowings. // https://go.dev/ref/spec#Return_statements cs.addError(stmt.Pos(), fmt.Sprintf("the number of returning variables must be %d but %d", len(outParams), len(stmt.Results))) diff --git a/internal/shader/testdata/for5.expected.fs b/internal/shader/testdata/for5.expected.fs index 954d264a4..f609f4939 100644 --- a/internal/shader/testdata/for5.expected.fs +++ b/internal/shader/testdata/for5.expected.fs @@ -3,31 +3,32 @@ uniform float U1; uniform float U2; int F0(in int l0); -vec4 F1(in vec4 l0); +void F1(in vec4 l0, out vec4 l1); int F0(in int l0) { return l0; } -vec4 F1(in vec4 l0) { - int l1 = 0; - int l3 = 0; - l1 = 0; - for (int l2 = 0; l2 < 10; l2++) { - int l3 = 0; - l3 = F0(l2); - l1 = (l1) + (l3); - for (int l4 = 0; l4 < 10; l4++) { - int l5 = 0; - l5 = F0(l4); - l1 = (l1) + (l5); +void F1(in vec4 l0, out vec4 l1) { + int l2 = 0; + int l4 = 0; + l2 = 0; + for (int l3 = 0; l3 < 10; l3++) { + int l4 = 0; + l4 = F0(l3); + l2 = (l2) + (l4); + for (int l5 = 0; l5 < 10; l5++) { + int l6 = 0; + l6 = F0(l5); + l2 = (l2) + (l6); } } - l3 = 0; - l1 = (l1) + (l3); - return vec4(float(l1)); + l4 = 0; + l2 = (l2) + (l4); + l1 = vec4(float(l2)); + return; } void main(void) { - fragColor = F1(gl_FragCoord); + F1(gl_FragCoord, gl_FragData[0]); } diff --git a/internal/shader/testdata/issue1238.expected.fs b/internal/shader/testdata/issue1238.expected.fs index af6a3c49f..6c3e28e4e 100644 --- a/internal/shader/testdata/issue1238.expected.fs +++ b/internal/shader/testdata/issue1238.expected.fs @@ -1,12 +1,14 @@ -vec4 F0(in vec4 l0); +void F0(in vec4 l0, out vec4 l1); -vec4 F0(in vec4 l0) { +void F0(in vec4 l0, out vec4 l1) { if (true) { - return l0; + l1 = l0; + return; } - return l0; + l1 = l0; + return; } void main(void) { - fragColor = F0(gl_FragCoord); + F0(gl_FragCoord, gl_FragData[0]); } diff --git a/internal/shader/testdata/issue1245.expected.fs b/internal/shader/testdata/issue1245.expected.fs index a964a0a03..18a354cb5 100644 --- a/internal/shader/testdata/issue1245.expected.fs +++ b/internal/shader/testdata/issue1245.expected.fs @@ -1,13 +1,14 @@ -vec4 F0(in vec4 l0); +void F0(in vec4 l0, out vec4 l1); -vec4 F0(in vec4 l0) { - vec4 l1 = vec4(0); - for (float l2 = 0.0; l2 < 4.0; l2++) { - (l1).x = ((l1).x) + ((l2) * (1.0000000000e-02)); +void F0(in vec4 l0, out vec4 l1) { + vec4 l2 = vec4(0); + for (float l3 = 0.0; l3 < 4.0; l3++) { + (l2).x = ((l2).x) + ((l3) * (1.0000000000e-02)); } - return l1; + l1 = l2; + return; } void main(void) { - fragColor = F0(gl_FragCoord); + F0(gl_FragCoord, gl_FragData[0]); } diff --git a/internal/shader/testdata/issue1701.expected.fs b/internal/shader/testdata/issue1701.expected.fs index c25af6e4e..1a9b3376d 100644 --- a/internal/shader/testdata/issue1701.expected.fs +++ b/internal/shader/testdata/issue1701.expected.fs @@ -1,6 +1,6 @@ void F2(void); void F3(void); -vec4 F5(in vec4 l0); +void F5(in vec4 l0, out vec4 l1); void F2(void) { } @@ -9,11 +9,12 @@ void F3(void) { F2(); } -vec4 F5(in vec4 l0) { +void F5(in vec4 l0, out vec4 l1) { F3(); - return vec4(0.0); + l1 = vec4(0.0); + return; } void main(void) { - fragColor = F5(gl_FragCoord); + F5(gl_FragCoord, gl_FragData[0]); } diff --git a/internal/shader/testdata/vertex_fragment.expected.fs b/internal/shader/testdata/vertex_fragment.expected.fs index a95f49229..f28f8c9b8 100644 --- a/internal/shader/testdata/vertex_fragment.expected.fs +++ b/internal/shader/testdata/vertex_fragment.expected.fs @@ -2,12 +2,13 @@ uniform vec2 U0; in vec2 V0; in vec4 V1; -vec4 F0(in vec4 l0, in vec2 l1, in vec4 l2); +void F0(in vec4 l0, in vec2 l1, in vec4 l2, out vec4 l3); -vec4 F0(in vec4 l0, in vec2 l1, in vec4 l2) { - return vec4((l0).x, (l1).y, (l2).z, 1.0); +void F0(in vec4 l0, in vec2 l1, in vec4 l2, out vec4 l3) { + l3 = vec4((l0).x, (l1).y, (l2).z, 1.0); + return; } void main(void) { - fragColor = F0(gl_FragCoord, V0, V1); + F0(gl_FragCoord, V0, V1, gl_FragData[0]); } diff --git a/internal/shaderir/glsl/glsl.go b/internal/shaderir/glsl/glsl.go index e947e0b5c..830b9f982 100644 --- a/internal/shaderir/glsl/glsl.go +++ b/internal/shaderir/glsl/glsl.go @@ -86,9 +86,7 @@ precision highp int; #define lowp #define mediump #define highp -#endif - -out vec4 fragColor;` +#endif` if version == GLSLVersionDefault { prelude += "\n\n" + utilFunctions } @@ -231,6 +229,12 @@ func Compile(p *shaderir.Program, version GLSLVersion) (vertexShader, fragmentSh fslines = append(fslines, fmt.Sprintf("in %s;", c.varDecl(p, &t, fmt.Sprintf("V%d", i)))) } } + // If ES300 out colors need to be defined explicitely + if version == GLSLVersionES300 { + for i := 0; i < p.ColorsOutCount; i++ { + fslines = append(fslines, fmt.Sprintf("layout(location = %d) out vec4 glFragColor%d;", i, i)) + } + } var funcs []*shaderir.Func if p.VertexFunc.Block != nil { @@ -420,7 +424,10 @@ func (c *compileContext) localVariableName(p *shaderir.Program, topBlock *shader case idx < nv+1: return fmt.Sprintf("V%d", idx-1) default: - return fmt.Sprintf("l%d", idx-(nv+1)) + if c.version == GLSLVersionES300 { + return fmt.Sprintf("glFragColor%d", idx-(nv+1)) + } + return fmt.Sprintf("gl_FragData[%d]", idx-(nv+1)) } default: return fmt.Sprintf("l%d", idx) @@ -595,7 +602,7 @@ func (c *compileContext) block(p *shaderir.Program, topBlock, block *shaderir.Bl case shaderir.Return: switch { case topBlock == p.FragmentFunc.Block: - lines = append(lines, fmt.Sprintf("%sfragColor = %s;", idt, expr(&s.Exprs[0]))) + lines = append(lines, fmt.Sprintf("%s%s;", idt, expr(&s.Exprs[0]))) // The 'return' statement is not required so far, as the fragment entrypoint has only one sentence so far. See adjustProgram implementation. case len(s.Exprs) == 0: lines = append(lines, idt+"return;") @@ -604,7 +611,7 @@ func (c *compileContext) block(p *shaderir.Program, topBlock, block *shaderir.Bl } case shaderir.Discard: // 'discard' is invoked only in the fragment shader entry point. - lines = append(lines, idt+"discard;", idt+"return vec4(0.0);") + lines = append(lines, idt+"discard;") //, idt+"return vec4(0.0);") default: lines = append(lines, fmt.Sprintf("%s?(unexpected stmt: %d)", idt, s.Type)) } @@ -645,15 +652,20 @@ func adjustProgram(p *shaderir.Program) *shaderir.Program { Main: shaderir.Vec4, // gl_FragCoord } copy(inParams[1:], newP.Varyings) + // Out parameters of a fragment func are colors + outParams := make([]shaderir.Type, p.ColorsOutCount) + for i := range outParams { + outParams[i] = shaderir.Type{ + Main: shaderir.Vec4, + } + } + newP.FragmentFunc.Block.LocalVarIndexOffset += (p.ColorsOutCount - 1) newP.Funcs = append(newP.Funcs, shaderir.Func{ Index: funcIdx, InParams: inParams, - OutParams: nil, - Return: shaderir.Type{ - Main: shaderir.Vec4, - }, - Block: newP.FragmentFunc.Block, + OutParams: outParams, + Block: newP.FragmentFunc.Block, }) // Create an AST to call the new function. @@ -663,7 +675,7 @@ func adjustProgram(p *shaderir.Program) *shaderir.Program { Index: funcIdx, }, } - for i := 0; i < 1+len(newP.Varyings); i++ { + for i := 0; i < 1+len(newP.Varyings)+p.ColorsOutCount; i++ { call = append(call, shaderir.Expr{ Type: shaderir.LocalVariable, Index: i, diff --git a/internal/shaderir/hlsl/hlsl.go b/internal/shaderir/hlsl/hlsl.go index 22a05e635..a44aceb8e 100644 --- a/internal/shaderir/hlsl/hlsl.go +++ b/internal/shaderir/hlsl/hlsl.go @@ -88,6 +88,7 @@ float4x4 float4x4FromScalar(float x) { func Compile(p *shaderir.Program) (vertexShader, pixelShader string) { offsets := CalcUniformMemoryOffsets(p) + p = adjustProgram(p) c := &compileContext{ unit: p.Unit, @@ -190,7 +191,15 @@ func Compile(p *shaderir.Program) (vertexShader, pixelShader string) { } if p.FragmentFunc.Block != nil && len(p.FragmentFunc.Block.Stmts) > 0 { pslines = append(pslines, "") - pslines = append(pslines, fmt.Sprintf("float4 PSMain(Varyings %s) : SV_TARGET {", vsOut)) + pslines = append(pslines, "struct PS_OUTPUT") + pslines = append(pslines, "{") + for i := 0; i < p.ColorsOutCount; i++ { + pslines = append(pslines, fmt.Sprintf("\tfloat4 Color%d: SV_Target%d;", i, i)) + } + pslines = append(pslines, "};") + pslines = append(pslines, "") + pslines = append(pslines, fmt.Sprintf("PS_OUTPUT PSMain(Varyings %s) {", vsOut)) + pslines = append(pslines, "\tPS_OUTPUT output;") pslines = append(pslines, c.block(p, p.FragmentFunc.Block, p.FragmentFunc.Block, 0)...) pslines = append(pslines, "}") } @@ -353,7 +362,7 @@ func (c *compileContext) localVariableName(p *shaderir.Program, topBlock *shader case idx < nv+1: return fmt.Sprintf("%s.M%d", vsOut, idx-1) default: - return fmt.Sprintf("l%d", idx-(nv+1)) + return fmt.Sprintf("output.Color%d", idx-(nv+1)) } default: return fmt.Sprintf("l%d", idx) @@ -563,6 +572,10 @@ func (c *compileContext) block(p *shaderir.Program, topBlock, block *shaderir.Bl switch { case topBlock == p.VertexFunc.Block: lines = append(lines, fmt.Sprintf("%sreturn %s;", idt, vsOut)) + case topBlock == p.FragmentFunc.Block: + // Call to the pseudo fragment func based on out parameters + lines = append(lines, idt+expr(&s.Exprs[0])+";") + lines = append(lines, idt+"return output;") case len(s.Exprs) == 0: lines = append(lines, idt+"return;") default: @@ -570,7 +583,7 @@ func (c *compileContext) block(p *shaderir.Program, topBlock, block *shaderir.Bl } case shaderir.Discard: // 'discard' is invoked only in the fragment shader entry point. - lines = append(lines, idt+"discard;", idt+"return float4(0.0, 0.0, 0.0, 0.0);") + lines = append(lines, idt+"discard;") default: lines = append(lines, fmt.Sprintf("%s?(unexpected stmt: %d)", idt, s.Type)) } @@ -578,3 +591,86 @@ func (c *compileContext) block(p *shaderir.Program, topBlock, block *shaderir.Bl return lines } + +func adjustProgram(p *shaderir.Program) *shaderir.Program { + if p.FragmentFunc.Block == nil { + return p + } + + // Shallow-clone the program in order not to modify p itself. + newP := *p + + // Create a new slice not to affect the original p. + newP.Funcs = make([]shaderir.Func, len(p.Funcs)) + copy(newP.Funcs, p.Funcs) + + // Create a new function whose body is the same is the fragment shader's entry point. + // Determine a unique index of the new function. + var funcIdx int + for _, f := range newP.Funcs { + if funcIdx <= f.Index { + funcIdx = f.Index + 1 + } + } + + // For parameters of a fragment func, see the comment in internal/shaderir/program.go. + inParams := make([]shaderir.Type, 1+len(newP.Varyings)) + inParams[0] = shaderir.Type{ + Main: shaderir.Vec4, // gl_FragCoord + } + copy(inParams[1:], newP.Varyings) + // Out parameters of a fragment func are colors + outParams := make([]shaderir.Type, p.ColorsOutCount) + for i := range outParams { + outParams[i] = shaderir.Type{ + Main: shaderir.Vec4, + } + } + newP.FragmentFunc.Block.LocalVarIndexOffset += (p.ColorsOutCount - 1) + + newP.Funcs = append(newP.Funcs, shaderir.Func{ + Index: funcIdx, + InParams: inParams, + OutParams: outParams, + Block: newP.FragmentFunc.Block, + }) + + // Create an AST to call the new function. + call := []shaderir.Expr{ + { + Type: shaderir.FunctionExpr, + Index: funcIdx, + }, + } + for i := 0; i < 1+len(newP.Varyings)+p.ColorsOutCount; i++ { + call = append(call, shaderir.Expr{ + Type: shaderir.LocalVariable, + Index: i, + }) + } + + // Replace the entry point with just calling the new function. + stmts := []shaderir.Stmt{ + { + // Return: This will be replaced with a call to the new function. + // Then the output structure containing colors will be returned. + Type: shaderir.Return, + Exprs: []shaderir.Expr{ + // The function call + { + Type: shaderir.Call, + Exprs: call, + }, + }, + }, + } + newP.FragmentFunc = shaderir.FragmentFunc{ + Block: &shaderir.Block{ + LocalVars: nil, + LocalVarIndexOffset: 1 + len(newP.Varyings) + 1, + Stmts: stmts, + }, + } + + return &newP +} diff --git a/internal/shaderir/ir_test.go b/internal/shaderir/ir_test.go index a248402af..c0e42b9f6 100644 --- a/internal/shaderir/ir_test.go +++ b/internal/shaderir/ir_test.go @@ -938,12 +938,12 @@ void F0(float l0, float l1, thread float& l2) { }, Attributes: []shaderir.Type{ {Main: shaderir.Vec4}, - {Main: shaderir.Float}, {Main: shaderir.Vec2}, + {Main: shaderir.Vec4}, }, Varyings: []shaderir.Type{ - {Main: shaderir.Float}, {Main: shaderir.Vec2}, + {Main: shaderir.Vec4}, }, VertexFunc: shaderir.VertexFunc{ Block: block( @@ -967,10 +967,10 @@ void F0(float l0, float l1, thread float& l2) { GlslVS: glslVertexPrelude + ` uniform float U0; in vec4 A0; -in float A1; -in vec2 A2; -out float V0; -out vec2 V1; +in vec2 A1; +in vec4 A2; +out vec2 V0; +out vec4 V1; void main(void) { gl_Position = A0; @@ -979,8 +979,8 @@ void main(void) { }`, GlslFS: glslFragmentPrelude + ` uniform float U0; -in float V0; -in vec2 V1;`, +in vec2 V0; +in vec4 V1;`, }, { Name: "FragmentFunc", @@ -991,12 +991,12 @@ in vec2 V1;`, }, Attributes: []shaderir.Type{ {Main: shaderir.Vec4}, - {Main: shaderir.Float}, {Main: shaderir.Vec2}, + {Main: shaderir.Vec4}, }, Varyings: []shaderir.Type{ - {Main: shaderir.Float}, {Main: shaderir.Vec2}, + {Main: shaderir.Vec4}, }, VertexFunc: shaderir.VertexFunc{ Block: block( @@ -1016,39 +1016,39 @@ in vec2 V1;`, ), ), }, + ColorsOutCount: 1, FragmentFunc: shaderir.FragmentFunc{ Block: block( []shaderir.Type{ + {Main: shaderir.Vec2}, {Main: shaderir.Vec4}, - {Main: shaderir.Float}, }, - 3, - assignStmt( - localVariableExpr(3), - localVariableExpr(0), - ), + 3+1, assignStmt( localVariableExpr(4), localVariableExpr(1), ), - returnStmt( - callExpr( - builtinFuncExpr(shaderir.Vec4F), - localVariableExpr(2), - localVariableExpr(1), - localVariableExpr(1), - ), + assignStmt( + localVariableExpr(5), + localVariableExpr(2), ), + assignStmt( + localVariableExpr(3), + localVariableExpr(0), + ), + shaderir.Stmt{ + Type: shaderir.Return, + }, ), }, }, GlslVS: glslVertexPrelude + ` uniform float U0; in vec4 A0; -in float A1; -in vec2 A2; -out float V0; -out vec2 V1; +in vec2 A1; +in vec4 A2; +out vec2 V0; +out vec4 V1; void main(void) { gl_Position = A0; @@ -1057,21 +1057,22 @@ void main(void) { }`, GlslFS: glslFragmentPrelude + ` uniform float U0; -in float V0; -in vec2 V1; +in vec2 V0; +in vec4 V1; -vec4 F0(in vec4 l0, in float l1, in vec2 l2); +void F0(in vec4 l0, in vec2 l1, in vec4 l2, out vec4 l3); -vec4 F0(in vec4 l0, in float l1, in vec2 l2) { - vec4 l3 = vec4(0); - float l4 = float(0); - l3 = l0; +void F0(in vec4 l0, in vec2 l1, in vec4 l2, out vec4 l3) { + vec2 l4 = vec2(0); + vec4 l5 = vec4(0); l4 = l1; - return vec4(l2, l1, l1); + l5 = l2; + l3 = l0; + return; } void main(void) { - fragColor = F0(gl_FragCoord, V0, V1); + F0(gl_FragCoord, V0, V1, gl_FragData[0]); }`, }, } @@ -1093,14 +1094,14 @@ void main(void) { t.Errorf("%s fragment: got: %s, want: %s", tc.Name, got, want) } } - m := msl.Compile(&tc.Program) + /*m := msl.Compile(&tc.Program) if tc.Metal != "" { got := m want := tc.Metal + "\n" if got != want { t.Errorf("%s metal: got: %s, want: %s", tc.Name, got, want) } - } + }*/ }) } } diff --git a/internal/shaderir/program.go b/internal/shaderir/program.go index 8688057cb..906713602 100644 --- a/internal/shaderir/program.go +++ b/internal/shaderir/program.go @@ -47,15 +47,16 @@ func (s SourceHash) String() string { } type Program struct { - UniformNames []string - Uniforms []Type - TextureCount int - Attributes []Type - Varyings []Type - Funcs []Func - VertexFunc VertexFunc - FragmentFunc FragmentFunc - Unit Unit + UniformNames []string + Uniforms []Type + TextureCount int + ColorsOutCount int + Attributes []Type + Varyings []Type + Funcs []Func + VertexFunc VertexFunc + FragmentFunc FragmentFunc + Unit Unit SourceHash SourceHash diff --git a/internal/ui/image.go b/internal/ui/image.go index d0a63ec37..11f655ec9 100644 --- a/internal/ui/image.go +++ b/internal/ui/image.go @@ -116,6 +116,33 @@ func (i *Image) DrawTriangles(srcs [graphics.ShaderSrcImageCount]*Image, vertice i.mipmap.DrawTriangles(srcMipmaps, vertices, indices, blend, dstRegion, srcRegions, shader.shader, uniforms, fillRule, canSkipMipmap) } +func DrawTrianglesMRT(dsts [graphics.ShaderDstImageCount]*Image, srcs [graphics.ShaderSrcImageCount]*Image, vertices []float32, indices []uint32, blend graphicsdriver.Blend, dstRegion image.Rectangle, srcRegions [graphics.ShaderSrcImageCount]image.Rectangle, shader *Shader, uniforms []uint32, fillRule graphicsdriver.FillRule, canSkipMipmap bool, antialias bool) { + var dstMipmaps [graphics.ShaderDstImageCount]*mipmap.Mipmap + for i, dst := range dsts { + if dst == nil { + continue + } + if dst.modifyCallback != nil { + dst.modifyCallback() + } + + dst.lastBlend = blend + dst.flushBufferIfNeeded() + dstMipmaps[i] = dst.mipmap + } + + var srcMipmaps [graphics.ShaderSrcImageCount]*mipmap.Mipmap + for i, src := range srcs { + if src == nil { + continue + } + src.flushBufferIfNeeded() + srcMipmaps[i] = src.mipmap + } + + mipmap.DrawTrianglesMRT(dstMipmaps, srcMipmaps, vertices, indices, blend, dstRegion, srcRegions, shader.shader, uniforms, fillRule, canSkipMipmap) +} + func (i *Image) WritePixels(pix []byte, region image.Rectangle) { if i.modifyCallback != nil { i.modifyCallback() diff --git a/shader_test.go b/shader_test.go index e04e7749e..b87fc7f19 100644 --- a/shader_test.go +++ b/shader_test.go @@ -2595,3 +2595,155 @@ func Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 { } } } + +// Issue #2930 +func TestShaderMRT(t *testing.T) { + const w, h = 16, 16 + + s, err := ebiten.NewShader([]byte(`//kage:unit pixels + +package main + +func Fragment(dstPos vec4, srcPos vec2, color vec4) (vec4, vec4, vec4, vec4, vec4, vec4, vec4, vec4) { + return vec4(1, 0, 0, 1), + vec4(0, 1, 0, 1), + vec4(0, 0, 1, 1), + vec4(1, 0, 1, 1), + vec4(1, 1, 0, 1), + vec4(0, 1, 1, 1), + vec4(1, 1, 1, 1), + vec4(1, 1, 1, 0) +} +`)) + if err != nil { + t.Fatal(err) + } + + bounds := image.Rect(0, 0, w, h) + opts := &ebiten.NewImageOptions{ + Unmanaged: true, + } + vertices := []ebiten.Vertex{ + { + DstX: 0, + DstY: 0, + }, + { + DstX: w, + DstY: 0, + }, + { + DstX: 0, + DstY: h, + }, + { + DstX: w, + DstY: h, + }, + } + indices := []uint16{0, 1, 2, 1, 2, 3} + t.Run("8 locations", func(t *testing.T) { + imgs := [8]*ebiten.Image{ + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + } + wantColors := [8]color.RGBA{ + {R: 0xff, G: 0, B: 0, A: 0xff}, + {R: 0, G: 0xff, B: 0, A: 0xff}, + {R: 0, G: 0, B: 0xff, A: 0xff}, + {R: 0xff, G: 0, B: 0xff, A: 0xff}, + {R: 0xff, G: 0xff, B: 0, A: 0xff}, + {R: 0, G: 0xff, B: 0xff, A: 0xff}, + {R: 0xff, G: 0xff, B: 0xff, A: 0xff}, + {R: 0xff, G: 0xff, B: 0xff, A: 0}, + } + ebiten.DrawTrianglesShaderMRT(imgs, vertices, indices, s, nil) + for k, dst := range imgs { + for j := 0; j < h; j++ { + for i := 0; i < w; i++ { + got := dst.At(i, j).(color.RGBA) + want := wantColors[k] + if !sameColors(got, want, 1) { + t.Errorf("dst.At(%d, %d): got: %v, want: %v", i, j, got, want) + } + } + } + } + }) + + t.Run("Empty locations", func(t *testing.T) { + imgs := [8]*ebiten.Image{ + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + } + wantColors := [8]color.RGBA{ + {}, + {R: 0, G: 0xff, B: 0, A: 0xff}, + {}, + {R: 0xff, G: 0, B: 0xff, A: 0xff}, + {}, + {R: 0, G: 0xff, B: 0xff, A: 0xff}, + {}, + {R: 0xff, G: 0xff, B: 0xff, A: 0}, + } + dsts := [8]*ebiten.Image{ + nil, imgs[1], nil, imgs[3], nil, imgs[5], nil, imgs[7], + } + ebiten.DrawTrianglesShaderMRT(dsts, vertices, indices, s, nil) + for k, dst := range imgs { + for j := 0; j < h; j++ { + for i := 0; i < w; i++ { + got := dst.At(i, j).(color.RGBA) + want := wantColors[k] + if !sameColors(got, want, 1) { + t.Errorf("%d dst.At(%d, %d): got: %v, want: %v", k, i, j, got, want) + } + } + } + } + }) + + t.Run("1 location (first slot)", func(t *testing.T) { + imgs := [8]*ebiten.Image{ + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + ebiten.NewImageWithOptions(bounds, opts), + } + wantColors := [8]color.RGBA{ + {R: 0xff, G: 0, B: 0, A: 0xff}, + {}, {}, {}, {}, {}, {}, {}, + } + dsts := [8]*ebiten.Image{ + imgs[0], nil, nil, nil, nil, nil, nil, nil, + } + ebiten.DrawTrianglesShaderMRT(dsts, vertices, indices, s, nil) + for k, dst := range imgs { + for j := 0; j < h; j++ { + for i := 0; i < w; i++ { + got := dst.At(i, j).(color.RGBA) + want := wantColors[k] + if !sameColors(got, want, 1) { + t.Errorf("dst.At(%d, %d): got: %v, want: %v", i, j, got, want) + } + } + } + } + }) +}