diff --git a/internal/graphicsdriver/directx/graphics_windows.go b/internal/graphicsdriver/directx/graphics_windows.go index 33afdb0f1..773dd3a22 100644 --- a/internal/graphicsdriver/directx/graphics_windows.go +++ b/internal/graphicsdriver/directx/graphics_windows.go @@ -1194,6 +1194,10 @@ func (g *Graphics) NewShader(program *shaderir.Program) (graphicsdriver.Shader, } func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.ShaderImageCount]graphicsdriver.ImageID, offsets [graphics.ShaderImageCount - 1][2]float32, shaderID graphicsdriver.ShaderID, indexLen int, indexOffset int, mode graphicsdriver.CompositeMode, colorM graphicsdriver.ColorM, filter graphicsdriver.Filter, address graphicsdriver.Address, dstRegion, srcRegion graphicsdriver.Region, uniforms [][]float32, evenOdd bool) error { + if shaderID == graphicsdriver.InvalidShaderID { + return fmt.Errorf("directx: shader ID is invalid") + } + if err := g.flushCommandList(g.copyCommandList); err != nil { return err } @@ -1236,96 +1240,48 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.Sh return err } - var shader *Shader - if shaderID != graphicsdriver.InvalidShaderID { - shader = g.shaders[shaderID] + shader := g.shaders[shaderID] + + // TODO: This logic is very similar to Metal's. Let's unify them. + dw, dh := dst.internalSize() + us := make([][]float32, graphics.PreservedUniformVariablesCount+len(uniforms)) + us[graphics.TextureDestinationSizeUniformVariableIndex] = []float32{float32(dw), float32(dh)} + usizes := make([]float32, 2*len(srcs)) + for i, src := range srcImages { + if src != nil { + w, h := src.internalSize() + usizes[2*i] = float32(w) + usizes[2*i+1] = float32(h) + } + } + us[graphics.TextureSourceSizesUniformVariableIndex] = usizes + udorigin := []float32{float32(dstRegion.X) / float32(dw), float32(dstRegion.Y) / float32(dh)} + us[graphics.TextureDestinationRegionOriginUniformVariableIndex] = udorigin + udsize := []float32{float32(dstRegion.Width) / float32(dw), float32(dstRegion.Height) / float32(dh)} + us[graphics.TextureDestinationRegionSizeUniformVariableIndex] = udsize + uoffsets := make([]float32, 2*len(offsets)) + for i, offset := range offsets { + uoffsets[2*i] = offset[0] + uoffsets[2*i+1] = offset[1] + } + us[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets + usorigin := []float32{float32(srcRegion.X), float32(srcRegion.Y)} + us[graphics.TextureSourceRegionOriginUniformVariableIndex] = usorigin + ussize := []float32{float32(srcRegion.Width), float32(srcRegion.Height)} + us[graphics.TextureSourceRegionSizeUniformVariableIndex] = ussize + us[graphics.ProjectionMatrixUniformVariableIndex] = []float32{ + 2 / float32(dw), 0, 0, 0, + 0, -2 / float32(dh), 0, 0, + 0, 0, 1, 0, + -1, 1, 0, 1, } - var flattenUniforms []float32 - if shader == nil { - screenWidth, screenHeight := dst.internalSize() - var srcWidth, srcHeight float32 - if filter != graphicsdriver.FilterNearest { - w, h := srcImages[0].internalSize() - srcWidth = float32(w) - srcHeight = float32(h) - } - var esBody [16]float32 - var esTranslate [4]float32 - colorM.Elements(esBody[:], esTranslate[:]) - - flattenUniforms = []float32{ - float32(screenWidth), - float32(screenHeight), - srcWidth, - srcHeight, - esBody[0], - esBody[1], - esBody[2], - esBody[3], - esBody[4], - esBody[5], - esBody[6], - esBody[7], - esBody[8], - esBody[9], - esBody[10], - esBody[11], - esBody[12], - esBody[13], - esBody[14], - esBody[15], - esTranslate[0], - esTranslate[1], - esTranslate[2], - esTranslate[3], - srcRegion.X, - srcRegion.Y, - srcRegion.X + srcRegion.Width, - srcRegion.Y + srcRegion.Height, - } - } else { - // TODO: This logic is very similar to Metal's. Let's unify them. - dw, dh := dst.internalSize() - us := make([][]float32, graphics.PreservedUniformVariablesCount+len(uniforms)) - us[graphics.TextureDestinationSizeUniformVariableIndex] = []float32{float32(dw), float32(dh)} - usizes := make([]float32, 2*len(srcs)) - for i, src := range srcImages { - if src != nil { - w, h := src.internalSize() - usizes[2*i] = float32(w) - usizes[2*i+1] = float32(h) - } - } - us[graphics.TextureSourceSizesUniformVariableIndex] = usizes - udorigin := []float32{float32(dstRegion.X) / float32(dw), float32(dstRegion.Y) / float32(dh)} - us[graphics.TextureDestinationRegionOriginUniformVariableIndex] = udorigin - udsize := []float32{float32(dstRegion.Width) / float32(dw), float32(dstRegion.Height) / float32(dh)} - us[graphics.TextureDestinationRegionSizeUniformVariableIndex] = udsize - uoffsets := make([]float32, 2*len(offsets)) - for i, offset := range offsets { - uoffsets[2*i] = offset[0] - uoffsets[2*i+1] = offset[1] - } - us[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets - usorigin := []float32{float32(srcRegion.X), float32(srcRegion.Y)} - us[graphics.TextureSourceRegionOriginUniformVariableIndex] = usorigin - ussize := []float32{float32(srcRegion.Width), float32(srcRegion.Height)} - us[graphics.TextureSourceRegionSizeUniformVariableIndex] = ussize - us[graphics.ProjectionMatrixUniformVariableIndex] = []float32{ - 2 / float32(dw), 0, 0, 0, - 0, -2 / float32(dh), 0, 0, - 0, 0, 1, 0, - -1, 1, 0, 1, - } - - for i, u := range uniforms { - us[graphics.PreservedUniformVariablesCount+i] = u - } - - flattenUniforms = shader.uniformsToFloat32s(us) + for i, u := range uniforms { + us[graphics.PreservedUniformVariablesCount+i] = u } + flattenUniforms := shader.uniformsToFloat32s(us) + w, h := dst.internalSize() g.needFlushDrawCommandList = true g.drawCommandList.RSSetViewports([]_D3D12_VIEWPORT{ @@ -1361,69 +1317,29 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.Sh Format: _DXGI_FORMAT_R16_UINT, }) - if shader == nil { - key := builtinPipelineStatesKey{ - useColorM: !colorM.IsIdentity(), - compositeMode: mode, - filter: filter, - address: address, - screen: dst.screen, + if evenOdd { + s, err := shader.pipelineState(mode, prepareStencil, dst.screen) + if err != nil { + return err + } + if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { + return err } - if evenOdd { - key.stencilMode = prepareStencil - s, err := g.pipelineStates.builtinGraphicsPipelineState(g.device, key) - if err != nil { - return err - } - if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { - return err - } - - key.stencilMode = drawWithStencil - s, err = g.pipelineStates.builtinGraphicsPipelineState(g.device, key) - if err != nil { - return err - } - if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { - return err - } - } else { - key.stencilMode = noStencil - s, err := g.pipelineStates.builtinGraphicsPipelineState(g.device, key) - if err != nil { - return err - } - if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { - return err - } + s, err = shader.pipelineState(mode, drawWithStencil, dst.screen) + if err != nil { + return err + } + if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { + return err } - } else { - if evenOdd { - s, err := shader.pipelineState(mode, prepareStencil, dst.screen) - if err != nil { - return err - } - if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { - return err - } - - s, err = shader.pipelineState(mode, drawWithStencil, dst.screen) - if err != nil { - return err - } - if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { - return err - } - } else { - s, err := shader.pipelineState(mode, noStencil, dst.screen) - if err != nil { - return err - } - if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { - return err - } + s, err := shader.pipelineState(mode, noStencil, dst.screen) + if err != nil { + return err + } + if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { + return err } } diff --git a/internal/graphicsdriver/directx/pipeline_windows.go b/internal/graphicsdriver/directx/pipeline_windows.go index 5b6b4c979..65f1e5df3 100644 --- a/internal/graphicsdriver/directx/pipeline_windows.go +++ b/internal/graphicsdriver/directx/pipeline_windows.go @@ -49,198 +49,9 @@ func operationToBlend(c graphicsdriver.Operation, alpha bool) _D3D12_BLEND { } } -type builtinPipelineStatesKey struct { - useColorM bool - compositeMode graphicsdriver.CompositeMode - filter graphicsdriver.Filter - address graphicsdriver.Address - stencilMode stencilMode - screen bool -} - -func (k *builtinPipelineStatesKey) defs() ([]_D3D_SHADER_MACRO, error) { - var defs []_D3D_SHADER_MACRO - defval := []byte("1\x00") - if k.useColorM { - name := []byte("USE_COLOR_MATRIX\x00") - defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) - } - - switch k.filter { - case graphicsdriver.FilterNearest: - name := []byte("FILTER_NEAREST\x00") - defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) - case graphicsdriver.FilterLinear: - name := []byte("FILTER_LINEAR\x00") - defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) - default: - return nil, fmt.Errorf("directx: invalid filter: %d", k.filter) - } - - switch k.address { - case graphicsdriver.AddressUnsafe: - name := []byte("ADDRESS_UNSAFE\x00") - defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) - case graphicsdriver.AddressClampToZero: - name := []byte("ADDRESS_CLAMP_TO_ZERO\x00") - defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) - case graphicsdriver.AddressRepeat: - name := []byte("ADDRESS_REPEAT\x00") - defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) - default: - return nil, fmt.Errorf("directx: invalid address: %d", k.address) - } - - // Termination - defs = append(defs, _D3D_SHADER_MACRO{}) - - return defs, nil -} - -func (k *builtinPipelineStatesKey) source() []byte { - return []byte(`struct PSInput { - float4 position : SV_POSITION; - float2 texcoord : TEXCOORD0; - float4 color : COLOR; -}; - -cbuffer ShaderParameter : register(b0) { - float2 viewport_size; - float2 source_size; - float4x4 color_matrix_body; - float4 color_matrix_translation; - float4 source_region; -} - -PSInput VSMain(float2 position : POSITION, float2 tex : TEXCOORD, float4 color : COLOR) { - // In DirectX, the NDC's Y direction (upward) and the framebuffer's Y direction (downward) don't - // match. Then, the Y direction must be inverted. - float4x4 projectionMatrix = { - 2.0 / viewport_size.x, 0, 0, -1, - 0, -2.0 / viewport_size.y, 0, 1, - 0, 0, 1, 0, - 0, 0, 0, 1, - }; - - PSInput result; - result.position = mul(projectionMatrix, float4(position, 0, 1)); - result.texcoord = tex; - result.color = color; - return result; -} - -Texture2D tex : register(t0); -SamplerState samp : register(s0); - -float2 euclideanMod(float2 x, float2 y) { - // Assume that y is always positive. - return x - y * floor(x/y); -} - -float2 adjustTexelByAddress(float2 p, float4 source_region) { -#if defined(ADDRESS_CLAMP_TO_ZERO) - return p; -#endif - -#if defined(ADDRESS_REPEAT) - float2 o = float2(source_region[0], source_region[1]); - float2 size = float2(source_region[2] - source_region[0], source_region[3] - source_region[1]); - return euclideanMod((p - o), size) + o; -#endif - -#if defined(ADDRESS_UNSAFE) - return p; -#endif -} - -float4 PSMain(PSInput input) : SV_TARGET { -#if defined(FILTER_NEAREST) -# if defined(ADDRESS_UNSAFE) - float4 color = tex.Sample(samp, input.texcoord); -# else - float4 color; - float2 pos = adjustTexelByAddress(input.texcoord, source_region); - if (source_region[0] <= pos.x && - source_region[1] <= pos.y && - pos.x < source_region[2] && - pos.y < source_region[3]) { - color = tex.Sample(samp, pos); - } else { - color = float4(0, 0, 0, 0); - } -# endif // defined(ADDRESS_UNSAFE) -#endif // defined(FILTER_NEAREST) - -#if defined(FILTER_LINEAR) - float2 pos = input.texcoord; - float2 texel_size = 1.0 / source_size; - - // Shift 1/512 [texel] to avoid the tie-breaking issue (#1212). - // As all the vertex positions are aligned to 1/16 [pixel], this shiting should work in most cases. - float2 p0 = pos - (texel_size) / 2.0 + (texel_size / 512.0); - float2 p1 = pos + (texel_size) / 2.0 + (texel_size / 512.0); - -# if !defined(ADDRESS_UNSAFE) - p0 = adjustTexelByAddress(p0, source_region); - p1 = adjustTexelByAddress(p1, source_region); -# endif // !defined(ADDRESS_UNSAFE) - - float4 c0 = tex.Sample(samp, p0); - float4 c1 = tex.Sample(samp, float2(p1.x, p0.y)); - float4 c2 = tex.Sample(samp, float2(p0.x, p1.y)); - float4 c3 = tex.Sample(samp, p1); - -# if !defined(ADDRESS_UNSAFE) - if (p0.x < source_region[0]) { - c0 = float4(0, 0, 0, 0); - c2 = float4(0, 0, 0, 0); - } - if (p0.y < source_region[1]) { - c0 = float4(0, 0, 0, 0); - c1 = float4(0, 0, 0, 0); - } - if (source_region[2] <= p1.x) { - c1 = float4(0, 0, 0, 0); - c3 = float4(0, 0, 0, 0); - } - if (source_region[3] <= p1.y) { - c2 = float4(0, 0, 0, 0); - c3 = float4(0, 0, 0, 0); - } -# endif // !defined(ADDRESS_UNSAFE) - - float2 rate = frac(p0 * source_size); - float4 color = lerp(lerp(c0, c1, rate.x), lerp(c2, c3, rate.x), rate.y); -#endif // defined(FILTER_LINEAR) - -#if defined(USE_COLOR_MATRIX) - // Un-premultiply alpha. - // When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing. - color.rgb /= color.a + (1.0 - sign(color.a)); - // Apply the color matrix or scale. - color = mul(color_matrix_body, color) + color_matrix_translation; - // Premultiply alpha - color.rgb *= color.a; - // Apply color scale. - color *= input.color; - // Clamp the output. - color.rgb = min(color.rgb, color.a); - return color; -#else - return input.color * color; -#endif // defined(USE_COLOR_MATRIX) - -}`) -} - type pipelineStates struct { rootSignature *_ID3D12RootSignature - cache map[builtinPipelineStatesKey]*_ID3D12PipelineState - - // builtinShaders is a set of the built-in vertex/pixel shaders that are never released. - builtinShaders []*_ID3DBlob - shaderDescriptorHeap *_ID3D12DescriptorHeap shaderDescriptorSize uint32 @@ -302,35 +113,6 @@ func (p *pipelineStates) initialize(device *_ID3D12Device) (ferr error) { return nil } -func (p *pipelineStates) builtinGraphicsPipelineState(device *_ID3D12Device, key builtinPipelineStatesKey) (*_ID3D12PipelineState, error) { - state, ok := p.cache[key] - if ok { - return state, nil - } - - defs, err := key.defs() - if err != nil { - return nil, err - } - - vsh, psh, err := newShader(key.source(), defs) - if err != nil { - return nil, err - } - // Keep the shaders. These are never released. - p.builtinShaders = append(p.builtinShaders, vsh, psh) - - s, err := p.newPipelineState(device, vsh, psh, key.compositeMode, key.stencilMode, key.screen) - if err != nil { - return nil, err - } - if p.cache == nil { - p.cache = map[builtinPipelineStatesKey]*_ID3D12PipelineState{} - } - p.cache[key] = s - return s, nil -} - func (p *pipelineStates) useGraphicsPipelineState(device *_ID3D12Device, commandList *_ID3D12GraphicsCommandList, frameIndex int, pipelineState *_ID3D12PipelineState, srcs [graphics.ShaderImageCount]*Image, uniforms []float32) error { idx := len(p.constantBuffers[frameIndex]) if idx >= numDescriptorsPerFrame {