ebiten/internal/graphicsdriver/directx/pipeline_windows.go
2023-03-25 01:34:29 +09:00

602 lines
19 KiB
Go

// Copyright 2022 The Ebiten Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package directx
import (
"fmt"
"math"
"unsafe"
"golang.org/x/sync/errgroup"
"github.com/hajimehoshi/ebiten/v2/internal/graphics"
"github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver"
)
var inputElementDescs = []_D3D12_INPUT_ELEMENT_DESC{
{
SemanticName: &([]byte("POSITION\000"))[0],
SemanticIndex: 0,
Format: _DXGI_FORMAT_R32G32_FLOAT,
InputSlot: 0,
AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT,
InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
InstanceDataStepRate: 0,
},
{
SemanticName: &([]byte("TEXCOORD\000"))[0],
SemanticIndex: 0,
Format: _DXGI_FORMAT_R32G32_FLOAT,
InputSlot: 0,
AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT,
InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
InstanceDataStepRate: 0,
},
{
SemanticName: &([]byte("COLOR\000"))[0],
SemanticIndex: 0,
Format: _DXGI_FORMAT_R32G32B32A32_FLOAT,
InputSlot: 0,
AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT,
InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
InstanceDataStepRate: 0,
},
}
const numDescriptorsPerFrame = 32
func blendFactorToBlend(f graphicsdriver.BlendFactor, alpha bool) _D3D12_BLEND {
// D3D12_RENDER_TARGET_BLEND_DESC's *BlendAlpha members don't allow *_COLOR values.
// See https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_render_target_blend_desc.
switch f {
case graphicsdriver.BlendFactorZero:
return _D3D12_BLEND_ZERO
case graphicsdriver.BlendFactorOne:
return _D3D12_BLEND_ONE
case graphicsdriver.BlendFactorSourceColor:
if alpha {
return _D3D12_BLEND_SRC_ALPHA
}
return _D3D12_BLEND_SRC_COLOR
case graphicsdriver.BlendFactorOneMinusSourceColor:
if alpha {
return _D3D12_BLEND_INV_SRC_ALPHA
}
return _D3D12_BLEND_INV_SRC_COLOR
case graphicsdriver.BlendFactorSourceAlpha:
return _D3D12_BLEND_SRC_ALPHA
case graphicsdriver.BlendFactorOneMinusSourceAlpha:
return _D3D12_BLEND_INV_SRC_ALPHA
case graphicsdriver.BlendFactorDestinationColor:
if alpha {
return _D3D12_BLEND_DEST_ALPHA
}
return _D3D12_BLEND_DEST_COLOR
case graphicsdriver.BlendFactorOneMinusDestinationColor:
if alpha {
return _D3D12_BLEND_INV_DEST_ALPHA
}
return _D3D12_BLEND_INV_DEST_COLOR
case graphicsdriver.BlendFactorDestinationAlpha:
return _D3D12_BLEND_DEST_ALPHA
case graphicsdriver.BlendFactorOneMinusDestinationAlpha:
return _D3D12_BLEND_INV_DEST_ALPHA
case graphicsdriver.BlendFactorSourceAlphaSaturated:
return _D3D12_BLEND_SRC_ALPHA_SAT
default:
panic(fmt.Sprintf("directx: invalid blend factor: %d", f))
}
}
func blendOperationToBlendOp(o graphicsdriver.BlendOperation) _D3D12_BLEND_OP {
switch o {
case graphicsdriver.BlendOperationAdd:
return _D3D12_BLEND_OP_ADD
case graphicsdriver.BlendOperationSubtract:
return _D3D12_BLEND_OP_SUBTRACT
case graphicsdriver.BlendOperationReverseSubtract:
return _D3D12_BLEND_OP_REV_SUBTRACT
default:
panic(fmt.Sprintf("directx: invalid blend operation: %d", o))
}
}
type pipelineStates struct {
rootSignature *_ID3D12RootSignature
shaderDescriptorHeap *_ID3D12DescriptorHeap
shaderDescriptorSize uint32
samplerDescriptorHeap *_ID3D12DescriptorHeap
constantBuffers [frameCount][]*_ID3D12Resource
constantBufferMaps [frameCount][]uintptr
}
const numConstantBufferAndSourceTextures = 1 + graphics.ShaderImageCount
func (p *pipelineStates) initialize(device *_ID3D12Device) (ferr error) {
// Create a CBV/SRV/UAV descriptor heap.
// 5n+0: constants
// 5n+m (1<=4): textures
shaderH, err := device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{
Type: _D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
NumDescriptors: frameCount * numDescriptorsPerFrame * numConstantBufferAndSourceTextures,
Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
NodeMask: 0,
})
if err != nil {
return err
}
p.shaderDescriptorHeap = shaderH
defer func() {
if ferr != nil {
p.shaderDescriptorHeap.Release()
p.shaderDescriptorHeap = nil
}
}()
p.shaderDescriptorSize = device.GetDescriptorHandleIncrementSize(_D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
samplerH, err := device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{
Type: _D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
NumDescriptors: 1,
Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
NodeMask: 0,
})
if err != nil {
return err
}
p.samplerDescriptorHeap = samplerH
h, err := p.samplerDescriptorHeap.GetCPUDescriptorHandleForHeapStart()
if err != nil {
return err
}
device.CreateSampler(&_D3D12_SAMPLER_DESC{
Filter: _D3D12_FILTER_MIN_MAG_MIP_POINT,
AddressU: _D3D12_TEXTURE_ADDRESS_MODE_WRAP,
AddressV: _D3D12_TEXTURE_ADDRESS_MODE_WRAP,
AddressW: _D3D12_TEXTURE_ADDRESS_MODE_WRAP,
ComparisonFunc: _D3D12_COMPARISON_FUNC_NEVER,
MinLOD: -math.MaxFloat32,
MaxLOD: math.MaxFloat32,
}, h)
return nil
}
func (p *pipelineStates) drawTriangles(device *_ID3D12Device, commandList *_ID3D12GraphicsCommandList, frameIndex int, screen bool, srcs [graphics.ShaderImageCount]*Image, shader *Shader, dstRegions []graphicsdriver.DstRegion, uniforms []uint32, blend graphicsdriver.Blend, indexOffset int, evenOdd bool) error {
idx := len(p.constantBuffers[frameIndex])
if idx >= numDescriptorsPerFrame {
return fmt.Errorf("directx: too many constant buffers")
}
if cap(p.constantBuffers[frameIndex]) > idx {
p.constantBuffers[frameIndex] = p.constantBuffers[frameIndex][:idx+1]
p.constantBufferMaps[frameIndex] = p.constantBufferMaps[frameIndex][:idx+1]
} else {
p.constantBuffers[frameIndex] = append(p.constantBuffers[frameIndex], nil)
p.constantBufferMaps[frameIndex] = append(p.constantBufferMaps[frameIndex], 0)
}
const bufferSizeAlignment = 256
bufferSize := uint32(unsafe.Sizeof(uint32(0))) * uint32(len(uniforms))
if bufferSize > 0 {
bufferSize = ((bufferSize-1)/bufferSizeAlignment + 1) * bufferSizeAlignment
}
cb := p.constantBuffers[frameIndex][idx]
m := p.constantBufferMaps[frameIndex][idx]
if cb != nil {
if uint32(cb.GetDesc().Width) < bufferSize {
p.constantBuffers[frameIndex][idx].Unmap(0, nil)
p.constantBuffers[frameIndex][idx].Release()
p.constantBuffers[frameIndex][idx] = nil
p.constantBufferMaps[frameIndex][idx] = 0
cb = nil
}
}
if cb == nil {
var err error
cb, err = createBuffer(device, uint64(bufferSize), _D3D12_HEAP_TYPE_UPLOAD)
if err != nil {
return err
}
p.constantBuffers[frameIndex][idx] = cb
h, err := p.shaderDescriptorHeap.GetCPUDescriptorHandleForHeapStart()
if err != nil {
return err
}
offset := int32(numConstantBufferAndSourceTextures * (frameIndex*numDescriptorsPerFrame + idx))
h.Offset(offset, p.shaderDescriptorSize)
device.CreateConstantBufferView(&_D3D12_CONSTANT_BUFFER_VIEW_DESC{
BufferLocation: cb.GetGPUVirtualAddress(),
SizeInBytes: bufferSize,
}, h)
m, err = cb.Map(0, &_D3D12_RANGE{0, 0})
if err != nil {
return err
}
p.constantBufferMaps[frameIndex][idx] = m
}
if m == 0 {
return fmt.Errorf("directx: ID3D12Resource::Map failed")
}
h, err := p.shaderDescriptorHeap.GetCPUDescriptorHandleForHeapStart()
if err != nil {
return err
}
offset := int32(numConstantBufferAndSourceTextures * (frameIndex*numDescriptorsPerFrame + idx))
h.Offset(offset, p.shaderDescriptorSize)
for _, src := range srcs {
h.Offset(1, p.shaderDescriptorSize)
if src == nil {
continue
}
device.CreateShaderResourceView(src.resource(), &_D3D12_SHADER_RESOURCE_VIEW_DESC{
Format: _DXGI_FORMAT_R8G8B8A8_UNORM,
ViewDimension: _D3D12_SRV_DIMENSION_TEXTURE2D,
Shader4ComponentMapping: _D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING,
Texture2D: _D3D12_TEX2D_SRV{
MipLevels: 1, // TODO: Can this be 0?
},
}, h)
}
// Update the constant buffer.
copy(unsafe.Slice((*uint32)(unsafe.Pointer(m)), len(uniforms)), uniforms)
rs, err := p.ensureRootSignature(device)
if err != nil {
return err
}
commandList.SetGraphicsRootSignature(rs)
commandList.SetDescriptorHeaps([]*_ID3D12DescriptorHeap{
p.shaderDescriptorHeap,
p.samplerDescriptorHeap,
})
// Match the indices with rootParams in graphicsPipelineState.
gh, err := p.shaderDescriptorHeap.GetGPUDescriptorHandleForHeapStart()
if err != nil {
return err
}
gh.Offset(offset, p.shaderDescriptorSize)
commandList.SetGraphicsRootDescriptorTable(0, gh)
commandList.SetGraphicsRootDescriptorTable(1, gh)
sh, err := p.samplerDescriptorHeap.GetGPUDescriptorHandleForHeapStart()
if err != nil {
return err
}
commandList.SetGraphicsRootDescriptorTable(2, sh)
for _, dstRegion := range dstRegions {
commandList.RSSetScissorRects([]_D3D12_RECT{
{
left: int32(dstRegion.Region.X),
top: int32(dstRegion.Region.Y),
right: int32(dstRegion.Region.X + dstRegion.Region.Width),
bottom: int32(dstRegion.Region.Y + dstRegion.Region.Height),
},
})
if evenOdd {
s, err := shader.pipelineState(blend, prepareStencil, screen)
if err != nil {
return err
}
commandList.SetPipelineState(s)
commandList.DrawIndexedInstanced(uint32(dstRegion.IndexCount), 1, uint32(indexOffset), 0, 0)
s, err = shader.pipelineState(blend, drawWithStencil, screen)
if err != nil {
return err
}
commandList.SetPipelineState(s)
commandList.DrawIndexedInstanced(uint32(dstRegion.IndexCount), 1, uint32(indexOffset), 0, 0)
} else {
s, err := shader.pipelineState(blend, noStencil, screen)
if err != nil {
return err
}
commandList.SetPipelineState(s)
commandList.DrawIndexedInstanced(uint32(dstRegion.IndexCount), 1, uint32(indexOffset), 0, 0)
}
indexOffset += dstRegion.IndexCount
}
return nil
}
func (p *pipelineStates) ensureRootSignature(device *_ID3D12Device) (rootSignature *_ID3D12RootSignature, ferr error) {
if p.rootSignature != nil {
return p.rootSignature, nil
}
cbv := _D3D12_DESCRIPTOR_RANGE{
RangeType: _D3D12_DESCRIPTOR_RANGE_TYPE_CBV, // b0
NumDescriptors: 1,
BaseShaderRegister: 0,
RegisterSpace: 0,
OffsetInDescriptorsFromTableStart: 0,
}
srv := _D3D12_DESCRIPTOR_RANGE{
RangeType: _D3D12_DESCRIPTOR_RANGE_TYPE_SRV, // t0
NumDescriptors: graphics.ShaderImageCount,
BaseShaderRegister: 0,
RegisterSpace: 0,
OffsetInDescriptorsFromTableStart: 1,
}
sampler := _D3D12_DESCRIPTOR_RANGE{
RangeType: _D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, // s0
NumDescriptors: 1,
BaseShaderRegister: 0,
RegisterSpace: 0,
OffsetInDescriptorsFromTableStart: 0,
}
rootParams := [...]_D3D12_ROOT_PARAMETER{
{
ParameterType: _D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
DescriptorTable: _D3D12_ROOT_DESCRIPTOR_TABLE{
NumDescriptorRanges: 1,
pDescriptorRanges: &cbv,
},
ShaderVisibility: _D3D12_SHADER_VISIBILITY_ALL,
},
{
ParameterType: _D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
DescriptorTable: _D3D12_ROOT_DESCRIPTOR_TABLE{
NumDescriptorRanges: 1,
pDescriptorRanges: &srv,
},
ShaderVisibility: _D3D12_SHADER_VISIBILITY_PIXEL,
},
{
ParameterType: _D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
DescriptorTable: _D3D12_ROOT_DESCRIPTOR_TABLE{
NumDescriptorRanges: 1,
pDescriptorRanges: &sampler,
},
ShaderVisibility: _D3D12_SHADER_VISIBILITY_PIXEL,
},
}
// Create a root signature.
sig, err := _D3D12SerializeRootSignature(&_D3D12_ROOT_SIGNATURE_DESC{
NumParameters: uint32(len(rootParams)),
pParameters: &rootParams[0],
NumStaticSamplers: 0,
pStaticSamplers: nil,
Flags: _D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT,
}, _D3D_ROOT_SIGNATURE_VERSION_1_0)
if err != nil {
return nil, err
}
defer sig.Release()
rs, err := device.CreateRootSignature(0, sig.GetBufferPointer(), sig.GetBufferSize())
if err != nil {
return nil, err
}
defer func() {
if ferr != nil {
rootSignature.Release()
}
}()
p.rootSignature = rs
return p.rootSignature, nil
}
var vertexShaderCache = map[string]*_ID3DBlob{}
func newShader(vs, ps string) (vsh, psh *_ID3DBlob, ferr error) {
var flag uint32 = uint32(_D3DCOMPILE_OPTIMIZATION_LEVEL3)
defer func() {
if ferr == nil {
return
}
if vsh != nil {
vsh.Release()
}
if psh != nil {
psh.Release()
}
}()
var wg errgroup.Group
// Vertex shaders are likely the same. If so, reuse the same _ID3DBlob.
if v, ok := vertexShaderCache[vs]; ok {
// Increment the reference count not to release this object unexpectedly.
// The value will be removed when the count reached 0.
// See (*Shader).disposeImpl.
v.AddRef()
vsh = v
} else {
defer func() {
if ferr == nil {
vertexShaderCache[vs] = vsh
}
}()
wg.Go(func() error {
v, err := _D3DCompile([]byte(vs), "shader", nil, nil, "VSMain", "vs_5_0", flag, 0)
if err != nil {
return fmt.Errorf("directx: D3DCompile for VSMain failed, original source: %s, %w", vs, err)
}
vsh = v
return nil
})
}
wg.Go(func() error {
p, err := _D3DCompile([]byte(ps), "shader", nil, nil, "PSMain", "ps_5_0", flag, 0)
if err != nil {
return fmt.Errorf("directx: D3DCompile for PSMain failed, original source: %s, %w", ps, err)
}
psh = p
return nil
})
if err := wg.Wait(); err != nil {
return nil, nil, err
}
return
}
func (p *pipelineStates) newPipelineState(device *_ID3D12Device, vsh, psh *_ID3DBlob, blend graphicsdriver.Blend, stencilMode stencilMode, screen bool) (state *_ID3D12PipelineState, ferr error) {
rootSignature, err := p.ensureRootSignature(device)
if err != nil {
return nil, err
}
defer func() {
if ferr != nil {
rootSignature.Release()
}
}()
depthStencilDesc := _D3D12_DEPTH_STENCIL_DESC{
DepthEnable: 0,
DepthWriteMask: _D3D12_DEPTH_WRITE_MASK_ALL,
DepthFunc: _D3D12_COMPARISON_FUNC_LESS,
StencilEnable: 0,
StencilReadMask: _D3D12_DEFAULT_STENCIL_READ_MASK,
StencilWriteMask: _D3D12_DEFAULT_STENCIL_WRITE_MASK,
FrontFace: _D3D12_DEPTH_STENCILOP_DESC{
StencilFailOp: _D3D12_STENCIL_OP_KEEP,
StencilDepthFailOp: _D3D12_STENCIL_OP_KEEP,
StencilPassOp: _D3D12_STENCIL_OP_KEEP,
StencilFunc: _D3D12_COMPARISON_FUNC_ALWAYS,
},
BackFace: _D3D12_DEPTH_STENCILOP_DESC{
StencilFailOp: _D3D12_STENCIL_OP_KEEP,
StencilDepthFailOp: _D3D12_STENCIL_OP_KEEP,
StencilPassOp: _D3D12_STENCIL_OP_KEEP,
StencilFunc: _D3D12_COMPARISON_FUNC_ALWAYS,
},
}
writeMask := uint8(_D3D12_COLOR_WRITE_ENABLE_ALL)
switch stencilMode {
case prepareStencil:
depthStencilDesc.StencilEnable = 1
depthStencilDesc.FrontFace.StencilPassOp = _D3D12_STENCIL_OP_INVERT
depthStencilDesc.BackFace.StencilPassOp = _D3D12_STENCIL_OP_INVERT
writeMask = 0
case drawWithStencil:
depthStencilDesc.StencilEnable = 1
depthStencilDesc.FrontFace.StencilFunc = _D3D12_COMPARISON_FUNC_NOT_EQUAL
depthStencilDesc.BackFace.StencilFunc = _D3D12_COMPARISON_FUNC_NOT_EQUAL
}
rtvFormat := _DXGI_FORMAT_R8G8B8A8_UNORM
if screen {
rtvFormat = _DXGI_FORMAT_B8G8R8A8_UNORM
}
dsvFormat := _DXGI_FORMAT_UNKNOWN
if stencilMode != noStencil {
dsvFormat = _DXGI_FORMAT_D24_UNORM_S8_UINT
}
// Create a pipeline state.
psoDesc := _D3D12_GRAPHICS_PIPELINE_STATE_DESC{
pRootSignature: rootSignature,
VS: _D3D12_SHADER_BYTECODE{
pShaderBytecode: vsh.GetBufferPointer(),
BytecodeLength: vsh.GetBufferSize(),
},
PS: _D3D12_SHADER_BYTECODE{
pShaderBytecode: psh.GetBufferPointer(),
BytecodeLength: psh.GetBufferSize(),
},
BlendState: _D3D12_BLEND_DESC{
AlphaToCoverageEnable: 0,
IndependentBlendEnable: 0,
RenderTarget: [8]_D3D12_RENDER_TARGET_BLEND_DESC{
{
BlendEnable: 1,
LogicOpEnable: 0,
SrcBlend: blendFactorToBlend(blend.BlendFactorSourceRGB, false),
DestBlend: blendFactorToBlend(blend.BlendFactorDestinationRGB, false),
BlendOp: blendOperationToBlendOp(blend.BlendOperationRGB),
SrcBlendAlpha: blendFactorToBlend(blend.BlendFactorSourceAlpha, true),
DestBlendAlpha: blendFactorToBlend(blend.BlendFactorDestinationAlpha, true),
BlendOpAlpha: blendOperationToBlendOp(blend.BlendOperationAlpha),
LogicOp: _D3D12_LOGIC_OP_NOOP,
RenderTargetWriteMask: writeMask,
},
},
},
SampleMask: math.MaxUint32,
RasterizerState: _D3D12_RASTERIZER_DESC{
FillMode: _D3D12_FILL_MODE_SOLID,
CullMode: _D3D12_CULL_MODE_NONE,
FrontCounterClockwise: 0,
DepthBias: _D3D12_DEFAULT_DEPTH_BIAS,
DepthBiasClamp: _D3D12_DEFAULT_DEPTH_BIAS_CLAMP,
SlopeScaledDepthBias: _D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
DepthClipEnable: 0,
MultisampleEnable: 0,
AntialiasedLineEnable: 0,
ForcedSampleCount: 0,
ConservativeRaster: _D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF,
},
DepthStencilState: depthStencilDesc,
InputLayout: _D3D12_INPUT_LAYOUT_DESC{
pInputElementDescs: &inputElementDescs[0],
NumElements: uint32(len(inputElementDescs)),
},
PrimitiveTopologyType: _D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE,
NumRenderTargets: 1,
RTVFormats: [8]_DXGI_FORMAT{
rtvFormat,
},
DSVFormat: dsvFormat,
SampleDesc: _DXGI_SAMPLE_DESC{
Count: 1,
Quality: 0,
},
}
s, err := device.CreateGraphicsPipelineState(&psoDesc)
if err != nil {
return nil, err
}
return s, nil
}
func (p *pipelineStates) releaseConstantBuffers(frameIndex int) {
for i := range p.constantBuffers[frameIndex] {
p.constantBuffers[frameIndex][i].Unmap(0, nil)
p.constantBuffers[frameIndex][i].Release()
p.constantBuffers[frameIndex][i] = nil
p.constantBufferMaps[frameIndex][i] = 0
}
p.constantBuffers[frameIndex] = p.constantBuffers[frameIndex][:0]
p.constantBufferMaps[frameIndex] = p.constantBufferMaps[frameIndex][:0]
}
func (p *pipelineStates) resetConstantBuffers(frameIndex int) {
p.constantBuffers[frameIndex] = p.constantBuffers[frameIndex][:0]
p.constantBufferMaps[frameIndex] = p.constantBufferMaps[frameIndex][:0]
}