diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e88b873fe..a3c115aac 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -92,15 +92,10 @@ jobs: - name: go test # TODO: Add more test environments (#1305) - if: ${{ startsWith(matrix.os, 'ubuntu-') }} + if: ${{ startsWith(matrix.os, 'ubuntu-') || startsWith(matrix.os, 'windows-') }} run: | go test -tags=example ${{ !startsWith(matrix.go, '1.15.') && !startsWith(matrix.go, '1.16.') && '-shuffle=on' || '' }} -v ./... - - name: go test (Windows) - if: ${{ startsWith(matrix.os, 'windows-') }} - run: | - go test -tags=example ${{ !startsWith(matrix.go, '1.15.') && !startsWith(matrix.go, '1.16.') && '-shuffle=on' || '' }} -v ./internal/shader - - name: go test (Wasm) # TODO: Investigate times out on Windows. (#1313) if: ${{ !startsWith(matrix.os, 'windows-') && !startsWith(matrix.go, '1.15.') && !startsWith(matrix.go, '1.16.') }} diff --git a/doc.go b/doc.go index 7fd8116d7..1b4f2275e 100644 --- a/doc.go +++ b/doc.go @@ -69,9 +69,16 @@ // If the specified graphics library is not available, RunGame returns an error. // This can take one of the following value: // -// "auto": Ebiten chooses the graphics library automatically. This is the default value. -// "opengl": OpenGL, OpenGL ES, or WebGL. -// "metal": Metal. This works only on macOS or iOS. +// "auto": Ebiten chooses the graphics library automatically. This is the default value. +// "opengl": OpenGL, OpenGL ES, or WebGL. +// "directx": DirectX. This works only on Windows. +// "metal": Metal. This works only on macOS or iOS. +// +// `EBITEN_DIRECTX` environment variable specifies various parameters for DirectX. +// You can specify multiple values separated by a comma. The default value is empty (i.e. no parameters). +// +// "warp": Use WARP (i.e. software rendering). +// "debug": Use a debug layer. // // Build tags // diff --git a/internal/graphicsdriver/directx/api_windows.go b/internal/graphicsdriver/directx/api_windows.go new file mode 100644 index 000000000..064efa685 --- /dev/null +++ b/internal/graphicsdriver/directx/api_windows.go @@ -0,0 +1,2085 @@ +// Copyright 2022 The Ebiten Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package directx + +import ( + "fmt" + "math" + "reflect" + "runtime" + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +func boolToUintptr(v bool) uintptr { + if v { + return 1 + } + return 0 +} + +// Reference: +// * https://github.com/microsoft/DirectX-Headers +// * https://github.com/microsoft/win32metadata +// * https://raw.githubusercontent.com/microsoft/win32metadata/master/generation/WinSDK/RecompiledIdlHeaders/um/d3d12.h + +const ( + _D3D12_APPEND_ALIGNED_ELEMENT = 0xffffffff + _D3D12_DEFAULT_DEPTH_BIAS = 0 + _D3D12_DEFAULT_DEPTH_BIAS_CLAMP = 0.0 + _D3D12_DEFAULT_STENCIL_READ_MASK = 0xff + _D3D12_DEFAULT_STENCIL_WRITE_MASK = 0xff + _D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS = 0.0 + _D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND = 0xffffffff + _D3D12_MAX_DEPTH = 1.0 + _D3D12_MIN_DEPTH = 0.0 + _D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION = 16384 + _D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES = 0xffffffff +) + +type _D3D_FEATURE_LEVEL int32 + +const ( + _D3D_FEATURE_LEVEL_11_0 _D3D_FEATURE_LEVEL = 0xb000 +) + +type _D3D_PRIMITIVE_TOPOLOGY int32 + +const ( + _D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST _D3D_PRIMITIVE_TOPOLOGY = 4 +) + +type _D3D_ROOT_SIGNATURE_VERSION int32 + +const ( + _D3D_ROOT_SIGNATURE_VERSION_1_0 _D3D_ROOT_SIGNATURE_VERSION = 0x1 +) + +type _D3D12_BLEND int32 + +const ( + _D3D12_BLEND_ZERO _D3D12_BLEND = 1 + _D3D12_BLEND_ONE _D3D12_BLEND = 2 + _D3D12_BLEND_SRC_COLOR _D3D12_BLEND = 3 + _D3D12_BLEND_INV_SRC_COLOR _D3D12_BLEND = 4 + _D3D12_BLEND_SRC_ALPHA _D3D12_BLEND = 5 + _D3D12_BLEND_INV_SRC_ALPHA _D3D12_BLEND = 6 + _D3D12_BLEND_DEST_ALPHA _D3D12_BLEND = 7 + _D3D12_BLEND_INV_DEST_ALPHA _D3D12_BLEND = 8 + _D3D12_BLEND_DEST_COLOR _D3D12_BLEND = 9 + _D3D12_BLEND_INV_DEST_COLOR _D3D12_BLEND = 10 + _D3D12_BLEND_SRC_ALPHA_SAT _D3D12_BLEND = 11 + _D3D12_BLEND_BLEND_FACTOR _D3D12_BLEND = 14 + _D3D12_BLEND_INV_BLEND_FACTOR _D3D12_BLEND = 15 + _D3D12_BLEND_SRC1_COLOR _D3D12_BLEND = 16 + _D3D12_BLEND_INV_SRC1_COLOR _D3D12_BLEND = 17 + _D3D12_BLEND_SRC1_ALPHA _D3D12_BLEND = 18 + _D3D12_BLEND_INV_SRC1_ALPHA _D3D12_BLEND = 19 +) + +type _D3D12_BLEND_OP int32 + +const ( + _D3D12_BLEND_OP_ADD _D3D12_BLEND_OP = 1 + _D3D12_BLEND_OP_SUBTRACT _D3D12_BLEND_OP = 2 + _D3D12_BLEND_OP_REV_SUBTRACT _D3D12_BLEND_OP = 3 + _D3D12_BLEND_OP_MIN _D3D12_BLEND_OP = 4 + _D3D12_BLEND_OP_MAX _D3D12_BLEND_OP = 5 +) + +type _D3D12_CLEAR_FLAGS int32 + +const ( + _D3D12_CLEAR_FLAG_DEPTH _D3D12_CLEAR_FLAGS = 0x1 + _D3D12_CLEAR_FLAG_STENCIL _D3D12_CLEAR_FLAGS = 0x2 +) + +type _D3D12_COLOR_WRITE_ENABLE int32 + +const ( + _D3D12_COLOR_WRITE_ENABLE_RED _D3D12_COLOR_WRITE_ENABLE = 1 + _D3D12_COLOR_WRITE_ENABLE_GREEN _D3D12_COLOR_WRITE_ENABLE = 2 + _D3D12_COLOR_WRITE_ENABLE_BLUE _D3D12_COLOR_WRITE_ENABLE = 4 + _D3D12_COLOR_WRITE_ENABLE_ALPHA _D3D12_COLOR_WRITE_ENABLE = 8 + _D3D12_COLOR_WRITE_ENABLE_ALL _D3D12_COLOR_WRITE_ENABLE = _D3D12_COLOR_WRITE_ENABLE_RED | _D3D12_COLOR_WRITE_ENABLE_GREEN | _D3D12_COLOR_WRITE_ENABLE_BLUE | _D3D12_COLOR_WRITE_ENABLE_ALPHA +) + +type _D3D12_COMMAND_LIST_TYPE int32 + +const ( + _D3D12_COMMAND_LIST_TYPE_DIRECT _D3D12_COMMAND_LIST_TYPE = 0 +) + +type _D3D12_COMMAND_QUEUE_FLAGS int32 + +const ( + _D3D12_COMMAND_QUEUE_FLAG_NONE _D3D12_COMMAND_QUEUE_FLAGS = 0 +) + +type _D3D12_COMPARISON_FUNC int32 + +const ( + _D3D12_COMPARISON_FUNC_NEVER _D3D12_COMPARISON_FUNC = 1 + _D3D12_COMPARISON_FUNC_LESS _D3D12_COMPARISON_FUNC = 2 + _D3D12_COMPARISON_FUNC_EQUAL _D3D12_COMPARISON_FUNC = 3 + _D3D12_COMPARISON_FUNC_LESS_EQUAL _D3D12_COMPARISON_FUNC = 4 + _D3D12_COMPARISON_FUNC_GREATER _D3D12_COMPARISON_FUNC = 5 + _D3D12_COMPARISON_FUNC_NOT_EQUAL _D3D12_COMPARISON_FUNC = 6 + _D3D12_COMPARISON_FUNC_GREATER_EQUAL _D3D12_COMPARISON_FUNC = 7 + _D3D12_COMPARISON_FUNC_ALWAYS _D3D12_COMPARISON_FUNC = 8 +) + +type _D3D12_CONSERVATIVE_RASTERIZATION_MODE int32 + +const ( + _D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF _D3D12_CONSERVATIVE_RASTERIZATION_MODE = 0 + _D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON _D3D12_CONSERVATIVE_RASTERIZATION_MODE = 1 +) + +type _D3D12_CPU_PAGE_PROPERTY int32 + +const ( + _D3D12_CPU_PAGE_PROPERTY_UNKNOWN _D3D12_CPU_PAGE_PROPERTY = 0 +) + +type _D3D12_CULL_MODE int32 + +const ( + _D3D12_CULL_MODE_NONE _D3D12_CULL_MODE = 1 + _D3D12_CULL_MODE_FRONT _D3D12_CULL_MODE = 2 + _D3D12_CULL_MODE_BACK _D3D12_CULL_MODE = 3 +) + +type _D3D12_DEBUG_FEATURE int32 + +const ( + _D3D12_DEBUG_FEATURE_NONE _D3D12_DEBUG_FEATURE = 0 + _D3D12_DEBUG_FEATURE_ALLOW_BEHAVIOR_CHANGING_DEBUG_AIDS _D3D12_DEBUG_FEATURE = 0x1 + _D3D12_DEBUG_FEATURE_CONSERVATIVE_RESOURCE_STATE_TRACKING _D3D12_DEBUG_FEATURE = 0x2 + _D3D12_DEBUG_FEATURE_DISABLE_VIRTUALIZED_BUNDLES_VALIDATION _D3D12_DEBUG_FEATURE = 0x4 +) + +type _D3D12_DEPTH_WRITE_MASK int32 + +const ( + _D3D12_DEPTH_WRITE_MASK_ZERO _D3D12_DEPTH_WRITE_MASK = 0 + _D3D12_DEPTH_WRITE_MASK_ALL _D3D12_DEPTH_WRITE_MASK = 1 +) + +type _D3D12_DESCRIPTOR_HEAP_TYPE int32 + +const ( + _D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV _D3D12_DESCRIPTOR_HEAP_TYPE = iota + _D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER + _D3D12_DESCRIPTOR_HEAP_TYPE_RTV + _D3D12_DESCRIPTOR_HEAP_TYPE_DSV + _D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES +) + +type _D3D12_DESCRIPTOR_HEAP_FLAGS int32 + +const ( + _D3D12_DESCRIPTOR_HEAP_FLAG_NONE _D3D12_DESCRIPTOR_HEAP_FLAGS = 0 + _D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE _D3D12_DESCRIPTOR_HEAP_FLAGS = 0x1 +) + +type _D3D12_DESCRIPTOR_RANGE_TYPE int32 + +const ( + _D3D12_DESCRIPTOR_RANGE_TYPE_SRV _D3D12_DESCRIPTOR_RANGE_TYPE = iota + _D3D12_DESCRIPTOR_RANGE_TYPE_UAV + _D3D12_DESCRIPTOR_RANGE_TYPE_CBV + _D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER +) + +type _D3D12_DSV_DIMENSION int32 + +const ( + _D3D12_DSV_DIMENSION_UNKNOWN _D3D12_DSV_DIMENSION = 0 + _D3D12_DSV_DIMENSION_TEXTURE1D _D3D12_DSV_DIMENSION = 1 + _D3D12_DSV_DIMENSION_TEXTURE1DARRAY _D3D12_DSV_DIMENSION = 2 + _D3D12_DSV_DIMENSION_TEXTURE2D _D3D12_DSV_DIMENSION = 3 + _D3D12_DSV_DIMENSION_TEXTURE2DARRAY _D3D12_DSV_DIMENSION = 4 + _D3D12_DSV_DIMENSION_TEXTURE2DMS _D3D12_DSV_DIMENSION = 5 + _D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY _D3D12_DSV_DIMENSION = 6 +) + +type _D3D12_DSV_FLAGS int32 + +const ( + _D3D12_DSV_FLAG_NONE _D3D12_DSV_FLAGS = 0 + _D3D12_DSV_FLAG_READ_ONLY_DEPTH _D3D12_DSV_FLAGS = 0x1 + _D3D12_DSV_FLAG_READ_ONLY_STENCIL _D3D12_DSV_FLAGS = 0x2 +) + +type _D3D12_FENCE_FLAGS int32 + +const ( + _D3D12_FENCE_FLAG_NONE _D3D12_FENCE_FLAGS = 0 +) + +type _D3D12_FILL_MODE int32 + +const ( + _D3D12_FILL_MODE_WIREFRAME _D3D12_FILL_MODE = 2 + _D3D12_FILL_MODE_SOLID _D3D12_FILL_MODE = 3 +) + +type _D3D12_FILTER int32 + +const ( + _D3D12_FILTER_MIN_MAG_MIP_POINT _D3D12_FILTER = 0 +) + +type _D3D12_HEAP_FLAGS int32 + +const ( + _D3D12_HEAP_FLAG_NONE _D3D12_HEAP_FLAGS = 0 +) + +type _D3D12_HEAP_TYPE int32 + +const ( + _D3D12_HEAP_TYPE_DEFAULT _D3D12_HEAP_TYPE = 1 + _D3D12_HEAP_TYPE_UPLOAD _D3D12_HEAP_TYPE = 2 + _D3D12_HEAP_TYPE_READBACK _D3D12_HEAP_TYPE = 3 + _D3D12_HEAP_TYPE_CUSTOM _D3D12_HEAP_TYPE = 4 +) + +type _D3D12_INDEX_BUFFER_STRIP_CUT_VALUE int32 + +const ( + _D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED _D3D12_INDEX_BUFFER_STRIP_CUT_VALUE = 0 + _D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF _D3D12_INDEX_BUFFER_STRIP_CUT_VALUE = 1 + _D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF _D3D12_INDEX_BUFFER_STRIP_CUT_VALUE = 2 +) + +type _D3D12_INPUT_CLASSIFICATION int32 + +const ( + _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA _D3D12_INPUT_CLASSIFICATION = 0 + _D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA _D3D12_INPUT_CLASSIFICATION = 1 +) + +type _D3D12_LOGIC_OP int32 + +const ( + _D3D12_LOGIC_OP_CLEAR _D3D12_LOGIC_OP = iota + _D3D12_LOGIC_OP_SET + _D3D12_LOGIC_OP_COPY + _D3D12_LOGIC_OP_COPY_INVERTED + _D3D12_LOGIC_OP_NOOP + _D3D12_LOGIC_OP_INVERT + _D3D12_LOGIC_OP_AND + _D3D12_LOGIC_OP_NAND + _D3D12_LOGIC_OP_OR + _D3D12_LOGIC_OP_NOR + _D3D12_LOGIC_OP_XOR + _D3D12_LOGIC_OP_EQUIV + _D3D12_LOGIC_OP_AND_REVERSE + _D3D12_LOGIC_OP_AND_INVERTED + _D3D12_LOGIC_OP_OR_REVERSE + _D3D12_LOGIC_OP_OR_INVERTED +) + +type _D3D12_MEMORY_POOL int32 + +const ( + _D3D12_MEMORY_POOL_UNKNOWN _D3D12_MEMORY_POOL = 0 +) + +type _D3D12_PIPELINE_STATE_FLAGS int32 + +const ( + _D3D12_PIPELINE_STATE_FLAG_NONE _D3D12_PIPELINE_STATE_FLAGS = 0 + _D3D12_PIPELINE_STATE_FLAG_TOOL_DEBUG _D3D12_PIPELINE_STATE_FLAGS = 0x1 +) + +type _D3D12_PRIMITIVE_TOPOLOGY_TYPE int32 + +const ( + _D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED _D3D12_PRIMITIVE_TOPOLOGY_TYPE = 0 + _D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT _D3D12_PRIMITIVE_TOPOLOGY_TYPE = 1 + _D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE _D3D12_PRIMITIVE_TOPOLOGY_TYPE = 2 + _D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE _D3D12_PRIMITIVE_TOPOLOGY_TYPE = 3 + _D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH _D3D12_PRIMITIVE_TOPOLOGY_TYPE = 4 +) + +type _D3D12_RESOURCE_BARRIER_FLAGS int32 + +const ( + _D3D12_RESOURCE_BARRIER_FLAG_NONE _D3D12_RESOURCE_BARRIER_FLAGS = 0 +) + +type _D3D12_RESOURCE_BARRIER_TYPE int32 + +const ( + _D3D12_RESOURCE_BARRIER_TYPE_TRANSITION _D3D12_RESOURCE_BARRIER_TYPE = 0 +) + +type _D3D12_RESOURCE_DIMENSION int32 + +const ( + _D3D12_RESOURCE_DIMENSION_UNKNOWN _D3D12_RESOURCE_DIMENSION = 0 + _D3D12_RESOURCE_DIMENSION_BUFFER _D3D12_RESOURCE_DIMENSION = 1 + _D3D12_RESOURCE_DIMENSION_TEXTURE1D _D3D12_RESOURCE_DIMENSION = 2 + _D3D12_RESOURCE_DIMENSION_TEXTURE2D _D3D12_RESOURCE_DIMENSION = 3 + _D3D12_RESOURCE_DIMENSION_TEXTURE3D _D3D12_RESOURCE_DIMENSION = 4 +) + +type _D3D12_RESOURCE_FLAGS int32 + +const ( + _D3D12_RESOURCE_FLAG_NONE _D3D12_RESOURCE_FLAGS = 0 + _D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET _D3D12_RESOURCE_FLAGS = 0x1 + _D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL _D3D12_RESOURCE_FLAGS = 0x2 + _D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS _D3D12_RESOURCE_FLAGS = 0x4 + _D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE _D3D12_RESOURCE_FLAGS = 0x8 + _D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER _D3D12_RESOURCE_FLAGS = 0x10 + _D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS _D3D12_RESOURCE_FLAGS = 0x20 + _D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY _D3D12_RESOURCE_FLAGS = 0x40 +) + +type _D3D12_RESOURCE_STATES int32 + +const ( + _D3D12_RESOURCE_STATE_RENDER_TARGET _D3D12_RESOURCE_STATES = 0x4 + _D3D12_RESOURCE_STATE_DEPTH_WRITE _D3D12_RESOURCE_STATES = 0x10 + _D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE _D3D12_RESOURCE_STATES = 0x80 + _D3D12_RESOURCE_STATE_COPY_DEST _D3D12_RESOURCE_STATES = 0x400 + _D3D12_RESOURCE_STATE_COPY_SOURCE _D3D12_RESOURCE_STATES = 0x800 + _D3D12_RESOURCE_STATE_GENERIC_READ _D3D12_RESOURCE_STATES = 0x1 | 0x2 | 0x40 | 0x80 | 0x200 | 0x800 + _D3D12_RESOURCE_STATE_PRESENT _D3D12_RESOURCE_STATES = 0 +) + +type _D3D12_ROOT_PARAMETER_TYPE int32 + +const ( + _D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE _D3D12_ROOT_PARAMETER_TYPE = iota + _D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS + _D3D12_ROOT_PARAMETER_TYPE_CBV + _D3D12_ROOT_PARAMETER_TYPE_SRV + _D3D12_ROOT_PARAMETER_TYPE_UAV +) + +type _D3D12_ROOT_SIGNATURE_FLAGS int32 + +const ( + _D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT _D3D12_ROOT_SIGNATURE_FLAGS = 0x1 +) + +type _D3D12_RTV_DIMENSION int32 + +const ( + _D3D12_SHADER_COMPONENT_MAPPING_MASK = 0x7 + _D3D12_SHADER_COMPONENT_MAPPING_SHIFT = 3 + _D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES = 1 << (_D3D12_SHADER_COMPONENT_MAPPING_SHIFT * 4) + _D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING_0_1_2_3 = (0 & _D3D12_SHADER_COMPONENT_MAPPING_MASK) | + ((1 & _D3D12_SHADER_COMPONENT_MAPPING_MASK) << _D3D12_SHADER_COMPONENT_MAPPING_SHIFT) | + ((2 & _D3D12_SHADER_COMPONENT_MAPPING_MASK) << (_D3D12_SHADER_COMPONENT_MAPPING_SHIFT * 2)) | + ((3 & _D3D12_SHADER_COMPONENT_MAPPING_MASK) << (_D3D12_SHADER_COMPONENT_MAPPING_SHIFT * 3)) | + _D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES + _D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING = _D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING_0_1_2_3 +) + +type _D3D12_SHADER_VISIBILITY int32 + +const ( + _D3D12_SHADER_VISIBILITY_ALL _D3D12_SHADER_VISIBILITY = 0 + _D3D12_SHADER_VISIBILITY_VERTEX _D3D12_SHADER_VISIBILITY = 1 + _D3D12_SHADER_VISIBILITY_HULL _D3D12_SHADER_VISIBILITY = 2 + _D3D12_SHADER_VISIBILITY_DOMAIN _D3D12_SHADER_VISIBILITY = 3 + _D3D12_SHADER_VISIBILITY_GEOMETRY _D3D12_SHADER_VISIBILITY = 4 + _D3D12_SHADER_VISIBILITY_PIXEL _D3D12_SHADER_VISIBILITY = 5 + _D3D12_SHADER_VISIBILITY_AMPLIFICATION _D3D12_SHADER_VISIBILITY = 6 + _D3D12_SHADER_VISIBILITY_MESH _D3D12_SHADER_VISIBILITY = 7 +) + +type _D3D12_SRV_DIMENSION int32 + +const ( + _D3D12_SRV_DIMENSION_UNKNOWN _D3D12_SRV_DIMENSION = 0 + _D3D12_SRV_DIMENSION_BUFFER _D3D12_SRV_DIMENSION = 1 + _D3D12_SRV_DIMENSION_TEXTURE1D _D3D12_SRV_DIMENSION = 2 + _D3D12_SRV_DIMENSION_TEXTURE1DARRAY _D3D12_SRV_DIMENSION = 3 + _D3D12_SRV_DIMENSION_TEXTURE2D _D3D12_SRV_DIMENSION = 4 + _D3D12_SRV_DIMENSION_TEXTURE2DARRAY _D3D12_SRV_DIMENSION = 5 + _D3D12_SRV_DIMENSION_TEXTURE2DMS _D3D12_SRV_DIMENSION = 6 + _D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY _D3D12_SRV_DIMENSION = 7 + _D3D12_SRV_DIMENSION_TEXTURE3D _D3D12_SRV_DIMENSION = 8 + _D3D12_SRV_DIMENSION_TEXTURECUBE _D3D12_SRV_DIMENSION = 9 + _D3D12_SRV_DIMENSION_TEXTURECUBEARRAY _D3D12_SRV_DIMENSION = 10 + _D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE _D3D12_SRV_DIMENSION = 11 +) + +type _D3D12_STATIC_BORDER_COLOR int32 + +const ( + _D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK _D3D12_STATIC_BORDER_COLOR = 0 +) + +type _D3D12_STENCIL_OP int32 + +const ( + _D3D12_STENCIL_OP_KEEP _D3D12_STENCIL_OP = 1 + _D3D12_STENCIL_OP_ZERO _D3D12_STENCIL_OP = 2 + _D3D12_STENCIL_OP_REPLACE _D3D12_STENCIL_OP = 3 + _D3D12_STENCIL_OP_INCR_SAT _D3D12_STENCIL_OP = 4 + _D3D12_STENCIL_OP_DECR_SAT _D3D12_STENCIL_OP = 5 + _D3D12_STENCIL_OP_INVERT _D3D12_STENCIL_OP = 6 + _D3D12_STENCIL_OP_INCR _D3D12_STENCIL_OP = 7 + _D3D12_STENCIL_OP_DECR _D3D12_STENCIL_OP = 8 +) + +type _D3D12_TEXTURE_ADDRESS_MODE int32 + +const ( + _D3D12_TEXTURE_ADDRESS_MODE_WRAP _D3D12_TEXTURE_ADDRESS_MODE = 1 + _D3D12_TEXTURE_ADDRESS_MODE_MIRROR _D3D12_TEXTURE_ADDRESS_MODE = 2 + _D3D12_TEXTURE_ADDRESS_MODE_CLAMP _D3D12_TEXTURE_ADDRESS_MODE = 3 + _D3D12_TEXTURE_ADDRESS_MODE_BORDER _D3D12_TEXTURE_ADDRESS_MODE = 4 + _D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE _D3D12_TEXTURE_ADDRESS_MODE = 5 +) + +type _D3D12_TEXTURE_COPY_TYPE int32 + +const ( + _D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX _D3D12_TEXTURE_COPY_TYPE = 0 + _D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT _D3D12_TEXTURE_COPY_TYPE = 1 +) + +type _D3D12_TEXTURE_LAYOUT int32 + +const ( + _D3D12_TEXTURE_LAYOUT_UNKNOWN _D3D12_TEXTURE_LAYOUT = 0 + _D3D12_TEXTURE_LAYOUT_ROW_MAJOR _D3D12_TEXTURE_LAYOUT = 1 + _D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE _D3D12_TEXTURE_LAYOUT = 2 + _D3D12_TEXTURE_LAYOUT_64KB_STANDARD_SWIZZLE _D3D12_TEXTURE_LAYOUT = 3 +) + +type _DXGI_ALPHA_MODE uint32 + +const ( + _DXGI_ALPHA_MODE_UNSPECIFIED _DXGI_ALPHA_MODE = 0 + _DXGI_ALPHA_MODE_PREMULTIPLIED _DXGI_ALPHA_MODE = 1 + _DXGI_ALPHA_MODE_STRAIGHT _DXGI_ALPHA_MODE = 2 + _DXGI_ALPHA_MODE_IGNORE _DXGI_ALPHA_MODE = 3 + _DXGI_ALPHA_MODE_FORCE_DWORD _DXGI_ALPHA_MODE = 0xffffffff +) + +type _DXGI_FORMAT int32 + +const ( + _DXGI_FORMAT_UNKNOWN _DXGI_FORMAT = 0 + _DXGI_FORMAT_R32G32B32A32_FLOAT _DXGI_FORMAT = 2 + _DXGI_FORMAT_R32G32_FLOAT _DXGI_FORMAT = 16 + _DXGI_FORMAT_R8G8B8A8_UNORM _DXGI_FORMAT = 28 + _DXGI_FORMAT_D24_UNORM_S8_UINT _DXGI_FORMAT = 45 + _DXGI_FORMAT_R16_UINT _DXGI_FORMAT = 57 + _DXGI_FORMAT_B8G8R8A8_UNORM _DXGI_FORMAT = 87 +) + +type _DXGI_MODE_SCANLINE_ORDER int32 + +type _DXGI_MODE_SCALING int32 + +type _DXGI_SCALING int32 + +type _DXGI_SWAP_EFFECT int32 + +const ( + _DXGI_SWAP_EFFECT_FLIP_DISCARD _DXGI_SWAP_EFFECT = 4 +) + +type _DXGI_USAGE uint32 + +const ( + _DXGI_USAGE_RENDER_TARGET_OUTPUT _DXGI_USAGE = 1 << (1 + 4) +) + +const ( + _DXGI_ADAPTER_FLAG_SOFTWARE = 2 + + _DXGI_CREATE_FACTORY_DEBUG = 0x01 + + _DXGI_ERROR_NOT_FOUND = windows.Errno(0x887A0002) +) + +var ( + _IID_ID3D12CommandAllocator = windows.GUID{0x6102dee4, 0xaf59, 0x4b09, [...]byte{0xb9, 0x99, 0xb4, 0x4d, 0x73, 0xf0, 0x9b, 0x24}} + _IID_ID3D12CommandQueue = windows.GUID{0x0ec870a6, 0x5d7e, 0x4c22, [...]byte{0x8c, 0xfc, 0x5b, 0xaa, 0xe0, 0x76, 0x16, 0xed}} + _IID_ID3D12Debug = windows.GUID{0x344488b7, 0x6846, 0x474b, [...]byte{0xb9, 0x89, 0xf0, 0x27, 0x44, 0x82, 0x45, 0xe0}} + _IID_ID3D12DescriptorHeap = windows.GUID{0x8efb471d, 0x616c, 0x4f49, [...]byte{0x90, 0xf7, 0x12, 0x7b, 0xb7, 0x63, 0xfa, 0x51}} + _IID_ID3D12DebugCommandList = windows.GUID{0x09e0bf36, 0x54ac, 0x484f, [...]byte{0x88, 0x47, 0x4b, 0xae, 0xea, 0xb6, 0x05, 0x3f}} + _IID_ID3D12Device = windows.GUID{0x189819f1, 0x1db6, 0x4b57, [...]byte{0xbe, 0x54, 0x18, 0x21, 0x33, 0x9b, 0x85, 0xf7}} + _IID_ID3D12Fence = windows.GUID{0x0a753dcf, 0xc4d8, 0x4b91, [...]byte{0xad, 0xf6, 0xbe, 0x5a, 0x60, 0xd9, 0x5a, 0x76}} + _IID_ID3D12GraphicsCommandList = windows.GUID{0x5b160d0f, 0xac1b, 0x4185, [...]byte{0x8b, 0xa8, 0xb3, 0xae, 0x42, 0xa5, 0xa4, 0x55}} + _IID_ID3D12PipelineState = windows.GUID{0x765a30f3, 0xf624, 0x4c6f, [...]byte{0xa8, 0x28, 0xac, 0xe9, 0x48, 0x62, 0x24, 0x45}} + _IID_ID3D12Resource1 = windows.GUID{0x9D5E227A, 0x4430, 0x4161, [...]byte{0x88, 0xB3, 0x3E, 0xCA, 0x6B, 0xB1, 0x6E, 0x19}} + _IID_ID3D12RootSignature = windows.GUID{0xc54a6b66, 0x72df, 0x4ee8, [...]byte{0x8b, 0xe5, 0xa9, 0x46, 0xa1, 0x42, 0x92, 0x14}} + + _IID_IDXGIAdapter1 = windows.GUID{0x29038f61, 0x3839, 0x4626, [...]byte{0x91, 0xfd, 0x08, 0x68, 0x79, 0x01, 0x1a, 0x05}} + _IID_IDXGIFactory4 = windows.GUID{0x1bc6ea02, 0xef36, 0x464f, [...]byte{0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a}} +) + +type _D3D12_BLEND_DESC struct { + AlphaToCoverageEnable int32 + IndependentBlendEnable int32 + RenderTarget [8]_D3D12_RENDER_TARGET_BLEND_DESC +} + +type _D3D12_BOX struct { + left uint32 + top uint32 + front uint32 + right uint32 + bottom uint32 + back uint32 +} + +type _D3D12_CACHED_PIPELINE_STATE struct { + pCachedBlob uintptr + CachedBlobSizeInBytes uintptr +} + +type _D3D12_CLEAR_VALUE struct { + Format _DXGI_FORMAT + Color [4]float32 // Union +} + +type _D3D12_CONSTANT_BUFFER_VIEW_DESC struct { + BufferLocation _D3D12_GPU_VIRTUAL_ADDRESS + SizeInBytes uint32 +} + +type _D3D12_CPU_DESCRIPTOR_HANDLE struct { + ptr uintptr +} + +func (h *_D3D12_CPU_DESCRIPTOR_HANDLE) Offset(offsetInDescriptors int32, descriptorIncrementSize uint32) { + h.ptr += uintptr(offsetInDescriptors) * uintptr(descriptorIncrementSize) +} + +type _D3D12_DEPTH_STENCIL_VIEW_DESC struct { + Format _DXGI_FORMAT + ViewDimension _D3D12_DSV_DIMENSION + Flags _D3D12_DSV_FLAGS + _ [4]byte // A padding (TODO: This can be different on 32bit) + Texture2D _D3D12_TEX2D_DSV // Union + _ [12 - unsafe.Sizeof(_D3D12_TEX2D_DSV{})]byte // A padding for union +} + +type _D3D12_DEPTH_STENCIL_DESC struct { + DepthEnable int32 + DepthWriteMask _D3D12_DEPTH_WRITE_MASK + DepthFunc _D3D12_COMPARISON_FUNC + StencilEnable int32 + StencilReadMask uint8 + StencilWriteMask uint8 + FrontFace _D3D12_DEPTH_STENCILOP_DESC + BackFace _D3D12_DEPTH_STENCILOP_DESC +} + +type _D3D12_DEPTH_STENCILOP_DESC struct { + StencilFailOp _D3D12_STENCIL_OP + StencilDepthFailOp _D3D12_STENCIL_OP + StencilPassOp _D3D12_STENCIL_OP + StencilFunc _D3D12_COMPARISON_FUNC +} + +type _D3D12_DESCRIPTOR_RANGE struct { + RangeType _D3D12_DESCRIPTOR_RANGE_TYPE + NumDescriptors uint32 + BaseShaderRegister uint32 + RegisterSpace uint32 + OffsetInDescriptorsFromTableStart uint32 +} + +type _D3D12_GPU_DESCRIPTOR_HANDLE struct { + ptr uint64 +} + +func (h *_D3D12_GPU_DESCRIPTOR_HANDLE) Offset(offsetInDescriptors int32, descriptorIncrementSize uint32) { + h.ptr += uint64(offsetInDescriptors) * uint64(descriptorIncrementSize) +} + +type _D3D12_GPU_VIRTUAL_ADDRESS uint64 + +type _D3D12_GRAPHICS_PIPELINE_STATE_DESC struct { + pRootSignature *iD3D12RootSignature + VS _D3D12_SHADER_BYTECODE + PS _D3D12_SHADER_BYTECODE + DS _D3D12_SHADER_BYTECODE + HS _D3D12_SHADER_BYTECODE + GS _D3D12_SHADER_BYTECODE + StreamOutput _D3D12_STREAM_OUTPUT_DESC + BlendState _D3D12_BLEND_DESC + SampleMask uint32 + RasterizerState _D3D12_RASTERIZER_DESC + DepthStencilState _D3D12_DEPTH_STENCIL_DESC + InputLayout _D3D12_INPUT_LAYOUT_DESC + IBStripCutValue _D3D12_INDEX_BUFFER_STRIP_CUT_VALUE + PrimitiveTopologyType _D3D12_PRIMITIVE_TOPOLOGY_TYPE + NumRenderTargets uint32 + RTVFormats [8]_DXGI_FORMAT + DSVFormat _DXGI_FORMAT + SampleDesc _DXGI_SAMPLE_DESC + NodeMask uint32 + CachedPSO _D3D12_CACHED_PIPELINE_STATE + Flags _D3D12_PIPELINE_STATE_FLAGS +} + +type _D3D12_HEAP_PROPERTIES struct { + Type _D3D12_HEAP_TYPE + CPUPageProperty _D3D12_CPU_PAGE_PROPERTY + MemoryPoolPreference _D3D12_MEMORY_POOL + CreationNodeMask uint32 + VisibleNodeMask uint32 +} + +type _D3D12_INDEX_BUFFER_VIEW struct { + BufferLocation _D3D12_GPU_VIRTUAL_ADDRESS + SizeInBytes uint32 + Format _DXGI_FORMAT +} + +type _D3D12_INPUT_ELEMENT_DESC struct { + SemanticName *byte + SemanticIndex uint32 + Format _DXGI_FORMAT + InputSlot uint32 + AlignedByteOffset uint32 + InputSlotClass _D3D12_INPUT_CLASSIFICATION + InstanceDataStepRate uint32 +} + +type _D3D12_INPUT_LAYOUT_DESC struct { + pInputElementDescs *_D3D12_INPUT_ELEMENT_DESC + NumElements uint32 +} + +type _D3D12_RANGE struct { + Begin uintptr + End uintptr +} + +type _D3D12_RASTERIZER_DESC struct { + FillMode _D3D12_FILL_MODE + CullMode _D3D12_CULL_MODE + FrontCounterClockwise int32 + DepthBias int32 + DepthBiasClamp float32 + SlopeScaledDepthBias float32 + DepthClipEnable int32 + MultisampleEnable int32 + AntialiasedLineEnable int32 + ForcedSampleCount uint32 + ConservativeRaster _D3D12_CONSERVATIVE_RASTERIZATION_MODE +} + +type _D3D12_RECT struct { + left int32 + top int32 + right int32 + bottom int32 +} + +type _D3D12_RESOURCE_BARRIER_Transition struct { + Type _D3D12_RESOURCE_BARRIER_TYPE + Flags _D3D12_RESOURCE_BARRIER_FLAGS + Transition _D3D12_RESOURCE_TRANSITION_BARRIER +} + +type _D3D12_RESOURCE_DESC struct { + Dimension _D3D12_RESOURCE_DIMENSION + Alignment uint64 + Width uint64 + Height uint32 + DepthOrArraySize uint16 + MipLevels uint16 + Format _DXGI_FORMAT + SampleDesc _DXGI_SAMPLE_DESC + Layout _D3D12_TEXTURE_LAYOUT + Flags _D3D12_RESOURCE_FLAGS +} + +type _D3D12_RESOURCE_TRANSITION_BARRIER struct { + pResource *iD3D12Resource1 + Subresource uint32 + StateBefore _D3D12_RESOURCE_STATES + StateAfter _D3D12_RESOURCE_STATES +} + +type _D3D12_ROOT_DESCRIPTOR_TABLE struct { + NumDescriptorRanges uint32 + pDescriptorRanges *_D3D12_DESCRIPTOR_RANGE +} + +type _D3D12_ROOT_PARAMETER struct { + ParameterType _D3D12_ROOT_PARAMETER_TYPE + DescriptorTable _D3D12_ROOT_DESCRIPTOR_TABLE // Union + ShaderVisibility _D3D12_SHADER_VISIBILITY +} + +type _D3D12_ROOT_SIGNATURE_DESC struct { + NumParameters uint32 + pParameters *_D3D12_ROOT_PARAMETER + NumStaticSamplers uint32 + pStaticSamplers *_D3D12_STATIC_SAMPLER_DESC + Flags _D3D12_ROOT_SIGNATURE_FLAGS +} + +type _D3D12_SHADER_BYTECODE struct { + pShaderBytecode uintptr + BytecodeLength uintptr +} + +type _D3D12_SHADER_RESOURCE_VIEW_DESC struct { + Format _DXGI_FORMAT + ViewDimension _D3D12_SRV_DIMENSION + Shader4ComponentMapping uint32 + _ [4]byte // A padding (TODO: This can be different on 32bit) + Texture2D _D3D12_TEX2D_SRV // Union + _ [24 - unsafe.Sizeof(_D3D12_TEX2D_SRV{})]byte // A padding for union +} + +type _D3D12_SO_DECLARATION_ENTRY struct { + Stream uint32 + SemanticName *byte + SemanticIndex uint32 + StartComponent byte + ComponentCount byte + OutputSlot byte +} + +type _D3D12_STATIC_SAMPLER_DESC struct { + Filter _D3D12_FILTER + AddressU _D3D12_TEXTURE_ADDRESS_MODE + AddressV _D3D12_TEXTURE_ADDRESS_MODE + AddressW _D3D12_TEXTURE_ADDRESS_MODE + MipLODBias float32 + MaxAnisotropy uint32 + ComparisonFunc _D3D12_COMPARISON_FUNC + BorderColor _D3D12_STATIC_BORDER_COLOR + MinLOD float32 + MaxLOD float32 + ShaderRegister uint32 + RegisterSpace uint32 + ShaderVisibility _D3D12_SHADER_VISIBILITY +} + +type _D3D12_STREAM_OUTPUT_DESC struct { + pSODeclaration *_D3D12_SO_DECLARATION_ENTRY + NumEntries uint32 + pBufferStrides *uint32 + NumStrides uint32 + RasterizedStream uint32 +} + +type _D3D12_TEX2D_DSV struct { + MipSlice uint32 +} + +type _D3D12_TEX2D_SRV struct { + MostDetailedMip uint32 + MipLevels uint32 + PlaneSlice uint32 + ResourceMinLODClamp float32 +} + +type _D3D12_TEXTURE_COPY_LOCATION_PlacedFootPrint struct { + pResource *iD3D12Resource1 + Type _D3D12_TEXTURE_COPY_TYPE + PlacedFootprint _D3D12_PLACED_SUBRESOURCE_FOOTPRINT +} + +type _D3D12_TEXTURE_COPY_LOCATION_SubresourceIndex struct { + pResource *iD3D12Resource1 + Type _D3D12_TEXTURE_COPY_TYPE + SubresourceIndex uint32 + _ [unsafe.Sizeof(_D3D12_PLACED_SUBRESOURCE_FOOTPRINT{}) - unsafe.Sizeof(uint32(0))]byte // A padding for union +} + +type _D3D12_VERTEX_BUFFER_VIEW struct { + BufferLocation _D3D12_GPU_VIRTUAL_ADDRESS + SizeInBytes uint32 + StrideInBytes uint32 +} + +type _D3D12_VIEWPORT struct { + TopLeftX float32 + TopLeftY float32 + Width float32 + Height float32 + MinDepth float32 + MaxDepth float32 +} + +var ( + d3d12 = windows.NewLazySystemDLL("d3d12.dll") + d3dcompiler = windows.NewLazySystemDLL("d3dcompiler_47.dll") + dxgi = windows.NewLazySystemDLL("dxgi.dll") + + procD3D12CreateDevice = d3d12.NewProc("D3D12CreateDevice") + procD3D12GetDebugInterface = d3d12.NewProc("D3D12GetDebugInterface") + procD3D12SerializeRootSignature = d3d12.NewProc("D3D12SerializeRootSignature") + + procD3DCompile = d3dcompiler.NewProc("D3DCompile") + + procCreateDXGIFactory2 = dxgi.NewProc("CreateDXGIFactory2") +) + +func d3D12CreateDevice(pAdapter unsafe.Pointer, minimumFeatureLevel _D3D_FEATURE_LEVEL, riid *windows.GUID, ppDevice *unsafe.Pointer) error { + r, _, _ := procD3D12CreateDevice.Call(uintptr(pAdapter), uintptr(minimumFeatureLevel), uintptr(unsafe.Pointer(riid)), uintptr(unsafe.Pointer(ppDevice))) + if ppDevice == nil && windows.Handle(r) != windows.S_FALSE { + return fmt.Errorf("directx: D3D12CreateDevice failed: %w", windows.Errno(r)) + } + if ppDevice != nil && windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: D3D12CreateDevice failed: %w", windows.Errno(r)) + } + return nil +} + +func d3D12GetDebugInterface() (*iD3D12Debug, error) { + var debug *iD3D12Debug + r, _, _ := procD3D12GetDebugInterface.Call(uintptr(unsafe.Pointer(&_IID_ID3D12Debug)), uintptr(unsafe.Pointer(&debug))) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: D3D12GetDebugInterface failed: %w", windows.Errno(r)) + } + return debug, nil +} + +func d3D12SerializeRootSignature(pRootSignature *_D3D12_ROOT_SIGNATURE_DESC, version _D3D_ROOT_SIGNATURE_VERSION) (*iD3DBlob, error) { + var blob *iD3DBlob + var errorBlob *iD3DBlob + r, _, _ := procD3D12SerializeRootSignature.Call(uintptr(unsafe.Pointer(pRootSignature)), uintptr(version), uintptr(unsafe.Pointer(&blob)), uintptr(unsafe.Pointer(&errorBlob))) + if windows.Handle(r) != windows.S_OK { + if errorBlob != nil { + defer errorBlob.Release() + return nil, fmt.Errorf("directx: D3D12SerializeRootSignature failed: %s: %w", errorBlob.String(), windows.Errno(r)) + } + return nil, fmt.Errorf("directx: D3D12SerializeRootSignature failed: %w", windows.Errno(r)) + } + return blob, nil +} + +func d3DCompile(srcData []byte, sourceName string, pDefines []_D3D_SHADER_MACRO, pInclude unsafe.Pointer, entryPoint string, target string, flags1 uint32, flags2 uint32) (*iD3DBlob, error) { + // TODO: Define iD3DInclude for pInclude, but is it possible in Go? + + var defs unsafe.Pointer + if len(pDefines) > 0 { + defs = unsafe.Pointer(&pDefines[0]) + } + sourceNameBytes := append([]byte(sourceName), 0) + entryPointBytes := append([]byte(entryPoint), 0) + targetBytes := append([]byte(target), 0) + var code *iD3DBlob + var errorMsgs *iD3DBlob + r, _, _ := procD3DCompile.Call( + uintptr(unsafe.Pointer(&srcData[0])), uintptr(len(srcData)), uintptr(unsafe.Pointer(&sourceNameBytes[0])), + uintptr(defs), uintptr(unsafe.Pointer(pInclude)), uintptr(unsafe.Pointer(&entryPointBytes[0])), + uintptr(unsafe.Pointer(&targetBytes[0])), uintptr(flags1), uintptr(flags2), + uintptr(unsafe.Pointer(&code)), uintptr(unsafe.Pointer(&errorMsgs))) + runtime.KeepAlive(pDefines) + runtime.KeepAlive(pInclude) + runtime.KeepAlive(sourceNameBytes) + runtime.KeepAlive(entryPointBytes) + runtime.KeepAlive(targetBytes) + if windows.Handle(r) != windows.S_OK { + if errorMsgs != nil { + defer errorMsgs.Release() + return nil, fmt.Errorf("directx: D3DCompile failed: %s: %w", errorMsgs.String(), windows.Errno(r)) + } + return nil, fmt.Errorf("directx: D3DCompile failed: %w", windows.Errno(r)) + } + return code, nil +} + +func createDXGIFactory2(flags uint32) (*iDXGIFactory4, error) { + var factory *iDXGIFactory4 + r, _, _ := procCreateDXGIFactory2.Call(uintptr(flags), uintptr(unsafe.Pointer(&_IID_IDXGIFactory4)), uintptr(unsafe.Pointer(&factory))) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: CreateDXGIFactory2 failed: %w", windows.Errno(r)) + } + return factory, nil +} + +type _D3D_SHADER_MACRO struct { + Name *byte + Definition *byte +} + +type _D3D12_COMMAND_QUEUE_DESC struct { + Type _D3D12_COMMAND_LIST_TYPE + Priority int32 + Flags _D3D12_COMMAND_QUEUE_FLAGS + NodeMask uint32 +} + +type _D3D12_DESCRIPTOR_HEAP_DESC struct { + Type _D3D12_DESCRIPTOR_HEAP_TYPE + NumDescriptors uint32 + Flags _D3D12_DESCRIPTOR_HEAP_FLAGS + NodeMask uint32 +} + +type _D3D12_PLACED_SUBRESOURCE_FOOTPRINT struct { + Offset uint64 + Footprint _D3D12_SUBRESOURCE_FOOTPRINT +} + +type _D3D12_RENDER_TARGET_BLEND_DESC struct { + BlendEnable int32 + LogicOpEnable int32 + SrcBlend _D3D12_BLEND + DestBlend _D3D12_BLEND + BlendOp _D3D12_BLEND_OP + SrcBlendAlpha _D3D12_BLEND + DestBlendAlpha _D3D12_BLEND + BlendOpAlpha _D3D12_BLEND_OP + LogicOp _D3D12_LOGIC_OP + RenderTargetWriteMask uint8 +} + +type _D3D12_RENDER_TARGET_VIEW_DESC struct { + Format _DXGI_FORMAT + ViewDimension _D3D12_RTV_DIMENSION + _ [3]uint32 // Union: D3D12_BUFFER_RTV seems the biggest +} + +type _D3D12_SAMPLER_DESC struct { + Filter _D3D12_FILTER + AddressU _D3D12_TEXTURE_ADDRESS_MODE + AddressV _D3D12_TEXTURE_ADDRESS_MODE + AddressW _D3D12_TEXTURE_ADDRESS_MODE + MipLODBias float32 + MaxAnisotropy uint32 + ComparisonFunc _D3D12_COMPARISON_FUNC + BorderColor [4]float32 + MinLOD float32 + MaxLOD float32 +} + +type _D3D12_SUBRESOURCE_FOOTPRINT struct { + Format _DXGI_FORMAT + Width uint32 + Height uint32 + Depth uint32 + RowPitch uint32 +} + +type _DXGI_ADAPTER_DESC1 struct { + Description [128]uint16 + VendorId uint32 + DeviceId uint32 + SubSysId uint32 + Revision uint32 + DedicatedVideoMemory uint + DedicatedSystemMemory uint + SharedSystemMemory uint + AdapterLuid _LUID + Flags uint32 +} + +type _DXGI_SWAP_CHAIN_FULLSCREEN_DESC struct { + RefreshRate _DXGI_RATIONAL + ScanlineOrdering _DXGI_MODE_SCANLINE_ORDER + Scaling _DXGI_MODE_SCALING + Windowed int32 +} + +type _DXGI_RATIONAL struct { + Numerator uint32 + Denominator uint32 +} + +type _DXGI_SAMPLE_DESC struct { + Count uint32 + Quality uint32 +} + +type _DXGI_SWAP_CHAIN_DESC1 struct { + Width uint32 + Height uint32 + Format _DXGI_FORMAT + Stereo int32 + SampleDesc _DXGI_SAMPLE_DESC + BufferUsage _DXGI_USAGE + BufferCount uint32 + Scaling _DXGI_SCALING + SwapEffect _DXGI_SWAP_EFFECT + AlphaMode _DXGI_ALPHA_MODE + Flags uint32 +} + +type _LUID struct { + LowPart uint32 + HighPart int32 +} + +type iD3D12CommandAllocator struct { + vtbl *iD3D12CommandAllocator_Vtbl +} + +type iD3D12CommandAllocator_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + GetPrivateData uintptr + SetPrivateData uintptr + SetPrivateDataInterface uintptr + SetName uintptr + GetDevice uintptr + Reset uintptr +} + +func (i *iD3D12CommandAllocator) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +func (i *iD3D12CommandAllocator) Reset() error { + r, _, _ := syscall.Syscall(i.vtbl.Reset, 1, uintptr(unsafe.Pointer(i)), 0, 0) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: ID3D12CommandAllocator::Reset failed: %w", windows.Errno(r)) + } + return nil +} + +type iD3D12CommandQueue struct { + vtbl *iD3D12CommandQueue_Vtbl +} + +type iD3D12CommandQueue_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + GetPrivateData uintptr + SetPrivateData uintptr + SetPrivateDataInterface uintptr + SetName uintptr + GetDevice uintptr + UpdateTileMappings uintptr + CopyTileMappings uintptr + ExecuteCommandLists uintptr + SetMarker uintptr + BeginEvent uintptr + EndEvent uintptr + Signal uintptr + Wait uintptr + GetTimestampFrequency uintptr + GetClockCalibration uintptr + GetDesc uintptr +} + +func (i *iD3D12CommandQueue) ExecuteCommandLists(ppCommandLists []*iD3D12GraphicsCommandList) { + syscall.Syscall(i.vtbl.ExecuteCommandLists, 3, uintptr(unsafe.Pointer(i)), + uintptr(len(ppCommandLists)), uintptr(unsafe.Pointer(&ppCommandLists[0]))) + runtime.KeepAlive(ppCommandLists) +} + +func (i *iD3D12CommandQueue) Signal(signal *iD3D12Fence, value uint64) error { + r, _, _ := syscall.Syscall(i.vtbl.Signal, 3, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(signal)), uintptr(value)) + runtime.KeepAlive(signal) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: ID3D12CommandQueue::Signal failed: %w", windows.Errno(r)) + } + return nil +} + +func (i *iD3D12CommandQueue) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +type iD3D12Debug struct { + vtbl *iD3D12Debug_Vtbl +} + +type iD3D12Debug_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + EnableDebugLayer uintptr +} + +func (i *iD3D12Debug) As(debug **iD3D12Debug3) { + *debug = (*iD3D12Debug3)(unsafe.Pointer(i)) +} + +func (i *iD3D12Debug) EnableDebugLayer() { + syscall.Syscall(i.vtbl.EnableDebugLayer, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +func (i *iD3D12Debug) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +type iD3D12Debug3 struct { + vtbl *iD3D12Debug3_Vtbl +} + +type iD3D12Debug3_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + EnableDebugLayer uintptr + SetEnableGPUBasedValidation uintptr + SetEnableSynchronizedCommandQueueValidation uintptr + SetGPUBasedValidationFlags uintptr +} + +func (i *iD3D12Debug3) SetEnableGPUBasedValidation(enable bool) { + syscall.Syscall(i.vtbl.SetEnableGPUBasedValidation, 2, uintptr(unsafe.Pointer(i)), boolToUintptr(enable), 0) +} + +type iD3D12DebugCommandList struct { + vtbl *iD3D12DebugCommandList_Vtbl +} + +type iD3D12DebugCommandList_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + AssertResourceState uintptr + SetFeatureMask uintptr + GetFeatureMask uintptr +} + +func (i *iD3D12DebugCommandList) SetFeatureMask(mask _D3D12_DEBUG_FEATURE) error { + r, _, _ := syscall.Syscall(i.vtbl.SetFeatureMask, 2, uintptr(unsafe.Pointer(i)), uintptr(mask), 0) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: ID3D12DebugCommandList::SetFeatureMask failed: %w", windows.Errno(r)) + } + return nil +} + +type iD3D12DescriptorHeap struct { + vtbl *iD3D12DescriptrHeap_Vtbl +} + +type iD3D12DescriptrHeap_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + GetPrivateData uintptr + SetPrivateData uintptr + SetPrivateDataInterface uintptr + SetName uintptr + GetDevice uintptr + GetDesc uintptr + GetCPUDescriptorHandleForHeapStart uintptr + GetGPUDescriptorHandleForHeapStart uintptr +} + +func (i *iD3D12DescriptorHeap) GetCPUDescriptorHandleForHeapStart() _D3D12_CPU_DESCRIPTOR_HANDLE { + // There is a bug in the header file: + // https://stackoverflow.com/questions/34118929/getcpudescriptorhandleforheapstart-stack-corruption + var handle _D3D12_CPU_DESCRIPTOR_HANDLE + syscall.Syscall(i.vtbl.GetCPUDescriptorHandleForHeapStart, 2, uintptr(unsafe.Pointer(i)), uintptr(unsafe.Pointer(&handle)), 0) + return handle +} + +func (i *iD3D12DescriptorHeap) GetGPUDescriptorHandleForHeapStart() _D3D12_GPU_DESCRIPTOR_HANDLE { + // This has the same issue as GetCPUDescriptorHandleForHeapStart. + var handle _D3D12_GPU_DESCRIPTOR_HANDLE + syscall.Syscall(i.vtbl.GetGPUDescriptorHandleForHeapStart, 2, uintptr(unsafe.Pointer(i)), uintptr(unsafe.Pointer(&handle)), 0) + return handle +} + +func (i *iD3D12DescriptorHeap) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +type iD3D12Device struct { + vtbl *iD3D12Device_Vtbl +} + +type iD3D12Device_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + GetPrivateData uintptr + SetPrivateData uintptr + SetPrivateDataInterface uintptr + SetName uintptr + GetNodeCount uintptr + CreateCommandQueue uintptr + CreateCommandAllocator uintptr + CreateGraphicsPipelineState uintptr + CreateComputePipelineState uintptr + CreateCommandList uintptr + CheckFeatureSupport uintptr + CreateDescriptorHeap uintptr + GetDescriptorHandleIncrementSize uintptr + CreateRootSignature uintptr + CreateConstantBufferView uintptr + CreateShaderResourceView uintptr + CreateUnorderedAccessView uintptr + CreateRenderTargetView uintptr + CreateDepthStencilView uintptr + CreateSampler uintptr + CopyDescriptors uintptr + CopyDescriptorsSimple uintptr + GetResourceAllocationInfo uintptr + GetCustomHeapProperties uintptr + CreateCommittedResource uintptr + CreateHeap uintptr + CreatePlacedResource uintptr + CreateReservedResource uintptr + CreateSharedHandle uintptr + OpenSharedHandle uintptr + OpenSharedHandleByName uintptr + MakeResident uintptr + Evict uintptr + CreateFence uintptr + GetDeviceRemovedReason uintptr + GetCopyableFootprints uintptr + CreateQueryHeap uintptr + SetStablePowerState uintptr + CreateCommandSignature uintptr + GetResourceTiling uintptr + GetAdapterLuid uintptr +} + +func (i *iD3D12Device) CreateCommandAllocator(typ _D3D12_COMMAND_LIST_TYPE) (*iD3D12CommandAllocator, error) { + var commandAllocator *iD3D12CommandAllocator + r, _, _ := syscall.Syscall6(i.vtbl.CreateCommandAllocator, 4, uintptr(unsafe.Pointer(i)), + uintptr(typ), uintptr(unsafe.Pointer(&_IID_ID3D12CommandAllocator)), uintptr(unsafe.Pointer(&commandAllocator)), + 0, 0) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: ID3D12Device::CreateCommandAllocator failed: %w", windows.Errno(r)) + } + return commandAllocator, nil +} + +func (i *iD3D12Device) CreateCommandList(nodeMask uint32, typ _D3D12_COMMAND_LIST_TYPE, pCommandAllocator *iD3D12CommandAllocator, pInitialState *iD3D12PipelineState) (*iD3D12GraphicsCommandList, error) { + var commandList *iD3D12GraphicsCommandList + r, _, _ := syscall.Syscall9(i.vtbl.CreateCommandList, 7, + uintptr(unsafe.Pointer(i)), uintptr(nodeMask), uintptr(typ), + uintptr(unsafe.Pointer(pCommandAllocator)), uintptr(unsafe.Pointer(pInitialState)), uintptr(unsafe.Pointer(&_IID_ID3D12GraphicsCommandList)), + uintptr(unsafe.Pointer(&commandList)), 0, 0) + runtime.KeepAlive(pCommandAllocator) + runtime.KeepAlive(pInitialState) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: ID3D12Device::CreateCommandList failed: %w", windows.Errno(r)) + } + return commandList, nil +} + +func (i *iD3D12Device) CreateCommittedResource(pHeapProperties *_D3D12_HEAP_PROPERTIES, heapFlags _D3D12_HEAP_FLAGS, pDesc *_D3D12_RESOURCE_DESC, initialResourceState _D3D12_RESOURCE_STATES, pOptimizedClearValue *_D3D12_CLEAR_VALUE) (*iD3D12Resource1, error) { + var resource *iD3D12Resource1 + r, _, _ := syscall.Syscall9(i.vtbl.CreateCommittedResource, 8, + uintptr(unsafe.Pointer(i)), uintptr(unsafe.Pointer(pHeapProperties)), uintptr(heapFlags), + uintptr(unsafe.Pointer(pDesc)), uintptr(initialResourceState), uintptr(unsafe.Pointer(pOptimizedClearValue)), + uintptr(unsafe.Pointer(&_IID_ID3D12Resource1)), uintptr(unsafe.Pointer(&resource)), 0) + runtime.KeepAlive(pHeapProperties) + runtime.KeepAlive(pDesc) + runtime.KeepAlive(pOptimizedClearValue) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: ID3D12Device::CreateCommittedResource failed: %w", windows.Errno(r)) + } + return resource, nil +} + +func (i *iD3D12Device) CreateCommandQueue(desc *_D3D12_COMMAND_QUEUE_DESC) (*iD3D12CommandQueue, error) { + var commandQueue *iD3D12CommandQueue + r, _, _ := syscall.Syscall6(i.vtbl.CreateCommandQueue, 4, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(desc)), uintptr(unsafe.Pointer(&_IID_ID3D12CommandQueue)), uintptr(unsafe.Pointer(&commandQueue)), + 0, 0) + runtime.KeepAlive(desc) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: ID3D12Device::CreateCommandQueue failed: %w", windows.Errno(r)) + } + return commandQueue, nil +} + +func (i *iD3D12Device) CreateConstantBufferView(pDesc *_D3D12_CONSTANT_BUFFER_VIEW_DESC, destDescriptor _D3D12_CPU_DESCRIPTOR_HANDLE) { + syscall.Syscall(i.vtbl.CreateConstantBufferView, 3, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pDesc)), uintptr(destDescriptor.ptr)) + runtime.KeepAlive(pDesc) +} + +func (i *iD3D12Device) CreateDescriptorHeap(desc *_D3D12_DESCRIPTOR_HEAP_DESC) (*iD3D12DescriptorHeap, error) { + var descriptorHeap *iD3D12DescriptorHeap + r, _, _ := syscall.Syscall6(i.vtbl.CreateDescriptorHeap, 4, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(desc)), uintptr(unsafe.Pointer(&_IID_ID3D12DescriptorHeap)), uintptr(unsafe.Pointer(&descriptorHeap)), + 0, 0) + runtime.KeepAlive(desc) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: ID3D12Device::CreateDescriptorHeap failed: %w", windows.Errno(r)) + } + return descriptorHeap, nil +} + +func (i *iD3D12Device) CreateDepthStencilView(pResource *iD3D12Resource1, pDesc *_D3D12_DEPTH_STENCIL_VIEW_DESC, destDescriptor _D3D12_CPU_DESCRIPTOR_HANDLE) { + syscall.Syscall6(i.vtbl.CreateDepthStencilView, 4, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pResource)), uintptr(unsafe.Pointer(pDesc)), destDescriptor.ptr, + 0, 0) + runtime.KeepAlive(pResource) + runtime.KeepAlive(pDesc) +} + +func (i *iD3D12Device) CreateFence(initialValue uint64, flags _D3D12_FENCE_FLAGS) (*iD3D12Fence, error) { + // TODO: Does this work on a 32bit machine? + var fence *iD3D12Fence + r, _, _ := syscall.Syscall6(i.vtbl.CreateFence, 5, uintptr(unsafe.Pointer(i)), + uintptr(initialValue), uintptr(flags), uintptr(unsafe.Pointer(&_IID_ID3D12Fence)), uintptr(unsafe.Pointer(&fence)), + 0) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: ID3D12Device::CreateFence failed: %w", windows.Errno(r)) + } + return fence, nil +} + +func (i *iD3D12Device) CreateGraphicsPipelineState(pDesc *_D3D12_GRAPHICS_PIPELINE_STATE_DESC) (*iD3D12PipelineState, error) { + var pipelineState *iD3D12PipelineState + r, _, _ := syscall.Syscall6(i.vtbl.CreateGraphicsPipelineState, 4, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pDesc)), uintptr(unsafe.Pointer(&_IID_ID3D12PipelineState)), uintptr(unsafe.Pointer(&pipelineState)), + 0, 0) + runtime.KeepAlive(pDesc) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: ID3D12Device::CreateGraphicsPipelineState failed: %w", windows.Errno(r)) + } + return pipelineState, nil +} + +func (i *iD3D12Device) CreateRenderTargetView(pResource *iD3D12Resource1, pDesc *_D3D12_RENDER_TARGET_VIEW_DESC, destDescriptor _D3D12_CPU_DESCRIPTOR_HANDLE) { + syscall.Syscall6(i.vtbl.CreateRenderTargetView, 4, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pResource)), uintptr(unsafe.Pointer(pDesc)), destDescriptor.ptr, + 0, 0) + runtime.KeepAlive(pResource) + runtime.KeepAlive(pDesc) +} + +func (i *iD3D12Device) CreateRootSignature(nodeMask uint32, pBlobWithRootSignature uintptr, blobLengthInBytes uintptr) (*iD3D12RootSignature, error) { + var signature *iD3D12RootSignature + r, _, _ := syscall.Syscall6(i.vtbl.CreateRootSignature, 6, uintptr(unsafe.Pointer(i)), + uintptr(nodeMask), pBlobWithRootSignature, blobLengthInBytes, + uintptr(unsafe.Pointer(&_IID_ID3D12RootSignature)), uintptr(unsafe.Pointer(&signature))) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: ID3D12Device::CreateRootSignature failed: %w", windows.Errno(r)) + } + return signature, nil +} + +func (i *iD3D12Device) CreateSampler(pDesc *_D3D12_SAMPLER_DESC, destDescriptor _D3D12_CPU_DESCRIPTOR_HANDLE) { + syscall.Syscall(i.vtbl.CreateSampler, 3, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pDesc)), destDescriptor.ptr) + runtime.KeepAlive(pDesc) +} + +func (i *iD3D12Device) CreateShaderResourceView(pResource *iD3D12Resource1, pDesc *_D3D12_SHADER_RESOURCE_VIEW_DESC, destDescriptor _D3D12_CPU_DESCRIPTOR_HANDLE) { + syscall.Syscall6(i.vtbl.CreateShaderResourceView, 4, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pResource)), uintptr(unsafe.Pointer(pDesc)), destDescriptor.ptr, + 0, 0) + runtime.KeepAlive(pResource) + runtime.KeepAlive(pDesc) +} + +func (i *iD3D12Device) GetCopyableFootprints(pResourceDesc *_D3D12_RESOURCE_DESC, firstSubresource uint32, numSubresources uint32, baseOffset uint64) (layouts _D3D12_PLACED_SUBRESOURCE_FOOTPRINT, numRows uint, rowSizeInBytes uint64, totalBytes uint64) { + syscall.Syscall9(i.vtbl.GetCopyableFootprints, 9, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pResourceDesc)), uintptr(firstSubresource), uintptr(numSubresources), + uintptr(baseOffset), uintptr(unsafe.Pointer(&layouts)), uintptr(unsafe.Pointer(&numRows)), + uintptr(unsafe.Pointer(&rowSizeInBytes)), uintptr(unsafe.Pointer(&totalBytes))) + runtime.KeepAlive(pResourceDesc) + return +} + +func (i *iD3D12Device) GetDescriptorHandleIncrementSize(descriptorHeapType _D3D12_DESCRIPTOR_HEAP_TYPE) uint32 { + r, _, _ := syscall.Syscall(i.vtbl.GetDescriptorHandleIncrementSize, 2, uintptr(unsafe.Pointer(i)), + uintptr(descriptorHeapType), 0) + return uint32(r) +} + +func (i *iD3D12Device) GetDeviceRemovedReason() error { + r, _, _ := syscall.Syscall(i.vtbl.GetDeviceRemovedReason, 1, uintptr(unsafe.Pointer(i)), 0, 0) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: ID3D12Device::GetDeviceRemovedReason failed: %w", windows.Errno(r)) + } + return nil +} + +type iD3D12Fence struct { + vtbl *iD3D12Fence_Vtbl +} + +type iD3D12Fence_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + GetPrivateData uintptr + SetPrivateData uintptr + SetPrivateDataInterface uintptr + SetName uintptr + GetDevice uintptr + GetCompletedValue uintptr + SetEventOnCompletion uintptr + Signal uintptr +} + +func (i *iD3D12Fence) GetCompletedValue() uint64 { + // TODO: Does this work on a 32bit machine? + r, _, _ := syscall.Syscall(i.vtbl.GetCompletedValue, 1, uintptr(unsafe.Pointer(i)), 0, 0) + return uint64(r) +} + +func (i *iD3D12Fence) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +func (i *iD3D12Fence) SetEventOnCompletion(value uint64, hEvent windows.Handle) error { + // TODO: Does this work on a 32bit machine? + r, _, _ := syscall.Syscall(i.vtbl.SetEventOnCompletion, 3, uintptr(unsafe.Pointer(i)), + uintptr(value), uintptr(hEvent)) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: ID3D12Fence::SetEventOnCompletion failed: %w", windows.Errno(r)) + } + return nil +} + +type iD3D12GraphicsCommandList struct { + vtbl *iD3D12GraphicsCommandList_Vtbl +} + +type iD3D12GraphicsCommandList_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + GetPrivateData uintptr + SetPrivateData uintptr + SetPrivateDataInterface uintptr + SetName uintptr + GetDevice uintptr + GetType uintptr + Close uintptr + Reset uintptr + ClearState uintptr + DrawInstanced uintptr + DrawIndexedInstanced uintptr + Dispatch uintptr + CopyBufferRegion uintptr + CopyTextureRegion uintptr + CopyResource uintptr + CopyTiles uintptr + ResolveSubresource uintptr + IASetPrimitiveTopology uintptr + RSSetViewports uintptr + RSSetScissorRects uintptr + OMSetBlendFactor uintptr + OMSetStencilRef uintptr + SetPipelineState uintptr + ResourceBarrier uintptr + ExecuteBundle uintptr + SetDescriptorHeaps uintptr + SetComputeRootSignature uintptr + SetGraphicsRootSignature uintptr + SetComputeRootDescriptorTable uintptr + SetGraphicsRootDescriptorTable uintptr + SetComputeRoot32BitConstant uintptr + SetGraphicsRoot32BitConstant uintptr + SetComputeRoot32BitConstants uintptr + SetGraphicsRoot32BitConstants uintptr + SetComputeRootConstantBufferView uintptr + SetGraphicsRootConstantBufferView uintptr + SetComputeRootShaderResourceView uintptr + SetGraphicsRootShaderResourceView uintptr + SetComputeRootUnorderedAccessView uintptr + SetGraphicsRootUnorderedAccessView uintptr + IASetIndexBuffer uintptr + IASetVertexBuffers uintptr + SOSetTargets uintptr + OMSetRenderTargets uintptr + ClearDepthStencilView uintptr + ClearRenderTargetView uintptr + ClearUnorderedAccessViewUint uintptr + ClearUnorderedAccessViewFloat uintptr + DiscardResource uintptr + BeginQuery uintptr + EndQuery uintptr + ResolveQueryData uintptr + SetPredication uintptr + SetMarker uintptr + BeginEvent uintptr + EndEvent uintptr + ExecuteIndirect uintptr +} + +func (i *iD3D12GraphicsCommandList) ClearDepthStencilView(depthStencilView _D3D12_CPU_DESCRIPTOR_HANDLE, clearFlags _D3D12_CLEAR_FLAGS, depth float32, stencil uint8, numRects uint32, pRects *_D3D12_RECT) { + syscall.Syscall9(i.vtbl.ClearDepthStencilView, 7, uintptr(unsafe.Pointer(i)), + depthStencilView.ptr, uintptr(clearFlags), uintptr(math.Float32bits(depth)), + uintptr(stencil), uintptr(numRects), uintptr(unsafe.Pointer(pRects)), + 0, 0) + runtime.KeepAlive(pRects) +} + +func (i *iD3D12GraphicsCommandList) ClearRenderTargetView(pRenderTargetView _D3D12_CPU_DESCRIPTOR_HANDLE, colorRGBA [4]float32, numRects uint32, pRects *_D3D12_RECT) { + syscall.Syscall6(i.vtbl.ClearRenderTargetView, 5, uintptr(unsafe.Pointer(i)), + pRenderTargetView.ptr, uintptr(unsafe.Pointer(&colorRGBA[0])), uintptr(numRects), uintptr(unsafe.Pointer(pRects)), + 0) + runtime.KeepAlive(pRenderTargetView) +} + +func (i *iD3D12GraphicsCommandList) Close() error { + r, _, _ := syscall.Syscall(i.vtbl.Close, 1, uintptr(unsafe.Pointer(i)), 0, 0) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: ID3D12GraphicsCommandList::Close failed: %w", windows.Errno(r)) + } + return nil +} + +func (i *iD3D12GraphicsCommandList) CopyTextureRegion_PlacedFootPrint_SubresourceIndex(pDst *_D3D12_TEXTURE_COPY_LOCATION_PlacedFootPrint, dstX uint32, dstY uint32, dstZ uint32, pSrc *_D3D12_TEXTURE_COPY_LOCATION_SubresourceIndex, pSrcBox *_D3D12_BOX) { + syscall.Syscall9(i.vtbl.CopyTextureRegion, 7, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pDst)), uintptr(dstX), uintptr(dstY), + uintptr(dstZ), uintptr(unsafe.Pointer(pSrc)), uintptr(unsafe.Pointer(pSrcBox)), + 0, 0) + runtime.KeepAlive(pDst) + runtime.KeepAlive(pSrc) + runtime.KeepAlive(pSrcBox) +} + +func (i *iD3D12GraphicsCommandList) CopyTextureRegion_SubresourceIndex_PlacedFootPrint(pDst *_D3D12_TEXTURE_COPY_LOCATION_SubresourceIndex, dstX uint32, dstY uint32, dstZ uint32, pSrc *_D3D12_TEXTURE_COPY_LOCATION_PlacedFootPrint, pSrcBox *_D3D12_BOX) { + syscall.Syscall9(i.vtbl.CopyTextureRegion, 7, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pDst)), uintptr(dstX), uintptr(dstY), + uintptr(dstZ), uintptr(unsafe.Pointer(pSrc)), uintptr(unsafe.Pointer(pSrcBox)), + 0, 0) + runtime.KeepAlive(pDst) + runtime.KeepAlive(pSrc) + runtime.KeepAlive(pSrcBox) +} + +func (i *iD3D12GraphicsCommandList) DrawIndexedInstanced(indexCountPerInstance uint32, instanceCount uint32, startIndexLocation uint32, baseVertexLocation int32, startInstanceLocation uint32) { + syscall.Syscall6(i.vtbl.DrawIndexedInstanced, 6, uintptr(unsafe.Pointer(i)), + uintptr(indexCountPerInstance), uintptr(instanceCount), uintptr(startIndexLocation), uintptr(baseVertexLocation), uintptr(startInstanceLocation)) +} + +func (i *iD3D12GraphicsCommandList) IASetIndexBuffer(pView *_D3D12_INDEX_BUFFER_VIEW) { + syscall.Syscall(i.vtbl.IASetIndexBuffer, 2, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pView)), 0) + runtime.KeepAlive(pView) +} + +func (i *iD3D12GraphicsCommandList) IASetPrimitiveTopology(primitiveTopology _D3D_PRIMITIVE_TOPOLOGY) { + syscall.Syscall(i.vtbl.IASetPrimitiveTopology, 2, uintptr(unsafe.Pointer(i)), + uintptr(primitiveTopology), 0) +} + +func (i *iD3D12GraphicsCommandList) IASetVertexBuffers(startSlot uint32, numViews uint32, pViews *_D3D12_VERTEX_BUFFER_VIEW) { + syscall.Syscall6(i.vtbl.IASetVertexBuffers, 4, uintptr(unsafe.Pointer(i)), + uintptr(startSlot), uintptr(numViews), uintptr(unsafe.Pointer(pViews)), + 0, 0) + runtime.KeepAlive(pViews) +} + +func (i *iD3D12GraphicsCommandList) OMSetRenderTargets(numRenderTargetDescriptors uint32, pRenderTargetDescriptors *_D3D12_CPU_DESCRIPTOR_HANDLE, rtsSingleHandleToDescriptorRange bool, pDepthStencilDescriptor *_D3D12_CPU_DESCRIPTOR_HANDLE) { + syscall.Syscall6(i.vtbl.OMSetRenderTargets, 5, uintptr(unsafe.Pointer(i)), + uintptr(numRenderTargetDescriptors), uintptr(unsafe.Pointer(pRenderTargetDescriptors)), boolToUintptr(rtsSingleHandleToDescriptorRange), uintptr(unsafe.Pointer(pDepthStencilDescriptor)), + 0) + runtime.KeepAlive(pRenderTargetDescriptors) + runtime.KeepAlive(pDepthStencilDescriptor) +} + +func (i *iD3D12GraphicsCommandList) OMSetStencilRef(stencilRef uint32) { + syscall.Syscall(i.vtbl.OMSetStencilRef, 2, uintptr(unsafe.Pointer(i)), uintptr(stencilRef), 0) +} + +func (i *iD3D12GraphicsCommandList) QueryInterface(riid *windows.GUID, ppvObject *unsafe.Pointer) error { + r, _, _ := syscall.Syscall(i.vtbl.QueryInterface, 3, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(riid)), uintptr(unsafe.Pointer(ppvObject))) + runtime.KeepAlive(riid) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: ID3D12GraphicsCommandList::QueryInterface failed: %w", windows.Errno(r)) + } + return nil +} + +func (i *iD3D12GraphicsCommandList) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +func (i *iD3D12GraphicsCommandList) Reset(pAllocator *iD3D12CommandAllocator, pInitialState *iD3D12PipelineState) error { + r, _, _ := syscall.Syscall(i.vtbl.Reset, 3, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pAllocator)), uintptr(unsafe.Pointer(pInitialState))) + runtime.KeepAlive(pAllocator) + runtime.KeepAlive(pInitialState) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: ID3D12GraphicsCommandList::Reset failed: %w", windows.Errno(r)) + } + return nil +} + +func (i *iD3D12GraphicsCommandList) ResourceBarrier(numBarriers uint32, pBarriers *_D3D12_RESOURCE_BARRIER_Transition) { + syscall.Syscall(i.vtbl.ResourceBarrier, 3, uintptr(unsafe.Pointer(i)), + uintptr(numBarriers), uintptr(unsafe.Pointer(pBarriers))) + runtime.KeepAlive(pBarriers) +} + +func (i *iD3D12GraphicsCommandList) RSSetViewports(numViewports uint32, pViewports *_D3D12_VIEWPORT) { + syscall.Syscall(i.vtbl.RSSetViewports, 3, uintptr(unsafe.Pointer(i)), + uintptr(numViewports), uintptr(unsafe.Pointer(pViewports))) + runtime.KeepAlive(pViewports) +} + +func (i *iD3D12GraphicsCommandList) RSSetScissorRects(numRects uint32, pRects *_D3D12_RECT) { + syscall.Syscall(i.vtbl.RSSetScissorRects, 3, uintptr(unsafe.Pointer(i)), + uintptr(numRects), uintptr(unsafe.Pointer(pRects))) + runtime.KeepAlive(pRects) +} + +func (i *iD3D12GraphicsCommandList) SetDescriptorHeaps(ppDescriptorHeaps []*iD3D12DescriptorHeap) { + syscall.Syscall(i.vtbl.SetDescriptorHeaps, 3, uintptr(unsafe.Pointer(i)), + uintptr(len(ppDescriptorHeaps)), uintptr(unsafe.Pointer(&ppDescriptorHeaps[0]))) + runtime.KeepAlive(ppDescriptorHeaps) +} + +func (i *iD3D12GraphicsCommandList) SetGraphicsRootDescriptorTable(rootParameterIndex uint32, baseDescriptor _D3D12_GPU_DESCRIPTOR_HANDLE) { + syscall.Syscall(i.vtbl.SetGraphicsRootDescriptorTable, 3, uintptr(unsafe.Pointer(i)), + uintptr(rootParameterIndex), uintptr(baseDescriptor.ptr)) +} + +func (i *iD3D12GraphicsCommandList) SetGraphicsRootSignature(pRootSignature *iD3D12RootSignature) { + syscall.Syscall(i.vtbl.SetGraphicsRootSignature, 2, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pRootSignature)), 0) + runtime.KeepAlive(pRootSignature) +} + +func (i *iD3D12GraphicsCommandList) SetPipelineState(pPipelineState *iD3D12PipelineState) { + syscall.Syscall(i.vtbl.SetPipelineState, 2, uintptr(unsafe.Pointer(i)), + uintptr(unsafe.Pointer(pPipelineState)), 0) + runtime.KeepAlive(pPipelineState) +} + +type iD3D12PipelineState struct { + vtbl *iD3D12PipelineState_Vtbl +} + +type iD3D12PipelineState_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + GetPrivateData uintptr + SetPrivateData uintptr + SetPrivateDataInterface uintptr + SetName uintptr + GetDevice uintptr + GetCachedBlob uintptr +} + +func (i *iD3D12PipelineState) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +type iD3D12Resource1 struct { + vtbl *iD3D12Resource1_Vtbl +} + +type iD3D12Resource1_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + GetPrivateData uintptr + SetPrivateData uintptr + SetPrivateDataInterface uintptr + SetName uintptr + GetDevice uintptr + Map uintptr + Unmap uintptr + GetDesc uintptr + GetGPUVirtualAddress uintptr + WriteToSubresource uintptr + ReadFromSubresource uintptr + GetHeapProperties uintptr + GetProtectedResourceSession uintptr +} + +func (i *iD3D12Resource1) GetDesc() _D3D12_RESOURCE_DESC { + var resourceDesc _D3D12_RESOURCE_DESC + syscall.Syscall(i.vtbl.GetDesc, 2, uintptr(unsafe.Pointer(i)), uintptr(unsafe.Pointer(&resourceDesc)), 0) + return resourceDesc +} + +func (i *iD3D12Resource1) GetGPUVirtualAddress() _D3D12_GPU_VIRTUAL_ADDRESS { + r, _, _ := syscall.Syscall(i.vtbl.GetGPUVirtualAddress, 1, uintptr(unsafe.Pointer(i)), 0, 0) + return _D3D12_GPU_VIRTUAL_ADDRESS(r) +} + +func (i *iD3D12Resource1) Map(subresource uint32, pReadRange *_D3D12_RANGE) (unsafe.Pointer, error) { + var data unsafe.Pointer + r, _, _ := syscall.Syscall6(i.vtbl.Map, 4, uintptr(unsafe.Pointer(i)), + uintptr(subresource), uintptr(unsafe.Pointer(pReadRange)), uintptr(unsafe.Pointer(&data)), + 0, 0) + runtime.KeepAlive(pReadRange) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: ID3D12Resource1::Map failed: %w", windows.Errno(r)) + } + return data, nil +} + +func (i *iD3D12Resource1) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +func (i *iD3D12Resource1) Unmap(subresource uint32, pWrittenRange *_D3D12_RANGE) error { + r, _, _ := syscall.Syscall(i.vtbl.Unmap, 3, uintptr(unsafe.Pointer(i)), + uintptr(subresource), uintptr(unsafe.Pointer(pWrittenRange))) + runtime.KeepAlive(pWrittenRange) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: ID3D12Resource1::Unmap failed: %w", windows.Errno(r)) + } + return nil +} + +type iD3DBlob struct { + vtbl *iD3DBlob_Vtbl +} + +type iD3DBlob_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + GetBufferPointer uintptr + GetBufferSize uintptr +} + +func (i *iD3DBlob) GetBufferPointer() uintptr { + r, _, _ := syscall.Syscall(i.vtbl.GetBufferPointer, 1, uintptr(unsafe.Pointer(i)), + 0, 0) + return r +} + +func (i *iD3DBlob) GetBufferSize() uintptr { + r, _, _ := syscall.Syscall(i.vtbl.GetBufferSize, 1, uintptr(unsafe.Pointer(i)), + 0, 0) + return r +} + +func (i *iD3DBlob) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +func (i *iD3DBlob) String() string { + var str string + h := (*reflect.StringHeader)(unsafe.Pointer(&str)) + h.Data = i.GetBufferPointer() + h.Len = int(i.GetBufferSize()) + return str +} + +type iDXGIAdapter1 struct { + vtbl *iDXGIAdapter1_Vtbl +} + +type iDXGIAdapter1_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + SetPrivateData uintptr + SetPrivateDataInterface uintptr + GetPrivateData uintptr + GetParent uintptr + EnumOutputs uintptr + GetDesc uintptr + CheckInterfaceSupport uintptr + GetDesc1 uintptr +} + +func (i *iDXGIAdapter1) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +func (i *iDXGIAdapter1) GetDesc1() (*_DXGI_ADAPTER_DESC1, error) { + var desc _DXGI_ADAPTER_DESC1 + r, _, _ := syscall.Syscall(i.vtbl.GetDesc1, 2, uintptr(unsafe.Pointer(i)), uintptr(unsafe.Pointer(&desc)), 0) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: IDXGIAdapter1::GetDesc1 failed: %w", windows.Errno(r)) + } + return &desc, nil +} + +type iDXGIFactory4 struct { + vtbl *iDXGIFactory4_Vtbl +} + +type iDXGIFactory4_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + SetPrivateData uintptr + SetPrivateDataInterface uintptr + GetPrivateData uintptr + GetParent uintptr + EnumAdapters uintptr + MakeWindowAssociation uintptr + GetWindowAssociation uintptr + CreateSwapChain uintptr + CreateSoftwareAdapter uintptr + EnumAdapters1 uintptr + IsCurrent uintptr + IsWindowedStereoEnabled uintptr + CreateSwapChainForHwnd uintptr + CreateSwapChainForCoreWindow uintptr + GetSharedResourceAdapterLuid uintptr + RegisterStereoStatusWindow uintptr + RegisterStereoStatusEvent uintptr + UnregisterStereoStatus uintptr + RegisterOcclusionStatusWindow uintptr + RegisterOcclusionStatusEvent uintptr + UnregisterOcclusionStatus uintptr + CreateSwapChainForComposition uintptr + GetCreationFlags uintptr + EnumAdapterByLuid uintptr + EnumWarpAdapter uintptr +} + +func (i *iDXGIFactory4) CreateSwapChainForComposition(pDevice unsafe.Pointer, pDesc *_DXGI_SWAP_CHAIN_DESC1, pRestrictToOutput *iDXGIOutput) (*iDXGISwapChain1, error) { + var swapChain *iDXGISwapChain1 + r, _, _ := syscall.Syscall6(i.vtbl.CreateSwapChainForComposition, 5, + uintptr(unsafe.Pointer(i)), uintptr(pDevice), uintptr(unsafe.Pointer(pDesc)), + uintptr(unsafe.Pointer(pRestrictToOutput)), uintptr(unsafe.Pointer(&swapChain)), 0) + runtime.KeepAlive(pDesc) + runtime.KeepAlive(pRestrictToOutput) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: IDXGIFactory4::CreateSwapChainForComposition failed: %w", windows.Errno(r)) + } + return swapChain, nil +} + +func (i *iDXGIFactory4) CreateSwapChainForHwnd(pDevice unsafe.Pointer, hWnd windows.HWND, pDesc *_DXGI_SWAP_CHAIN_DESC1, pFullscreenDesc *_DXGI_SWAP_CHAIN_FULLSCREEN_DESC, pRestrictToOutput *iDXGIOutput) (*iDXGISwapChain1, error) { + var swapChain *iDXGISwapChain1 + r, _, _ := syscall.Syscall9(i.vtbl.CreateSwapChainForHwnd, 7, + uintptr(unsafe.Pointer(i)), uintptr(pDevice), uintptr(hWnd), + uintptr(unsafe.Pointer(pDesc)), uintptr(unsafe.Pointer(pFullscreenDesc)), uintptr(unsafe.Pointer(pRestrictToOutput)), + uintptr(unsafe.Pointer(&swapChain)), 0, 0) + runtime.KeepAlive(pDesc) + runtime.KeepAlive(pFullscreenDesc) + runtime.KeepAlive(pRestrictToOutput) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: IDXGIFactory4::CreateSwapChainForHwnd failed: %w", windows.Errno(r)) + } + return swapChain, nil +} + +func (i *iDXGIFactory4) EnumAdapters1(adapter uint32) (*iDXGIAdapter1, error) { + var ptr *iDXGIAdapter1 + r, _, _ := syscall.Syscall(i.vtbl.EnumAdapters1, 3, uintptr(unsafe.Pointer(i)), uintptr(adapter), uintptr(unsafe.Pointer(&ptr))) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: IDXGIFactory4::EnumAdapters1 failed: %w", windows.Errno(r)) + } + return ptr, nil +} + +func (i *iDXGIFactory4) EnumWarpAdapter() (*iDXGIAdapter1, error) { + var ptr *iDXGIAdapter1 + r, _, _ := syscall.Syscall(i.vtbl.EnumWarpAdapter, 3, uintptr(unsafe.Pointer(i)), uintptr(unsafe.Pointer(&_IID_IDXGIAdapter1)), uintptr(unsafe.Pointer(&ptr))) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: IDXGIFactory4::EnumWarpAdapter failed: %w", windows.Errno(r)) + } + return ptr, nil +} + +func (i *iDXGIFactory4) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +type iDXGIOutput struct { + vtbl *iDXGIOutput_Vtbl +} + +type iDXGIOutput_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + SetPrivateData uintptr + SetPrivateDataInterface uintptr + GetPrivateData uintptr + GetParent uintptr + GetDesc uintptr + GetDisplayModeList uintptr + FindClosestMatchingMode uintptr + WaitForVBlank uintptr + TakeOwnership uintptr + ReleaseOwnership uintptr + GetGammaControlCapabilities uintptr + SetGammaControl uintptr + GetGammaControl uintptr + SetDisplaySurface uintptr + GetDisplaySurfaceData uintptr + GetFrameStatistics uintptr +} + +type iD3D12RootSignature struct { + vtbl *iD3D12RootSignature_Vtbl +} + +type iD3D12RootSignature_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + GetPrivateData uintptr + SetPrivateData uintptr + SetPrivateDataInterface uintptr + SetName uintptr + GetDevice uintptr +} + +func (i *iD3D12RootSignature) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} + +type iDXGISwapChain1 struct { + vtbl *iDXGISwapChain1_Vtbl +} + +type iDXGISwapChain1_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + SetPrivateData uintptr + SetPrivateDataInterface uintptr + GetPrivateData uintptr + GetParent uintptr + GetDevice uintptr + Present uintptr + GetBuffer uintptr + SetFullscreenState uintptr + GetFullscreenState uintptr + GetDesc uintptr + ResizeBuffers uintptr + ResizeTarget uintptr + GetContainingOutput uintptr + GetFrameStatistics uintptr + GetLastPresentCount uintptr + GetDesc1 uintptr + GetFullscreenDesc uintptr + GetHwnd uintptr + GetCoreWindow uintptr + Present1 uintptr + IsTemporaryMonoSupported uintptr + GetRestrictToOutput uintptr + SetBackgroundColor uintptr + GetBackgroundColor uintptr + SetRotation uintptr + GetRotation uintptr +} + +func (i *iDXGISwapChain1) As(swapChain **iDXGISwapChain4) { + *swapChain = (*iDXGISwapChain4)(unsafe.Pointer(i)) +} + +type iDXGISwapChain4 struct { + vtbl *iDXGISwapChain4_Vtbl +} + +type iDXGISwapChain4_Vtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr + + SetPrivateData uintptr + SetPrivateDataInterface uintptr + GetPrivateData uintptr + GetParent uintptr + GetDevice uintptr + Present uintptr + GetBuffer uintptr + SetFullscreenState uintptr + GetFullscreenState uintptr + GetDesc uintptr + ResizeBuffers uintptr + ResizeTarget uintptr + GetContainingOutput uintptr + GetFrameStatistics uintptr + GetLastPresentCount uintptr + GetDesc1 uintptr + GetFullscreenDesc uintptr + GetHwnd uintptr + GetCoreWindow uintptr + Present1 uintptr + IsTemporaryMonoSupported uintptr + GetRestrictToOutput uintptr + SetBackgroundColor uintptr + GetBackgroundColor uintptr + SetRotation uintptr + GetRotation uintptr + + SetSourceSize uintptr + GetSourceSize uintptr + SetMaximumFrameLatency uintptr + GetMaximumFrameLatency uintptr + GetFrameLatencyWaitableObject uintptr + SetMatrixTransform uintptr + GetMatrixTransform uintptr + GetCurrentBackBufferIndex uintptr + CheckColorSpaceSupport uintptr + SetColorSpace1 uintptr + ResizeBuffers1 uintptr + SetHDRMetaData uintptr +} + +func (i *iDXGISwapChain4) GetBuffer(buffer uint32) (*iD3D12Resource1, error) { + var resource *iD3D12Resource1 + r, _, _ := syscall.Syscall6(i.vtbl.GetBuffer, 4, uintptr(unsafe.Pointer(i)), + uintptr(buffer), uintptr(unsafe.Pointer(&_IID_ID3D12Resource1)), uintptr(unsafe.Pointer(&resource)), + 0, 0) + if windows.Handle(r) != windows.S_OK { + return nil, fmt.Errorf("directx: IDXGISwapChain4::GetBuffer failed: %w", windows.Errno(r)) + } + return resource, nil +} + +func (i *iDXGISwapChain4) GetCurrentBackBufferIndex() uint32 { + r, _, _ := syscall.Syscall(i.vtbl.GetCurrentBackBufferIndex, 1, uintptr(unsafe.Pointer(i)), 0, 0) + return uint32(r) +} + +func (i *iDXGISwapChain4) Present(syncInterval uint32, flags uint32) error { + r, _, _ := syscall.Syscall(i.vtbl.Present, 3, uintptr(unsafe.Pointer(i)), uintptr(syncInterval), uintptr(flags)) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: IDXGISwapChain4::Present failed: %w", windows.Errno(r)) + } + return nil +} + +func (i *iDXGISwapChain4) ResizeBuffers(bufferCount uint32, width uint32, height uint32, newFormat _DXGI_FORMAT, swapChainFlags uint32) error { + r, _, _ := syscall.Syscall6(i.vtbl.ResizeBuffers, 6, + uintptr(unsafe.Pointer(i)), uintptr(bufferCount), uintptr(width), + uintptr(height), uintptr(newFormat), uintptr(swapChainFlags)) + if windows.Handle(r) != windows.S_OK { + return fmt.Errorf("directx: IDXGISwapChain4::ResizeBuffers failed: %w", windows.Errno(r)) + } + return nil +} + +func (i *iDXGISwapChain4) Release() { + syscall.Syscall(i.vtbl.Release, 1, uintptr(unsafe.Pointer(i)), 0, 0) +} diff --git a/internal/graphicsdriver/directx/graphics_windows.go b/internal/graphicsdriver/directx/graphics_windows.go new file mode 100644 index 000000000..4f681962d --- /dev/null +++ b/internal/graphicsdriver/directx/graphics_windows.go @@ -0,0 +1,1709 @@ +// Copyright 2022 The Ebiten Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package directx + +import ( + "errors" + "fmt" + "os" + "reflect" + "strings" + "unsafe" + + "golang.org/x/sys/windows" + + "github.com/hajimehoshi/ebiten/v2/internal/graphics" + "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver" + "github.com/hajimehoshi/ebiten/v2/internal/shaderir" + "github.com/hajimehoshi/ebiten/v2/internal/shaderir/hlsl" +) + +const frameCount = 2 + +const is64bit = uint64(^uintptr(0)) == ^uint64(0) + +// isDirectXAvailable indicates whether DirectX is available or not. +// In 32bit machines, DirectX is not used because +// 1) The functions syscall.Syscall cannot accept 64bit values as one argument +// 2) The struct layouts can be different +var isDirectXAvailable = is64bit && theGraphics.initializeDevice() == nil + +var theGraphics Graphics + +func Get() *Graphics { + if !isDirectXAvailable { + return nil + } + return &theGraphics +} + +var inputElementDescs []_D3D12_INPUT_ELEMENT_DESC + +func init() { + position := []byte("POSITION\000") + texcoord := []byte("TEXCOORD\000") + color := []byte("COLOR\000") + inputElementDescs = []_D3D12_INPUT_ELEMENT_DESC{ + { + SemanticName: &position[0], + SemanticIndex: 0, + Format: _DXGI_FORMAT_R32G32_FLOAT, + InputSlot: 0, + AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT, + InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + InstanceDataStepRate: 0, + }, + { + SemanticName: &texcoord[0], + SemanticIndex: 0, + Format: _DXGI_FORMAT_R32G32_FLOAT, + InputSlot: 0, + AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT, + InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + InstanceDataStepRate: 0, + }, + { + SemanticName: &color[0], + SemanticIndex: 0, + Format: _DXGI_FORMAT_R32G32B32A32_FLOAT, + InputSlot: 0, + AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT, + InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + InstanceDataStepRate: 0, + }, + } +} + +type Graphics struct { + debug *iD3D12Debug + device *iD3D12Device + commandQueue *iD3D12CommandQueue + rtvDescriptorHeap *iD3D12DescriptorHeap + rtvDescriptorSize uint32 + renderTargets [frameCount]*iD3D12Resource1 + + fences [frameCount]*iD3D12Fence + fenceValues [frameCount]uint64 + + // fenceWaitEvent is an event. + // As all the Graphics functions work in a single thread, only one event is enough for multiple fences. + fenceWaitEvent windows.Handle + + // drawCommandAllocators are command allocators for a 3D engine (DrawIndexedInstanced). + // For the word 'engine', see https://docs.microsoft.com/en-us/windows/win32/direct3d12/user-mode-heap-synchronization. + // The term 'draw' is used instead of '3D' in this package. + drawCommandAllocators [frameCount]*iD3D12CommandAllocator + + // copyCommandAllocators are command allocators for a copy engine (CopyTextureRegion). + copyCommandAllocators [frameCount]*iD3D12CommandAllocator + + // drawCommandList is a command list for a 3D engine (DrawIndexedInstanced). + drawCommandList *iD3D12GraphicsCommandList + + // copyCommandList is a command list for a copy engine (CopyTextureRegion). + copyCommandList *iD3D12GraphicsCommandList + + // drawCommandList and copyCommandList are exclusive: if one is not empty, the other must be empty. + + vertices [frameCount][]*iD3D12Resource1 + indices [frameCount][]*iD3D12Resource1 + + factory *iDXGIFactory4 + adapter *iDXGIAdapter1 + swapChain *iDXGISwapChain4 + + window windows.HWND + + frameIndex int + + images map[graphicsdriver.ImageID]*Image + screenImage *Image + nextImageID graphicsdriver.ImageID + disposedImages [frameCount][]*Image + + shaders map[graphicsdriver.ShaderID]*Shader + nextShaderID graphicsdriver.ShaderID + disposedShaders [frameCount][]*Shader + + vsyncEnabled bool + transparent bool + + pipelineStates +} + +func (g *Graphics) initializeDevice() (ferr error) { + var ( + useWARP bool + useDebugLayer bool + ) + for _, t := range strings.Split(os.Getenv("EBITEN_DIRECTX"), ",") { + switch strings.TrimSpace(t) { + case "warp": + useWARP = true + case "debug": + useDebugLayer = true + } + } + + if err := d3d12.Load(); err != nil { + return err + } + + // As g's lifetime is the same as the process's lifetime, debug and other objects are never released + // if this initialization succeeds. + + // The debug interface is optional and might not exist. + if useDebugLayer { + d, err := d3D12GetDebugInterface() + if err != nil { + return err + } + g.debug = d + defer func() { + if ferr != nil { + g.debug.Release() + g.debug = nil + } + }() + g.debug.EnableDebugLayer() + } + + var flag uint32 + if g.debug != nil { + flag = _DXGI_CREATE_FACTORY_DEBUG + } + f, err := createDXGIFactory2(flag) + if err != nil { + return err + } + g.factory = f + defer func() { + if ferr != nil { + g.factory.Release() + g.factory = nil + } + }() + + if useWARP { + a, err := g.factory.EnumWarpAdapter() + if err != nil { + return err + } + + g.adapter = a + defer func() { + if ferr != nil { + g.adapter.Release() + g.adapter = nil + } + }() + } else { + for i := 0; ; i++ { + a, err := g.factory.EnumAdapters1(uint32(i)) + if errors.Is(err, _DXGI_ERROR_NOT_FOUND) { + break + } + if err != nil { + return err + } + + desc, err := a.GetDesc1() + if err != nil { + return err + } + if desc.Flags&_DXGI_ADAPTER_FLAG_SOFTWARE != 0 { + a.Release() + continue + } + if err := d3D12CreateDevice(unsafe.Pointer(a), _D3D_FEATURE_LEVEL_11_0, &_IID_ID3D12Device, nil); err != nil { + a.Release() + continue + } + g.adapter = a + defer func() { + if ferr != nil { + g.adapter.Release() + g.adapter = nil + } + }() + break + } + } + + if g.adapter == nil { + return errors.New("directx: DirectX 12 is not supported") + } + + if err := d3D12CreateDevice(unsafe.Pointer(g.adapter), _D3D_FEATURE_LEVEL_11_0, &_IID_ID3D12Device, (*unsafe.Pointer)(unsafe.Pointer(&g.device))); err != nil { + return err + } + + return nil +} + +func (g *Graphics) Initialize() (ferr error) { + // Create an event for a fence. + e, err := windows.CreateEvent(nil, 0, 0, nil) + if err != nil { + return fmt.Errorf("directx: CreateEvent failed: %w", err) + } + g.fenceWaitEvent = e + + // Create a command queue. + desc := _D3D12_COMMAND_QUEUE_DESC{ + Type: _D3D12_COMMAND_LIST_TYPE_DIRECT, + Flags: _D3D12_COMMAND_QUEUE_FLAG_NONE, + } + c, err := g.device.CreateCommandQueue(&desc) + if err != nil { + return err + } + g.commandQueue = c + defer func() { + if ferr != nil { + g.commandQueue.Release() + g.commandQueue = nil + } + }() + + // Create command allocators. + for i := 0; i < frameCount; i++ { + dca, err := g.device.CreateCommandAllocator(_D3D12_COMMAND_LIST_TYPE_DIRECT) + if err != nil { + return err + } + g.drawCommandAllocators[i] = dca + defer func(i int) { + if ferr != nil { + g.drawCommandAllocators[i].Release() + g.drawCommandAllocators[i] = nil + } + }(i) + + cca, err := g.device.CreateCommandAllocator(_D3D12_COMMAND_LIST_TYPE_DIRECT) + if err != nil { + return err + } + g.copyCommandAllocators[i] = cca + defer func(i int) { + if ferr != nil { + g.copyCommandAllocators[i].Release() + g.copyCommandAllocators[i] = nil + } + }(i) + } + + // Create frame fences. + for i := 0; i < frameCount; i++ { + f, err := g.device.CreateFence(0, _D3D12_FENCE_FLAG_NONE) + if err != nil { + return err + } + g.fences[i] = f + defer func(i int) { + if ferr != nil { + g.fences[i].Release() + g.fences[i] = nil + } + }(i) + } + + // Create command lists. + dcl, err := g.device.CreateCommandList(0, _D3D12_COMMAND_LIST_TYPE_DIRECT, g.drawCommandAllocators[0], nil) + if err != nil { + return err + } + g.drawCommandList = dcl + defer func() { + if ferr != nil { + g.drawCommandList.Release() + g.drawCommandList = nil + } + }() + + ccl, err := g.device.CreateCommandList(0, _D3D12_COMMAND_LIST_TYPE_DIRECT, g.copyCommandAllocators[0], nil) + if err != nil { + return err + } + g.copyCommandList = ccl + defer func() { + if ferr != nil { + g.copyCommandList.Release() + g.copyCommandList = nil + } + }() + + // Close the command list once as this is immediately Reset at Begin. + if err := g.drawCommandList.Close(); err != nil { + return err + } + if err := g.copyCommandList.Close(); err != nil { + return err + } + + // Create a descriptor heap for RTV. + h, err := g.device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ + Type: _D3D12_DESCRIPTOR_HEAP_TYPE_RTV, + NumDescriptors: frameCount, + Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + NodeMask: 0, + }) + if err != nil { + return err + } + g.rtvDescriptorHeap = h + defer func() { + if ferr != nil { + g.rtvDescriptorHeap.Release() + g.rtvDescriptorHeap = nil + } + }() + g.rtvDescriptorSize = g.device.GetDescriptorHandleIncrementSize(_D3D12_DESCRIPTOR_HEAP_TYPE_RTV) + + if err := g.pipelineStates.initialize(g.device); err != nil { + return err + } + + return nil +} + +func createBuffer(device *iD3D12Device, bufferSize uint64, heapType _D3D12_HEAP_TYPE) (*iD3D12Resource1, error) { + state := _D3D12_RESOURCE_STATE_GENERIC_READ + if heapType == _D3D12_HEAP_TYPE_READBACK { + state = _D3D12_RESOURCE_STATE_COPY_DEST + } + + r, err := device.CreateCommittedResource(&_D3D12_HEAP_PROPERTIES{ + Type: heapType, + CPUPageProperty: _D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + MemoryPoolPreference: _D3D12_MEMORY_POOL_UNKNOWN, + CreationNodeMask: 1, + VisibleNodeMask: 1, + }, _D3D12_HEAP_FLAG_NONE, &_D3D12_RESOURCE_DESC{ + Dimension: _D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: bufferSize, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: _DXGI_FORMAT_UNKNOWN, + SampleDesc: _DXGI_SAMPLE_DESC{ + Count: 1, + Quality: 0, + }, + Layout: _D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: _D3D12_RESOURCE_FLAG_NONE, + }, state, nil) + if err != nil { + return nil, err + } + return r, nil +} + +func (g *Graphics) updateSwapChain(width, height int) error { + if g.window == 0 { + return errors.New("directx: the window handle is not initialized yet") + } + + if g.swapChain == nil { + if err := g.initSwapChain(width, height); err != nil { + return err + } + } else { + if err := g.resizeSwapChain(width, height); err != nil { + return err + } + } + + return nil +} + +func (g *Graphics) initSwapChain(width, height int) (ferr error) { + // Create a swap chain. + // + // DXGI_ALPHA_MODE_PREMULTIPLIED doesn't work with a HWND well. + // + // IDXGIFactory::CreateSwapChain: Alpha blended swapchains must be created with CreateSwapChainForComposition, + // or CreateSwapChainForCoreWindow with the DXGI_SWAP_CHAIN_FLAG_FOREGROUND_LAYER flag + s, err := g.factory.CreateSwapChainForHwnd(unsafe.Pointer(g.commandQueue), g.window, &_DXGI_SWAP_CHAIN_DESC1{ + Width: uint32(width), + Height: uint32(height), + Format: _DXGI_FORMAT_B8G8R8A8_UNORM, + BufferUsage: _DXGI_USAGE_RENDER_TARGET_OUTPUT, + BufferCount: frameCount, + SwapEffect: _DXGI_SWAP_EFFECT_FLIP_DISCARD, + SampleDesc: _DXGI_SAMPLE_DESC{ + Count: 1, + Quality: 0, + }, + }, nil, nil) + if err != nil { + return err + } + s.As(&g.swapChain) + defer func() { + if ferr != nil { + g.swapChain.Release() + g.swapChain = nil + } + }() + + // TODO: Call factory.MakeWindowAssociation not to support fullscreen transitions? + // TODO: Get the current buffer index? + + if err := g.createRenderTargetViews(); err != nil { + return err + } + + g.frameIndex = int(g.swapChain.GetCurrentBackBufferIndex()) + + return nil +} + +func (g *Graphics) resizeSwapChain(width, height int) error { + if err := g.flushCommandList(g.copyCommandList); err != nil { + return err + } + if err := g.copyCommandList.Close(); err != nil { + return err + } + if err := g.flushCommandList(g.drawCommandList); err != nil { + return err + } + if err := g.drawCommandList.Close(); err != nil { + return err + } + + for i := 0; i < frameCount; i++ { + if err := g.waitForCommandQueueForFrame(i); err != nil { + return err + } + g.releaseResources(i) + if err := g.releaseCommandAllocators(i); err != nil { + return err + } + } + + for _, r := range g.renderTargets { + r.Release() + } + + if err := g.swapChain.ResizeBuffers(frameCount, uint32(width), uint32(height), _DXGI_FORMAT_B8G8R8A8_UNORM, 0); err != nil { + return err + } + + if err := g.createRenderTargetViews(); err != nil { + return err + } + + g.frameIndex = int(g.swapChain.GetCurrentBackBufferIndex()) + + if err := g.drawCommandList.Reset(g.drawCommandAllocators[g.frameIndex], nil); err != nil { + return err + } + if err := g.copyCommandList.Reset(g.copyCommandAllocators[g.frameIndex], nil); err != nil { + return err + } + + return nil +} + +func (g *Graphics) createRenderTargetViews() (ferr error) { + // Create frame resources. + h := g.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + for i := 0; i < frameCount; i++ { + r, err := g.swapChain.GetBuffer(uint32(i)) + if err != nil { + return err + } + g.renderTargets[i] = r + defer func(i int) { + if ferr != nil { + g.renderTargets[i].Release() + g.renderTargets[i] = nil + } + }(i) + + g.device.CreateRenderTargetView(r, nil, h) + h.Offset(1, g.rtvDescriptorSize) + } + + return nil +} + +func (g *Graphics) SetWindow(window uintptr) { + g.window = windows.HWND(window) + // TODO: need to update the swap chain? +} + +func (g *Graphics) Begin() error { + g.frameIndex = 0 + // The swap chain is initialized when NewScreenFramebufferImage is called. + // This must be called at the first frame. + if g.swapChain != nil { + g.frameIndex = int(g.swapChain.GetCurrentBackBufferIndex()) + } + + if err := g.drawCommandList.Reset(g.drawCommandAllocators[g.frameIndex], nil); err != nil { + return err + } + + if err := g.copyCommandList.Reset(g.copyCommandAllocators[g.frameIndex], nil); err != nil { + return err + } + return nil +} + +func (g *Graphics) End(present bool) error { + // The swap chain might still be nil when Begin-End is invoked not by a frame (e.g., Image.At). + + // As copyCommandList and drawCommandList are exclusive, the order should not matter here. + if err := g.flushCommandList(g.copyCommandList); err != nil { + return err + } + if err := g.copyCommandList.Close(); err != nil { + return err + } + + if present { + g.screenImage.transiteState(g.drawCommandList, _D3D12_RESOURCE_STATE_PRESENT) + } + + if err := g.drawCommandList.Close(); err != nil { + return err + } + g.commandQueue.ExecuteCommandLists([]*iD3D12GraphicsCommandList{g.drawCommandList}) + + // Release vertices and indices buffers when too many ones were created. + // This is needed espciallly for testings, where present is always false. + if len(g.vertices[g.frameIndex]) >= 16 { + if err := g.waitForCommandQueue(); err != nil { + return err + } + g.releaseVerticesAndIndices(g.frameIndex) + } + + g.pipelineStates.resetConstantBuffers(g.frameIndex) + + if present { + if g.swapChain == nil { + return fmt.Errorf("directx: the swap chain is not initialized yet at End") + } + + var syncInterval uint32 + if g.vsyncEnabled { + syncInterval = 1 + } + if err := g.swapChain.Present(syncInterval, 0); err != nil { + return err + } + + // Wait for the previous frame. + fence := g.fences[g.frameIndex] + g.fenceValues[g.frameIndex]++ + if err := g.commandQueue.Signal(fence, g.fenceValues[g.frameIndex]); err != nil { + return err + } + + nextIndex := (g.frameIndex + 1) % frameCount + if err := g.waitForCommandQueueForFrame(nextIndex); err != nil { + return err + } + + g.releaseResources(nextIndex) + g.releaseVerticesAndIndices(nextIndex) + if err := g.releaseCommandAllocators(nextIndex); err != nil { + return err + } + } + return nil +} + +func (g *Graphics) waitForCommandQueueForFrame(frameIndex int) error { + expected := g.fenceValues[frameIndex] + actual := g.fences[frameIndex].GetCompletedValue() + if actual < expected { + if err := g.fences[frameIndex].SetEventOnCompletion(expected, g.fenceWaitEvent); err != nil { + return err + } + if _, err := windows.WaitForSingleObject(g.fenceWaitEvent, windows.INFINITE); err != nil { + return err + } + } + return nil +} + +func (g *Graphics) releaseResources(frameIndex int) { + for i, img := range g.disposedImages[frameIndex] { + img.disposeImpl() + g.disposedImages[frameIndex][i] = nil + } + g.disposedImages[frameIndex] = g.disposedImages[frameIndex][:0] + + for i, s := range g.disposedShaders[frameIndex] { + s.disposeImpl() + g.disposedShaders[frameIndex][i] = nil + } + g.disposedShaders[frameIndex] = g.disposedShaders[frameIndex][:0] +} + +func (g *Graphics) releaseVerticesAndIndices(frameIndex int) { + for i := range g.vertices[frameIndex] { + g.vertices[frameIndex][i].Release() + g.vertices[frameIndex][i] = nil + } + g.vertices[frameIndex] = g.vertices[frameIndex][:0] + + for i := range g.indices[frameIndex] { + g.indices[frameIndex][i].Release() + g.indices[frameIndex][i] = nil + } + g.indices[frameIndex] = g.indices[frameIndex][:0] +} + +func (g *Graphics) releaseCommandAllocators(frameIndex int) error { + if err := g.drawCommandAllocators[frameIndex].Reset(); err != nil { + return err + } + if err := g.copyCommandAllocators[frameIndex].Reset(); err != nil { + return err + } + + return nil +} + +// flushCommandList executes commands in the command list and waits for its completion. +// +// TODO: This is not efficient. Is it possible to make two command lists work in parallel? +func (g *Graphics) flushCommandList(commandList *iD3D12GraphicsCommandList) error { + if err := commandList.Close(); err != nil { + return err + } + + g.commandQueue.ExecuteCommandLists([]*iD3D12GraphicsCommandList{commandList}) + + if err := g.waitForCommandQueue(); err != nil { + return err + } + + switch commandList { + case g.drawCommandList: + if err := commandList.Reset(g.drawCommandAllocators[g.frameIndex], nil); err != nil { + return err + } + case g.copyCommandList: + if err := commandList.Reset(g.copyCommandAllocators[g.frameIndex], nil); err != nil { + return err + } + } + + return nil +} + +func (g *Graphics) waitForCommandQueue() error { + f, err := g.device.CreateFence(0, _D3D12_FENCE_FLAG_NONE) + if err != nil { + return err + } + defer f.Release() + + const expected uint64 = 1 + g.commandQueue.Signal(f, expected) + if f.GetCompletedValue() < expected { + if err := f.SetEventOnCompletion(expected, g.fenceWaitEvent); err != nil { + return err + } + if _, err := windows.WaitForSingleObject(g.fenceWaitEvent, windows.INFINITE); err != nil { + return err + } + } + return nil +} + +func (g *Graphics) SetTransparent(transparent bool) { + g.transparent = transparent +} + +func (g *Graphics) SetVertices(vertices []float32, indices []uint16) (ferr error) { + // Create buffers if necessary. + vidx := len(g.vertices[g.frameIndex]) + if cap(g.vertices[g.frameIndex]) > vidx { + g.vertices[g.frameIndex] = g.vertices[g.frameIndex][:vidx+1] + } else { + g.vertices[g.frameIndex] = append(g.vertices[g.frameIndex], nil) + } + if g.vertices[g.frameIndex][vidx] == nil { + // TODO: Use the default heap for efficienty. See the official example HelloTriangle. + vs, err := createBuffer(g.device, graphics.IndicesNum*graphics.VertexFloatNum*uint64(unsafe.Sizeof(float32(0))), _D3D12_HEAP_TYPE_UPLOAD) + if err != nil { + return err + } + g.vertices[g.frameIndex][vidx] = vs + defer func() { + if ferr != nil { + g.vertices[g.frameIndex][vidx].Release() + g.vertices[g.frameIndex][vidx] = nil + } + }() + } + + iidx := len(g.indices[g.frameIndex]) + if cap(g.indices[g.frameIndex]) > iidx { + g.indices[g.frameIndex] = g.indices[g.frameIndex][:iidx+1] + } else { + g.indices[g.frameIndex] = append(g.indices[g.frameIndex], nil) + } + if g.indices[g.frameIndex][iidx] == nil { + is, err := createBuffer(g.device, graphics.IndicesNum*uint64(unsafe.Sizeof(uint16(0))), _D3D12_HEAP_TYPE_UPLOAD) + if err != nil { + return err + } + g.indices[g.frameIndex][iidx] = is + defer func() { + if ferr != nil { + g.indices[g.frameIndex][iidx].Release() + g.indices[g.frameIndex][iidx] = nil + } + }() + } + + m, err := g.vertices[g.frameIndex][vidx].Map(0, &_D3D12_RANGE{0, 0}) + if err != nil { + return err + } + copyFloat32s(m, vertices) + if err := g.vertices[g.frameIndex][vidx].Unmap(0, nil); err != nil { + return err + } + + m, err = g.indices[g.frameIndex][iidx].Map(0, &_D3D12_RANGE{0, 0}) + if err != nil { + return err + } + copyUint16s(m, indices) + if err := g.indices[g.frameIndex][iidx].Unmap(0, nil); err != nil { + return err + } + + return nil +} + +func (g *Graphics) NewImage(width, height int) (graphicsdriver.Image, error) { + desc := _D3D12_RESOURCE_DESC{ + Dimension: _D3D12_RESOURCE_DIMENSION_TEXTURE2D, + Alignment: 0, + Width: uint64(graphics.InternalImageSize(width)), + Height: uint32(graphics.InternalImageSize(height)), + DepthOrArraySize: 1, + MipLevels: 0, + Format: _DXGI_FORMAT_R8G8B8A8_UNORM, + SampleDesc: _DXGI_SAMPLE_DESC{ + Count: 1, + Quality: 0, + }, + Layout: _D3D12_TEXTURE_LAYOUT_UNKNOWN, + Flags: _D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, + } + + state := _D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + t, err := g.device.CreateCommittedResource(&_D3D12_HEAP_PROPERTIES{ + Type: _D3D12_HEAP_TYPE_DEFAULT, // Upload? + CPUPageProperty: _D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + MemoryPoolPreference: _D3D12_MEMORY_POOL_UNKNOWN, + CreationNodeMask: 1, + VisibleNodeMask: 1, + }, _D3D12_HEAP_FLAG_NONE, &desc, state, nil) + if err != nil { + return nil, err + } + + layouts, numRows, _, totalBytes := g.device.GetCopyableFootprints(&desc, 0, 1, 0) + + i := &Image{ + graphics: g, + id: g.genNextImageID(), + width: width, + height: height, + texture: t, + state: state, + layouts: layouts, + numRows: numRows, + totalBytes: totalBytes, + } + g.addImage(i) + return i, nil +} + +func (g *Graphics) NewScreenFramebufferImage(width, height int) (graphicsdriver.Image, error) { + if err := g.updateSwapChain(width, height); err != nil { + return nil, err + } + + i := &Image{ + graphics: g, + id: g.genNextImageID(), + width: width, + height: height, + screen: true, + state: _D3D12_RESOURCE_STATE_PRESENT, + } + g.addImage(i) + return i, nil +} + +func (g *Graphics) addImage(img *Image) { + if g.images == nil { + g.images = map[graphicsdriver.ImageID]*Image{} + } + if _, ok := g.images[img.id]; ok { + panic(fmt.Sprintf("directx: image ID %d was already registered", img.id)) + } + g.images[img.id] = img + if img.screen { + g.screenImage = img + } +} + +func (g *Graphics) removeImage(img *Image) { + delete(g.images, img.id) + g.disposedImages[g.frameIndex] = append(g.disposedImages[g.frameIndex], img) + if img.screen { + g.screenImage = nil + } +} + +func (g *Graphics) addShader(s *Shader) { + if g.shaders == nil { + g.shaders = map[graphicsdriver.ShaderID]*Shader{} + } + if _, ok := g.shaders[s.id]; ok { + panic(fmt.Sprintf("directx: shader ID %d was already registered", s.id)) + } + g.shaders[s.id] = s +} + +func (g *Graphics) removeShader(s *Shader) { + delete(g.shaders, s.id) + g.disposedShaders[g.frameIndex] = append(g.disposedShaders[g.frameIndex], s) +} + +func (g *Graphics) SetVsyncEnabled(enabled bool) { + g.vsyncEnabled = enabled +} + +func (g *Graphics) SetFullscreen(fullscreen bool) { +} + +func (g *Graphics) FramebufferYDirection() graphicsdriver.YDirection { + return graphicsdriver.Downward +} + +func (g *Graphics) NDCYDirection() graphicsdriver.YDirection { + return graphicsdriver.Upward +} + +func (g *Graphics) NeedsRestoring() bool { + return false +} + +func (g *Graphics) NeedsClearingScreen() bool { + // TODO: Confirm this is really true. + return true +} + +func (g *Graphics) IsGL() bool { + return false +} + +func (g *Graphics) IsDirectX() bool { + return true +} + +func (g *Graphics) HasHighPrecisionFloat() bool { + return true +} + +func (g *Graphics) MaxImageSize() int { + return _D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION +} + +func (g *Graphics) NewShader(program *shaderir.Program) (graphicsdriver.Shader, error) { + src, offsets := hlsl.Compile(program) + vsh, psh, err := newShader([]byte(src), nil) + if err != nil { + return nil, err + } + + s := &Shader{ + graphics: g, + id: g.genNextShaderID(), + uniformTypes: program.Uniforms, + uniformOffsets: offsets, + vertexShader: vsh, + pixelShader: psh, + } + g.addShader(s) + return s, nil +} + +func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.ShaderImageNum]graphicsdriver.ImageID, offsets [graphics.ShaderImageNum - 1][2]float32, shaderID graphicsdriver.ShaderID, indexLen int, indexOffset int, mode graphicsdriver.CompositeMode, colorM graphicsdriver.ColorM, filter graphicsdriver.Filter, address graphicsdriver.Address, dstRegion, srcRegion graphicsdriver.Region, uniforms [][]float32, evenOdd bool) error { + if err := g.flushCommandList(g.copyCommandList); err != nil { + return err + } + + dst := g.images[dstID] + + var shader *Shader + if shaderID != graphicsdriver.InvalidShaderID { + shader = g.shaders[shaderID] + } + + if err := dst.setAsRenderTarget(g.device, evenOdd); err != nil { + return err + } + + var srcImages [graphics.ShaderImageNum]*Image + for i, srcID := range srcs { + src := g.images[srcID] + if src == nil { + continue + } + srcImages[i] = src + src.transiteState(g.drawCommandList, _D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) + } + + var flattenUniforms []float32 + if shader == nil { + screenWidth, screenHeight := dst.internalSize() + var srcWidth, srcHeight float32 + if filter != graphicsdriver.FilterNearest { + w, h := srcImages[0].internalSize() + srcWidth = float32(w) + srcHeight = float32(h) + } + var esBody [16]float32 + var esTranslate [4]float32 + colorM.Elements(&esBody, &esTranslate) + scale := float32(0) + if filter == graphicsdriver.FilterScreen { + scale = float32(dst.width) / float32(srcImages[0].width) + } + + flattenUniforms = []float32{ + float32(screenWidth), + float32(screenHeight), + srcWidth, + srcHeight, + esBody[0], + esBody[1], + esBody[2], + esBody[3], + esBody[4], + esBody[5], + esBody[6], + esBody[7], + esBody[8], + esBody[9], + esBody[10], + esBody[11], + esBody[12], + esBody[13], + esBody[14], + esBody[15], + esTranslate[0], + esTranslate[1], + esTranslate[2], + esTranslate[3], + srcRegion.X, + srcRegion.Y, + srcRegion.X + srcRegion.Width, + srcRegion.Y + srcRegion.Height, + scale, + } + } else { + // TODO: This logic is very similar to Metal's. Let's unify them. + dw, dh := dst.internalSize() + us := make([][]float32, graphics.PreservedUniformVariablesNum+len(uniforms)) + us[graphics.TextureDestinationSizeUniformVariableIndex] = []float32{float32(dw), float32(dh)} + usizes := make([]float32, 2*len(srcs)) + for i, src := range srcImages { + if src != nil { + w, h := src.internalSize() + usizes[2*i] = float32(w) + usizes[2*i+1] = float32(h) + } + } + us[graphics.TextureSourceSizesUniformVariableIndex] = usizes + udorigin := []float32{float32(dstRegion.X) / float32(dw), float32(dstRegion.Y) / float32(dh)} + us[graphics.TextureDestinationRegionOriginUniformVariableIndex] = udorigin + udsize := []float32{float32(dstRegion.Width) / float32(dw), float32(dstRegion.Height) / float32(dh)} + us[graphics.TextureDestinationRegionSizeUniformVariableIndex] = udsize + uoffsets := make([]float32, 2*len(offsets)) + for i, offset := range offsets { + uoffsets[2*i] = offset[0] + uoffsets[2*i+1] = offset[1] + } + us[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets + usorigin := []float32{float32(srcRegion.X), float32(srcRegion.Y)} + us[graphics.TextureSourceRegionOriginUniformVariableIndex] = usorigin + ussize := []float32{float32(srcRegion.Width), float32(srcRegion.Height)} + us[graphics.TextureSourceRegionSizeUniformVariableIndex] = ussize + + for i, u := range uniforms { + us[graphics.PreservedUniformVariablesNum+i] = u + } + + flattenUniforms = shader.uniformsToFloat32s(us) + } + + w, h := dst.internalSize() + g.drawCommandList.RSSetViewports(1, &_D3D12_VIEWPORT{ + TopLeftX: 0, + TopLeftY: 0, + Width: float32(w), + Height: float32(h), + MinDepth: _D3D12_MIN_DEPTH, + MaxDepth: _D3D12_MAX_DEPTH, + }) + g.drawCommandList.RSSetScissorRects(1, &_D3D12_RECT{ + left: int32(dstRegion.X), + top: int32(dstRegion.Y), + right: int32(dstRegion.X + dstRegion.Width), + bottom: int32(dstRegion.Y + dstRegion.Height), + }) + + g.drawCommandList.IASetPrimitiveTopology(_D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST) + g.drawCommandList.IASetVertexBuffers(0, 1, &_D3D12_VERTEX_BUFFER_VIEW{ + BufferLocation: g.vertices[g.frameIndex][len(g.vertices[g.frameIndex])-1].GetGPUVirtualAddress(), + SizeInBytes: graphics.IndicesNum * graphics.VertexFloatNum * uint32(unsafe.Sizeof(float32(0))), + StrideInBytes: graphics.VertexFloatNum * uint32(unsafe.Sizeof(float32(0))), + }) + g.drawCommandList.IASetIndexBuffer(&_D3D12_INDEX_BUFFER_VIEW{ + BufferLocation: g.indices[g.frameIndex][len(g.indices[g.frameIndex])-1].GetGPUVirtualAddress(), + SizeInBytes: graphics.IndicesNum * uint32(unsafe.Sizeof(uint16(0))), + Format: _DXGI_FORMAT_R16_UINT, + }) + + if shader == nil { + key := builtinPipelineStatesKey{ + useColorM: !colorM.IsIdentity(), + compositeMode: mode, + filter: filter, + address: address, + screen: dst.screen, + } + + if evenOdd { + key.stencilMode = prepareStencil + s, err := g.pipelineStates.builtinGraphicsPipelineState(g.device, key) + if err != nil { + return err + } + if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { + return err + } + + key.stencilMode = drawWithStencil + s, err = g.pipelineStates.builtinGraphicsPipelineState(g.device, key) + if err != nil { + return err + } + if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { + return err + } + } else { + key.stencilMode = noStencil + s, err := g.pipelineStates.builtinGraphicsPipelineState(g.device, key) + if err != nil { + return err + } + if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { + return err + } + } + + } else { + if evenOdd { + s, err := shader.pipelineState(mode, prepareStencil) + if err != nil { + return err + } + if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { + return err + } + + s, err = shader.pipelineState(mode, drawWithStencil) + if err != nil { + return err + } + if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { + return err + } + } else { + s, err := shader.pipelineState(mode, noStencil) + if err != nil { + return err + } + if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { + return err + } + } + } + + return nil +} + +func (g *Graphics) drawTriangles(pipelineState *iD3D12PipelineState, srcs [graphics.ShaderImageNum]*Image, flattenUniforms []float32, indexLen int, indexOffset int) error { + if err := g.pipelineStates.useGraphicsPipelineState(g.device, g.drawCommandList, g.frameIndex, pipelineState, srcs, flattenUniforms); err != nil { + return err + } + + g.drawCommandList.DrawIndexedInstanced(uint32(indexLen), 1, uint32(indexOffset), 0, 0) + + // Release constant buffers when too many ones were created. + // This is needed espciallly for testings, where present is always false. + if len(g.pipelineStates.constantBuffers[g.frameIndex]) >= 16 { + if err := g.flushCommandList(g.drawCommandList); err != nil { + return err + } + g.pipelineStates.releaseConstantBuffers(g.frameIndex) + } + + return nil +} + +func (g *Graphics) genNextImageID() graphicsdriver.ImageID { + g.nextImageID++ + return g.nextImageID +} + +func (g *Graphics) genNextShaderID() graphicsdriver.ShaderID { + g.nextShaderID++ + return g.nextShaderID +} + +type Image struct { + graphics *Graphics + id graphicsdriver.ImageID + width int + height int + screen bool + + state _D3D12_RESOURCE_STATES + texture *iD3D12Resource1 + stencil *iD3D12Resource1 + layouts _D3D12_PLACED_SUBRESOURCE_FOOTPRINT + numRows uint + totalBytes uint64 + uploadingStagingBuffer *iD3D12Resource1 + readingStagingBuffer *iD3D12Resource1 + rtvDescriptorHeap *iD3D12DescriptorHeap + dsvDescriptorHeap *iD3D12DescriptorHeap +} + +func (i *Image) ID() graphicsdriver.ImageID { + return i.id +} + +func (i *Image) Dispose() { + // Dipose the images later as this image might still be used. + i.graphics.removeImage(i) +} + +func (i *Image) disposeImpl() { + if i.dsvDescriptorHeap != nil { + i.dsvDescriptorHeap.Release() + i.dsvDescriptorHeap = nil + } + if i.rtvDescriptorHeap != nil { + i.rtvDescriptorHeap.Release() + i.rtvDescriptorHeap = nil + } + if i.uploadingStagingBuffer != nil { + i.uploadingStagingBuffer.Release() + i.uploadingStagingBuffer = nil + } + if i.readingStagingBuffer != nil { + i.readingStagingBuffer.Release() + i.readingStagingBuffer = nil + } + if i.stencil != nil { + i.stencil.Release() + i.stencil = nil + } + if i.texture != nil { + i.texture.Release() + i.texture = nil + } +} + +func (*Image) IsInvalidated() bool { + return false +} + +func (i *Image) ensureUploadingStagingBuffer() error { + if i.uploadingStagingBuffer != nil { + return nil + } + var err error + i.uploadingStagingBuffer, err = createBuffer(i.graphics.device, i.totalBytes, _D3D12_HEAP_TYPE_UPLOAD) + if err != nil { + return err + } + return nil +} + +func (i *Image) ensureReadingStagingBuffer() error { + if i.readingStagingBuffer != nil { + return nil + } + var err error + i.readingStagingBuffer, err = createBuffer(i.graphics.device, i.totalBytes, _D3D12_HEAP_TYPE_READBACK) + if err != nil { + return err + } + return nil +} + +func (i *Image) ReadPixels(buf []byte) error { + if i.screen { + return errors.New("directx: Pixels cannot be called on the screen") + } + + if err := i.graphics.flushCommandList(i.graphics.drawCommandList); err != nil { + return err + } + + if err := i.ensureReadingStagingBuffer(); err != nil { + return err + } + + i.transiteState(i.graphics.copyCommandList, _D3D12_RESOURCE_STATE_COPY_SOURCE) + + m, err := i.readingStagingBuffer.Map(0, &_D3D12_RANGE{0, 0}) + if err != nil { + return err + } + + dst := _D3D12_TEXTURE_COPY_LOCATION_PlacedFootPrint{ + pResource: i.readingStagingBuffer, + Type: _D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + PlacedFootprint: i.layouts, + } + src := _D3D12_TEXTURE_COPY_LOCATION_SubresourceIndex{ + pResource: i.texture, + Type: _D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + SubresourceIndex: 0, + } + i.graphics.copyCommandList.CopyTextureRegion_PlacedFootPrint_SubresourceIndex( + &dst, 0, 0, 0, &src, &_D3D12_BOX{ + left: 0, + top: 0, + front: 0, + right: uint32(i.width), + bottom: uint32(i.height), + back: 1, + }) + + if err := i.graphics.flushCommandList(i.graphics.copyCommandList); err != nil { + return err + } + + var dstBytes []byte + h := (*reflect.SliceHeader)(unsafe.Pointer(&dstBytes)) + h.Data = uintptr(m) + h.Len = int(i.totalBytes) + h.Cap = int(i.totalBytes) + + for j := 0; j < i.height; j++ { + copy(buf[j*i.width*4:(j+1)*i.width*4], dstBytes[j*int(i.layouts.Footprint.RowPitch):]) + } + + if err := i.readingStagingBuffer.Unmap(0, nil); err != nil { + return err + } + + return nil +} + +func (i *Image) ReplacePixels(args []*graphicsdriver.ReplacePixelsArgs) error { + if i.screen { + return errors.New("directx: ReplacePixels cannot be called on the screen") + } + + if err := i.graphics.flushCommandList(i.graphics.drawCommandList); err != nil { + return err + } + + if err := i.ensureUploadingStagingBuffer(); err != nil { + return err + } + + i.transiteState(i.graphics.copyCommandList, _D3D12_RESOURCE_STATE_COPY_DEST) + + m, err := i.uploadingStagingBuffer.Map(0, &_D3D12_RANGE{0, 0}) + if err != nil { + return err + } + + var srcBytes []byte + h := (*reflect.SliceHeader)(unsafe.Pointer(&srcBytes)) + h.Data = uintptr(m) + h.Len = int(i.totalBytes) + h.Cap = int(i.totalBytes) + for _, a := range args { + for j := 0; j < a.Height; j++ { + copy(srcBytes[(a.Y+j)*int(i.layouts.Footprint.RowPitch)+a.X*4:], a.Pixels[j*a.Width*4:(j+1)*a.Width*4]) + } + + dst := _D3D12_TEXTURE_COPY_LOCATION_SubresourceIndex{ + pResource: i.texture, + Type: _D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + SubresourceIndex: 0, + } + src := _D3D12_TEXTURE_COPY_LOCATION_PlacedFootPrint{ + pResource: i.uploadingStagingBuffer, + Type: _D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + PlacedFootprint: i.layouts, + } + i.graphics.copyCommandList.CopyTextureRegion_SubresourceIndex_PlacedFootPrint( + &dst, uint32(a.X), uint32(a.Y), 0, &src, &_D3D12_BOX{ + left: uint32(a.X), + top: uint32(a.Y), + front: 0, + right: uint32(a.X + a.Width), + bottom: uint32(a.Y + a.Height), + back: 1, + }) + } + + if err := i.uploadingStagingBuffer.Unmap(0, nil); err != nil { + return err + } + + return nil +} + +func (i *Image) resource() *iD3D12Resource1 { + if i.screen { + return i.graphics.renderTargets[i.graphics.frameIndex] + } + return i.texture +} + +func (i *Image) transiteState(commandList *iD3D12GraphicsCommandList, newState _D3D12_RESOURCE_STATES) { + if i.state == newState { + return + } + + commandList.ResourceBarrier(1, &_D3D12_RESOURCE_BARRIER_Transition{ + Type: _D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: _D3D12_RESOURCE_BARRIER_FLAG_NONE, + Transition: _D3D12_RESOURCE_TRANSITION_BARRIER{ + pResource: i.resource(), + Subresource: _D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: i.state, + StateAfter: newState, + }, + }) + i.state = newState +} + +func (i *Image) internalSize() (int, int) { + if i.screen { + return i.width, i.height + } + return graphics.InternalImageSize(i.width), graphics.InternalImageSize(i.height) +} + +func (i *Image) setAsRenderTarget(device *iD3D12Device, useStencil bool) error { + i.transiteState(i.graphics.drawCommandList, _D3D12_RESOURCE_STATE_RENDER_TARGET) + + if err := i.ensureRenderTargetView(device); err != nil { + return err + } + + if i.screen { + rtv := i.graphics.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + rtv.Offset(int32(i.graphics.frameIndex), i.graphics.rtvDescriptorSize) + i.graphics.drawCommandList.OMSetRenderTargets(1, &rtv, false, nil) + return nil + } + + rtv := i.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + var dsv *_D3D12_CPU_DESCRIPTOR_HANDLE + if useStencil { + if err := i.ensureDepthStencilView(device); err != nil { + return err + } + v := i.dsvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + dsv = &v + + i.graphics.drawCommandList.ClearDepthStencilView(v, _D3D12_CLEAR_FLAG_STENCIL, 0, 0, 0, nil) + i.graphics.drawCommandList.OMSetStencilRef(0) + } + i.graphics.drawCommandList.OMSetRenderTargets(1, &rtv, false, dsv) // TODO: Pass depth-stencil here! + + return nil +} + +func (i *Image) ensureRenderTargetView(device *iD3D12Device) error { + if i.screen { + return nil + } + + if i.rtvDescriptorHeap != nil { + return nil + } + + h, err := device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ + Type: _D3D12_DESCRIPTOR_HEAP_TYPE_RTV, + NumDescriptors: 1, + Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + NodeMask: 0, + }) + if err != nil { + return err + } + i.rtvDescriptorHeap = h + + rtv := i.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + device.CreateRenderTargetView(i.texture, nil, rtv) + + return nil +} + +func (i *Image) ensureDepthStencilView(device *iD3D12Device) error { + if i.screen { + return nil + } + + if i.dsvDescriptorHeap != nil { + return nil + } + + h, err := device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ + Type: _D3D12_DESCRIPTOR_HEAP_TYPE_DSV, + NumDescriptors: 1, + Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_NONE, + NodeMask: 0, + }) + if err != nil { + return err + } + i.dsvDescriptorHeap = h + + dsv := i.dsvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + if i.stencil == nil { + s, err := device.CreateCommittedResource(&_D3D12_HEAP_PROPERTIES{ + Type: _D3D12_HEAP_TYPE_DEFAULT, + CPUPageProperty: _D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + MemoryPoolPreference: _D3D12_MEMORY_POOL_UNKNOWN, + CreationNodeMask: 1, + VisibleNodeMask: 1, + }, _D3D12_HEAP_FLAG_NONE, &_D3D12_RESOURCE_DESC{ + Dimension: _D3D12_RESOURCE_DIMENSION_TEXTURE2D, + Alignment: 0, + Width: uint64(graphics.InternalImageSize(i.width)), + Height: uint32(graphics.InternalImageSize(i.height)), + DepthOrArraySize: 1, + MipLevels: 0, + Format: _DXGI_FORMAT_D24_UNORM_S8_UINT, + SampleDesc: _DXGI_SAMPLE_DESC{ + Count: 1, + Quality: 0, + }, + Layout: _D3D12_TEXTURE_LAYOUT_UNKNOWN, + Flags: _D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL, + }, _D3D12_RESOURCE_STATE_DEPTH_WRITE, &_D3D12_CLEAR_VALUE{ + Format: _DXGI_FORMAT_D24_UNORM_S8_UINT, + }) + if err != nil { + return err + } + i.stencil = s + } + device.CreateDepthStencilView(i.stencil, nil, dsv) + + return nil +} + +func copyFloat32s(dst unsafe.Pointer, src []float32) { + var dsts []float32 + h := (*reflect.SliceHeader)(unsafe.Pointer(&dsts)) + h.Data = uintptr(dst) + h.Len = len(src) + h.Cap = len(src) + copy(dsts, src) +} + +func copyUint16s(dst unsafe.Pointer, src []uint16) { + var dsts []uint16 + h := (*reflect.SliceHeader)(unsafe.Pointer(&dsts)) + h.Data = uintptr(dst) + h.Len = len(src) + h.Cap = len(src) + copy(dsts, src) +} + +type stencilMode int + +const ( + prepareStencil stencilMode = iota + drawWithStencil + noStencil +) + +type pipelineStateKey struct { + compositeMode graphicsdriver.CompositeMode + stencilMode stencilMode +} + +type Shader struct { + graphics *Graphics + id graphicsdriver.ShaderID + uniformTypes []shaderir.Type + uniformOffsets []int + vertexShader *iD3DBlob + pixelShader *iD3DBlob + pipelineStates map[pipelineStateKey]*iD3D12PipelineState +} + +func (s *Shader) ID() graphicsdriver.ShaderID { + return s.id +} + +func (s *Shader) Dispose() { + s.graphics.removeShader(s) +} + +func (s *Shader) disposeImpl() { + for c, p := range s.pipelineStates { + p.Release() + delete(s.pipelineStates, c) + } + + if s.pixelShader != nil { + s.pixelShader.Release() + s.pixelShader = nil + } + if s.vertexShader != nil { + s.vertexShader.Release() + s.vertexShader = nil + } +} + +func (s *Shader) pipelineState(compositeMode graphicsdriver.CompositeMode, stencilMode stencilMode) (*iD3D12PipelineState, error) { + key := pipelineStateKey{ + compositeMode: compositeMode, + stencilMode: stencilMode, + } + if state, ok := s.pipelineStates[key]; ok { + return state, nil + } + + state, err := s.graphics.pipelineStates.newPipelineState(s.graphics.device, s.vertexShader, s.pixelShader, compositeMode, stencilMode, false) + if err != nil { + return nil, err + } + if s.pipelineStates == nil { + s.pipelineStates = map[pipelineStateKey]*iD3D12PipelineState{} + } + s.pipelineStates[key] = state + return state, nil +} + +func (s *Shader) uniformsToFloat32s(uniforms [][]float32) []float32 { + var fs []float32 + for i, u := range uniforms { + if len(fs) < s.uniformOffsets[i]/4 { + fs = append(fs, make([]float32, s.uniformOffsets[i]/4-len(fs))...) + } + + t := s.uniformTypes[i] + switch t.Main { + case shaderir.Float, shaderir.Vec2, shaderir.Vec3, shaderir.Vec4: + fs = append(fs, u...) + case shaderir.Mat2: + for j := 0; j < 2; j++ { + fs = append(fs, u[2*j:2*(j+1)]...) + if j < 1 { + fs = append(fs, 0, 0) + } + } + case shaderir.Mat3: + for j := 0; j < 3; j++ { + fs = append(fs, u[3*j:3*(j+1)]...) + if j < 2 { + fs = append(fs, 0) + } + } + case shaderir.Mat4: + fs = append(fs, u...) + case shaderir.Array: + // Each element is aligned to the boundary. + switch t.Sub[0].Main { + case shaderir.Float: + for j := 0; j < t.Length; j++ { + fs = append(fs, u[j]) + if j < t.Length-1 { + fs = append(fs, 0, 0, 0) + } + } + case shaderir.Vec2: + for j := 0; j < t.Length; j++ { + fs = append(fs, u[2*j:2*(j+1)]...) + if j < t.Length-1 { + fs = append(fs, 0, 0) + } + } + case shaderir.Vec3: + for j := 0; j < t.Length; j++ { + fs = append(fs, u[3*j:3*(j+1)]...) + if j < t.Length-1 { + fs = append(fs, 0) + } + } + case shaderir.Vec4: + fs = append(fs, u...) + case shaderir.Mat2: + for j := 0; j < t.Length; j++ { + for k := 0; k < 2; k++ { + fs = append(fs, u[2*(2*j+k):2*(2*j+k+1)]...) + if j < t.Length-1 || k < 1 { + fs = append(fs, 0, 0) + } + } + } + case shaderir.Mat3: + for j := 0; j < t.Length; j++ { + for k := 0; k < 3; k++ { + fs = append(fs, u[3*(3*j+k):3*(3*j+k+1)]...) + if j < t.Length-1 || k < 2 { + fs = append(fs, 0) + } + } + } + case shaderir.Mat4: + fs = append(fs, u...) + default: + panic(fmt.Sprintf("directx: not implemented type for uniform variables: %s", t.String())) + } + default: + panic(fmt.Sprintf("directx: not implemented type for uniform variables: %s", t.String())) + } + } + return fs +} diff --git a/internal/graphicsdriver/directx/pipeline_windows.go b/internal/graphicsdriver/directx/pipeline_windows.go new file mode 100644 index 000000000..e5ef6e285 --- /dev/null +++ b/internal/graphicsdriver/directx/pipeline_windows.go @@ -0,0 +1,686 @@ +// Copyright 2022 The Ebiten Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package directx + +import ( + "fmt" + "math" + "unsafe" + + "github.com/hajimehoshi/ebiten/v2/internal/graphics" + "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver" +) + +const numDescriptorsPerFrame = 256 + +func operationToBlend(c graphicsdriver.Operation, alpha bool) _D3D12_BLEND { + switch c { + case graphicsdriver.Zero: + return _D3D12_BLEND_ZERO + case graphicsdriver.One: + return _D3D12_BLEND_ONE + case graphicsdriver.SrcAlpha: + return _D3D12_BLEND_SRC_ALPHA + case graphicsdriver.DstAlpha: + return _D3D12_BLEND_DEST_ALPHA + case graphicsdriver.OneMinusSrcAlpha: + return _D3D12_BLEND_INV_SRC_ALPHA + case graphicsdriver.OneMinusDstAlpha: + return _D3D12_BLEND_INV_DEST_ALPHA + case graphicsdriver.DstColor: + if alpha { + return _D3D12_BLEND_DEST_ALPHA + } + return _D3D12_BLEND_DEST_COLOR + default: + panic(fmt.Sprintf("directx: invalid operation: %d", c)) + } +} + +type builtinPipelineStatesKey struct { + useColorM bool + compositeMode graphicsdriver.CompositeMode + filter graphicsdriver.Filter + address graphicsdriver.Address + stencilMode stencilMode + screen bool +} + +func (k *builtinPipelineStatesKey) defs() ([]_D3D_SHADER_MACRO, error) { + var defs []_D3D_SHADER_MACRO + defval := []byte("1\x00") + if k.useColorM { + name := []byte("USE_COLOR_MATRIX\x00") + defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) + } + + switch k.filter { + case graphicsdriver.FilterNearest: + name := []byte("FILTER_NEAREST\x00") + defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) + case graphicsdriver.FilterLinear: + name := []byte("FILTER_LINEAR\x00") + defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) + case graphicsdriver.FilterScreen: + name := []byte("FILTER_SCREEN\x00") + defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) + default: + return nil, fmt.Errorf("directx: invalid filter: %d", k.filter) + } + + switch k.address { + case graphicsdriver.AddressUnsafe: + name := []byte("ADDRESS_UNSAFE\x00") + defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) + case graphicsdriver.AddressClampToZero: + name := []byte("ADDRESS_CLAMP_TO_ZERO\x00") + defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) + case graphicsdriver.AddressRepeat: + name := []byte("ADDRESS_REPEAT\x00") + defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) + default: + return nil, fmt.Errorf("directx: invalid address: %d", k.address) + } + + // Termination + defs = append(defs, _D3D_SHADER_MACRO{}) + + return defs, nil +} + +func (k *builtinPipelineStatesKey) source() []byte { + return []byte(`struct PSInput { + float4 position : SV_POSITION; + float2 texcoord : TEXCOORD0; + float4 color : COLOR; +}; + +cbuffer ShaderParameter : register(b0) { + float2 viewport_size; + float2 source_size; + float4x4 color_matrix_body; + float4 color_matrix_translation; + float4 source_region; + + // This member should be the last not to create a new sector. + // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules + float scale; +} + +PSInput VSMain(float2 position : POSITION, float2 tex : TEXCOORD, float4 color : COLOR) { + // In DirectX, the NDC's Y direction (upward) and the framebuffer's Y direction (downward) don't + // match. Then, the Y direction must be inverted. + float4x4 projectionMatrix = { + 2.0 / viewport_size.x, 0, 0, -1, + 0, -2.0 / viewport_size.y, 0, 1, + 0, 0, 1, 0, + 0, 0, 0, 1, + }; + + PSInput result; + result.position = mul(projectionMatrix, float4(position, 0, 1)); + result.texcoord = tex; + result.color = float4(color.rgb, 1) * color.a; + return result; +} + +Texture2D tex : register(t0); +SamplerState samp : register(s0); + +float euclideanMod(float x, float y) { + // Assume that y is always positive. + return x - y * floor(x/y); +} + +float2 adjustTexelByAddress(float2 p, float4 source_region) { +#if defined(ADDRESS_CLAMP_TO_ZERO) + return p; +#endif + +#if defined(ADDRESS_REPEAT) + float2 o = float2(source_region[0], source_region[1]); + float2 size = float2(source_region[2] - source_region[0], source_region[3] - source_region[1]); + return float2(euclideanMod((p.x - o.x), size.x) + o.x, euclideanMod((p.y - o.y), size.y) + o.y); +#endif + +#if defined(ADDRESS_UNSAFE) + return p; +#endif +} + +float4 PSMain(PSInput input) : SV_TARGET { +#if defined(FILTER_NEAREST) +# if defined(ADDRESS_UNSAFE) + float4 color = tex.Sample(samp, input.texcoord); +# else + float4 color; + float2 pos = adjustTexelByAddress(input.texcoord, source_region); + if (source_region[0] <= pos.x && + source_region[1] <= pos.y && + pos.x < source_region[2] && + pos.y < source_region[3]) { + color = tex.Sample(samp, pos); + } else { + color = float4(0, 0, 0, 0); + } +# endif // defined(ADDRESS_UNSAFE) +#endif // defined(FILTER_NEAREST) + +#if defined(FILTER_LINEAR) + float2 pos = input.texcoord; + float2 texel_size = 1.0 / source_size; + + // Shift 1/512 [texel] to avoid the tie-breaking issue. + // As all the vertex positions are aligned to 1/16 [pixel], this shiting should work in most cases. + float2 p0 = pos - (texel_size) / 2.0 + (texel_size / 512.0); + float2 p1 = pos + (texel_size) / 2.0 + (texel_size / 512.0); + +# if !defined(ADDRESS_UNSAFE) + p0 = adjustTexelByAddress(p0, source_region); + p1 = adjustTexelByAddress(p1, source_region); +# endif // !defined(ADDRESS_UNSAFE) + + float4 c0 = tex.Sample(samp, p0); + float4 c1 = tex.Sample(samp, float2(p1.x, p0.y)); + float4 c2 = tex.Sample(samp, float2(p0.x, p1.y)); + float4 c3 = tex.Sample(samp, p1); + +# if !defined(ADDRESS_UNSAFE) + if (p0.x < source_region[0]) { + c0 = float4(0, 0, 0, 0); + c2 = float4(0, 0, 0, 0); + } + if (p0.y < source_region[1]) { + c0 = float4(0, 0, 0, 0); + c1 = float4(0, 0, 0, 0); + } + if (source_region[2] <= p1.x) { + c1 = float4(0, 0, 0, 0); + c3 = float4(0, 0, 0, 0); + } + if (source_region[3] <= p1.y) { + c2 = float4(0, 0, 0, 0); + c3 = float4(0, 0, 0, 0); + } +# endif // !defined(ADDRESS_UNSAFE) + + float2 rate = frac(p0 * source_size); + float4 color = lerp(lerp(c0, c1, rate.x), lerp(c2, c3, rate.x), rate.y); +#endif // defined(FILTER_LINEAR) + +#if defined(FILTER_SCREEN) + float2 pos = input.texcoord; + float2 texel_size = 1.0 / source_size; + float2 half_scaled_texel_size = texel_size / 2.0 / scale; + + float2 p0 = pos - half_scaled_texel_size + (texel_size / 512.0); + float2 p1 = pos + half_scaled_texel_size + (texel_size / 512.0); + + float4 c0 = tex.Sample(samp, p0); + float4 c1 = tex.Sample(samp, float2(p1.x, p0.y)); + float4 c2 = tex.Sample(samp, float2(p0.x, p1.y)); + float4 c3 = tex.Sample(samp, p1); + // Texels must be in the source rect, so it is not necessary to check that like linear filter. + + float2 rate_center = float2(1.0, 1.0) - half_scaled_texel_size; + float2 rate = clamp(((frac(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0); + float4 color = lerp(lerp(c0, c1, rate.x), lerp(c2, c3, rate.x), rate.y); +#endif // defined(FILTER_SCREEN) + +#if defined(USE_COLOR_MATRIX) + // Un-premultiply alpha. + // When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing. + color.rgb /= color.a + (1.0 - sign(color.a)); + // Apply the color matrix or scale. + color = mul(color_matrix_body, color) + color_matrix_translation; + // Premultiply alpha + color.rgb *= color.a; + // Apply color scale. + color *= input.color; + // Clamp the output. + color.rgb = min(color.rgb, color.a); + return color; +#elif defined(FILTER_SCREEN) + return color; +#else + return input.color * color; +#endif // defined(USE_COLOR_MATRIX) + +}`) +} + +type pipelineStates struct { + rootSignature *iD3D12RootSignature + + cache map[builtinPipelineStatesKey]*iD3D12PipelineState + + // builtinShaders is a set of the built-in vertex/pixel shaders that are never released. + builtinShaders []*iD3DBlob + + shaderDescriptorHeap *iD3D12DescriptorHeap + shaderDescriptorSize uint32 + + samplerDescriptorHeap *iD3D12DescriptorHeap + + constantBuffers [frameCount][]*iD3D12Resource1 +} + +const numConstantBufferAndSourceTextures = 1 + graphics.ShaderImageNum + +func (p *pipelineStates) initialize(device *iD3D12Device) (ferr error) { + // Create a CBV/SRV/UAV descriptor heap. + // 5n+0: constants + // 5n+m (1<=4): textures + shaderH, err := device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ + Type: _D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + NumDescriptors: frameCount * numDescriptorsPerFrame * numConstantBufferAndSourceTextures, + Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, + NodeMask: 0, + }) + if err != nil { + return err + } + p.shaderDescriptorHeap = shaderH + defer func() { + if ferr != nil { + p.shaderDescriptorHeap.Release() + p.shaderDescriptorHeap = nil + } + }() + p.shaderDescriptorSize = device.GetDescriptorHandleIncrementSize(_D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + + samplerH, err := device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ + Type: _D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, + NumDescriptors: 1, + Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, + NodeMask: 0, + }) + if err != nil { + return err + } + p.samplerDescriptorHeap = samplerH + + device.CreateSampler(&_D3D12_SAMPLER_DESC{ + Filter: _D3D12_FILTER_MIN_MAG_MIP_POINT, + AddressU: _D3D12_TEXTURE_ADDRESS_MODE_WRAP, + AddressV: _D3D12_TEXTURE_ADDRESS_MODE_WRAP, + AddressW: _D3D12_TEXTURE_ADDRESS_MODE_WRAP, + ComparisonFunc: _D3D12_COMPARISON_FUNC_NEVER, + MinLOD: -math.MaxFloat32, + MaxLOD: math.MaxFloat32, + }, p.samplerDescriptorHeap.GetCPUDescriptorHandleForHeapStart()) + + return nil +} + +func (p *pipelineStates) builtinGraphicsPipelineState(device *iD3D12Device, key builtinPipelineStatesKey) (*iD3D12PipelineState, error) { + state, ok := p.cache[key] + if ok { + return state, nil + } + + defs, err := key.defs() + if err != nil { + return nil, err + } + + vsh, psh, err := newShader(key.source(), defs) + if err != nil { + return nil, err + } + // Keep the shaders. These are never released. + p.builtinShaders = append(p.builtinShaders, vsh, psh) + + s, err := p.newPipelineState(device, vsh, psh, key.compositeMode, key.stencilMode, key.screen) + if err != nil { + return nil, err + } + if p.cache == nil { + p.cache = map[builtinPipelineStatesKey]*iD3D12PipelineState{} + } + p.cache[key] = s + return s, nil +} + +func (p *pipelineStates) useGraphicsPipelineState(device *iD3D12Device, commandList *iD3D12GraphicsCommandList, frameIndex int, pipelineState *iD3D12PipelineState, srcs [graphics.ShaderImageNum]*Image, uniforms []float32) error { + idx := len(p.constantBuffers[frameIndex]) + if idx >= numDescriptorsPerFrame*2 { + return fmt.Errorf("directx: too many constant buffers") + } + + if cap(p.constantBuffers[frameIndex]) > idx { + p.constantBuffers[frameIndex] = p.constantBuffers[frameIndex][:idx+1] + } else { + p.constantBuffers[frameIndex] = append(p.constantBuffers[frameIndex], nil) + } + + const bufferSizeAlignement = 256 + bufferSize := uint32(unsafe.Sizeof(float32(0))) * uint32(len(uniforms)) + if bufferSize > 0 { + bufferSize = ((bufferSize-1)/bufferSizeAlignement + 1) * bufferSizeAlignement + } + + cb := p.constantBuffers[frameIndex][idx] + if cb != nil { + if uint32(cb.GetDesc().Width) < bufferSize { + p.constantBuffers[frameIndex][idx].Release() + p.constantBuffers[frameIndex][idx] = nil + cb = nil + } + } + if cb == nil { + var err error + cb, err = createBuffer(device, uint64(bufferSize), _D3D12_HEAP_TYPE_UPLOAD) + if err != nil { + return err + } + p.constantBuffers[frameIndex][idx] = cb + + h := p.shaderDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + h.Offset(int32(frameIndex*numDescriptorsPerFrame+numConstantBufferAndSourceTextures*idx), p.shaderDescriptorSize) + device.CreateConstantBufferView(&_D3D12_CONSTANT_BUFFER_VIEW_DESC{ + BufferLocation: cb.GetGPUVirtualAddress(), + SizeInBytes: bufferSize, + }, h) + } + + h := p.shaderDescriptorHeap.GetCPUDescriptorHandleForHeapStart() + h.Offset(int32(frameIndex*numDescriptorsPerFrame+numConstantBufferAndSourceTextures*idx), p.shaderDescriptorSize) + for _, src := range srcs { + h.Offset(1, p.shaderDescriptorSize) + if src == nil { + continue + } + device.CreateShaderResourceView(src.resource(), &_D3D12_SHADER_RESOURCE_VIEW_DESC{ + Format: _DXGI_FORMAT_R8G8B8A8_UNORM, + ViewDimension: _D3D12_SRV_DIMENSION_TEXTURE2D, + Shader4ComponentMapping: _D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + Texture2D: _D3D12_TEX2D_SRV{ + MipLevels: 1, + }, + }, h) + } + + // Update the constant buffer. + m, err := cb.Map(0, &_D3D12_RANGE{0, 0}) + if err != nil { + return err + } + copyFloat32s(m, uniforms) + + if err := cb.Unmap(0, nil); err != nil { + return err + } + + commandList.SetPipelineState(pipelineState) + + rs, err := p.ensureRootSignature(device) + if err != nil { + return err + } + commandList.SetGraphicsRootSignature(rs) + + commandList.SetDescriptorHeaps([]*iD3D12DescriptorHeap{ + p.shaderDescriptorHeap, + p.samplerDescriptorHeap, + }) + + // Match the indices with rootParams in graphicsPipelineState. + gh := p.shaderDescriptorHeap.GetGPUDescriptorHandleForHeapStart() + gh.Offset(int32(frameIndex*numDescriptorsPerFrame+numConstantBufferAndSourceTextures*idx), p.shaderDescriptorSize) + commandList.SetGraphicsRootDescriptorTable(0, gh) + gh.Offset(1, p.shaderDescriptorSize) + commandList.SetGraphicsRootDescriptorTable(1, gh) + commandList.SetGraphicsRootDescriptorTable(2, p.samplerDescriptorHeap.GetGPUDescriptorHandleForHeapStart()) + + return nil +} + +func (p *pipelineStates) ensureRootSignature(device *iD3D12Device) (rootSignature *iD3D12RootSignature, ferr error) { + if p.rootSignature != nil { + return p.rootSignature, nil + } + + cbv := _D3D12_DESCRIPTOR_RANGE{ + RangeType: _D3D12_DESCRIPTOR_RANGE_TYPE_CBV, // b0 + NumDescriptors: 1, + BaseShaderRegister: 0, + RegisterSpace: 0, + OffsetInDescriptorsFromTableStart: _D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + } + srv := _D3D12_DESCRIPTOR_RANGE{ + RangeType: _D3D12_DESCRIPTOR_RANGE_TYPE_SRV, // t0 + NumDescriptors: graphics.ShaderImageNum, + BaseShaderRegister: 0, + RegisterSpace: 0, + OffsetInDescriptorsFromTableStart: _D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + } + sampler := _D3D12_DESCRIPTOR_RANGE{ + RangeType: _D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, // s0 + NumDescriptors: 1, + BaseShaderRegister: 0, + RegisterSpace: 0, + OffsetInDescriptorsFromTableStart: _D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + } + + rootParams := [...]_D3D12_ROOT_PARAMETER{ + { + ParameterType: _D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, + DescriptorTable: _D3D12_ROOT_DESCRIPTOR_TABLE{ + NumDescriptorRanges: 1, + pDescriptorRanges: &cbv, + }, + ShaderVisibility: _D3D12_SHADER_VISIBILITY_ALL, + }, + { + ParameterType: _D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, + DescriptorTable: _D3D12_ROOT_DESCRIPTOR_TABLE{ + NumDescriptorRanges: 1, + pDescriptorRanges: &srv, + }, + ShaderVisibility: _D3D12_SHADER_VISIBILITY_PIXEL, + }, + { + ParameterType: _D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE, + DescriptorTable: _D3D12_ROOT_DESCRIPTOR_TABLE{ + NumDescriptorRanges: 1, + pDescriptorRanges: &sampler, + }, + ShaderVisibility: _D3D12_SHADER_VISIBILITY_PIXEL, + }, + } + + // Create a root signature. + sig, err := d3D12SerializeRootSignature(&_D3D12_ROOT_SIGNATURE_DESC{ + NumParameters: uint32(len(rootParams)), + pParameters: &rootParams[0], + NumStaticSamplers: 0, + pStaticSamplers: nil, + Flags: _D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT, + }, _D3D_ROOT_SIGNATURE_VERSION_1_0) + if err != nil { + return nil, err + } + defer sig.Release() + + rs, err := device.CreateRootSignature(0, sig.GetBufferPointer(), sig.GetBufferSize()) + if err != nil { + return nil, err + } + defer func() { + if ferr != nil { + rootSignature.Release() + } + }() + + p.rootSignature = rs + + return p.rootSignature, nil +} + +func newShader(source []byte, defs []_D3D_SHADER_MACRO) (vsh, psh *iD3DBlob, ferr error) { + // Create a shader + v, err := d3DCompile(source, "shader", defs, nil, "VSMain", "vs_5_0", 0, 0) + if err != nil { + return nil, nil, err + } + defer func() { + if ferr != nil { + v.Release() + } + }() + + p, err := d3DCompile(source, "shader", defs, nil, "PSMain", "ps_5_0", 0, 0) + if err != nil { + return nil, nil, err + } + defer func() { + if ferr != nil { + p.Release() + } + }() + + return v, p, nil +} + +func (p *pipelineStates) newPipelineState(device *iD3D12Device, vsh, psh *iD3DBlob, compositeMode graphicsdriver.CompositeMode, stencilMode stencilMode, screen bool) (state *iD3D12PipelineState, ferr error) { + rootSignature, err := p.ensureRootSignature(device) + if err != nil { + return nil, err + } + defer func() { + if ferr != nil { + rootSignature.Release() + } + }() + + depthStencilDesc := _D3D12_DEPTH_STENCIL_DESC{ + DepthEnable: 0, + DepthWriteMask: _D3D12_DEPTH_WRITE_MASK_ALL, + DepthFunc: _D3D12_COMPARISON_FUNC_LESS, + StencilEnable: 0, + StencilReadMask: _D3D12_DEFAULT_STENCIL_READ_MASK, + StencilWriteMask: _D3D12_DEFAULT_STENCIL_WRITE_MASK, + FrontFace: _D3D12_DEPTH_STENCILOP_DESC{ + StencilFailOp: _D3D12_STENCIL_OP_KEEP, + StencilDepthFailOp: _D3D12_STENCIL_OP_KEEP, + StencilPassOp: _D3D12_STENCIL_OP_KEEP, + StencilFunc: _D3D12_COMPARISON_FUNC_ALWAYS, + }, + BackFace: _D3D12_DEPTH_STENCILOP_DESC{ + StencilFailOp: _D3D12_STENCIL_OP_KEEP, + StencilDepthFailOp: _D3D12_STENCIL_OP_KEEP, + StencilPassOp: _D3D12_STENCIL_OP_KEEP, + StencilFunc: _D3D12_COMPARISON_FUNC_ALWAYS, + }, + } + writeMask := uint8(_D3D12_COLOR_WRITE_ENABLE_ALL) + + switch stencilMode { + case prepareStencil: + depthStencilDesc.StencilEnable = 1 + depthStencilDesc.FrontFace.StencilPassOp = _D3D12_STENCIL_OP_INVERT + depthStencilDesc.BackFace.StencilPassOp = _D3D12_STENCIL_OP_INVERT + writeMask = 0 + case drawWithStencil: + depthStencilDesc.StencilEnable = 1 + depthStencilDesc.FrontFace.StencilFunc = _D3D12_COMPARISON_FUNC_NOT_EQUAL + depthStencilDesc.BackFace.StencilFunc = _D3D12_COMPARISON_FUNC_NOT_EQUAL + } + + rtvFormat := _DXGI_FORMAT_R8G8B8A8_UNORM + if screen { + rtvFormat = _DXGI_FORMAT_B8G8R8A8_UNORM + } + + // Create a pipeline state. + srcOp, dstOp := compositeMode.Operations() + psoDesc := _D3D12_GRAPHICS_PIPELINE_STATE_DESC{ + pRootSignature: rootSignature, + VS: _D3D12_SHADER_BYTECODE{ + pShaderBytecode: vsh.GetBufferPointer(), + BytecodeLength: vsh.GetBufferSize(), + }, + PS: _D3D12_SHADER_BYTECODE{ + pShaderBytecode: psh.GetBufferPointer(), + BytecodeLength: psh.GetBufferSize(), + }, + BlendState: _D3D12_BLEND_DESC{ + AlphaToCoverageEnable: 0, + IndependentBlendEnable: 0, + RenderTarget: [8]_D3D12_RENDER_TARGET_BLEND_DESC{ + { + BlendEnable: 1, + LogicOpEnable: 0, + SrcBlend: operationToBlend(srcOp, false), + DestBlend: operationToBlend(dstOp, false), + BlendOp: _D3D12_BLEND_OP_ADD, + SrcBlendAlpha: operationToBlend(srcOp, true), + DestBlendAlpha: operationToBlend(dstOp, true), + BlendOpAlpha: _D3D12_BLEND_OP_ADD, + LogicOp: _D3D12_LOGIC_OP_NOOP, + RenderTargetWriteMask: writeMask, + }, + }, + }, + SampleMask: math.MaxUint32, + RasterizerState: _D3D12_RASTERIZER_DESC{ + FillMode: _D3D12_FILL_MODE_SOLID, + CullMode: _D3D12_CULL_MODE_NONE, + FrontCounterClockwise: 0, + DepthBias: _D3D12_DEFAULT_DEPTH_BIAS, + DepthBiasClamp: _D3D12_DEFAULT_DEPTH_BIAS_CLAMP, + SlopeScaledDepthBias: _D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, + DepthClipEnable: 0, + MultisampleEnable: 0, + AntialiasedLineEnable: 0, + ForcedSampleCount: 0, + ConservativeRaster: _D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF, + }, + DepthStencilState: depthStencilDesc, + InputLayout: _D3D12_INPUT_LAYOUT_DESC{ + pInputElementDescs: &inputElementDescs[0], + NumElements: uint32(len(inputElementDescs)), + }, + PrimitiveTopologyType: _D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + NumRenderTargets: 1, + RTVFormats: [8]_DXGI_FORMAT{ + rtvFormat, + }, + DSVFormat: _DXGI_FORMAT_D24_UNORM_S8_UINT, + SampleDesc: _DXGI_SAMPLE_DESC{ + Count: 1, + Quality: 0, + }, + } + + s, err := device.CreateGraphicsPipelineState(&psoDesc) + if err != nil { + return nil, err + } + return s, nil +} + +func (p *pipelineStates) releaseConstantBuffers(frameIndex int) { + for i := range p.constantBuffers[frameIndex] { + p.constantBuffers[frameIndex][i].Release() + p.constantBuffers[frameIndex][i] = nil + } + p.constantBuffers[frameIndex] = p.constantBuffers[frameIndex][:0] +} + +func (p *pipelineStates) resetConstantBuffers(frameIndex int) { + p.constantBuffers[frameIndex] = p.constantBuffers[frameIndex][:0] +} diff --git a/internal/graphicsdriver/graphics.go b/internal/graphicsdriver/graphics.go index 719aa81ff..4ac867fc9 100644 --- a/internal/graphicsdriver/graphics.go +++ b/internal/graphicsdriver/graphics.go @@ -54,6 +54,7 @@ type Graphics interface { NeedsRestoring() bool NeedsClearingScreen() bool IsGL() bool + IsDirectX() bool HasHighPrecisionFloat() bool MaxImageSize() int diff --git a/internal/graphicsdriver/metal/graphics_darwin.go b/internal/graphicsdriver/metal/graphics_darwin.go index 4766f0502..847b25a23 100644 --- a/internal/graphicsdriver/metal/graphics_darwin.go +++ b/internal/graphicsdriver/metal/graphics_darwin.go @@ -1046,6 +1046,10 @@ func (g *Graphics) IsGL() bool { return false } +func (g *Graphics) IsDirectX() bool { + return false +} + func (g *Graphics) HasHighPrecisionFloat() bool { return true } diff --git a/internal/graphicsdriver/opengl/graphics.go b/internal/graphicsdriver/opengl/graphics.go index 9512cb2ef..4cd8e2cbb 100644 --- a/internal/graphicsdriver/opengl/graphics.go +++ b/internal/graphicsdriver/opengl/graphics.go @@ -392,6 +392,10 @@ func (g *Graphics) IsGL() bool { return true } +func (g *Graphics) IsDirectX() bool { + return false +} + func (g *Graphics) HasHighPrecisionFloat() bool { return g.context.hasHighPrecisionFloat() } diff --git a/internal/ui/context.go b/internal/ui/context.go index bc07dbfc9..1f0c0a7ae 100644 --- a/internal/ui/context.go +++ b/internal/ui/context.go @@ -61,7 +61,18 @@ func (c *contextImpl) updateFrame(graphicsDriver graphicsdriver.Graphics, outsid } func (c *contextImpl) forceUpdateFrame(graphicsDriver graphicsdriver.Graphics, outsideWidth, outsideHeight float64, deviceScaleFactor float64) error { - return c.updateFrameImpl(graphicsDriver, 1, outsideWidth, outsideHeight, deviceScaleFactor) + n := 1 + if graphicsDriver.IsDirectX() { + // On DirectX, both framebuffers in the swap chain should be updated. + // Or, the rendering result becomes unexpected when the window is resized. + n = 2 + } + for i := 0; i < n; i++ { + if err := c.updateFrameImpl(graphicsDriver, 1, outsideWidth, outsideHeight, deviceScaleFactor); err != nil { + return err + } + } + return nil } func (c *contextImpl) updateFrameImpl(graphicsDriver graphicsdriver.Graphics, updateCount int, outsideWidth, outsideHeight float64, deviceScaleFactor float64) error { diff --git a/internal/ui/ui_glfw.go b/internal/ui/ui_glfw.go index ae93b3f5d..9deacf7bc 100644 --- a/internal/ui/ui_glfw.go +++ b/internal/ui/ui_glfw.go @@ -811,19 +811,6 @@ event: } func (u *userInterfaceImpl) init() error { - g, err := chooseGraphicsDriver(&graphicsDriverGetterImpl{}) - if err != nil { - return err - } - u.graphicsDriver = g - if u.graphicsDriver.IsGL() { - glfw.WindowHint(glfw.ClientAPI, glfw.OpenGLAPI) - glfw.WindowHint(glfw.ContextVersionMajor, 2) - glfw.WindowHint(glfw.ContextVersionMinor, 1) - } else { - glfw.WindowHint(glfw.ClientAPI, glfw.NoAPI) - } - glfw.WindowHint(glfw.AutoIconify, glfw.False) decorated := glfw.False @@ -832,13 +819,30 @@ func (u *userInterfaceImpl) init() error { } glfw.WindowHint(glfw.Decorated, decorated) - transparent := glfw.False - if u.isInitScreenTransparent() { - transparent = glfw.True + transparent := u.isInitScreenTransparent() + glfwTransparent := glfw.False + if transparent { + glfwTransparent = glfw.True } - glfw.WindowHint(glfw.TransparentFramebuffer, transparent) + glfw.WindowHint(glfw.TransparentFramebuffer, glfwTransparent) + + g, err := chooseGraphicsDriver(&graphicsDriverGetterImpl{ + transparent: transparent, + }) + if err != nil { + return err + } + u.graphicsDriver = g u.graphicsDriver.SetTransparent(u.isInitScreenTransparent()) + if u.graphicsDriver.IsGL() { + glfw.WindowHint(glfw.ClientAPI, glfw.OpenGLAPI) + glfw.WindowHint(glfw.ContextVersionMajor, 2) + glfw.WindowHint(glfw.ContextVersionMinor, 1) + } else { + glfw.WindowHint(glfw.ClientAPI, glfw.NoAPI) + } + // Before creating a window, set it unresizable no matter what u.isInitWindowResizable() is (#1987). // Making the window resizable here doesn't work correctly when switching to enable resizing. resizable := glfw.False diff --git a/internal/ui/ui_glfw_darwin.go b/internal/ui/ui_glfw_darwin.go index 11e40bbc5..282018039 100644 --- a/internal/ui/ui_glfw_darwin.go +++ b/internal/ui/ui_glfw_darwin.go @@ -232,7 +232,9 @@ import ( "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver/opengl" ) -type graphicsDriverGetterImpl struct{} +type graphicsDriverGetterImpl struct { + transparent bool +} func (g *graphicsDriverGetterImpl) getAuto() graphicsdriver.Graphics { if m := g.getMetal(); m != nil { diff --git a/internal/ui/ui_glfw_unix.go b/internal/ui/ui_glfw_unix.go index ba915f010..01bb76ad6 100644 --- a/internal/ui/ui_glfw_unix.go +++ b/internal/ui/ui_glfw_unix.go @@ -31,7 +31,9 @@ import ( "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver/opengl" ) -type graphicsDriverGetterImpl struct{} +type graphicsDriverGetterImpl struct { + transparent bool +} func (g *graphicsDriverGetterImpl) getAuto() graphicsdriver.Graphics { return g.getOpenGL() diff --git a/internal/ui/ui_glfw_windows.go b/internal/ui/ui_glfw_windows.go index 8d0743e94..0c4a67d13 100644 --- a/internal/ui/ui_glfw_windows.go +++ b/internal/ui/ui_glfw_windows.go @@ -26,12 +26,18 @@ import ( "github.com/hajimehoshi/ebiten/v2/internal/glfw" "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver" + "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver/directx" "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver/opengl" ) -type graphicsDriverGetterImpl struct{} +type graphicsDriverGetterImpl struct { + transparent bool +} func (g *graphicsDriverGetterImpl) getAuto() graphicsdriver.Graphics { + if d := g.getDirectX(); d != nil { + return d + } return g.getOpenGL() } @@ -42,7 +48,13 @@ func (*graphicsDriverGetterImpl) getOpenGL() graphicsdriver.Graphics { return nil } -func (*graphicsDriverGetterImpl) getDirectX() graphicsdriver.Graphics { +func (g *graphicsDriverGetterImpl) getDirectX() graphicsdriver.Graphics { + if g.transparent { + return nil + } + if d := directx.Get(); d != nil { + return d + } return nil } diff --git a/shader_test.go b/shader_test.go index 844a710ed..1a11d36cc 100644 --- a/shader_test.go +++ b/shader_test.go @@ -17,6 +17,7 @@ package ebiten_test import ( "image" "image/color" + "math" "testing" "github.com/hajimehoshi/ebiten/v2" @@ -654,3 +655,74 @@ func Fragment(position vec4, texCoord vec2, color vec4) vec4 { } } } + +func TestShaderTextureAt(t *testing.T) { + const w, h = 16, 16 + + src := ebiten.NewImage(w, h) + src.Fill(color.RGBA{0x10, 0x20, 0x30, 0xff}) + + dst := ebiten.NewImage(w, h) + s, err := ebiten.NewShader([]byte(`package main + +func textureAt(uv vec2) vec4 { + return imageSrc0UnsafeAt(uv) +} + +func Fragment(position vec4, texCoord vec2, color vec4) vec4 { + return textureAt(texCoord) +} +`)) + if err != nil { + t.Fatal(err) + } + + op := &ebiten.DrawRectShaderOptions{} + op.Images[0] = src + dst.DrawRectShader(w, h, s, op) + + for j := 0; j < h; j++ { + for i := 0; i < w; i++ { + got := dst.At(i, j).(color.RGBA) + want := color.RGBA{0x10, 0x20, 0x30, 0xff} + if !sameColors(got, want, 2) { + t.Errorf("dst.At(%d, %d): got: %v, want: %v", i, j, got, want) + } + } + } +} + +func TestShaderAtan2(t *testing.T) { + const w, h = 16, 16 + + src := ebiten.NewImage(w, h) + src.Fill(color.RGBA{0x10, 0x20, 0x30, 0xff}) + + dst := ebiten.NewImage(w, h) + s, err := ebiten.NewShader([]byte(`package main + +func Fragment(position vec4, texCoord vec2, color vec4) vec4 { + y := vec4(1, 1, 1, 1) + x := vec4(1, 1, 1, 1) + return atan2(y, x) +} +`)) + if err != nil { + t.Fatal(err) + } + + op := &ebiten.DrawRectShaderOptions{} + op.Images[0] = src + dst.DrawRectShader(w, h, s, op) + + for j := 0; j < h; j++ { + for i := 0; i < w; i++ { + got := dst.At(i, j).(color.RGBA) + v := byte(math.Floor(0xff * math.Pi / 4)) + want := color.RGBA{v, v, v, v} + if !sameColors(got, want, 2) { + t.Errorf("dst.At(%d, %d): got: %v, want: %v", i, j, got, want) + } + } + } +}