// Copyright 2022 The Ebiten Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package directx import ( "errors" "fmt" "os" "strings" "time" "unsafe" "golang.org/x/sys/windows" "github.com/hajimehoshi/ebiten/v2/internal/graphics" "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver" "github.com/hajimehoshi/ebiten/v2/internal/microsoftgdk" "github.com/hajimehoshi/ebiten/v2/internal/shaderir" "github.com/hajimehoshi/ebiten/v2/internal/shaderir/hlsl" ) const frameCount = 2 // NewGraphics creates an implementation of graphicsdriver.Graphics for DirectX. // The returned graphics value is nil iff the error is not nil. func NewGraphics() (graphicsdriver.Graphics, error) { g := &Graphics{} if err := g.initialize(); err != nil { return nil, err } return g, nil } var inputElementDescs []_D3D12_INPUT_ELEMENT_DESC func init() { position := []byte("POSITION\000") texcoord := []byte("TEXCOORD\000") color := []byte("COLOR\000") inputElementDescs = []_D3D12_INPUT_ELEMENT_DESC{ { SemanticName: &position[0], SemanticIndex: 0, Format: _DXGI_FORMAT_R32G32_FLOAT, InputSlot: 0, AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT, InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, InstanceDataStepRate: 0, }, { SemanticName: &texcoord[0], SemanticIndex: 0, Format: _DXGI_FORMAT_R32G32_FLOAT, InputSlot: 0, AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT, InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, InstanceDataStepRate: 0, }, { SemanticName: &color[0], SemanticIndex: 0, Format: _DXGI_FORMAT_R32G32B32A32_FLOAT, InputSlot: 0, AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT, InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, InstanceDataStepRate: 0, }, } } type Graphics struct { debug *_ID3D12Debug device *_ID3D12Device commandQueue *_ID3D12CommandQueue rtvDescriptorHeap *_ID3D12DescriptorHeap rtvDescriptorSize uint32 renderTargets [frameCount]*_ID3D12Resource framePipelineToken _D3D12XBOX_FRAME_PIPELINE_TOKEN fence *_ID3D12Fence fenceValues [frameCount]uint64 fenceWaitEvent windows.Handle allowTearing bool // drawCommandAllocators are command allocators for a 3D engine (DrawIndexedInstanced). // For the word 'engine', see https://docs.microsoft.com/en-us/windows/win32/direct3d12/user-mode-heap-synchronization. // The term 'draw' is used instead of '3D' in this package. drawCommandAllocators [frameCount]*_ID3D12CommandAllocator // copyCommandAllocators are command allocators for a copy engine (CopyTextureRegion). copyCommandAllocators [frameCount]*_ID3D12CommandAllocator // drawCommandList is a command list for a 3D engine (DrawIndexedInstanced). drawCommandList *_ID3D12GraphicsCommandList needFlushDrawCommandList bool // copyCommandList is a command list for a copy engine (CopyTextureRegion). copyCommandList *_ID3D12GraphicsCommandList needFlushCopyCommandList bool // drawCommandList and copyCommandList are exclusive: if one is not empty, the other must be empty. vertices [frameCount][]*_ID3D12Resource indices [frameCount][]*_ID3D12Resource factory *_IDXGIFactory4 swapChain *_IDXGISwapChain4 window windows.HWND frameIndex int prevBeginFrameIndex int // frameStarted is true since Begin until End with present frameStarted bool images map[graphicsdriver.ImageID]*Image screenImage *Image nextImageID graphicsdriver.ImageID disposedImages [frameCount][]*Image shaders map[graphicsdriver.ShaderID]*Shader nextShaderID graphicsdriver.ShaderID disposedShaders [frameCount][]*Shader vsyncEnabled bool transparent bool // occluded reports whether the screen is invisible or not. occluded bool // lastTime is the last time for rendering. lastTime time.Time newScreenWidth int newScreenHeight int pipelineStates } func (g *Graphics) initialize() (ferr error) { var ( useWARP bool useDebugLayer bool ) env := os.Getenv("EBITENGINE_DIRECTX") if env == "" { // For backward compatibility, read the EBITEN_ version. env = os.Getenv("EBITEN_DIRECTX") } for _, t := range strings.Split(env, ",") { switch strings.TrimSpace(t) { case "warp": // TODO: Is WARP available on Xbox? useWARP = true case "debug": useDebugLayer = true } } // Ebitengine itself doesn't require the features level 12 and 11 should be enough, // but some old cards don't work well (#2447). Specify the level 12 by default. var featureLevel _D3D_FEATURE_LEVEL = _D3D_FEATURE_LEVEL_12_0 if env := os.Getenv("EBITENGINE_DIRECTX_FEATURE_LEVEL"); env != "" { switch env { case "11_0": featureLevel = _D3D_FEATURE_LEVEL_11_0 case "11_1": featureLevel = _D3D_FEATURE_LEVEL_11_1 case "12_0": featureLevel = _D3D_FEATURE_LEVEL_12_0 case "12_1": featureLevel = _D3D_FEATURE_LEVEL_12_1 case "12_2": featureLevel = _D3D_FEATURE_LEVEL_12_2 } } // Initialize not only a device but also other members like a fence. // Even if initializing a device succeeds, initializing a fence might fail (#2142). if microsoftgdk.IsXbox() { if err := g.initializeXbox(useWARP, useDebugLayer); err != nil { return err } } else { if err := g.initializeDesktop(useWARP, useDebugLayer, featureLevel); err != nil { return err } } return nil } func (g *Graphics) initializeDesktop(useWARP bool, useDebugLayer bool, featureLevel _D3D_FEATURE_LEVEL) (ferr error) { if err := d3d12.Load(); err != nil { return err } // As g's lifetime is the same as the process's lifetime, debug and other objects are never released // if this initialization succeeds. // The debug interface is optional and might not exist. if useDebugLayer { d, err := _D3D12GetDebugInterface() if err != nil { return err } g.debug = d defer func() { if ferr != nil { g.debug.Release() g.debug = nil } }() g.debug.EnableDebugLayer() } var flag uint32 if g.debug != nil { flag = _DXGI_CREATE_FACTORY_DEBUG } f, err := _CreateDXGIFactory2(flag) if err != nil { return err } g.factory = f defer func() { if ferr != nil { g.factory.Release() g.factory = nil } }() var adapter *_IDXGIAdapter1 if useWARP { a, err := g.factory.EnumWarpAdapter() if err != nil { return err } defer a.Release() adapter = a } else { for i := 0; ; i++ { a, err := g.factory.EnumAdapters1(uint32(i)) if errors.Is(err, _DXGI_ERROR_NOT_FOUND) { break } if err != nil { return err } defer a.Release() desc, err := a.GetDesc1() if err != nil { return err } if desc.Flags&_DXGI_ADAPTER_FLAG_SOFTWARE != 0 { continue } // Test D3D12CreateDevice without creating an actual device. if _, err := _D3D12CreateDevice(unsafe.Pointer(a), featureLevel, &_IID_ID3D12Device, false); err != nil { continue } adapter = a break } } if adapter == nil { return errors.New("directx: DirectX 12 is not supported") } d, err := _D3D12CreateDevice(unsafe.Pointer(adapter), featureLevel, &_IID_ID3D12Device, true) if err != nil { return err } g.device = (*_ID3D12Device)(d) if f, err := g.factory.QueryInterface(&_IID_IDXGIFactory5); err == nil && f != nil { factory := (*_IDXGIFactory5)(f) defer factory.Release() var allowTearing int32 if err := factory.CheckFeatureSupport(_DXGI_FEATURE_PRESENT_ALLOW_TEARING, unsafe.Pointer(&allowTearing), uint32(unsafe.Sizeof(allowTearing))); err == nil && allowTearing != 0 { g.allowTearing = true } } if err := g.initializeMembers(); err != nil { return err } // GetCopyableFootprints might return an invalid value with Wine (#2114). // To check this early, call NewImage here. i, err := g.NewImage(1, 1) if err != nil { return err } i.Dispose() return nil } func (g *Graphics) initializeXbox(useWARP bool, useDebugLayer bool) (ferr error) { if err := d3d12x.Load(); err != nil { return err } params := &_D3D12XBOX_CREATE_DEVICE_PARAMETERS{ Version: microsoftgdk.D3D12SDKVersion(), GraphicsCommandQueueRingSizeBytes: _D3D12XBOX_DEFAULT_SIZE_BYTES, GraphicsScratchMemorySizeBytes: _D3D12XBOX_DEFAULT_SIZE_BYTES, ComputeScratchMemorySizeBytes: _D3D12XBOX_DEFAULT_SIZE_BYTES, } if useDebugLayer { params.ProcessDebugFlags = _D3D12_PROCESS_DEBUG_FLAG_DEBUG_LAYER_ENABLED } d, err := _D3D12XboxCreateDevice(nil, params, &_IID_ID3D12Device) if err != nil { return err } g.device = (*_ID3D12Device)(d) if err := g.initializeMembers(); err != nil { return err } dd, err := g.device.QueryInterface(&_IID_IDXGIDevice) if err != nil { return err } dxgiDevice := (*_IDXGIDevice)(dd) defer dxgiDevice.Release() dxgiAdapter, err := dxgiDevice.GetAdapter() if err != nil { return err } defer dxgiAdapter.Release() dxgiOutput, err := dxgiAdapter.EnumOutputs(0) if err != nil { return err } defer dxgiOutput.Release() if err := g.device.SetFrameIntervalX(dxgiOutput, _D3D12XBOX_FRAME_INTERVAL_60_HZ, frameCount-1, _D3D12XBOX_FRAME_INTERVAL_FLAG_NONE); err != nil { return err } if err := g.device.ScheduleFrameEventX(_D3D12XBOX_FRAME_EVENT_ORIGIN, 0, nil, _D3D12XBOX_SCHEDULE_FRAME_EVENT_FLAG_NONE); err != nil { return err } return nil } func (g *Graphics) initializeMembers() (ferr error) { // Create an event for a fence. e, err := windows.CreateEventEx(nil, nil, 0, windows.EVENT_MODIFY_STATE|windows.SYNCHRONIZE) if err != nil { return fmt.Errorf("directx: CreateEvent failed: %w", err) } g.fenceWaitEvent = e // Create a command queue. desc := _D3D12_COMMAND_QUEUE_DESC{ Type: _D3D12_COMMAND_LIST_TYPE_DIRECT, Flags: _D3D12_COMMAND_QUEUE_FLAG_NONE, } c, err := g.device.CreateCommandQueue(&desc) if err != nil { return err } g.commandQueue = c defer func() { if ferr != nil { g.commandQueue.Release() g.commandQueue = nil } }() // Create command allocators. for i := 0; i < frameCount; i++ { dca, err := g.device.CreateCommandAllocator(_D3D12_COMMAND_LIST_TYPE_DIRECT) if err != nil { return err } g.drawCommandAllocators[i] = dca defer func(i int) { if ferr != nil { g.drawCommandAllocators[i].Release() g.drawCommandAllocators[i] = nil } }(i) cca, err := g.device.CreateCommandAllocator(_D3D12_COMMAND_LIST_TYPE_DIRECT) if err != nil { return err } g.copyCommandAllocators[i] = cca defer func(i int) { if ferr != nil { g.copyCommandAllocators[i].Release() g.copyCommandAllocators[i] = nil } }(i) } // Create a frame fence. f, err := g.device.CreateFence(0, _D3D12_FENCE_FLAG_NONE) if err != nil { return err } g.fence = f defer func() { if ferr != nil { g.fence.Release() g.fence = nil } }() g.fenceValues[g.frameIndex]++ // Create command lists. dcl, err := g.device.CreateCommandList(0, _D3D12_COMMAND_LIST_TYPE_DIRECT, g.drawCommandAllocators[0], nil) if err != nil { return err } g.drawCommandList = dcl defer func() { if ferr != nil { g.drawCommandList.Release() g.drawCommandList = nil } }() ccl, err := g.device.CreateCommandList(0, _D3D12_COMMAND_LIST_TYPE_DIRECT, g.copyCommandAllocators[0], nil) if err != nil { return err } g.copyCommandList = ccl defer func() { if ferr != nil { g.copyCommandList.Release() g.copyCommandList = nil } }() // Close the command list once as this is immediately Reset at Begin. if err := g.drawCommandList.Close(); err != nil { return err } if err := g.copyCommandList.Close(); err != nil { return err } // Create a descriptor heap for RTV. h, err := g.device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ Type: _D3D12_DESCRIPTOR_HEAP_TYPE_RTV, NumDescriptors: frameCount, Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_NONE, NodeMask: 0, }) if err != nil { return err } g.rtvDescriptorHeap = h defer func() { if ferr != nil { g.rtvDescriptorHeap.Release() g.rtvDescriptorHeap = nil } }() g.rtvDescriptorSize = g.device.GetDescriptorHandleIncrementSize(_D3D12_DESCRIPTOR_HEAP_TYPE_RTV) if err := g.pipelineStates.initialize(g.device); err != nil { return err } return nil } func (g *Graphics) Initialize() (err error) { // Initialization should already be done. return nil } func createBuffer(device *_ID3D12Device, bufferSize uint64, heapType _D3D12_HEAP_TYPE) (*_ID3D12Resource, error) { state := _D3D12_RESOURCE_STATE_GENERIC_READ() if heapType == _D3D12_HEAP_TYPE_READBACK { state = _D3D12_RESOURCE_STATE_COPY_DEST } r, err := device.CreateCommittedResource(&_D3D12_HEAP_PROPERTIES{ Type: heapType, CPUPageProperty: _D3D12_CPU_PAGE_PROPERTY_UNKNOWN, MemoryPoolPreference: _D3D12_MEMORY_POOL_UNKNOWN, CreationNodeMask: 1, VisibleNodeMask: 1, }, _D3D12_HEAP_FLAG_NONE, &_D3D12_RESOURCE_DESC{ Dimension: _D3D12_RESOURCE_DIMENSION_BUFFER, Alignment: 0, Width: bufferSize, Height: 1, DepthOrArraySize: 1, MipLevels: 1, Format: _DXGI_FORMAT_UNKNOWN, SampleDesc: _DXGI_SAMPLE_DESC{ Count: 1, Quality: 0, }, Layout: _D3D12_TEXTURE_LAYOUT_ROW_MAJOR, Flags: _D3D12_RESOURCE_FLAG_NONE, }, state, nil) if err != nil { return nil, err } return r, nil } func (g *Graphics) updateSwapChain(width, height int) error { if g.window == 0 { return errors.New("directx: the window handle is not initialized yet") } if g.swapChain == nil { if microsoftgdk.IsXbox() { if err := g.initSwapChainXbox(width, height); err != nil { return err } } else { if err := g.initSwapChainDesktop(width, height); err != nil { return err } } return nil } if microsoftgdk.IsXbox() { return errors.New("directx: resizing should never happen on Xbox") } g.newScreenWidth = width g.newScreenHeight = height return nil } func (g *Graphics) initSwapChainDesktop(width, height int) (ferr error) { // Create a swap chain. // // DXGI_ALPHA_MODE_PREMULTIPLIED doesn't work with a HWND well. // // IDXGIFactory::CreateSwapChain: Alpha blended swapchains must be created with CreateSwapChainForComposition, // or CreateSwapChainForCoreWindow with the DXGI_SWAP_CHAIN_FLAG_FOREGROUND_LAYER flag desc := &_DXGI_SWAP_CHAIN_DESC1{ Width: uint32(width), Height: uint32(height), Format: _DXGI_FORMAT_B8G8R8A8_UNORM, BufferUsage: _DXGI_USAGE_RENDER_TARGET_OUTPUT, BufferCount: frameCount, SwapEffect: _DXGI_SWAP_EFFECT_FLIP_DISCARD, SampleDesc: _DXGI_SAMPLE_DESC{ Count: 1, Quality: 0, }, } if g.allowTearing { desc.Flags |= uint32(_DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) } s, err := g.factory.CreateSwapChainForHwnd(unsafe.Pointer(g.commandQueue), g.window, desc, nil, nil) if err != nil { return err } s.As(&g.swapChain) defer func() { if ferr != nil { g.swapChain.Release() g.swapChain = nil } }() // MakeWindowAssociation should be called after swap chain creation. // https://docs.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgifactory-makewindowassociation if err := g.factory.MakeWindowAssociation(g.window, _DXGI_MWA_NO_WINDOW_CHANGES|_DXGI_MWA_NO_ALT_ENTER); err != nil { return err } // TODO: Get the current buffer index? if err := g.createRenderTargetViewsDesktop(); err != nil { return err } g.frameIndex = int(g.swapChain.GetCurrentBackBufferIndex()) return nil } func (g *Graphics) initSwapChainXbox(width, height int) (ferr error) { h, err := g.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } for i := 0; i < frameCount; i++ { r, err := g.device.CreateCommittedResource(&_D3D12_HEAP_PROPERTIES{ Type: _D3D12_HEAP_TYPE_DEFAULT, CPUPageProperty: _D3D12_CPU_PAGE_PROPERTY_UNKNOWN, MemoryPoolPreference: _D3D12_MEMORY_POOL_UNKNOWN, CreationNodeMask: 1, VisibleNodeMask: 1, }, _D3D12_HEAP_FLAG_ALLOW_DISPLAY, &_D3D12_RESOURCE_DESC{ Dimension: _D3D12_RESOURCE_DIMENSION_TEXTURE2D, Alignment: 0, Width: uint64(width), Height: uint32(height), DepthOrArraySize: 1, MipLevels: 1, // Use a single mipmap level Format: _DXGI_FORMAT_B8G8R8A8_UNORM, SampleDesc: _DXGI_SAMPLE_DESC{ Count: 1, Quality: 0, }, Layout: _D3D12_TEXTURE_LAYOUT_UNKNOWN, Flags: _D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, }, _D3D12_RESOURCE_STATE_PRESENT, &_D3D12_CLEAR_VALUE{ Format: _DXGI_FORMAT_B8G8R8A8_UNORM, }) if err != nil { return err } g.renderTargets[i] = r defer func(i int) { if ferr != nil { g.renderTargets[i].Release() g.renderTargets[i] = nil } }(i) g.device.CreateRenderTargetView(r, &_D3D12_RENDER_TARGET_VIEW_DESC{ Format: _DXGI_FORMAT_B8G8R8A8_UNORM, ViewDimension: _D3D12_RTV_DIMENSION_TEXTURE2D, }, h) h.Offset(1, g.rtvDescriptorSize) } return nil } func (g *Graphics) resizeSwapChainDesktop(width, height int) error { // All resources must be released before ResizeBuffers. if err := g.waitForCommandQueue(); err != nil { return err } g.releaseResources(g.frameIndex) for i := 0; i < frameCount; i++ { g.fenceValues[i] = g.fenceValues[g.frameIndex] } for _, r := range g.renderTargets { r.Release() } var flag uint32 if g.allowTearing { flag |= uint32(_DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) } if err := g.swapChain.ResizeBuffers(frameCount, uint32(width), uint32(height), _DXGI_FORMAT_B8G8R8A8_UNORM, flag); err != nil { return err } if err := g.createRenderTargetViewsDesktop(); err != nil { return err } return nil } func (g *Graphics) createRenderTargetViewsDesktop() (ferr error) { // Create frame resources. h, err := g.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } for i := 0; i < frameCount; i++ { r, err := g.swapChain.GetBuffer(uint32(i)) if err != nil { return err } g.renderTargets[i] = r defer func(i int) { if ferr != nil { g.renderTargets[i].Release() g.renderTargets[i] = nil } }(i) g.device.CreateRenderTargetView(r, nil, h) h.Offset(1, g.rtvDescriptorSize) } return nil } func (g *Graphics) SetWindow(window uintptr) { g.window = windows.HWND(window) // TODO: need to update the swap chain? } func (g *Graphics) Begin() error { if microsoftgdk.IsXbox() && !g.frameStarted { g.framePipelineToken = _D3D12XBOX_FRAME_PIPELINE_TOKEN_NULL if err := g.device.WaitFrameEventX(_D3D12XBOX_FRAME_EVENT_ORIGIN, windows.INFINITE, nil, _D3D12XBOX_WAIT_FRAME_EVENT_FLAG_NONE, &g.framePipelineToken); err != nil { return err } } g.frameStarted = true if g.prevBeginFrameIndex != g.frameIndex { if err := g.drawCommandAllocators[g.frameIndex].Reset(); err != nil { return err } if err := g.copyCommandAllocators[g.frameIndex].Reset(); err != nil { return err } } g.prevBeginFrameIndex = g.frameIndex if err := g.drawCommandList.Reset(g.drawCommandAllocators[g.frameIndex], nil); err != nil { return err } if err := g.copyCommandList.Reset(g.copyCommandAllocators[g.frameIndex], nil); err != nil { return err } return nil } func (g *Graphics) End(present bool) error { // The swap chain might still be nil when Begin-End is invoked not by a frame (e.g., Image.At). // As copyCommandList and drawCommandList are exclusive, the order should not matter here. if err := g.flushCommandList(g.copyCommandList); err != nil { return err } if err := g.copyCommandList.Close(); err != nil { return err } // screenImage can be nil in tests. if present && g.screenImage != nil { if rb, ok := g.screenImage.transiteState(_D3D12_RESOURCE_STATE_PRESENT); ok { g.drawCommandList.ResourceBarrier([]_D3D12_RESOURCE_BARRIER_Transition{rb}) } } if err := g.drawCommandList.Close(); err != nil { return err } g.commandQueue.ExecuteCommandLists([]*_ID3D12GraphicsCommandList{g.drawCommandList}) // Release vertices and indices buffers when too many ones were created. // The threshold is an arbitrary number. // This is needed espciallly for testings, where present is always false. if len(g.vertices[g.frameIndex]) >= 16 { if err := g.waitForCommandQueue(); err != nil { return err } g.releaseResources(g.frameIndex) g.releaseVerticesAndIndices(g.frameIndex) } g.pipelineStates.resetConstantBuffers(g.frameIndex) if present { if microsoftgdk.IsXbox() { if err := g.presentXbox(); err != nil { return err } } else { if err := g.presentDesktop(); err != nil { return err } } if g.newScreenWidth != 0 && g.newScreenHeight != 0 { if err := g.resizeSwapChainDesktop(g.newScreenWidth, g.newScreenHeight); err != nil { return err } g.newScreenWidth = 0 g.newScreenHeight = 0 } if err := g.moveToNextFrame(); err != nil { return err } g.releaseResources(g.frameIndex) g.releaseVerticesAndIndices(g.frameIndex) g.frameStarted = false } return nil } func (g *Graphics) presentDesktop() error { if g.swapChain == nil { return fmt.Errorf("directx: the swap chain is not initialized yet at End") } var syncInterval uint32 var flags _DXGI_PRESENT if g.occluded { // The screen is not visible. Test whether we can resume. flags |= _DXGI_PRESENT_TEST } else { // Do actual rendering only when the screen is visible. if g.vsyncEnabled { syncInterval = 1 } else if g.allowTearing { flags |= _DXGI_PRESENT_ALLOW_TEARING } } occluded, err := g.swapChain.Present(syncInterval, uint32(flags)) if err != nil { return err } g.occluded = occluded // Reduce FPS when the screen is invisible. now := time.Now() if g.occluded { if delta := 100*time.Millisecond - now.Sub(g.lastTime); delta > 0 { time.Sleep(delta) } } g.lastTime = now return nil } func (g *Graphics) presentXbox() error { return g.commandQueue.PresentX(1, &_D3D12XBOX_PRESENT_PLANE_PARAMETERS{ Token: g.framePipelineToken, ResourceCount: 1, ppResources: &g.renderTargets[g.frameIndex], }, nil) } func (g *Graphics) moveToNextFrame() error { fv := g.fenceValues[g.frameIndex] if err := g.commandQueue.Signal(g.fence, fv); err != nil { return err } // Update the frame index. if microsoftgdk.IsXbox() { g.frameIndex = (g.frameIndex + 1) % frameCount } else { g.frameIndex = int(g.swapChain.GetCurrentBackBufferIndex()) } if g.fence.GetCompletedValue() < g.fenceValues[g.frameIndex] { if err := g.fence.SetEventOnCompletion(g.fenceValues[g.frameIndex], g.fenceWaitEvent); err != nil { return err } if _, err := windows.WaitForSingleObject(g.fenceWaitEvent, windows.INFINITE); err != nil { return err } } g.fenceValues[g.frameIndex] = fv + 1 return nil } func (g *Graphics) releaseResources(frameIndex int) { for i, img := range g.disposedImages[frameIndex] { img.disposeImpl() g.disposedImages[frameIndex][i] = nil } g.disposedImages[frameIndex] = g.disposedImages[frameIndex][:0] for i, s := range g.disposedShaders[frameIndex] { s.disposeImpl() g.disposedShaders[frameIndex][i] = nil } g.disposedShaders[frameIndex] = g.disposedShaders[frameIndex][:0] } func (g *Graphics) releaseVerticesAndIndices(frameIndex int) { for i := range g.vertices[frameIndex] { g.vertices[frameIndex][i].Release() g.vertices[frameIndex][i] = nil } g.vertices[frameIndex] = g.vertices[frameIndex][:0] for i := range g.indices[frameIndex] { g.indices[frameIndex][i].Release() g.indices[frameIndex][i] = nil } g.indices[frameIndex] = g.indices[frameIndex][:0] } // flushCommandList executes commands in the command list and waits for its completion. // // TODO: This is not efficient. Is it possible to make two command lists work in parallel? func (g *Graphics) flushCommandList(commandList *_ID3D12GraphicsCommandList) error { switch commandList { case g.drawCommandList: if !g.needFlushDrawCommandList { return nil } g.needFlushDrawCommandList = false case g.copyCommandList: if !g.needFlushCopyCommandList { return nil } g.needFlushCopyCommandList = false } if err := commandList.Close(); err != nil { return err } g.commandQueue.ExecuteCommandLists([]*_ID3D12GraphicsCommandList{commandList}) if err := g.waitForCommandQueue(); err != nil { return err } switch commandList { case g.drawCommandList: if err := g.drawCommandAllocators[g.frameIndex].Reset(); err != nil { return err } if err := commandList.Reset(g.drawCommandAllocators[g.frameIndex], nil); err != nil { return err } case g.copyCommandList: if err := g.copyCommandAllocators[g.frameIndex].Reset(); err != nil { return err } if err := commandList.Reset(g.copyCommandAllocators[g.frameIndex], nil); err != nil { return err } } return nil } func (g *Graphics) waitForCommandQueue() error { fv := g.fenceValues[g.frameIndex] if err := g.commandQueue.Signal(g.fence, fv); err != nil { return err } if err := g.fence.SetEventOnCompletion(fv, g.fenceWaitEvent); err != nil { return err } if _, err := windows.WaitForSingleObject(g.fenceWaitEvent, windows.INFINITE); err != nil { return err } g.fenceValues[g.frameIndex]++ return nil } func (g *Graphics) SetTransparent(transparent bool) { g.transparent = transparent } func (g *Graphics) SetVertices(vertices []float32, indices []uint16) (ferr error) { // Create buffers if necessary. vidx := len(g.vertices[g.frameIndex]) if cap(g.vertices[g.frameIndex]) > vidx { g.vertices[g.frameIndex] = g.vertices[g.frameIndex][:vidx+1] } else { g.vertices[g.frameIndex] = append(g.vertices[g.frameIndex], nil) } if g.vertices[g.frameIndex][vidx] == nil { // TODO: Use the default heap for efficiently. See the official example HelloTriangle. vs, err := createBuffer(g.device, graphics.IndicesCount*graphics.VertexFloatCount*uint64(unsafe.Sizeof(float32(0))), _D3D12_HEAP_TYPE_UPLOAD) if err != nil { return err } g.vertices[g.frameIndex][vidx] = vs defer func() { if ferr != nil { g.vertices[g.frameIndex][vidx].Release() g.vertices[g.frameIndex][vidx] = nil } }() } iidx := len(g.indices[g.frameIndex]) if cap(g.indices[g.frameIndex]) > iidx { g.indices[g.frameIndex] = g.indices[g.frameIndex][:iidx+1] } else { g.indices[g.frameIndex] = append(g.indices[g.frameIndex], nil) } if g.indices[g.frameIndex][iidx] == nil { is, err := createBuffer(g.device, graphics.IndicesCount*uint64(unsafe.Sizeof(uint16(0))), _D3D12_HEAP_TYPE_UPLOAD) if err != nil { return err } g.indices[g.frameIndex][iidx] = is defer func() { if ferr != nil { g.indices[g.frameIndex][iidx].Release() g.indices[g.frameIndex][iidx] = nil } }() } m, err := g.vertices[g.frameIndex][vidx].Map(0, &_D3D12_RANGE{0, 0}) if err != nil { return err } copy(unsafe.Slice((*float32)(unsafe.Pointer(m)), len(vertices)), vertices) g.vertices[g.frameIndex][vidx].Unmap(0, nil) m, err = g.indices[g.frameIndex][iidx].Map(0, &_D3D12_RANGE{0, 0}) if err != nil { return err } copy(unsafe.Slice((*uint16)(unsafe.Pointer(m)), len(indices)), indices) g.indices[g.frameIndex][iidx].Unmap(0, nil) return nil } func (g *Graphics) NewImage(width, height int) (graphicsdriver.Image, error) { desc := _D3D12_RESOURCE_DESC{ Dimension: _D3D12_RESOURCE_DIMENSION_TEXTURE2D, Alignment: 0, Width: uint64(graphics.InternalImageSize(width)), Height: uint32(graphics.InternalImageSize(height)), DepthOrArraySize: 1, MipLevels: 0, Format: _DXGI_FORMAT_R8G8B8A8_UNORM, SampleDesc: _DXGI_SAMPLE_DESC{ Count: 1, Quality: 0, }, Layout: _D3D12_TEXTURE_LAYOUT_UNKNOWN, Flags: _D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, } state := _D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE t, err := g.device.CreateCommittedResource(&_D3D12_HEAP_PROPERTIES{ Type: _D3D12_HEAP_TYPE_DEFAULT, // Upload? CPUPageProperty: _D3D12_CPU_PAGE_PROPERTY_UNKNOWN, MemoryPoolPreference: _D3D12_MEMORY_POOL_UNKNOWN, CreationNodeMask: 1, VisibleNodeMask: 1, }, _D3D12_HEAP_FLAG_NONE, &desc, state, nil) if err != nil { return nil, err } layouts, _, _, totalBytes := g.device.GetCopyableFootprints(&desc, 0, 1, 0) if totalBytes == ^uint64(0) { return nil, fmt.Errorf("directx: GetCopyableFootprints returned an invalid total bytes") } i := &Image{ graphics: g, id: g.genNextImageID(), width: width, height: height, texture: t, states: [frameCount]_D3D12_RESOURCE_STATES{state}, layouts: layouts, totalBytes: totalBytes, } g.addImage(i) return i, nil } func (g *Graphics) NewScreenFramebufferImage(width, height int) (graphicsdriver.Image, error) { if err := g.updateSwapChain(width, height); err != nil { return nil, err } i := &Image{ graphics: g, id: g.genNextImageID(), width: width, height: height, screen: true, states: [frameCount]_D3D12_RESOURCE_STATES{0, 0}, } g.addImage(i) g.screenImage = i return i, nil } func (g *Graphics) addImage(img *Image) { if g.images == nil { g.images = map[graphicsdriver.ImageID]*Image{} } if _, ok := g.images[img.id]; ok { panic(fmt.Sprintf("directx: image ID %d was already registered", img.id)) } g.images[img.id] = img } func (g *Graphics) removeImage(img *Image) { delete(g.images, img.id) g.disposedImages[g.frameIndex] = append(g.disposedImages[g.frameIndex], img) } func (g *Graphics) addShader(s *Shader) { if g.shaders == nil { g.shaders = map[graphicsdriver.ShaderID]*Shader{} } if _, ok := g.shaders[s.id]; ok { panic(fmt.Sprintf("directx: shader ID %d was already registered", s.id)) } g.shaders[s.id] = s } func (g *Graphics) removeShader(s *Shader) { delete(g.shaders, s.id) g.disposedShaders[g.frameIndex] = append(g.disposedShaders[g.frameIndex], s) } func (g *Graphics) SetVsyncEnabled(enabled bool) { g.vsyncEnabled = enabled } func (g *Graphics) SetFullscreen(fullscreen bool) { } func (g *Graphics) NeedsRestoring() bool { return false } func (g *Graphics) NeedsClearingScreen() bool { // TODO: Confirm this is really true. return true } func (g *Graphics) IsGL() bool { return false } func (g *Graphics) IsDirectX() bool { return true } func (g *Graphics) MaxImageSize() int { return _D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION } func (g *Graphics) NewShader(program *shaderir.Program) (graphicsdriver.Shader, error) { src, offsets := hlsl.Compile(program) vsh, psh, err := newShader([]byte(src), nil) if err != nil { return nil, err } s := &Shader{ graphics: g, id: g.genNextShaderID(), uniformTypes: program.Uniforms, uniformOffsets: offsets, vertexShader: vsh, pixelShader: psh, } g.addShader(s) return s, nil } func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.ShaderImageCount]graphicsdriver.ImageID, shaderID graphicsdriver.ShaderID, dstRegions []graphicsdriver.DstRegion, indexOffset int, blend graphicsdriver.Blend, uniforms []uint32, evenOdd bool) error { if shaderID == graphicsdriver.InvalidShaderID { return fmt.Errorf("directx: shader ID is invalid") } if err := g.flushCommandList(g.copyCommandList); err != nil { return err } // Release constant buffers when too many ones will be created. numPipelines := 1 if evenOdd { numPipelines = 2 } if len(g.pipelineStates.constantBuffers[g.frameIndex])+numPipelines > numDescriptorsPerFrame { if err := g.flushCommandList(g.drawCommandList); err != nil { return err } g.pipelineStates.releaseConstantBuffers(g.frameIndex) } dst := g.images[dstID] var resourceBarriers []_D3D12_RESOURCE_BARRIER_Transition if rb, ok := dst.transiteState(_D3D12_RESOURCE_STATE_RENDER_TARGET); ok { resourceBarriers = append(resourceBarriers, rb) } var srcImages [graphics.ShaderImageCount]*Image for i, srcID := range srcs { src := g.images[srcID] if src == nil { continue } srcImages[i] = src if rb, ok := src.transiteState(_D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); ok { resourceBarriers = append(resourceBarriers, rb) } } if len(resourceBarriers) > 0 { g.drawCommandList.ResourceBarrier(resourceBarriers) } if err := dst.setAsRenderTarget(g.drawCommandList, g.device, evenOdd); err != nil { return err } shader := g.shaders[shaderID] adjustedUniforms := shader.adjustUniforms(uniforms, shader) w, h := dst.internalSize() g.needFlushDrawCommandList = true g.drawCommandList.RSSetViewports([]_D3D12_VIEWPORT{ { TopLeftX: 0, TopLeftY: 0, Width: float32(w), Height: float32(h), MinDepth: _D3D12_MIN_DEPTH, MaxDepth: _D3D12_MAX_DEPTH, }, }) g.drawCommandList.IASetPrimitiveTopology(_D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST) g.drawCommandList.IASetVertexBuffers(0, []_D3D12_VERTEX_BUFFER_VIEW{ { BufferLocation: g.vertices[g.frameIndex][len(g.vertices[g.frameIndex])-1].GetGPUVirtualAddress(), SizeInBytes: graphics.IndicesCount * graphics.VertexFloatCount * uint32(unsafe.Sizeof(float32(0))), StrideInBytes: graphics.VertexFloatCount * uint32(unsafe.Sizeof(float32(0))), }, }) g.drawCommandList.IASetIndexBuffer(&_D3D12_INDEX_BUFFER_VIEW{ BufferLocation: g.indices[g.frameIndex][len(g.indices[g.frameIndex])-1].GetGPUVirtualAddress(), SizeInBytes: graphics.IndicesCount * uint32(unsafe.Sizeof(uint16(0))), Format: _DXGI_FORMAT_R16_UINT, }) if err := g.pipelineStates.drawTriangles(g.device, g.drawCommandList, g.frameIndex, dst.screen, srcImages, shader, dstRegions, adjustedUniforms, blend, indexOffset, evenOdd); err != nil { return err } return nil } func (g *Graphics) genNextImageID() graphicsdriver.ImageID { g.nextImageID++ return g.nextImageID } func (g *Graphics) genNextShaderID() graphicsdriver.ShaderID { g.nextShaderID++ return g.nextShaderID } type Image struct { graphics *Graphics id graphicsdriver.ImageID width int height int screen bool states [frameCount]_D3D12_RESOURCE_STATES texture *_ID3D12Resource stencil *_ID3D12Resource layouts _D3D12_PLACED_SUBRESOURCE_FOOTPRINT totalBytes uint64 uploadingStagingBuffer *_ID3D12Resource readingStagingBuffer *_ID3D12Resource rtvDescriptorHeap *_ID3D12DescriptorHeap dsvDescriptorHeap *_ID3D12DescriptorHeap } func (i *Image) ID() graphicsdriver.ImageID { return i.id } func (i *Image) Dispose() { // Dipose the images later as this image might still be used. i.graphics.removeImage(i) } func (i *Image) disposeImpl() { if i.dsvDescriptorHeap != nil { i.dsvDescriptorHeap.Release() i.dsvDescriptorHeap = nil } if i.rtvDescriptorHeap != nil { i.rtvDescriptorHeap.Release() i.rtvDescriptorHeap = nil } if i.uploadingStagingBuffer != nil { i.uploadingStagingBuffer.Release() i.uploadingStagingBuffer = nil } if i.readingStagingBuffer != nil { i.readingStagingBuffer.Release() i.readingStagingBuffer = nil } if i.stencil != nil { i.stencil.Release() i.stencil = nil } if i.texture != nil { i.texture.Release() i.texture = nil } } func (*Image) IsInvalidated() bool { return false } func (i *Image) ensureUploadingStagingBuffer() error { if i.uploadingStagingBuffer != nil { return nil } var err error i.uploadingStagingBuffer, err = createBuffer(i.graphics.device, i.totalBytes, _D3D12_HEAP_TYPE_UPLOAD) if err != nil { return err } return nil } func (i *Image) ensureReadingStagingBuffer() error { if i.readingStagingBuffer != nil { return nil } var err error i.readingStagingBuffer, err = createBuffer(i.graphics.device, i.totalBytes, _D3D12_HEAP_TYPE_READBACK) if err != nil { return err } return nil } func (i *Image) ReadPixels(buf []byte, x, y, width, height int) error { if i.screen { return errors.New("directx: Pixels cannot be called on the screen") } if err := i.graphics.flushCommandList(i.graphics.drawCommandList); err != nil { return err } if err := i.ensureReadingStagingBuffer(); err != nil { return err } if rb, ok := i.transiteState(_D3D12_RESOURCE_STATE_COPY_SOURCE); ok { i.graphics.copyCommandList.ResourceBarrier([]_D3D12_RESOURCE_BARRIER_Transition{rb}) } m, err := i.readingStagingBuffer.Map(0, &_D3D12_RANGE{0, 0}) if err != nil { return err } dst := _D3D12_TEXTURE_COPY_LOCATION_PlacedFootPrint{ pResource: i.readingStagingBuffer, Type: _D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, PlacedFootprint: i.layouts, } src := _D3D12_TEXTURE_COPY_LOCATION_SubresourceIndex{ pResource: i.texture, Type: _D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, SubresourceIndex: 0, } i.graphics.needFlushCopyCommandList = true i.graphics.copyCommandList.CopyTextureRegion_PlacedFootPrint_SubresourceIndex( &dst, uint32(x), uint32(y), 0, &src, &_D3D12_BOX{ left: 0, top: 0, front: 0, right: uint32(width), bottom: uint32(height), back: 1, }) if err := i.graphics.flushCommandList(i.graphics.copyCommandList); err != nil { return err } dstBytes := unsafe.Slice((*byte)(unsafe.Pointer(m)), i.totalBytes) for j := 0; j < height; j++ { copy(buf[j*width*4:(j+1)*width*4], dstBytes[j*int(i.layouts.Footprint.RowPitch):]) } i.readingStagingBuffer.Unmap(0, nil) return nil } func (i *Image) WritePixels(args []*graphicsdriver.WritePixelsArgs) error { if i.screen { return errors.New("directx: WritePixels cannot be called on the screen") } if err := i.graphics.flushCommandList(i.graphics.drawCommandList); err != nil { return err } if err := i.ensureUploadingStagingBuffer(); err != nil { return err } if rb, ok := i.transiteState(_D3D12_RESOURCE_STATE_COPY_DEST); ok { i.graphics.copyCommandList.ResourceBarrier([]_D3D12_RESOURCE_BARRIER_Transition{rb}) } m, err := i.uploadingStagingBuffer.Map(0, &_D3D12_RANGE{0, 0}) if err != nil { return err } i.graphics.needFlushCopyCommandList = true srcBytes := unsafe.Slice((*byte)(unsafe.Pointer(m)), i.totalBytes) for _, a := range args { for j := 0; j < a.Height; j++ { copy(srcBytes[(a.Y+j)*int(i.layouts.Footprint.RowPitch)+a.X*4:], a.Pixels[j*a.Width*4:(j+1)*a.Width*4]) } } for _, a := range args { dst := _D3D12_TEXTURE_COPY_LOCATION_SubresourceIndex{ pResource: i.texture, Type: _D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, SubresourceIndex: 0, } src := _D3D12_TEXTURE_COPY_LOCATION_PlacedFootPrint{ pResource: i.uploadingStagingBuffer, Type: _D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, PlacedFootprint: i.layouts, } i.graphics.copyCommandList.CopyTextureRegion_SubresourceIndex_PlacedFootPrint( &dst, uint32(a.X), uint32(a.Y), 0, &src, &_D3D12_BOX{ left: uint32(a.X), top: uint32(a.Y), front: 0, right: uint32(a.X + a.Width), bottom: uint32(a.Y + a.Height), back: 1, }) } i.uploadingStagingBuffer.Unmap(0, nil) return nil } func (i *Image) resource() *_ID3D12Resource { if i.screen { return i.graphics.renderTargets[i.graphics.frameIndex] } return i.texture } func (i *Image) state() _D3D12_RESOURCE_STATES { if i.screen { return i.states[i.graphics.frameIndex] } return i.states[0] } func (i *Image) setState(newState _D3D12_RESOURCE_STATES) { if i.screen { i.states[i.graphics.frameIndex] = newState return } i.states[0] = newState } func (i *Image) transiteState(newState _D3D12_RESOURCE_STATES) (_D3D12_RESOURCE_BARRIER_Transition, bool) { if i.state() == newState { return _D3D12_RESOURCE_BARRIER_Transition{}, false } oldState := i.state() i.setState(newState) return _D3D12_RESOURCE_BARRIER_Transition{ Type: _D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, Flags: _D3D12_RESOURCE_BARRIER_FLAG_NONE, Transition: _D3D12_RESOURCE_TRANSITION_BARRIER{ pResource: i.resource(), Subresource: _D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, StateBefore: oldState, StateAfter: newState, }, }, true } func (i *Image) internalSize() (int, int) { if i.screen { return i.width, i.height } return graphics.InternalImageSize(i.width), graphics.InternalImageSize(i.height) } func (i *Image) setAsRenderTarget(drawCommandList *_ID3D12GraphicsCommandList, device *_ID3D12Device, useStencil bool) error { if err := i.ensureRenderTargetView(device); err != nil { return err } if i.screen { if useStencil { return fmt.Errorf("directx: stencils are not available on the screen framebuffer") } rtv, err := i.graphics.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } rtv.Offset(int32(i.graphics.frameIndex), i.graphics.rtvDescriptorSize) drawCommandList.OMSetRenderTargets([]_D3D12_CPU_DESCRIPTOR_HANDLE{rtv}, false, nil) return nil } rtv, err := i.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } if !useStencil { drawCommandList.OMSetRenderTargets([]_D3D12_CPU_DESCRIPTOR_HANDLE{rtv}, false, nil) return nil } if err := i.ensureDepthStencilView(device); err != nil { return err } dsv, err := i.dsvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } drawCommandList.OMSetStencilRef(0) drawCommandList.OMSetRenderTargets([]_D3D12_CPU_DESCRIPTOR_HANDLE{rtv}, false, &dsv) drawCommandList.ClearDepthStencilView(dsv, _D3D12_CLEAR_FLAG_STENCIL, 0, 0, nil) return nil } func (i *Image) ensureRenderTargetView(device *_ID3D12Device) error { if i.screen { return nil } if i.rtvDescriptorHeap != nil { return nil } h, err := device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ Type: _D3D12_DESCRIPTOR_HEAP_TYPE_RTV, NumDescriptors: 1, Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_NONE, NodeMask: 0, }) if err != nil { return err } i.rtvDescriptorHeap = h rtv, err := i.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } device.CreateRenderTargetView(i.texture, nil, rtv) return nil } func (i *Image) ensureDepthStencilView(device *_ID3D12Device) error { if i.screen { return fmt.Errorf("directx: stencils are not available on the screen framebuffer") } if i.dsvDescriptorHeap != nil { return nil } h, err := device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ Type: _D3D12_DESCRIPTOR_HEAP_TYPE_DSV, NumDescriptors: 1, Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_NONE, NodeMask: 0, }) if err != nil { return err } i.dsvDescriptorHeap = h dsv, err := i.dsvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } if i.stencil == nil { s, err := device.CreateCommittedResource(&_D3D12_HEAP_PROPERTIES{ Type: _D3D12_HEAP_TYPE_DEFAULT, CPUPageProperty: _D3D12_CPU_PAGE_PROPERTY_UNKNOWN, MemoryPoolPreference: _D3D12_MEMORY_POOL_UNKNOWN, CreationNodeMask: 1, VisibleNodeMask: 1, }, _D3D12_HEAP_FLAG_NONE, &_D3D12_RESOURCE_DESC{ Dimension: _D3D12_RESOURCE_DIMENSION_TEXTURE2D, Alignment: 0, Width: uint64(graphics.InternalImageSize(i.width)), Height: uint32(graphics.InternalImageSize(i.height)), DepthOrArraySize: 1, MipLevels: 0, Format: _DXGI_FORMAT_D24_UNORM_S8_UINT, SampleDesc: _DXGI_SAMPLE_DESC{ Count: 1, Quality: 0, }, Layout: _D3D12_TEXTURE_LAYOUT_UNKNOWN, Flags: _D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL, }, _D3D12_RESOURCE_STATE_DEPTH_WRITE, &_D3D12_CLEAR_VALUE{ Format: _DXGI_FORMAT_D24_UNORM_S8_UINT, }) if err != nil { return err } i.stencil = s } device.CreateDepthStencilView(i.stencil, nil, dsv) return nil } type stencilMode int const ( prepareStencil stencilMode = iota drawWithStencil noStencil ) type pipelineStateKey struct { blend graphicsdriver.Blend stencilMode stencilMode screen bool } type Shader struct { graphics *Graphics id graphicsdriver.ShaderID uniformTypes []shaderir.Type uniformOffsets []int vertexShader *_ID3DBlob pixelShader *_ID3DBlob pipelineStates map[pipelineStateKey]*_ID3D12PipelineState } func (s *Shader) ID() graphicsdriver.ShaderID { return s.id } func (s *Shader) Dispose() { s.graphics.removeShader(s) } func (s *Shader) disposeImpl() { for c, p := range s.pipelineStates { p.Release() delete(s.pipelineStates, c) } if s.pixelShader != nil { s.pixelShader.Release() s.pixelShader = nil } if s.vertexShader != nil { s.vertexShader.Release() s.vertexShader = nil } } func (s *Shader) pipelineState(blend graphicsdriver.Blend, stencilMode stencilMode, screen bool) (*_ID3D12PipelineState, error) { key := pipelineStateKey{ blend: blend, stencilMode: stencilMode, screen: screen, } if state, ok := s.pipelineStates[key]; ok { return state, nil } state, err := s.graphics.pipelineStates.newPipelineState(s.graphics.device, s.vertexShader, s.pixelShader, blend, stencilMode, screen) if err != nil { return nil, err } if s.pipelineStates == nil { s.pipelineStates = map[pipelineStateKey]*_ID3D12PipelineState{} } s.pipelineStates[key] = state return state, nil } func (s *Shader) adjustUniforms(uniforms []uint32, shader *Shader) []uint32 { var fs []uint32 var idx int for i, typ := range shader.uniformTypes { if len(fs) < s.uniformOffsets[i]/4 { fs = append(fs, make([]uint32, s.uniformOffsets[i]/4-len(fs))...) } n := typ.Uint32Count() switch typ.Main { case shaderir.Float: fs = append(fs, uniforms[idx:idx+1]...) case shaderir.Int: fs = append(fs, uniforms[idx:idx+1]...) case shaderir.Vec2, shaderir.IVec2: fs = append(fs, uniforms[idx:idx+2]...) case shaderir.Vec3, shaderir.IVec3: fs = append(fs, uniforms[idx:idx+3]...) case shaderir.Vec4, shaderir.IVec4: fs = append(fs, uniforms[idx:idx+4]...) case shaderir.Mat2: fs = append(fs, uniforms[idx+0], uniforms[idx+2], 0, 0, uniforms[idx+1], uniforms[idx+3], ) case shaderir.Mat3: fs = append(fs, uniforms[idx+0], uniforms[idx+3], uniforms[idx+6], 0, uniforms[idx+1], uniforms[idx+4], uniforms[idx+7], 0, uniforms[idx+2], uniforms[idx+5], uniforms[idx+8], ) case shaderir.Mat4: if i == graphics.ProjectionMatrixUniformVariableIndex { // In DirectX, the NDC's Y direction (upward) and the framebuffer's Y direction (downward) don't // match. Then, the Y direction must be inverted. // Invert the sign bits as float32 values. fs = append(fs, uniforms[idx+0], uniforms[idx+4], uniforms[idx+8], uniforms[idx+12], uniforms[idx+1]^(1<<31), uniforms[idx+5]^(1<<31), uniforms[idx+9]^(1<<31), uniforms[idx+13]^(1<<31), uniforms[idx+2], uniforms[idx+6], uniforms[idx+10], uniforms[idx+14], uniforms[idx+3], uniforms[idx+7], uniforms[idx+11], uniforms[idx+15], ) } else { fs = append(fs, uniforms[idx+0], uniforms[idx+4], uniforms[idx+8], uniforms[idx+12], uniforms[idx+1], uniforms[idx+5], uniforms[idx+9], uniforms[idx+13], uniforms[idx+2], uniforms[idx+6], uniforms[idx+10], uniforms[idx+14], uniforms[idx+3], uniforms[idx+7], uniforms[idx+11], uniforms[idx+15], ) } case shaderir.Array: // Each element is aligned to the boundary. switch typ.Sub[0].Main { case shaderir.Float: for j := 0; j < typ.Length; j++ { fs = append(fs, uniforms[idx+j]) if j < typ.Length-1 { fs = append(fs, 0, 0, 0) } } case shaderir.Int: for j := 0; j < typ.Length; j++ { fs = append(fs, uniforms[idx+j]) if j < typ.Length-1 { fs = append(fs, 0, 0, 0) } } case shaderir.Vec2, shaderir.IVec2: for j := 0; j < typ.Length; j++ { fs = append(fs, uniforms[idx+2*j:idx+2*(j+1)]...) if j < typ.Length-1 { fs = append(fs, 0, 0) } } case shaderir.Vec3, shaderir.IVec3: for j := 0; j < typ.Length; j++ { fs = append(fs, uniforms[idx+3*j:idx+3*(j+1)]...) if j < typ.Length-1 { fs = append(fs, 0) } } case shaderir.Vec4, shaderir.IVec4: fs = append(fs, uniforms[idx:idx+4*typ.Length]...) case shaderir.Mat2: for j := 0; j < typ.Length; j++ { u := uniforms[idx+4*j : idx+4*(j+1)] fs = append(fs, u[0], u[2], 0, 0, u[1], u[3], 0, 0, ) } if typ.Length > 0 { fs = fs[:len(fs)-2] } case shaderir.Mat3: for j := 0; j < typ.Length; j++ { u := uniforms[idx+9*j : idx+9*(j+1)] fs = append(fs, u[0], u[3], u[6], 0, u[1], u[4], u[7], 0, u[2], u[5], u[8], 0, ) } if typ.Length > 0 { fs = fs[:len(fs)-1] } case shaderir.Mat4: for j := 0; j < typ.Length; j++ { u := uniforms[idx+16*j : idx+16*(j+1)] fs = append(fs, u[0], u[4], u[8], u[12], u[1], u[5], u[9], u[13], u[2], u[6], u[10], u[14], u[3], u[7], u[11], u[15], ) } default: panic(fmt.Sprintf("directx: not implemented type for uniform variables: %s", typ.String())) } default: panic(fmt.Sprintf("directx: not implemented type for uniform variables: %s", typ.String())) } idx += n } return fs }