// Copyright 2022 The Ebiten Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package directx import ( "errors" "fmt" "os" "reflect" "strings" "unsafe" "golang.org/x/sys/windows" "github.com/hajimehoshi/ebiten/v2/internal/graphics" "github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver" "github.com/hajimehoshi/ebiten/v2/internal/microsoftgdk" "github.com/hajimehoshi/ebiten/v2/internal/shaderir" "github.com/hajimehoshi/ebiten/v2/internal/shaderir/hlsl" ) const frameCount = 2 // NewGraphics creates an implementation of graphicsdriver.Graphcis for DirectX. // The returned graphics value is nil iff the error is not nil. func NewGraphics() (graphicsdriver.Graphics, error) { const is64bit = uint64(^uintptr(0)) == ^uint64(0) // In 32bit machines, DirectX is not used because // 1) The functions syscall.Syscall cannot accept 64bit values as one argument // 2) The struct layouts can be different // TODO: Support DirectX for 32bit machines (#2088). if !is64bit { return nil, fmt.Errorf("directx: DirectX is not available on a 32bit machine") } g := &Graphics{} if err := g.initialize(); err != nil { return nil, err } return g, nil } var inputElementDescs []_D3D12_INPUT_ELEMENT_DESC func init() { position := []byte("POSITION\000") texcoord := []byte("TEXCOORD\000") color := []byte("COLOR\000") inputElementDescs = []_D3D12_INPUT_ELEMENT_DESC{ { SemanticName: &position[0], SemanticIndex: 0, Format: _DXGI_FORMAT_R32G32_FLOAT, InputSlot: 0, AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT, InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, InstanceDataStepRate: 0, }, { SemanticName: &texcoord[0], SemanticIndex: 0, Format: _DXGI_FORMAT_R32G32_FLOAT, InputSlot: 0, AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT, InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, InstanceDataStepRate: 0, }, { SemanticName: &color[0], SemanticIndex: 0, Format: _DXGI_FORMAT_R32G32B32A32_FLOAT, InputSlot: 0, AlignedByteOffset: _D3D12_APPEND_ALIGNED_ELEMENT, InputSlotClass: _D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, InstanceDataStepRate: 0, }, } } type Graphics struct { debug *_ID3D12Debug device *_ID3D12Device commandQueue *_ID3D12CommandQueue rtvDescriptorHeap *_ID3D12DescriptorHeap rtvDescriptorSize uint32 renderTargets [frameCount]*_ID3D12Resource fence *_ID3D12Fence fenceValues [frameCount]uint64 fenceWaitEvent windows.Handle // drawCommandAllocators are command allocators for a 3D engine (DrawIndexedInstanced). // For the word 'engine', see https://docs.microsoft.com/en-us/windows/win32/direct3d12/user-mode-heap-synchronization. // The term 'draw' is used instead of '3D' in this package. drawCommandAllocators [frameCount]*_ID3D12CommandAllocator // copyCommandAllocators are command allocators for a copy engine (CopyTextureRegion). copyCommandAllocators [frameCount]*_ID3D12CommandAllocator // drawCommandList is a command list for a 3D engine (DrawIndexedInstanced). drawCommandList *_ID3D12GraphicsCommandList // copyCommandList is a command list for a copy engine (CopyTextureRegion). copyCommandList *_ID3D12GraphicsCommandList // drawCommandList and copyCommandList are exclusive: if one is not empty, the other must be empty. vertices [frameCount][]*_ID3D12Resource indices [frameCount][]*_ID3D12Resource factory *_IDXGIFactory4 adapter *_IDXGIAdapter1 swapChain *_IDXGISwapChain4 window windows.HWND frameIndex int images map[graphicsdriver.ImageID]*Image screenImage *Image nextImageID graphicsdriver.ImageID disposedImages [frameCount][]*Image shaders map[graphicsdriver.ShaderID]*Shader nextShaderID graphicsdriver.ShaderID disposedShaders [frameCount][]*Shader vsyncEnabled bool transparent bool pipelineStates } func (g *Graphics) initialize() (ferr error) { var ( useWARP bool useDebugLayer bool ) for _, t := range strings.Split(os.Getenv("EBITEN_DIRECTX"), ",") { switch strings.TrimSpace(t) { case "warp": // TODO: Is WARP available on Xbox? useWARP = true case "debug": useDebugLayer = true } } // Initialize not only a device but also other members like a fence. // Even if initializing a device succeeds, initializing a fence might fail (#2142). if microsoftgdk.IsXbox() { if err := g.initializeXbox(useWARP, useDebugLayer); err != nil { return err } } else { if err := g.initializeDesktop(useWARP, useDebugLayer); err != nil { return err } } return nil } func (g *Graphics) initializeDesktop(useWARP bool, useDebugLayer bool) (ferr error) { if err := d3d12.Load(); err != nil { return err } // As g's lifetime is the same as the process's lifetime, debug and other objects are never released // if this initialization succeeds. // The debug interface is optional and might not exist. if useDebugLayer { d, err := _D3D12GetDebugInterface() if err != nil { return err } g.debug = d defer func() { if ferr != nil { g.debug.Release() g.debug = nil } }() g.debug.EnableDebugLayer() } var flag uint32 if g.debug != nil { flag = _DXGI_CREATE_FACTORY_DEBUG } f, err := _CreateDXGIFactory2(flag) if err != nil { return err } g.factory = f defer func() { if ferr != nil { g.factory.Release() g.factory = nil } }() if useWARP { a, err := g.factory.EnumWarpAdapter() if err != nil { return err } g.adapter = a defer func() { if ferr != nil { g.adapter.Release() g.adapter = nil } }() } else { for i := 0; ; i++ { a, err := g.factory.EnumAdapters1(uint32(i)) if errors.Is(err, _DXGI_ERROR_NOT_FOUND) { break } if err != nil { return err } desc, err := a.GetDesc1() if err != nil { return err } if desc.Flags&_DXGI_ADAPTER_FLAG_SOFTWARE != 0 { a.Release() continue } if err := _D3D12CreateDevice(unsafe.Pointer(a), _D3D_FEATURE_LEVEL_11_0, &_IID_ID3D12Device, nil); err != nil { a.Release() continue } g.adapter = a defer func() { if ferr != nil { g.adapter.Release() g.adapter = nil } }() break } } if g.adapter == nil { return errors.New("directx: DirectX 12 is not supported") } if err := _D3D12CreateDevice(unsafe.Pointer(g.adapter), _D3D_FEATURE_LEVEL_11_0, &_IID_ID3D12Device, (*unsafe.Pointer)(unsafe.Pointer(&g.device))); err != nil { return err } if err := g.initializeMembers(); err != nil { return err } return nil } func (g *Graphics) initializeXbox(useWARP bool, useDebugLayer bool) (ferr error) { if err := d3d12x.Load(); err != nil { return err } params := &_D3D12XBOX_CREATE_DEVICE_PARAMETERS{ Version: _D3D12_SDK_VERSION, // TODO: Can we always use the same value? GraphicsCommandQueueRingSizeBytes: _D3D12XBOX_DEFAULT_SIZE_BYTES, GraphicsScratchMemorySizeBytes: _D3D12XBOX_DEFAULT_SIZE_BYTES, ComputeScratchMemorySizeBytes: _D3D12XBOX_DEFAULT_SIZE_BYTES, } if useDebugLayer { params.ProcessDebugFlags = _D3D12_PROCESS_DEBUG_FLAG_DEBUG_LAYER_ENABLED } if err := _D3D12XboxCreateDevice(nil, params, &_IID_ID3D12Device, (*unsafe.Pointer)(unsafe.Pointer(&g.device))); err != nil { return err } if err := g.initializeMembers(); err != nil { return err } return nil } func (g *Graphics) initializeMembers() (ferr error) { // Create an event for a fence. e, err := windows.CreateEvent(nil, 0, 0, nil) if err != nil { return fmt.Errorf("directx: CreateEvent failed: %w", err) } g.fenceWaitEvent = e // Create a command queue. desc := _D3D12_COMMAND_QUEUE_DESC{ Type: _D3D12_COMMAND_LIST_TYPE_DIRECT, Flags: _D3D12_COMMAND_QUEUE_FLAG_NONE, } c, err := g.device.CreateCommandQueue(&desc) if err != nil { return err } g.commandQueue = c defer func() { if ferr != nil { g.commandQueue.Release() g.commandQueue = nil } }() // Create command allocators. for i := 0; i < frameCount; i++ { dca, err := g.device.CreateCommandAllocator(_D3D12_COMMAND_LIST_TYPE_DIRECT) if err != nil { return err } g.drawCommandAllocators[i] = dca defer func(i int) { if ferr != nil { g.drawCommandAllocators[i].Release() g.drawCommandAllocators[i] = nil } }(i) cca, err := g.device.CreateCommandAllocator(_D3D12_COMMAND_LIST_TYPE_DIRECT) if err != nil { return err } g.copyCommandAllocators[i] = cca defer func(i int) { if ferr != nil { g.copyCommandAllocators[i].Release() g.copyCommandAllocators[i] = nil } }(i) } // Create a frame fence. f, err := g.device.CreateFence(0, _D3D12_FENCE_FLAG_NONE) if err != nil { return err } g.fence = f defer func() { if ferr != nil { g.fence.Release() g.fence = nil } }() g.fenceValues[g.frameIndex]++ // Create command lists. dcl, err := g.device.CreateCommandList(0, _D3D12_COMMAND_LIST_TYPE_DIRECT, g.drawCommandAllocators[0], nil) if err != nil { return err } g.drawCommandList = dcl defer func() { if ferr != nil { g.drawCommandList.Release() g.drawCommandList = nil } }() ccl, err := g.device.CreateCommandList(0, _D3D12_COMMAND_LIST_TYPE_DIRECT, g.copyCommandAllocators[0], nil) if err != nil { return err } g.copyCommandList = ccl defer func() { if ferr != nil { g.copyCommandList.Release() g.copyCommandList = nil } }() // Close the command list once as this is immediately Reset at Begin. if err := g.drawCommandList.Close(); err != nil { return err } if err := g.copyCommandList.Close(); err != nil { return err } // Create a descriptor heap for RTV. h, err := g.device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ Type: _D3D12_DESCRIPTOR_HEAP_TYPE_RTV, NumDescriptors: frameCount, Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_NONE, NodeMask: 0, }) if err != nil { return err } g.rtvDescriptorHeap = h defer func() { if ferr != nil { g.rtvDescriptorHeap.Release() g.rtvDescriptorHeap = nil } }() g.rtvDescriptorSize = g.device.GetDescriptorHandleIncrementSize(_D3D12_DESCRIPTOR_HEAP_TYPE_RTV) if err := g.pipelineStates.initialize(g.device); err != nil { return err } return nil } func (g *Graphics) Initialize() (err error) { // Initialization should already be done. return nil } func createBuffer(device *_ID3D12Device, bufferSize uint64, heapType _D3D12_HEAP_TYPE) (*_ID3D12Resource, error) { state := _D3D12_RESOURCE_STATE_GENERIC_READ() if heapType == _D3D12_HEAP_TYPE_READBACK { state = _D3D12_RESOURCE_STATE_COPY_DEST } r, err := device.CreateCommittedResource(&_D3D12_HEAP_PROPERTIES{ Type: heapType, CPUPageProperty: _D3D12_CPU_PAGE_PROPERTY_UNKNOWN, MemoryPoolPreference: _D3D12_MEMORY_POOL_UNKNOWN, CreationNodeMask: 1, VisibleNodeMask: 1, }, _D3D12_HEAP_FLAG_NONE, &_D3D12_RESOURCE_DESC{ Dimension: _D3D12_RESOURCE_DIMENSION_BUFFER, Alignment: 0, Width: bufferSize, Height: 1, DepthOrArraySize: 1, MipLevels: 1, Format: _DXGI_FORMAT_UNKNOWN, SampleDesc: _DXGI_SAMPLE_DESC{ Count: 1, Quality: 0, }, Layout: _D3D12_TEXTURE_LAYOUT_ROW_MAJOR, Flags: _D3D12_RESOURCE_FLAG_NONE, }, state, nil) if err != nil { return nil, err } return r, nil } func (g *Graphics) updateSwapChain(width, height int) error { if g.window == 0 { return errors.New("directx: the window handle is not initialized yet") } if g.swapChain == nil { if err := g.initSwapChain(width, height); err != nil { return err } } else { if err := g.resizeSwapChain(width, height); err != nil { return err } } return nil } func (g *Graphics) initSwapChain(width, height int) (ferr error) { // Create a swap chain. // // DXGI_ALPHA_MODE_PREMULTIPLIED doesn't work with a HWND well. // // IDXGIFactory::CreateSwapChain: Alpha blended swapchains must be created with CreateSwapChainForComposition, // or CreateSwapChainForCoreWindow with the DXGI_SWAP_CHAIN_FLAG_FOREGROUND_LAYER flag s, err := g.factory.CreateSwapChainForHwnd(unsafe.Pointer(g.commandQueue), g.window, &_DXGI_SWAP_CHAIN_DESC1{ Width: uint32(width), Height: uint32(height), Format: _DXGI_FORMAT_B8G8R8A8_UNORM, BufferUsage: _DXGI_USAGE_RENDER_TARGET_OUTPUT, BufferCount: frameCount, SwapEffect: _DXGI_SWAP_EFFECT_FLIP_DISCARD, SampleDesc: _DXGI_SAMPLE_DESC{ Count: 1, Quality: 0, }, }, nil, nil) if err != nil { return err } s.As(&g.swapChain) defer func() { if ferr != nil { g.swapChain.Release() g.swapChain = nil } }() // MakeWindowAssociation should be called after swap chain creation. // https://docs.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgifactory-makewindowassociation if err := g.factory.MakeWindowAssociation(g.window, _DXGI_MWA_NO_WINDOW_CHANGES|_DXGI_MWA_NO_ALT_ENTER); err != nil { return err } // TODO: Get the current buffer index? if err := g.createRenderTargetViews(); err != nil { return err } g.frameIndex = int(g.swapChain.GetCurrentBackBufferIndex()) return nil } func (g *Graphics) resizeSwapChain(width, height int) error { if err := g.flushCommandList(g.copyCommandList); err != nil { return err } if err := g.copyCommandList.Close(); err != nil { return err } if err := g.flushCommandList(g.drawCommandList); err != nil { return err } if err := g.drawCommandList.Close(); err != nil { return err } if err := g.waitForCommandQueue(); err != nil { return err } g.releaseResources(g.frameIndex) if err := g.resetCommandAllocators(g.frameIndex); err != nil { return err } for i := 0; i < frameCount; i++ { g.fenceValues[i] = g.fenceValues[g.frameIndex] } for _, r := range g.renderTargets { r.Release() } if err := g.swapChain.ResizeBuffers(frameCount, uint32(width), uint32(height), _DXGI_FORMAT_B8G8R8A8_UNORM, 0); err != nil { return err } if err := g.createRenderTargetViews(); err != nil { return err } // TODO: Reset 0 on Xbox g.frameIndex = int(g.swapChain.GetCurrentBackBufferIndex()) if err := g.drawCommandList.Reset(g.drawCommandAllocators[g.frameIndex], nil); err != nil { return err } if err := g.copyCommandList.Reset(g.copyCommandAllocators[g.frameIndex], nil); err != nil { return err } return nil } func (g *Graphics) createRenderTargetViews() (ferr error) { // Create frame resources. h, err := g.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } for i := 0; i < frameCount; i++ { r, err := g.swapChain.GetBuffer(uint32(i)) if err != nil { return err } g.renderTargets[i] = r defer func(i int) { if ferr != nil { g.renderTargets[i].Release() g.renderTargets[i] = nil } }(i) g.device.CreateRenderTargetView(r, nil, h) h.Offset(1, g.rtvDescriptorSize) } return nil } func (g *Graphics) SetWindow(window uintptr) { g.window = windows.HWND(window) // TODO: need to update the swap chain? } func (g *Graphics) Begin() error { if err := g.drawCommandList.Reset(g.drawCommandAllocators[g.frameIndex], nil); err != nil { return err } if err := g.copyCommandList.Reset(g.copyCommandAllocators[g.frameIndex], nil); err != nil { return err } return nil } func (g *Graphics) End(present bool) error { // The swap chain might still be nil when Begin-End is invoked not by a frame (e.g., Image.At). // As copyCommandList and drawCommandList are exclusive, the order should not matter here. if err := g.flushCommandList(g.copyCommandList); err != nil { return err } if err := g.copyCommandList.Close(); err != nil { return err } if present { g.screenImage.transiteState(g.drawCommandList, _D3D12_RESOURCE_STATE_PRESENT) } if err := g.drawCommandList.Close(); err != nil { return err } g.commandQueue.ExecuteCommandLists([]*_ID3D12GraphicsCommandList{g.drawCommandList}) // Release vertices and indices buffers when too many ones were created. // This is needed espciallly for testings, where present is always false. if len(g.vertices[g.frameIndex]) >= 16 { if err := g.waitForCommandQueue(); err != nil { return err } g.releaseVerticesAndIndices(g.frameIndex) } g.pipelineStates.resetConstantBuffers(g.frameIndex) if present { if g.swapChain == nil { return fmt.Errorf("directx: the swap chain is not initialized yet at End") } var syncInterval uint32 if g.vsyncEnabled { syncInterval = 1 } if err := g.swapChain.Present(syncInterval, 0); err != nil { return err } if err := g.moveToNextFrame(); err != nil { return err } g.releaseResources(g.frameIndex) g.releaseVerticesAndIndices(g.frameIndex) if err := g.resetCommandAllocators(g.frameIndex); err != nil { return err } } return nil } func (g *Graphics) moveToNextFrame() error { fv := g.fenceValues[g.frameIndex] if err := g.commandQueue.Signal(g.fence, fv); err != nil { return err } // Update the frame index. // TODO: The calculation might be different in Xbox. g.frameIndex = int(g.swapChain.GetCurrentBackBufferIndex()) if g.fence.GetCompletedValue() < g.fenceValues[g.frameIndex] { if err := g.fence.SetEventOnCompletion(fv, g.fenceWaitEvent); err != nil { return err } if _, err := windows.WaitForSingleObject(g.fenceWaitEvent, windows.INFINITE); err != nil { return err } } g.fenceValues[g.frameIndex] = fv + 1 return nil } func (g *Graphics) releaseResources(frameIndex int) { for i, img := range g.disposedImages[frameIndex] { img.disposeImpl() g.disposedImages[frameIndex][i] = nil } g.disposedImages[frameIndex] = g.disposedImages[frameIndex][:0] for i, s := range g.disposedShaders[frameIndex] { s.disposeImpl() g.disposedShaders[frameIndex][i] = nil } g.disposedShaders[frameIndex] = g.disposedShaders[frameIndex][:0] } func (g *Graphics) releaseVerticesAndIndices(frameIndex int) { for i := range g.vertices[frameIndex] { g.vertices[frameIndex][i].Release() g.vertices[frameIndex][i] = nil } g.vertices[frameIndex] = g.vertices[frameIndex][:0] for i := range g.indices[frameIndex] { g.indices[frameIndex][i].Release() g.indices[frameIndex][i] = nil } g.indices[frameIndex] = g.indices[frameIndex][:0] } func (g *Graphics) resetCommandAllocators(frameIndex int) error { if err := g.drawCommandAllocators[frameIndex].Reset(); err != nil { return err } if err := g.copyCommandAllocators[frameIndex].Reset(); err != nil { return err } return nil } // flushCommandList executes commands in the command list and waits for its completion. // // TODO: This is not efficient. Is it possible to make two command lists work in parallel? func (g *Graphics) flushCommandList(commandList *_ID3D12GraphicsCommandList) error { if err := commandList.Close(); err != nil { return err } g.commandQueue.ExecuteCommandLists([]*_ID3D12GraphicsCommandList{commandList}) if err := g.waitForCommandQueue(); err != nil { return err } switch commandList { case g.drawCommandList: if err := commandList.Reset(g.drawCommandAllocators[g.frameIndex], nil); err != nil { return err } case g.copyCommandList: if err := commandList.Reset(g.copyCommandAllocators[g.frameIndex], nil); err != nil { return err } } return nil } func (g *Graphics) waitForCommandQueue() error { fv := g.fenceValues[g.frameIndex] if err := g.commandQueue.Signal(g.fence, fv); err != nil { return err } if err := g.fence.SetEventOnCompletion(fv, g.fenceWaitEvent); err != nil { return err } if _, err := windows.WaitForSingleObject(g.fenceWaitEvent, windows.INFINITE); err != nil { return err } g.fenceValues[g.frameIndex]++ return nil } func (g *Graphics) SetTransparent(transparent bool) { g.transparent = transparent } func (g *Graphics) SetVertices(vertices []float32, indices []uint16) (ferr error) { // Create buffers if necessary. vidx := len(g.vertices[g.frameIndex]) if cap(g.vertices[g.frameIndex]) > vidx { g.vertices[g.frameIndex] = g.vertices[g.frameIndex][:vidx+1] } else { g.vertices[g.frameIndex] = append(g.vertices[g.frameIndex], nil) } if g.vertices[g.frameIndex][vidx] == nil { // TODO: Use the default heap for efficienty. See the official example HelloTriangle. vs, err := createBuffer(g.device, graphics.IndicesNum*graphics.VertexFloatNum*uint64(unsafe.Sizeof(float32(0))), _D3D12_HEAP_TYPE_UPLOAD) if err != nil { return err } g.vertices[g.frameIndex][vidx] = vs defer func() { if ferr != nil { g.vertices[g.frameIndex][vidx].Release() g.vertices[g.frameIndex][vidx] = nil } }() } iidx := len(g.indices[g.frameIndex]) if cap(g.indices[g.frameIndex]) > iidx { g.indices[g.frameIndex] = g.indices[g.frameIndex][:iidx+1] } else { g.indices[g.frameIndex] = append(g.indices[g.frameIndex], nil) } if g.indices[g.frameIndex][iidx] == nil { is, err := createBuffer(g.device, graphics.IndicesNum*uint64(unsafe.Sizeof(uint16(0))), _D3D12_HEAP_TYPE_UPLOAD) if err != nil { return err } g.indices[g.frameIndex][iidx] = is defer func() { if ferr != nil { g.indices[g.frameIndex][iidx].Release() g.indices[g.frameIndex][iidx] = nil } }() } m, err := g.vertices[g.frameIndex][vidx].Map(0, &_D3D12_RANGE{0, 0}) if err != nil { return err } copyFloat32s(m, vertices) g.vertices[g.frameIndex][vidx].Unmap(0, nil) m, err = g.indices[g.frameIndex][iidx].Map(0, &_D3D12_RANGE{0, 0}) if err != nil { return err } copyUint16s(m, indices) g.indices[g.frameIndex][iidx].Unmap(0, nil) return nil } func (g *Graphics) NewImage(width, height int) (graphicsdriver.Image, error) { desc := _D3D12_RESOURCE_DESC{ Dimension: _D3D12_RESOURCE_DIMENSION_TEXTURE2D, Alignment: 0, Width: uint64(graphics.InternalImageSize(width)), Height: uint32(graphics.InternalImageSize(height)), DepthOrArraySize: 1, MipLevels: 0, Format: _DXGI_FORMAT_R8G8B8A8_UNORM, SampleDesc: _DXGI_SAMPLE_DESC{ Count: 1, Quality: 0, }, Layout: _D3D12_TEXTURE_LAYOUT_UNKNOWN, Flags: _D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET, } state := _D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE t, err := g.device.CreateCommittedResource(&_D3D12_HEAP_PROPERTIES{ Type: _D3D12_HEAP_TYPE_DEFAULT, // Upload? CPUPageProperty: _D3D12_CPU_PAGE_PROPERTY_UNKNOWN, MemoryPoolPreference: _D3D12_MEMORY_POOL_UNKNOWN, CreationNodeMask: 1, VisibleNodeMask: 1, }, _D3D12_HEAP_FLAG_NONE, &desc, state, nil) if err != nil { return nil, err } layouts, _, _, totalBytes := g.device.GetCopyableFootprints(&desc, 0, 1, 0) i := &Image{ graphics: g, id: g.genNextImageID(), width: width, height: height, texture: t, states: [frameCount]_D3D12_RESOURCE_STATES{state}, layouts: layouts, totalBytes: totalBytes, } g.addImage(i) return i, nil } func (g *Graphics) NewScreenFramebufferImage(width, height int) (graphicsdriver.Image, error) { if err := g.updateSwapChain(width, height); err != nil { return nil, err } i := &Image{ graphics: g, id: g.genNextImageID(), width: width, height: height, screen: true, states: [frameCount]_D3D12_RESOURCE_STATES{0, 0}, } g.addImage(i) g.screenImage = i return i, nil } func (g *Graphics) addImage(img *Image) { if g.images == nil { g.images = map[graphicsdriver.ImageID]*Image{} } if _, ok := g.images[img.id]; ok { panic(fmt.Sprintf("directx: image ID %d was already registered", img.id)) } g.images[img.id] = img } func (g *Graphics) removeImage(img *Image) { delete(g.images, img.id) g.disposedImages[g.frameIndex] = append(g.disposedImages[g.frameIndex], img) } func (g *Graphics) addShader(s *Shader) { if g.shaders == nil { g.shaders = map[graphicsdriver.ShaderID]*Shader{} } if _, ok := g.shaders[s.id]; ok { panic(fmt.Sprintf("directx: shader ID %d was already registered", s.id)) } g.shaders[s.id] = s } func (g *Graphics) removeShader(s *Shader) { delete(g.shaders, s.id) g.disposedShaders[g.frameIndex] = append(g.disposedShaders[g.frameIndex], s) } func (g *Graphics) SetVsyncEnabled(enabled bool) { g.vsyncEnabled = enabled } func (g *Graphics) SetFullscreen(fullscreen bool) { } func (g *Graphics) FramebufferYDirection() graphicsdriver.YDirection { return graphicsdriver.Downward } func (g *Graphics) NeedsRestoring() bool { return false } func (g *Graphics) NeedsClearingScreen() bool { // TODO: Confirm this is really true. return true } func (g *Graphics) IsGL() bool { return false } func (g *Graphics) IsDirectX() bool { return true } func (g *Graphics) MaxImageSize() int { return _D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION } func (g *Graphics) NewShader(program *shaderir.Program) (graphicsdriver.Shader, error) { src, offsets := hlsl.Compile(program) vsh, psh, err := newShader([]byte(src), nil) if err != nil { return nil, err } s := &Shader{ graphics: g, id: g.genNextShaderID(), uniformTypes: program.Uniforms, uniformOffsets: offsets, vertexShader: vsh, pixelShader: psh, } g.addShader(s) return s, nil } func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.ShaderImageNum]graphicsdriver.ImageID, offsets [graphics.ShaderImageNum - 1][2]float32, shaderID graphicsdriver.ShaderID, indexLen int, indexOffset int, mode graphicsdriver.CompositeMode, colorM graphicsdriver.ColorM, filter graphicsdriver.Filter, address graphicsdriver.Address, dstRegion, srcRegion graphicsdriver.Region, uniforms [][]float32, evenOdd bool) error { if err := g.flushCommandList(g.copyCommandList); err != nil { return err } dst := g.images[dstID] if err := dst.setAsRenderTarget(g.device, evenOdd); err != nil { return err } var shader *Shader if shaderID != graphicsdriver.InvalidShaderID { shader = g.shaders[shaderID] } var srcImages [graphics.ShaderImageNum]*Image for i, srcID := range srcs { src := g.images[srcID] if src == nil { continue } srcImages[i] = src src.transiteState(g.drawCommandList, _D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) } var flattenUniforms []float32 if shader == nil { screenWidth, screenHeight := dst.internalSize() var srcWidth, srcHeight float32 if filter != graphicsdriver.FilterNearest { w, h := srcImages[0].internalSize() srcWidth = float32(w) srcHeight = float32(h) } var esBody [16]float32 var esTranslate [4]float32 colorM.Elements(&esBody, &esTranslate) scale := float32(0) if filter == graphicsdriver.FilterScreen { scale = float32(dst.width) / float32(srcImages[0].width) } flattenUniforms = []float32{ float32(screenWidth), float32(screenHeight), srcWidth, srcHeight, esBody[0], esBody[1], esBody[2], esBody[3], esBody[4], esBody[5], esBody[6], esBody[7], esBody[8], esBody[9], esBody[10], esBody[11], esBody[12], esBody[13], esBody[14], esBody[15], esTranslate[0], esTranslate[1], esTranslate[2], esTranslate[3], srcRegion.X, srcRegion.Y, srcRegion.X + srcRegion.Width, srcRegion.Y + srcRegion.Height, scale, } } else { // TODO: This logic is very similar to Metal's. Let's unify them. dw, dh := dst.internalSize() us := make([][]float32, graphics.PreservedUniformVariablesNum+len(uniforms)) us[graphics.TextureDestinationSizeUniformVariableIndex] = []float32{float32(dw), float32(dh)} usizes := make([]float32, 2*len(srcs)) for i, src := range srcImages { if src != nil { w, h := src.internalSize() usizes[2*i] = float32(w) usizes[2*i+1] = float32(h) } } us[graphics.TextureSourceSizesUniformVariableIndex] = usizes udorigin := []float32{float32(dstRegion.X) / float32(dw), float32(dstRegion.Y) / float32(dh)} us[graphics.TextureDestinationRegionOriginUniformVariableIndex] = udorigin udsize := []float32{float32(dstRegion.Width) / float32(dw), float32(dstRegion.Height) / float32(dh)} us[graphics.TextureDestinationRegionSizeUniformVariableIndex] = udsize uoffsets := make([]float32, 2*len(offsets)) for i, offset := range offsets { uoffsets[2*i] = offset[0] uoffsets[2*i+1] = offset[1] } us[graphics.TextureSourceOffsetsUniformVariableIndex] = uoffsets usorigin := []float32{float32(srcRegion.X), float32(srcRegion.Y)} us[graphics.TextureSourceRegionOriginUniformVariableIndex] = usorigin ussize := []float32{float32(srcRegion.Width), float32(srcRegion.Height)} us[graphics.TextureSourceRegionSizeUniformVariableIndex] = ussize us[graphics.ProjectionMatrixUniformVariableIndex] = []float32{ 2 / float32(dw), 0, 0, 0, 0, -2 / float32(dh), 0, 0, 0, 0, 1, 0, -1, 1, 0, 1, } for i, u := range uniforms { us[graphics.PreservedUniformVariablesNum+i] = u } flattenUniforms = shader.uniformsToFloat32s(us) } w, h := dst.internalSize() g.drawCommandList.RSSetViewports([]_D3D12_VIEWPORT{ { TopLeftX: 0, TopLeftY: 0, Width: float32(w), Height: float32(h), MinDepth: _D3D12_MIN_DEPTH, MaxDepth: _D3D12_MAX_DEPTH, }, }) g.drawCommandList.RSSetScissorRects([]_D3D12_RECT{ { left: int32(dstRegion.X), top: int32(dstRegion.Y), right: int32(dstRegion.X + dstRegion.Width), bottom: int32(dstRegion.Y + dstRegion.Height), }, }) g.drawCommandList.IASetPrimitiveTopology(_D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST) g.drawCommandList.IASetVertexBuffers(0, []_D3D12_VERTEX_BUFFER_VIEW{ { BufferLocation: g.vertices[g.frameIndex][len(g.vertices[g.frameIndex])-1].GetGPUVirtualAddress(), SizeInBytes: graphics.IndicesNum * graphics.VertexFloatNum * uint32(unsafe.Sizeof(float32(0))), StrideInBytes: graphics.VertexFloatNum * uint32(unsafe.Sizeof(float32(0))), }, }) g.drawCommandList.IASetIndexBuffer(&_D3D12_INDEX_BUFFER_VIEW{ BufferLocation: g.indices[g.frameIndex][len(g.indices[g.frameIndex])-1].GetGPUVirtualAddress(), SizeInBytes: graphics.IndicesNum * uint32(unsafe.Sizeof(uint16(0))), Format: _DXGI_FORMAT_R16_UINT, }) if shader == nil { key := builtinPipelineStatesKey{ useColorM: !colorM.IsIdentity(), compositeMode: mode, filter: filter, address: address, screen: dst.screen, } if evenOdd { key.stencilMode = prepareStencil s, err := g.pipelineStates.builtinGraphicsPipelineState(g.device, key) if err != nil { return err } if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { return err } key.stencilMode = drawWithStencil s, err = g.pipelineStates.builtinGraphicsPipelineState(g.device, key) if err != nil { return err } if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { return err } } else { key.stencilMode = noStencil s, err := g.pipelineStates.builtinGraphicsPipelineState(g.device, key) if err != nil { return err } if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { return err } } } else { if evenOdd { s, err := shader.pipelineState(mode, prepareStencil) if err != nil { return err } if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { return err } s, err = shader.pipelineState(mode, drawWithStencil) if err != nil { return err } if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { return err } } else { s, err := shader.pipelineState(mode, noStencil) if err != nil { return err } if err := g.drawTriangles(s, srcImages, flattenUniforms, indexLen, indexOffset); err != nil { return err } } } // Release constant buffers when too many ones were created. // This is needed espciallly for testings, where present is always false. if len(g.pipelineStates.constantBuffers[g.frameIndex]) >= 16 { if err := g.flushCommandList(g.drawCommandList); err != nil { return err } g.pipelineStates.releaseConstantBuffers(g.frameIndex) } return nil } func (g *Graphics) drawTriangles(pipelineState *_ID3D12PipelineState, srcs [graphics.ShaderImageNum]*Image, flattenUniforms []float32, indexLen int, indexOffset int) error { if err := g.pipelineStates.useGraphicsPipelineState(g.device, g.drawCommandList, g.frameIndex, pipelineState, srcs, flattenUniforms); err != nil { return err } g.drawCommandList.DrawIndexedInstanced(uint32(indexLen), 1, uint32(indexOffset), 0, 0) return nil } func (g *Graphics) genNextImageID() graphicsdriver.ImageID { g.nextImageID++ return g.nextImageID } func (g *Graphics) genNextShaderID() graphicsdriver.ShaderID { g.nextShaderID++ return g.nextShaderID } type Image struct { graphics *Graphics id graphicsdriver.ImageID width int height int screen bool states [frameCount]_D3D12_RESOURCE_STATES texture *_ID3D12Resource stencil *_ID3D12Resource layouts _D3D12_PLACED_SUBRESOURCE_FOOTPRINT totalBytes uint64 uploadingStagingBuffer *_ID3D12Resource readingStagingBuffer *_ID3D12Resource rtvDescriptorHeap *_ID3D12DescriptorHeap dsvDescriptorHeap *_ID3D12DescriptorHeap } func (i *Image) ID() graphicsdriver.ImageID { return i.id } func (i *Image) Dispose() { // Dipose the images later as this image might still be used. i.graphics.removeImage(i) } func (i *Image) disposeImpl() { if i.dsvDescriptorHeap != nil { i.dsvDescriptorHeap.Release() i.dsvDescriptorHeap = nil } if i.rtvDescriptorHeap != nil { i.rtvDescriptorHeap.Release() i.rtvDescriptorHeap = nil } if i.uploadingStagingBuffer != nil { i.uploadingStagingBuffer.Release() i.uploadingStagingBuffer = nil } if i.readingStagingBuffer != nil { i.readingStagingBuffer.Release() i.readingStagingBuffer = nil } if i.stencil != nil { i.stencil.Release() i.stencil = nil } if i.texture != nil { i.texture.Release() i.texture = nil } } func (*Image) IsInvalidated() bool { return false } func (i *Image) ensureUploadingStagingBuffer() error { if i.uploadingStagingBuffer != nil { return nil } var err error i.uploadingStagingBuffer, err = createBuffer(i.graphics.device, i.totalBytes, _D3D12_HEAP_TYPE_UPLOAD) if err != nil { return err } return nil } func (i *Image) ensureReadingStagingBuffer() error { if i.readingStagingBuffer != nil { return nil } var err error i.readingStagingBuffer, err = createBuffer(i.graphics.device, i.totalBytes, _D3D12_HEAP_TYPE_READBACK) if err != nil { return err } return nil } func (i *Image) ReadPixels(buf []byte) error { if i.screen { return errors.New("directx: Pixels cannot be called on the screen") } if err := i.graphics.flushCommandList(i.graphics.drawCommandList); err != nil { return err } if err := i.ensureReadingStagingBuffer(); err != nil { return err } i.transiteState(i.graphics.copyCommandList, _D3D12_RESOURCE_STATE_COPY_SOURCE) m, err := i.readingStagingBuffer.Map(0, &_D3D12_RANGE{0, 0}) if err != nil { return err } dst := _D3D12_TEXTURE_COPY_LOCATION_PlacedFootPrint{ pResource: i.readingStagingBuffer, Type: _D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, PlacedFootprint: i.layouts, } src := _D3D12_TEXTURE_COPY_LOCATION_SubresourceIndex{ pResource: i.texture, Type: _D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, SubresourceIndex: 0, } i.graphics.copyCommandList.CopyTextureRegion_PlacedFootPrint_SubresourceIndex( &dst, 0, 0, 0, &src, &_D3D12_BOX{ left: 0, top: 0, front: 0, right: uint32(i.width), bottom: uint32(i.height), back: 1, }) if err := i.graphics.flushCommandList(i.graphics.copyCommandList); err != nil { return err } var dstBytes []byte h := (*reflect.SliceHeader)(unsafe.Pointer(&dstBytes)) h.Data = uintptr(m) h.Len = int(i.totalBytes) h.Cap = int(i.totalBytes) for j := 0; j < i.height; j++ { copy(buf[j*i.width*4:(j+1)*i.width*4], dstBytes[j*int(i.layouts.Footprint.RowPitch):]) } i.readingStagingBuffer.Unmap(0, nil) return nil } func (i *Image) ReplacePixels(args []*graphicsdriver.ReplacePixelsArgs) error { if i.screen { return errors.New("directx: ReplacePixels cannot be called on the screen") } if err := i.graphics.flushCommandList(i.graphics.drawCommandList); err != nil { return err } if err := i.ensureUploadingStagingBuffer(); err != nil { return err } i.transiteState(i.graphics.copyCommandList, _D3D12_RESOURCE_STATE_COPY_DEST) m, err := i.uploadingStagingBuffer.Map(0, &_D3D12_RANGE{0, 0}) if err != nil { return err } var srcBytes []byte h := (*reflect.SliceHeader)(unsafe.Pointer(&srcBytes)) h.Data = uintptr(m) h.Len = int(i.totalBytes) h.Cap = int(i.totalBytes) for _, a := range args { for j := 0; j < a.Height; j++ { copy(srcBytes[(a.Y+j)*int(i.layouts.Footprint.RowPitch)+a.X*4:], a.Pixels[j*a.Width*4:(j+1)*a.Width*4]) } dst := _D3D12_TEXTURE_COPY_LOCATION_SubresourceIndex{ pResource: i.texture, Type: _D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, SubresourceIndex: 0, } src := _D3D12_TEXTURE_COPY_LOCATION_PlacedFootPrint{ pResource: i.uploadingStagingBuffer, Type: _D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, PlacedFootprint: i.layouts, } i.graphics.copyCommandList.CopyTextureRegion_SubresourceIndex_PlacedFootPrint( &dst, uint32(a.X), uint32(a.Y), 0, &src, &_D3D12_BOX{ left: uint32(a.X), top: uint32(a.Y), front: 0, right: uint32(a.X + a.Width), bottom: uint32(a.Y + a.Height), back: 1, }) } i.uploadingStagingBuffer.Unmap(0, nil) return nil } func (i *Image) resource() *_ID3D12Resource { if i.screen { return i.graphics.renderTargets[i.graphics.frameIndex] } return i.texture } func (i *Image) state() _D3D12_RESOURCE_STATES { if i.screen { return i.states[i.graphics.frameIndex] } return i.states[0] } func (i *Image) setState(newState _D3D12_RESOURCE_STATES) { if i.screen { i.states[i.graphics.frameIndex] = newState return } i.states[0] = newState } func (i *Image) transiteState(commandList *_ID3D12GraphicsCommandList, newState _D3D12_RESOURCE_STATES) { if i.state() == newState { return } commandList.ResourceBarrier([]_D3D12_RESOURCE_BARRIER_Transition{ { Type: _D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, Flags: _D3D12_RESOURCE_BARRIER_FLAG_NONE, Transition: _D3D12_RESOURCE_TRANSITION_BARRIER{ pResource: i.resource(), Subresource: _D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, StateBefore: i.state(), StateAfter: newState, }, }, }) i.setState(newState) } func (i *Image) internalSize() (int, int) { if i.screen { return i.width, i.height } return graphics.InternalImageSize(i.width), graphics.InternalImageSize(i.height) } func (i *Image) setAsRenderTarget(device *_ID3D12Device, useStencil bool) error { i.transiteState(i.graphics.drawCommandList, _D3D12_RESOURCE_STATE_RENDER_TARGET) if err := i.ensureRenderTargetView(device); err != nil { return err } if i.screen { if useStencil { return fmt.Errorf("directx: stencils are not available on the screen framebuffer") } rtv, err := i.graphics.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } rtv.Offset(int32(i.graphics.frameIndex), i.graphics.rtvDescriptorSize) i.graphics.drawCommandList.OMSetRenderTargets([]_D3D12_CPU_DESCRIPTOR_HANDLE{rtv}, false, nil) return nil } rtv, err := i.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } if !useStencil { i.graphics.drawCommandList.OMSetRenderTargets([]_D3D12_CPU_DESCRIPTOR_HANDLE{rtv}, false, nil) return nil } if err := i.ensureDepthStencilView(device); err != nil { return err } dsv, err := i.dsvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } i.graphics.drawCommandList.OMSetStencilRef(0) i.graphics.drawCommandList.OMSetRenderTargets([]_D3D12_CPU_DESCRIPTOR_HANDLE{rtv}, false, &dsv) i.graphics.drawCommandList.ClearDepthStencilView(dsv, _D3D12_CLEAR_FLAG_STENCIL, 0, 0, nil) return nil } func (i *Image) ensureRenderTargetView(device *_ID3D12Device) error { if i.screen { return nil } if i.rtvDescriptorHeap != nil { return nil } h, err := device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ Type: _D3D12_DESCRIPTOR_HEAP_TYPE_RTV, NumDescriptors: 1, Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_NONE, NodeMask: 0, }) if err != nil { return err } i.rtvDescriptorHeap = h rtv, err := i.rtvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } device.CreateRenderTargetView(i.texture, nil, rtv) return nil } func (i *Image) ensureDepthStencilView(device *_ID3D12Device) error { if i.screen { return fmt.Errorf("directx: stencils are not available on the screen framebuffer") } if i.dsvDescriptorHeap != nil { return nil } h, err := device.CreateDescriptorHeap(&_D3D12_DESCRIPTOR_HEAP_DESC{ Type: _D3D12_DESCRIPTOR_HEAP_TYPE_DSV, NumDescriptors: 1, Flags: _D3D12_DESCRIPTOR_HEAP_FLAG_NONE, NodeMask: 0, }) if err != nil { return err } i.dsvDescriptorHeap = h dsv, err := i.dsvDescriptorHeap.GetCPUDescriptorHandleForHeapStart() if err != nil { return err } if i.stencil == nil { s, err := device.CreateCommittedResource(&_D3D12_HEAP_PROPERTIES{ Type: _D3D12_HEAP_TYPE_DEFAULT, CPUPageProperty: _D3D12_CPU_PAGE_PROPERTY_UNKNOWN, MemoryPoolPreference: _D3D12_MEMORY_POOL_UNKNOWN, CreationNodeMask: 1, VisibleNodeMask: 1, }, _D3D12_HEAP_FLAG_NONE, &_D3D12_RESOURCE_DESC{ Dimension: _D3D12_RESOURCE_DIMENSION_TEXTURE2D, Alignment: 0, Width: uint64(graphics.InternalImageSize(i.width)), Height: uint32(graphics.InternalImageSize(i.height)), DepthOrArraySize: 1, MipLevels: 0, Format: _DXGI_FORMAT_D24_UNORM_S8_UINT, SampleDesc: _DXGI_SAMPLE_DESC{ Count: 1, Quality: 0, }, Layout: _D3D12_TEXTURE_LAYOUT_UNKNOWN, Flags: _D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL, }, _D3D12_RESOURCE_STATE_DEPTH_WRITE, &_D3D12_CLEAR_VALUE{ Format: _DXGI_FORMAT_D24_UNORM_S8_UINT, }) if err != nil { return err } i.stencil = s } device.CreateDepthStencilView(i.stencil, nil, dsv) return nil } func copyFloat32s(dst uintptr, src []float32) { var dsts []float32 h := (*reflect.SliceHeader)(unsafe.Pointer(&dsts)) h.Data = dst h.Len = len(src) h.Cap = len(src) copy(dsts, src) } func copyUint16s(dst uintptr, src []uint16) { var dsts []uint16 h := (*reflect.SliceHeader)(unsafe.Pointer(&dsts)) h.Data = dst h.Len = len(src) h.Cap = len(src) copy(dsts, src) } type stencilMode int const ( prepareStencil stencilMode = iota drawWithStencil noStencil ) type pipelineStateKey struct { compositeMode graphicsdriver.CompositeMode stencilMode stencilMode } type Shader struct { graphics *Graphics id graphicsdriver.ShaderID uniformTypes []shaderir.Type uniformOffsets []int vertexShader *_ID3DBlob pixelShader *_ID3DBlob pipelineStates map[pipelineStateKey]*_ID3D12PipelineState } func (s *Shader) ID() graphicsdriver.ShaderID { return s.id } func (s *Shader) Dispose() { s.graphics.removeShader(s) } func (s *Shader) disposeImpl() { for c, p := range s.pipelineStates { p.Release() delete(s.pipelineStates, c) } if s.pixelShader != nil { s.pixelShader.Release() s.pixelShader = nil } if s.vertexShader != nil { s.vertexShader.Release() s.vertexShader = nil } } func (s *Shader) pipelineState(compositeMode graphicsdriver.CompositeMode, stencilMode stencilMode) (*_ID3D12PipelineState, error) { key := pipelineStateKey{ compositeMode: compositeMode, stencilMode: stencilMode, } if state, ok := s.pipelineStates[key]; ok { return state, nil } state, err := s.graphics.pipelineStates.newPipelineState(s.graphics.device, s.vertexShader, s.pixelShader, compositeMode, stencilMode, false) if err != nil { return nil, err } if s.pipelineStates == nil { s.pipelineStates = map[pipelineStateKey]*_ID3D12PipelineState{} } s.pipelineStates[key] = state return state, nil } func (s *Shader) uniformsToFloat32s(uniforms [][]float32) []float32 { var fs []float32 for i, u := range uniforms { if len(fs) < s.uniformOffsets[i]/4 { fs = append(fs, make([]float32, s.uniformOffsets[i]/4-len(fs))...) } t := s.uniformTypes[i] switch t.Main { case shaderir.Float, shaderir.Vec2, shaderir.Vec3, shaderir.Vec4: fs = append(fs, u...) case shaderir.Mat2: fs = append(fs, u[0], u[2], 0, 0, u[1], u[3], ) case shaderir.Mat3: fs = append(fs, u[0], u[3], u[6], 0, u[1], u[4], u[7], 0, u[2], u[5], u[8], ) case shaderir.Mat4: fs = append(fs, u[0], u[4], u[8], u[12], u[1], u[5], u[9], u[13], u[2], u[6], u[10], u[14], u[3], u[7], u[11], u[15], ) case shaderir.Array: // Each element is aligned to the boundary. switch t.Sub[0].Main { case shaderir.Float: for j := 0; j < t.Length; j++ { fs = append(fs, u[j]) if j < t.Length-1 { fs = append(fs, 0, 0, 0) } } case shaderir.Vec2: for j := 0; j < t.Length; j++ { fs = append(fs, u[2*j:2*(j+1)]...) if j < t.Length-1 { fs = append(fs, 0, 0) } } case shaderir.Vec3: for j := 0; j < t.Length; j++ { fs = append(fs, u[3*j:3*(j+1)]...) if j < t.Length-1 { fs = append(fs, 0) } } case shaderir.Vec4: fs = append(fs, u...) case shaderir.Mat2: for j := 0; j < t.Length; j++ { u1 := u[4*j : 4*(j+1)] fs = append(fs, u1[0], u1[2], 0, 0, u1[1], u1[3], 0, 0, ) } if t.Length > 0 { fs = fs[:len(fs)-2] } case shaderir.Mat3: for j := 0; j < t.Length; j++ { u1 := u[9*j : 9*(j+1)] fs = append(fs, u1[0], u1[3], u1[6], 0, u1[1], u1[4], u1[7], 0, u1[2], u1[5], u1[8], 0, ) } if t.Length > 0 { fs = fs[:len(fs)-1] } case shaderir.Mat4: for j := 0; j < t.Length; j++ { u1 := u[16*j : 16*(j+1)] fs = append(fs, u1[0], u1[4], u1[8], u1[12], u1[1], u1[5], u1[9], u1[13], u1[2], u1[6], u1[10], u1[14], u1[3], u1[7], u1[11], u1[15], ) } default: panic(fmt.Sprintf("directx: not implemented type for uniform variables: %s", t.String())) } default: panic(fmt.Sprintf("directx: not implemented type for uniform variables: %s", t.String())) } } return fs }