// Copyright 2023 The Ebitengine Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package directx

import (
	"fmt"

	"golang.org/x/sync/errgroup"

	"github.com/hajimehoshi/ebiten/v2/internal/graphics"
	"github.com/hajimehoshi/ebiten/v2/internal/shaderir"
)

var vertexShaderCache = map[string]*_ID3DBlob{}

func compileShader(vs, ps string) (vsh, psh *_ID3DBlob, ferr error) {
	var flag uint32 = uint32(_D3DCOMPILE_OPTIMIZATION_LEVEL3)

	defer func() {
		if ferr == nil {
			return
		}
		if vsh != nil {
			vsh.Release()
		}
		if psh != nil {
			psh.Release()
		}
	}()

	var wg errgroup.Group

	// Vertex shaders are likely the same. If so, reuse the same _ID3DBlob.
	if v, ok := vertexShaderCache[vs]; ok {
		// Increment the reference count not to release this object unexpectedly.
		// The value will be removed when the count reached 0.
		// See (*Shader).disposeImpl.
		v.AddRef()
		vsh = v
	} else {
		defer func() {
			if ferr == nil {
				vertexShaderCache[vs] = vsh
			}
		}()
		wg.Go(func() error {
			v, err := _D3DCompile([]byte(vs), "shader", nil, nil, "VSMain", "vs_4_0", flag, 0)
			if err != nil {
				return fmt.Errorf("directx: D3DCompile for VSMain failed, original source: %s, %w", vs, err)
			}
			vsh = v
			return nil
		})
	}
	wg.Go(func() error {
		p, err := _D3DCompile([]byte(ps), "shader", nil, nil, "PSMain", "ps_4_0", flag, 0)
		if err != nil {
			return fmt.Errorf("directx: D3DCompile for PSMain failed, original source: %s, %w", ps, err)
		}
		psh = p
		return nil
	})

	if err := wg.Wait(); err != nil {
		return nil, nil, err
	}

	return
}

func constantBufferSize(uniformTypes []shaderir.Type, uniformOffsets []int) int {
	var size int
	for i, typ := range uniformTypes {
		if size < uniformOffsets[i]/4 {
			size = uniformOffsets[i] / 4
		}

		switch typ.Main {
		case shaderir.Float:
			size += 1
		case shaderir.Int:
			size += 1
		case shaderir.Vec2, shaderir.IVec2:
			size += 2
		case shaderir.Vec3, shaderir.IVec3:
			size += 3
		case shaderir.Vec4, shaderir.IVec4:
			size += 4
		case shaderir.Mat2:
			size += 6
		case shaderir.Mat3:
			size += 11
		case shaderir.Mat4:
			size += 16
		case shaderir.Array:
			// Each element is aligned to the boundary.
			switch typ.Sub[0].Main {
			case shaderir.Float:
				size += 4*(typ.Length-1) + 1
			case shaderir.Int:
				size += 4*(typ.Length-1) + 1
			case shaderir.Vec2, shaderir.IVec2:
				size += 4*(typ.Length-1) + 2
			case shaderir.Vec3, shaderir.IVec3:
				size += 4*(typ.Length-1) + 3
			case shaderir.Vec4, shaderir.IVec4:
				size += 4 * typ.Length
			case shaderir.Mat2:
				size += 8*(typ.Length-1) + 6
			case shaderir.Mat3:
				size += 12*(typ.Length-1) + 11
			case shaderir.Mat4:
				size += 16 * typ.Length
			default:
				panic(fmt.Sprintf("directx: not implemented type for uniform variables: %s", typ.String()))
			}
		default:
			panic(fmt.Sprintf("directx: not implemented type for uniform variables: %s", typ.String()))
		}
	}
	return size
}

func adjustUniforms(uniformTypes []shaderir.Type, uniformOffsets []int, uniforms []uint32) []uint32 {
	var fs []uint32
	var idx int
	for i, typ := range uniformTypes {
		if len(fs) < uniformOffsets[i]/4 {
			fs = append(fs, make([]uint32, uniformOffsets[i]/4-len(fs))...)
		}

		n := typ.Uint32Count()
		switch typ.Main {
		case shaderir.Float:
			fs = append(fs, uniforms[idx:idx+1]...)
		case shaderir.Int:
			fs = append(fs, uniforms[idx:idx+1]...)
		case shaderir.Vec2, shaderir.IVec2:
			fs = append(fs, uniforms[idx:idx+2]...)
		case shaderir.Vec3, shaderir.IVec3:
			fs = append(fs, uniforms[idx:idx+3]...)
		case shaderir.Vec4, shaderir.IVec4:
			fs = append(fs, uniforms[idx:idx+4]...)
		case shaderir.Mat2:
			fs = append(fs,
				uniforms[idx+0], uniforms[idx+2], 0, 0,
				uniforms[idx+1], uniforms[idx+3],
			)
		case shaderir.Mat3:
			fs = append(fs,
				uniforms[idx+0], uniforms[idx+3], uniforms[idx+6], 0,
				uniforms[idx+1], uniforms[idx+4], uniforms[idx+7], 0,
				uniforms[idx+2], uniforms[idx+5], uniforms[idx+8],
			)
		case shaderir.Mat4:
			if i == graphics.ProjectionMatrixUniformVariableIndex {
				// In DirectX, the NDC's Y direction (upward) and the framebuffer's Y direction (downward) don't
				// match. Then, the Y direction must be inverted.
				// Invert the sign bits as float32 values.
				fs = append(fs,
					uniforms[idx+0], uniforms[idx+4], uniforms[idx+8], uniforms[idx+12],
					uniforms[idx+1]^(1<<31), uniforms[idx+5]^(1<<31), uniforms[idx+9]^(1<<31), uniforms[idx+13]^(1<<31),
					uniforms[idx+2], uniforms[idx+6], uniforms[idx+10], uniforms[idx+14],
					uniforms[idx+3], uniforms[idx+7], uniforms[idx+11], uniforms[idx+15],
				)
			} else {
				fs = append(fs,
					uniforms[idx+0], uniforms[idx+4], uniforms[idx+8], uniforms[idx+12],
					uniforms[idx+1], uniforms[idx+5], uniforms[idx+9], uniforms[idx+13],
					uniforms[idx+2], uniforms[idx+6], uniforms[idx+10], uniforms[idx+14],
					uniforms[idx+3], uniforms[idx+7], uniforms[idx+11], uniforms[idx+15],
				)
			}
		case shaderir.Array:
			// Each element is aligned to the boundary.
			switch typ.Sub[0].Main {
			case shaderir.Float:
				for j := 0; j < typ.Length; j++ {
					fs = append(fs, uniforms[idx+j])
					if j < typ.Length-1 {
						fs = append(fs, 0, 0, 0)
					}
				}
			case shaderir.Int:
				for j := 0; j < typ.Length; j++ {
					fs = append(fs, uniforms[idx+j])
					if j < typ.Length-1 {
						fs = append(fs, 0, 0, 0)
					}
				}
			case shaderir.Vec2, shaderir.IVec2:
				for j := 0; j < typ.Length; j++ {
					fs = append(fs, uniforms[idx+2*j:idx+2*(j+1)]...)
					if j < typ.Length-1 {
						fs = append(fs, 0, 0)
					}
				}
			case shaderir.Vec3, shaderir.IVec3:
				for j := 0; j < typ.Length; j++ {
					fs = append(fs, uniforms[idx+3*j:idx+3*(j+1)]...)
					if j < typ.Length-1 {
						fs = append(fs, 0)
					}
				}
			case shaderir.Vec4, shaderir.IVec4:
				fs = append(fs, uniforms[idx:idx+4*typ.Length]...)
			case shaderir.Mat2:
				for j := 0; j < typ.Length; j++ {
					u := uniforms[idx+4*j : idx+4*(j+1)]
					fs = append(fs,
						u[0], u[2], 0, 0,
						u[1], u[3], 0, 0,
					)
				}
				if typ.Length > 0 {
					fs = fs[:len(fs)-2]
				}
			case shaderir.Mat3:
				for j := 0; j < typ.Length; j++ {
					u := uniforms[idx+9*j : idx+9*(j+1)]
					fs = append(fs,
						u[0], u[3], u[6], 0,
						u[1], u[4], u[7], 0,
						u[2], u[5], u[8], 0,
					)
				}
				if typ.Length > 0 {
					fs = fs[:len(fs)-1]
				}
			case shaderir.Mat4:
				for j := 0; j < typ.Length; j++ {
					u := uniforms[idx+16*j : idx+16*(j+1)]
					fs = append(fs,
						u[0], u[4], u[8], u[12],
						u[1], u[5], u[9], u[13],
						u[2], u[6], u[10], u[14],
						u[3], u[7], u[11], u[15],
					)
				}
			default:
				panic(fmt.Sprintf("directx: not implemented type for uniform variables: %s", typ.String()))
			}
		default:
			panic(fmt.Sprintf("directx: not implemented type for uniform variables: %s", typ.String()))
		}

		idx += n
	}
	return fs
}