mirror of
https://github.com/hajimehoshi/ebiten.git
synced 2024-12-25 11:18:54 +01:00
graphicsdriver/metal: Reduce 'if' from shader programs (#762)
This commit is contained in:
parent
27dbf1bbb7
commit
81101e0f48
@ -32,12 +32,12 @@ import (
|
|||||||
|
|
||||||
const source = `#include <metal_stdlib>
|
const source = `#include <metal_stdlib>
|
||||||
|
|
||||||
#define FILTER_NEAREST ({{.FilterNearest}})
|
#define FILTER_NEAREST {{.FilterNearest}}
|
||||||
#define FILTER_LINEAR ({{.FilterLinear}})
|
#define FILTER_LINEAR {{.FilterLinear}}
|
||||||
#define FILTER_SCREEN ({{.FilterScreen}})
|
#define FILTER_SCREEN {{.FilterScreen}}
|
||||||
|
|
||||||
#define ADDRESS_CLAMP_TO_ZERO ({{.AddressClampToZero}})
|
#define ADDRESS_CLAMP_TO_ZERO {{.AddressClampToZero}}
|
||||||
#define ADDRESS_REPEAT ({{.AddressRepeat}})
|
#define ADDRESS_REPEAT {{.AddressRepeat}}
|
||||||
|
|
||||||
using namespace metal;
|
using namespace metal;
|
||||||
|
|
||||||
@ -99,30 +99,27 @@ float FloorMod(float x, float y) {
|
|||||||
return x - y * floor(x/y);
|
return x - y * floor(x/y);
|
||||||
}
|
}
|
||||||
|
|
||||||
float2 AdjustTexelByAddress(float2 p, float4 tex_region, uint8_t address) {
|
template<uint8_t address>
|
||||||
switch (address) {
|
float2 AdjustTexelByAddress(float2 p, float4 tex_region) {
|
||||||
case ADDRESS_CLAMP_TO_ZERO: {
|
if (address == ADDRESS_CLAMP_TO_ZERO) {
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
case ADDRESS_REPEAT: {
|
if (address == ADDRESS_REPEAT) {
|
||||||
float2 o = float2(tex_region[0], tex_region[1]);
|
float2 o = float2(tex_region[0], tex_region[1]);
|
||||||
float2 size = float2(tex_region[2] - tex_region[0], tex_region[3] - tex_region[1]);
|
float2 size = float2(tex_region[2] - tex_region[0], tex_region[3] - tex_region[1]);
|
||||||
return float2(FloorMod((p.x - o.x), size.x) + o.x, FloorMod((p.y - o.y), size.y) + o.y);
|
return float2(FloorMod((p.x - o.x), size.x) + o.x, FloorMod((p.y - o.y), size.y) + o.y);
|
||||||
}
|
}
|
||||||
default:
|
// Not reached.
|
||||||
// Not reached.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
fragment float4 FragmentShader(VertexOut v [[stage_in]],
|
template<uint8_t filter, uint8_t address>
|
||||||
texture2d<float> texture [[texture(0)]],
|
float4 fragmentShader(
|
||||||
constant float4x4& color_matrix_body [[buffer(2)]],
|
VertexOut v,
|
||||||
constant float4& color_matrix_translation [[buffer(3)]],
|
texture2d<float> texture,
|
||||||
constant uint8_t& filter [[buffer(4)]],
|
constant float4x4& color_matrix_body,
|
||||||
constant uint8_t& address [[buffer(5)]],
|
constant float4& color_matrix_translation,
|
||||||
constant float& scale [[buffer(6)]]) {
|
constant float& scale) {
|
||||||
constexpr sampler texture_sampler(filter::nearest);
|
constexpr sampler texture_sampler(filter::nearest);
|
||||||
float2 source_size = 1;
|
float2 source_size = 1;
|
||||||
while (source_size.x < texture.get_width()) {
|
while (source_size.x < texture.get_width()) {
|
||||||
@ -135,9 +132,8 @@ fragment float4 FragmentShader(VertexOut v [[stage_in]],
|
|||||||
|
|
||||||
float4 c;
|
float4 c;
|
||||||
|
|
||||||
switch (filter) {
|
if (filter == FILTER_NEAREST) {
|
||||||
case FILTER_NEAREST: {
|
float2 p = AdjustTexelByAddress<address>(v.tex, v.tex_region);
|
||||||
float2 p = AdjustTexelByAddress(v.tex, v.tex_region, address);
|
|
||||||
c = texture.sample(texture_sampler, p);
|
c = texture.sample(texture_sampler, p);
|
||||||
if (p.x < v.tex_region[0] ||
|
if (p.x < v.tex_region[0] ||
|
||||||
p.y < v.tex_region[1] ||
|
p.y < v.tex_region[1] ||
|
||||||
@ -145,15 +141,12 @@ fragment float4 FragmentShader(VertexOut v [[stage_in]],
|
|||||||
(v.tex_region[3] - texel_size.y / 512.0) <= p.y) {
|
(v.tex_region[3] - texel_size.y / 512.0) <= p.y) {
|
||||||
c = 0;
|
c = 0;
|
||||||
}
|
}
|
||||||
break;
|
} else if (filter == FILTER_LINEAR) {
|
||||||
}
|
|
||||||
|
|
||||||
case FILTER_LINEAR: {
|
|
||||||
float2 p0 = v.tex - texel_size / 2.0;
|
float2 p0 = v.tex - texel_size / 2.0;
|
||||||
float2 p1 = v.tex + texel_size / 2.0;
|
float2 p1 = v.tex + texel_size / 2.0;
|
||||||
p1 = AdjustTexel(source_size, p0, p1);
|
p1 = AdjustTexel(source_size, p0, p1);
|
||||||
p0 = AdjustTexelByAddress(p0, v.tex_region, address);
|
p0 = AdjustTexelByAddress<address>(p0, v.tex_region);
|
||||||
p1 = AdjustTexelByAddress(p1, v.tex_region, address);
|
p1 = AdjustTexelByAddress<address>(p1, v.tex_region);
|
||||||
|
|
||||||
float4 c0 = texture.sample(texture_sampler, p0);
|
float4 c0 = texture.sample(texture_sampler, p0);
|
||||||
float4 c1 = texture.sample(texture_sampler, float2(p1.x, p0.y));
|
float4 c1 = texture.sample(texture_sampler, float2(p1.x, p0.y));
|
||||||
@ -179,10 +172,7 @@ fragment float4 FragmentShader(VertexOut v [[stage_in]],
|
|||||||
|
|
||||||
float2 rate = fract(p0 * source_size);
|
float2 rate = fract(p0 * source_size);
|
||||||
c = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
|
c = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
|
||||||
break;
|
} else if (filter == FILTER_SCREEN) {
|
||||||
}
|
|
||||||
|
|
||||||
case FILTER_SCREEN: {
|
|
||||||
float2 p0 = v.tex - texel_size / 2.0 / scale;
|
float2 p0 = v.tex - texel_size / 2.0 / scale;
|
||||||
float2 p1 = v.tex + texel_size / 2.0 / scale;
|
float2 p1 = v.tex + texel_size / 2.0 / scale;
|
||||||
p1 = AdjustTexel(source_size, p0, p1);
|
p1 = AdjustTexel(source_size, p0, p1);
|
||||||
@ -195,10 +185,7 @@ fragment float4 FragmentShader(VertexOut v [[stage_in]],
|
|||||||
float2 rate_center = float2(1.0, 1.0) - texel_size / 2.0 / scale;
|
float2 rate_center = float2(1.0, 1.0) - texel_size / 2.0 / scale;
|
||||||
float2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0);
|
float2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0);
|
||||||
c = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
|
c = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
|
||||||
break;
|
} else {
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
// Not reached.
|
// Not reached.
|
||||||
discard_fragment();
|
discard_fragment();
|
||||||
return float4(0);
|
return float4(0);
|
||||||
@ -213,15 +200,46 @@ fragment float4 FragmentShader(VertexOut v [[stage_in]],
|
|||||||
c.rgb *= c.a;
|
c.rgb *= c.a;
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Define Foo and FooCp macro to force macro replacement.
|
||||||
|
// See "6.10.3.1 Argument substitution" in ISO/IEC 9899.
|
||||||
|
|
||||||
|
#define FragmentShaderFunc(filter, address) \
|
||||||
|
FragmentShaderFuncCp(filter, address) //
|
||||||
|
|
||||||
|
#define FragmentShaderFuncCp(filter, address) \
|
||||||
|
fragment float4 FragmentShader_##filter##_##address( \
|
||||||
|
VertexOut v [[stage_in]], \
|
||||||
|
texture2d<float> texture [[texture(0)]], \
|
||||||
|
constant float4x4& color_matrix_body [[buffer(2)]], \
|
||||||
|
constant float4& color_matrix_translation [[buffer(3)]], \
|
||||||
|
constant float& scale [[buffer(4)]]) { \
|
||||||
|
return fragmentShader<filter, address>( \
|
||||||
|
v, texture, color_matrix_body, color_matrix_translation, scale); \
|
||||||
|
}
|
||||||
|
|
||||||
|
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
|
||||||
|
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
|
||||||
|
FragmentShaderFunc(FILTER_SCREEN, ADDRESS_CLAMP_TO_ZERO)
|
||||||
|
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_REPEAT)
|
||||||
|
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_REPEAT)
|
||||||
|
|
||||||
|
#undef FragmentShaderFuncName
|
||||||
`
|
`
|
||||||
|
|
||||||
|
type rpsKey struct {
|
||||||
|
filter graphics.Filter
|
||||||
|
address graphics.Address
|
||||||
|
compositeMode graphics.CompositeMode
|
||||||
|
}
|
||||||
|
|
||||||
type Driver struct {
|
type Driver struct {
|
||||||
window uintptr
|
window uintptr
|
||||||
|
|
||||||
device mtl.Device
|
device mtl.Device
|
||||||
ml ca.MetalLayer
|
ml ca.MetalLayer
|
||||||
screenRPS mtl.RenderPipelineState
|
screenRPS mtl.RenderPipelineState
|
||||||
rpss map[graphics.CompositeMode]mtl.RenderPipelineState
|
rpss map[rpsKey]mtl.RenderPipelineState
|
||||||
cq mtl.CommandQueue
|
cq mtl.CommandQueue
|
||||||
cb mtl.CommandBuffer
|
cb mtl.CommandBuffer
|
||||||
|
|
||||||
@ -386,7 +404,7 @@ func (d *Driver) Reset() error {
|
|||||||
|
|
||||||
// TODO: Release existing rpss
|
// TODO: Release existing rpss
|
||||||
if d.rpss == nil {
|
if d.rpss == nil {
|
||||||
d.rpss = map[graphics.CompositeMode]mtl.RenderPipelineState{}
|
d.rpss = map[rpsKey]mtl.RenderPipelineState{}
|
||||||
}
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
@ -425,7 +443,8 @@ func (d *Driver) Reset() error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
fs, err := lib.MakeFunction("FragmentShader")
|
fs, err := lib.MakeFunction(
|
||||||
|
fmt.Sprintf("FragmentShader_%d_%d", graphics.FilterScreen, graphics.AddressClampToZero))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -464,24 +483,42 @@ func (d *Driver) Reset() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for c := graphics.CompositeModeSourceOver; c <= graphics.CompositeModeMax; c++ {
|
for _, a := range []graphics.Address{
|
||||||
rpld := mtl.RenderPipelineDescriptor{
|
graphics.AddressClampToZero,
|
||||||
VertexFunction: vs,
|
graphics.AddressRepeat,
|
||||||
FragmentFunction: fs,
|
} {
|
||||||
}
|
for _, f := range []graphics.Filter{
|
||||||
rpld.ColorAttachments[0].PixelFormat = mtl.PixelFormatRGBA8UNorm
|
graphics.FilterNearest,
|
||||||
rpld.ColorAttachments[0].BlendingEnabled = true
|
graphics.FilterLinear,
|
||||||
|
} {
|
||||||
|
for c := graphics.CompositeModeSourceOver; c <= graphics.CompositeModeMax; c++ {
|
||||||
|
fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d", f, a))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
rpld := mtl.RenderPipelineDescriptor{
|
||||||
|
VertexFunction: vs,
|
||||||
|
FragmentFunction: fs,
|
||||||
|
}
|
||||||
|
rpld.ColorAttachments[0].PixelFormat = mtl.PixelFormatRGBA8UNorm
|
||||||
|
rpld.ColorAttachments[0].BlendingEnabled = true
|
||||||
|
|
||||||
src, dst := c.Operations()
|
src, dst := c.Operations()
|
||||||
rpld.ColorAttachments[0].DestinationAlphaBlendFactor = conv(dst)
|
rpld.ColorAttachments[0].DestinationAlphaBlendFactor = conv(dst)
|
||||||
rpld.ColorAttachments[0].DestinationRGBBlendFactor = conv(dst)
|
rpld.ColorAttachments[0].DestinationRGBBlendFactor = conv(dst)
|
||||||
rpld.ColorAttachments[0].SourceAlphaBlendFactor = conv(src)
|
rpld.ColorAttachments[0].SourceAlphaBlendFactor = conv(src)
|
||||||
rpld.ColorAttachments[0].SourceRGBBlendFactor = conv(src)
|
rpld.ColorAttachments[0].SourceRGBBlendFactor = conv(src)
|
||||||
rps, err := d.device.MakeRenderPipelineState(rpld)
|
rps, err := d.device.MakeRenderPipelineState(rpld)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
}
|
||||||
|
d.rpss[rpsKey{
|
||||||
|
filter: f,
|
||||||
|
address: a,
|
||||||
|
compositeMode: c,
|
||||||
|
}] = rps
|
||||||
|
}
|
||||||
}
|
}
|
||||||
d.rpss[c] = rps
|
|
||||||
}
|
}
|
||||||
|
|
||||||
d.cq = d.device.MakeCommandQueue()
|
d.cq = d.device.MakeCommandQueue()
|
||||||
@ -535,7 +572,11 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
|
|||||||
if d.dst.screen {
|
if d.dst.screen {
|
||||||
rce.SetRenderPipelineState(d.screenRPS)
|
rce.SetRenderPipelineState(d.screenRPS)
|
||||||
} else {
|
} else {
|
||||||
rce.SetRenderPipelineState(d.rpss[mode])
|
rce.SetRenderPipelineState(d.rpss[rpsKey{
|
||||||
|
filter: filter,
|
||||||
|
address: address,
|
||||||
|
compositeMode: mode,
|
||||||
|
}])
|
||||||
}
|
}
|
||||||
rce.SetViewport(mtl.Viewport{0, 0, float64(w), float64(h), -1, 1})
|
rce.SetViewport(mtl.Viewport{0, 0, float64(w), float64(h), -1, 1})
|
||||||
rce.SetVertexBuffer(d.vb, 0, 0)
|
rce.SetVertexBuffer(d.vb, 0, 0)
|
||||||
@ -547,14 +588,8 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
|
|||||||
rce.SetFragmentBytes(unsafe.Pointer(&esBody[0]), unsafe.Sizeof(esBody[0])*uintptr(len(esBody)), 2)
|
rce.SetFragmentBytes(unsafe.Pointer(&esBody[0]), unsafe.Sizeof(esBody[0])*uintptr(len(esBody)), 2)
|
||||||
rce.SetFragmentBytes(unsafe.Pointer(&esTranslate[0]), unsafe.Sizeof(esTranslate[0])*uintptr(len(esTranslate)), 3)
|
rce.SetFragmentBytes(unsafe.Pointer(&esTranslate[0]), unsafe.Sizeof(esTranslate[0])*uintptr(len(esTranslate)), 3)
|
||||||
|
|
||||||
f := uint8(filter)
|
|
||||||
rce.SetFragmentBytes(unsafe.Pointer(&f), 1, 4)
|
|
||||||
|
|
||||||
a := uint8(address)
|
|
||||||
rce.SetFragmentBytes(unsafe.Pointer(&a), 1, 5)
|
|
||||||
|
|
||||||
scale := float32(d.dst.width) / float32(d.src.width)
|
scale := float32(d.dst.width) / float32(d.src.width)
|
||||||
rce.SetFragmentBytes(unsafe.Pointer(&scale), unsafe.Sizeof(scale), 6)
|
rce.SetFragmentBytes(unsafe.Pointer(&scale), unsafe.Sizeof(scale), 4)
|
||||||
|
|
||||||
if d.src != nil {
|
if d.src != nil {
|
||||||
rce.SetFragmentTexture(d.src.texture, 0)
|
rce.SetFragmentTexture(d.src.texture, 0)
|
||||||
|
Loading…
Reference in New Issue
Block a user