graphicsdriver/metal: Reduce 'if' from shader programs (#762)

This commit is contained in:
Hajime Hoshi 2019-02-13 21:46:39 +09:00
parent 27dbf1bbb7
commit 81101e0f48

View File

@ -32,12 +32,12 @@ import (
const source = `#include <metal_stdlib> const source = `#include <metal_stdlib>
#define FILTER_NEAREST ({{.FilterNearest}}) #define FILTER_NEAREST {{.FilterNearest}}
#define FILTER_LINEAR ({{.FilterLinear}}) #define FILTER_LINEAR {{.FilterLinear}}
#define FILTER_SCREEN ({{.FilterScreen}}) #define FILTER_SCREEN {{.FilterScreen}}
#define ADDRESS_CLAMP_TO_ZERO ({{.AddressClampToZero}}) #define ADDRESS_CLAMP_TO_ZERO {{.AddressClampToZero}}
#define ADDRESS_REPEAT ({{.AddressRepeat}}) #define ADDRESS_REPEAT {{.AddressRepeat}}
using namespace metal; using namespace metal;
@ -99,30 +99,27 @@ float FloorMod(float x, float y) {
return x - y * floor(x/y); return x - y * floor(x/y);
} }
float2 AdjustTexelByAddress(float2 p, float4 tex_region, uint8_t address) { template<uint8_t address>
switch (address) { float2 AdjustTexelByAddress(float2 p, float4 tex_region) {
case ADDRESS_CLAMP_TO_ZERO: { if (address == ADDRESS_CLAMP_TO_ZERO) {
return p; return p;
} }
case ADDRESS_REPEAT: { if (address == ADDRESS_REPEAT) {
float2 o = float2(tex_region[0], tex_region[1]); float2 o = float2(tex_region[0], tex_region[1]);
float2 size = float2(tex_region[2] - tex_region[0], tex_region[3] - tex_region[1]); float2 size = float2(tex_region[2] - tex_region[0], tex_region[3] - tex_region[1]);
return float2(FloorMod((p.x - o.x), size.x) + o.x, FloorMod((p.y - o.y), size.y) + o.y); return float2(FloorMod((p.x - o.x), size.x) + o.x, FloorMod((p.y - o.y), size.y) + o.y);
} }
default:
// Not reached. // Not reached.
break;
}
return 0.0; return 0.0;
} }
fragment float4 FragmentShader(VertexOut v [[stage_in]], template<uint8_t filter, uint8_t address>
texture2d<float> texture [[texture(0)]], float4 fragmentShader(
constant float4x4& color_matrix_body [[buffer(2)]], VertexOut v,
constant float4& color_matrix_translation [[buffer(3)]], texture2d<float> texture,
constant uint8_t& filter [[buffer(4)]], constant float4x4& color_matrix_body,
constant uint8_t& address [[buffer(5)]], constant float4& color_matrix_translation,
constant float& scale [[buffer(6)]]) { constant float& scale) {
constexpr sampler texture_sampler(filter::nearest); constexpr sampler texture_sampler(filter::nearest);
float2 source_size = 1; float2 source_size = 1;
while (source_size.x < texture.get_width()) { while (source_size.x < texture.get_width()) {
@ -135,9 +132,8 @@ fragment float4 FragmentShader(VertexOut v [[stage_in]],
float4 c; float4 c;
switch (filter) { if (filter == FILTER_NEAREST) {
case FILTER_NEAREST: { float2 p = AdjustTexelByAddress<address>(v.tex, v.tex_region);
float2 p = AdjustTexelByAddress(v.tex, v.tex_region, address);
c = texture.sample(texture_sampler, p); c = texture.sample(texture_sampler, p);
if (p.x < v.tex_region[0] || if (p.x < v.tex_region[0] ||
p.y < v.tex_region[1] || p.y < v.tex_region[1] ||
@ -145,15 +141,12 @@ fragment float4 FragmentShader(VertexOut v [[stage_in]],
(v.tex_region[3] - texel_size.y / 512.0) <= p.y) { (v.tex_region[3] - texel_size.y / 512.0) <= p.y) {
c = 0; c = 0;
} }
break; } else if (filter == FILTER_LINEAR) {
}
case FILTER_LINEAR: {
float2 p0 = v.tex - texel_size / 2.0; float2 p0 = v.tex - texel_size / 2.0;
float2 p1 = v.tex + texel_size / 2.0; float2 p1 = v.tex + texel_size / 2.0;
p1 = AdjustTexel(source_size, p0, p1); p1 = AdjustTexel(source_size, p0, p1);
p0 = AdjustTexelByAddress(p0, v.tex_region, address); p0 = AdjustTexelByAddress<address>(p0, v.tex_region);
p1 = AdjustTexelByAddress(p1, v.tex_region, address); p1 = AdjustTexelByAddress<address>(p1, v.tex_region);
float4 c0 = texture.sample(texture_sampler, p0); float4 c0 = texture.sample(texture_sampler, p0);
float4 c1 = texture.sample(texture_sampler, float2(p1.x, p0.y)); float4 c1 = texture.sample(texture_sampler, float2(p1.x, p0.y));
@ -179,10 +172,7 @@ fragment float4 FragmentShader(VertexOut v [[stage_in]],
float2 rate = fract(p0 * source_size); float2 rate = fract(p0 * source_size);
c = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); c = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
break; } else if (filter == FILTER_SCREEN) {
}
case FILTER_SCREEN: {
float2 p0 = v.tex - texel_size / 2.0 / scale; float2 p0 = v.tex - texel_size / 2.0 / scale;
float2 p1 = v.tex + texel_size / 2.0 / scale; float2 p1 = v.tex + texel_size / 2.0 / scale;
p1 = AdjustTexel(source_size, p0, p1); p1 = AdjustTexel(source_size, p0, p1);
@ -195,10 +185,7 @@ fragment float4 FragmentShader(VertexOut v [[stage_in]],
float2 rate_center = float2(1.0, 1.0) - texel_size / 2.0 / scale; float2 rate_center = float2(1.0, 1.0) - texel_size / 2.0 / scale;
float2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0); float2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0);
c = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); c = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
break; } else {
}
default:
// Not reached. // Not reached.
discard_fragment(); discard_fragment();
return float4(0); return float4(0);
@ -213,15 +200,46 @@ fragment float4 FragmentShader(VertexOut v [[stage_in]],
c.rgb *= c.a; c.rgb *= c.a;
return c; return c;
} }
// Define Foo and FooCp macro to force macro replacement.
// See "6.10.3.1 Argument substitution" in ISO/IEC 9899.
#define FragmentShaderFunc(filter, address) \
FragmentShaderFuncCp(filter, address) //
#define FragmentShaderFuncCp(filter, address) \
fragment float4 FragmentShader_##filter##_##address( \
VertexOut v [[stage_in]], \
texture2d<float> texture [[texture(0)]], \
constant float4x4& color_matrix_body [[buffer(2)]], \
constant float4& color_matrix_translation [[buffer(3)]], \
constant float& scale [[buffer(4)]]) { \
return fragmentShader<filter, address>( \
v, texture, color_matrix_body, color_matrix_translation, scale); \
}
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(FILTER_SCREEN, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_REPEAT)
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_REPEAT)
#undef FragmentShaderFuncName
` `
type rpsKey struct {
filter graphics.Filter
address graphics.Address
compositeMode graphics.CompositeMode
}
type Driver struct { type Driver struct {
window uintptr window uintptr
device mtl.Device device mtl.Device
ml ca.MetalLayer ml ca.MetalLayer
screenRPS mtl.RenderPipelineState screenRPS mtl.RenderPipelineState
rpss map[graphics.CompositeMode]mtl.RenderPipelineState rpss map[rpsKey]mtl.RenderPipelineState
cq mtl.CommandQueue cq mtl.CommandQueue
cb mtl.CommandBuffer cb mtl.CommandBuffer
@ -386,7 +404,7 @@ func (d *Driver) Reset() error {
// TODO: Release existing rpss // TODO: Release existing rpss
if d.rpss == nil { if d.rpss == nil {
d.rpss = map[graphics.CompositeMode]mtl.RenderPipelineState{} d.rpss = map[rpsKey]mtl.RenderPipelineState{}
} }
var err error var err error
@ -425,7 +443,8 @@ func (d *Driver) Reset() error {
if err != nil { if err != nil {
return err return err
} }
fs, err := lib.MakeFunction("FragmentShader") fs, err := lib.MakeFunction(
fmt.Sprintf("FragmentShader_%d_%d", graphics.FilterScreen, graphics.AddressClampToZero))
if err != nil { if err != nil {
return err return err
} }
@ -464,7 +483,19 @@ func (d *Driver) Reset() error {
} }
} }
for _, a := range []graphics.Address{
graphics.AddressClampToZero,
graphics.AddressRepeat,
} {
for _, f := range []graphics.Filter{
graphics.FilterNearest,
graphics.FilterLinear,
} {
for c := graphics.CompositeModeSourceOver; c <= graphics.CompositeModeMax; c++ { for c := graphics.CompositeModeSourceOver; c <= graphics.CompositeModeMax; c++ {
fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d", f, a))
if err != nil {
return err
}
rpld := mtl.RenderPipelineDescriptor{ rpld := mtl.RenderPipelineDescriptor{
VertexFunction: vs, VertexFunction: vs,
FragmentFunction: fs, FragmentFunction: fs,
@ -481,7 +512,13 @@ func (d *Driver) Reset() error {
if err != nil { if err != nil {
return err return err
} }
d.rpss[c] = rps d.rpss[rpsKey{
filter: f,
address: a,
compositeMode: c,
}] = rps
}
}
} }
d.cq = d.device.MakeCommandQueue() d.cq = d.device.MakeCommandQueue()
@ -535,7 +572,11 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
if d.dst.screen { if d.dst.screen {
rce.SetRenderPipelineState(d.screenRPS) rce.SetRenderPipelineState(d.screenRPS)
} else { } else {
rce.SetRenderPipelineState(d.rpss[mode]) rce.SetRenderPipelineState(d.rpss[rpsKey{
filter: filter,
address: address,
compositeMode: mode,
}])
} }
rce.SetViewport(mtl.Viewport{0, 0, float64(w), float64(h), -1, 1}) rce.SetViewport(mtl.Viewport{0, 0, float64(w), float64(h), -1, 1})
rce.SetVertexBuffer(d.vb, 0, 0) rce.SetVertexBuffer(d.vb, 0, 0)
@ -547,14 +588,8 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
rce.SetFragmentBytes(unsafe.Pointer(&esBody[0]), unsafe.Sizeof(esBody[0])*uintptr(len(esBody)), 2) rce.SetFragmentBytes(unsafe.Pointer(&esBody[0]), unsafe.Sizeof(esBody[0])*uintptr(len(esBody)), 2)
rce.SetFragmentBytes(unsafe.Pointer(&esTranslate[0]), unsafe.Sizeof(esTranslate[0])*uintptr(len(esTranslate)), 3) rce.SetFragmentBytes(unsafe.Pointer(&esTranslate[0]), unsafe.Sizeof(esTranslate[0])*uintptr(len(esTranslate)), 3)
f := uint8(filter)
rce.SetFragmentBytes(unsafe.Pointer(&f), 1, 4)
a := uint8(address)
rce.SetFragmentBytes(unsafe.Pointer(&a), 1, 5)
scale := float32(d.dst.width) / float32(d.src.width) scale := float32(d.dst.width) / float32(d.src.width)
rce.SetFragmentBytes(unsafe.Pointer(&scale), unsafe.Sizeof(scale), 6) rce.SetFragmentBytes(unsafe.Pointer(&scale), unsafe.Sizeof(scale), 4)
if d.src != nil { if d.src != nil {
rce.SetFragmentTexture(d.src.texture, 0) rce.SetFragmentTexture(d.src.texture, 0)