graphicsdriver: Optimize shader (skip color matrix calculation)

Skip multiplying with a color matrix when possible.
This commit is contained in:
Hajime Hoshi 2019-02-16 17:08:53 +09:00
parent 815ed8cda2
commit a8b97c9755
3 changed files with 127 additions and 93 deletions

View File

@ -194,7 +194,7 @@ struct GetColorFromTexel<FILTER_SCREEN, address> {
} }
}; };
template<uint8_t filter, uint8_t address> template<bool useColorM, uint8_t filter, uint8_t address>
float4 FragmentShaderImpl( float4 FragmentShaderImpl(
VertexOut v, VertexOut v,
texture2d<float> texture, texture2d<float> texture,
@ -204,7 +204,9 @@ float4 FragmentShaderImpl(
constant float& scale) { constant float& scale) {
float4 c = GetColorFromTexel<filter, address>().Do(v, texture, source_size, scale); float4 c = GetColorFromTexel<filter, address>().Do(v, texture, source_size, scale);
c.rgb /= c.a + (1.0 - sign(c.a)); c.rgb /= c.a + (1.0 - sign(c.a));
c = (color_matrix_body * c) + color_matrix_translation; if (useColorM) {
c = (color_matrix_body * c) + color_matrix_translation;
}
c *= v.color; c *= v.color;
c = clamp(c, 0.0, 1.0); c = clamp(c, 0.0, 1.0);
c.rgb *= c.a; c.rgb *= c.a;
@ -214,31 +216,37 @@ float4 FragmentShaderImpl(
// Define Foo and FooCp macros to force macro replacement. // Define Foo and FooCp macros to force macro replacement.
// See "6.10.3.1 Argument substitution" in ISO/IEC 9899. // See "6.10.3.1 Argument substitution" in ISO/IEC 9899.
#define FragmentShaderFunc(filter, address) \ #define FragmentShaderFunc(useColorM, filter, address) \
FragmentShaderFuncCp(filter, address) FragmentShaderFuncCp(useColorM, filter, address)
#define FragmentShaderFuncCp(filter, address) \ #define FragmentShaderFuncCp(useColorM, filter, address) \
fragment float4 FragmentShader_##filter##_##address( \ fragment float4 FragmentShader_##useColorM##_##filter##_##address( \
VertexOut v [[stage_in]], \ VertexOut v [[stage_in]], \
texture2d<float> texture [[texture(0)]], \ texture2d<float> texture [[texture(0)]], \
constant float2& source_size [[buffer(2)]], \ constant float2& source_size [[buffer(2)]], \
constant float4x4& color_matrix_body [[buffer(3)]], \ constant float4x4& color_matrix_body [[buffer(3)]], \
constant float4& color_matrix_translation [[buffer(4)]], \ constant float4& color_matrix_translation [[buffer(4)]], \
constant float& scale [[buffer(5)]]) { \ constant float& scale [[buffer(5)]]) { \
return FragmentShaderImpl<filter, address>( \ return FragmentShaderImpl<useColorM, filter, address>( \
v, texture, source_size, color_matrix_body, color_matrix_translation, scale); \ v, texture, source_size, color_matrix_body, color_matrix_translation, scale); \
} }
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO) FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO) FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(FILTER_SCREEN, ADDRESS_CLAMP_TO_ZERO) FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_REPEAT)
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_REPEAT) FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_REPEAT)
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_REPEAT) FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_REPEAT)
FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_REPEAT)
FragmentShaderFunc(0, FILTER_SCREEN, ADDRESS_CLAMP_TO_ZERO)
#undef FragmentShaderFuncName #undef FragmentShaderFuncName
` `
type rpsKey struct { type rpsKey struct {
useColorM bool
filter graphics.Filter filter graphics.Filter
address graphics.Address address graphics.Address
compositeMode graphics.CompositeMode compositeMode graphics.CompositeMode
@ -455,7 +463,7 @@ func (d *Driver) Reset() error {
return err return err
} }
fs, err := lib.MakeFunction( fs, err := lib.MakeFunction(
fmt.Sprintf("FragmentShader_%d_%d", graphics.FilterScreen, graphics.AddressClampToZero)) fmt.Sprintf("FragmentShader_%d_%d_%d", 0, graphics.FilterScreen, graphics.AddressClampToZero))
if err != nil { if err != nil {
return err return err
} }
@ -494,40 +502,47 @@ func (d *Driver) Reset() error {
} }
} }
for _, a := range []graphics.Address{ for _, cm := range []bool{false, true} {
graphics.AddressClampToZero, for _, a := range []graphics.Address{
graphics.AddressRepeat, graphics.AddressClampToZero,
} { graphics.AddressRepeat,
for _, f := range []graphics.Filter{
graphics.FilterNearest,
graphics.FilterLinear,
} { } {
for c := graphics.CompositeModeSourceOver; c <= graphics.CompositeModeMax; c++ { for _, f := range []graphics.Filter{
fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d", f, a)) graphics.FilterNearest,
if err != nil { graphics.FilterLinear,
return err } {
} for c := graphics.CompositeModeSourceOver; c <= graphics.CompositeModeMax; c++ {
rpld := mtl.RenderPipelineDescriptor{ cmi := 0
VertexFunction: vs, if cm {
FragmentFunction: fs, cmi = 1
} }
rpld.ColorAttachments[0].PixelFormat = mtl.PixelFormatRGBA8UNorm fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d_%d", cmi, f, a))
rpld.ColorAttachments[0].BlendingEnabled = true if err != nil {
return err
}
rpld := mtl.RenderPipelineDescriptor{
VertexFunction: vs,
FragmentFunction: fs,
}
rpld.ColorAttachments[0].PixelFormat = mtl.PixelFormatRGBA8UNorm
rpld.ColorAttachments[0].BlendingEnabled = true
src, dst := c.Operations() src, dst := c.Operations()
rpld.ColorAttachments[0].DestinationAlphaBlendFactor = conv(dst) rpld.ColorAttachments[0].DestinationAlphaBlendFactor = conv(dst)
rpld.ColorAttachments[0].DestinationRGBBlendFactor = conv(dst) rpld.ColorAttachments[0].DestinationRGBBlendFactor = conv(dst)
rpld.ColorAttachments[0].SourceAlphaBlendFactor = conv(src) rpld.ColorAttachments[0].SourceAlphaBlendFactor = conv(src)
rpld.ColorAttachments[0].SourceRGBBlendFactor = conv(src) rpld.ColorAttachments[0].SourceRGBBlendFactor = conv(src)
rps, err := d.device.MakeRenderPipelineState(rpld) rps, err := d.device.MakeRenderPipelineState(rpld)
if err != nil { if err != nil {
return err return err
}
d.rpss[rpsKey{
useColorM: cm,
filter: f,
address: a,
compositeMode: c,
}] = rps
} }
d.rpss[rpsKey{
filter: f,
address: a,
compositeMode: c,
}] = rps
} }
} }
} }
@ -584,6 +599,7 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
rce.SetRenderPipelineState(d.screenRPS) rce.SetRenderPipelineState(d.screenRPS)
} else { } else {
rce.SetRenderPipelineState(d.rpss[rpsKey{ rce.SetRenderPipelineState(d.rpss[rpsKey{
useColorM: colorM != nil,
filter: filter, filter: filter,
address: address, address: address,
compositeMode: mode, compositeMode: mode,
@ -601,9 +617,11 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
} }
rce.SetFragmentBytes(unsafe.Pointer(&sourceSize[0]), unsafe.Sizeof(sourceSize), 2) rce.SetFragmentBytes(unsafe.Pointer(&sourceSize[0]), unsafe.Sizeof(sourceSize), 2)
esBody, esTranslate := colorM.UnsafeElements() if colorM != nil {
rce.SetFragmentBytes(unsafe.Pointer(&esBody[0]), unsafe.Sizeof(esBody[0])*uintptr(len(esBody)), 3) esBody, esTranslate := colorM.UnsafeElements()
rce.SetFragmentBytes(unsafe.Pointer(&esTranslate[0]), unsafe.Sizeof(esTranslate[0])*uintptr(len(esTranslate)), 4) rce.SetFragmentBytes(unsafe.Pointer(&esBody[0]), unsafe.Sizeof(esBody[0])*uintptr(len(esBody)), 3)
rce.SetFragmentBytes(unsafe.Pointer(&esTranslate[0]), unsafe.Sizeof(esTranslate[0])*uintptr(len(esTranslate)), 4)
}
scale := float32(d.dst.width) / float32(d.src.width) scale := float32(d.dst.width) / float32(d.src.width)
rce.SetFragmentBytes(unsafe.Pointer(&scale), unsafe.Sizeof(scale), 5) rce.SetFragmentBytes(unsafe.Pointer(&scale), unsafe.Sizeof(scale), 5)

View File

@ -116,8 +116,9 @@ func init() {
} }
type programKey struct { type programKey struct {
filter graphics.Filter useColorM bool
address graphics.Address filter graphics.Filter
address graphics.Address
} }
// openGLState is a state for // openGLState is a state for
@ -200,34 +201,37 @@ func (s *openGLState) reset(context *context) error {
} }
defer context.deleteShader(shaderVertexModelviewNative) defer context.deleteShader(shaderVertexModelviewNative)
for _, a := range []graphics.Address{ for _, c := range []bool{false, true} {
graphics.AddressClampToZero, for _, a := range []graphics.Address{
graphics.AddressRepeat, graphics.AddressClampToZero,
} { graphics.AddressRepeat,
for _, f := range []graphics.Filter{
graphics.FilterNearest,
graphics.FilterLinear,
graphics.FilterScreen,
} { } {
shaderFragmentColorMatrixNative, err := context.newShader(fragmentShader, fragmentShaderStr(f, a)) for _, f := range []graphics.Filter{
if err != nil { graphics.FilterNearest,
panic(fmt.Sprintf("graphics: shader compiling error:\n%s", err)) graphics.FilterLinear,
graphics.FilterScreen,
} {
shaderFragmentColorMatrixNative, err := context.newShader(fragmentShader, fragmentShaderStr(c, f, a))
if err != nil {
panic(fmt.Sprintf("graphics: shader compiling error:\n%s", err))
}
defer context.deleteShader(shaderFragmentColorMatrixNative)
program, err := context.newProgram([]shader{
shaderVertexModelviewNative,
shaderFragmentColorMatrixNative,
}, theArrayBufferLayout.names())
if err != nil {
return err
}
s.programs[programKey{
useColorM: c,
filter: f,
address: a,
}] = program
} }
defer context.deleteShader(shaderFragmentColorMatrixNative)
program, err := context.newProgram([]shader{
shaderVertexModelviewNative,
shaderFragmentColorMatrixNative,
}, theArrayBufferLayout.names())
if err != nil {
return err
}
s.programs[programKey{
filter: f,
address: a,
}] = program
} }
} }
@ -274,8 +278,9 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM,
d.context.blendFunc(mode) d.context.blendFunc(mode)
program := d.state.programs[programKey{ program := d.state.programs[programKey{
filter: filter, useColorM: colorM != nil,
address: address, filter: filter,
address: address,
}] }]
if d.state.lastProgram != program { if d.state.lastProgram != program {
d.context.useProgram(program) d.context.useProgram(program)
@ -307,26 +312,24 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM,
d.state.lastViewportHeight = vh d.state.lastViewportHeight = vh
} }
esBody, esTranslate := colorM.UnsafeElements() if colorM != nil {
esBody, esTranslate := colorM.UnsafeElements()
if !areSameFloat32Array(d.state.lastColorMatrix, esBody) { if !areSameFloat32Array(d.state.lastColorMatrix, esBody) {
d.context.uniformFloats(program, "color_matrix_body", esBody) d.context.uniformFloats(program, "color_matrix_body", esBody)
// ColorM's elements are immutable. It's OK to hold the reference without copying. // ColorM's elements are immutable. It's OK to hold the reference without copying.
d.state.lastColorMatrix = esBody d.state.lastColorMatrix = esBody
} }
if !areSameFloat32Array(d.state.lastColorMatrixTranslation, esTranslate) { if !areSameFloat32Array(d.state.lastColorMatrixTranslation, esTranslate) {
d.context.uniformFloats(program, "color_matrix_translation", esTranslate) d.context.uniformFloats(program, "color_matrix_translation", esTranslate)
// ColorM's elements are immutable. It's OK to hold the reference without copying. // ColorM's elements are immutable. It's OK to hold the reference without copying.
d.state.lastColorMatrixTranslation = esTranslate d.state.lastColorMatrixTranslation = esTranslate
}
} }
sw := graphics.InternalImageSize(srcW) sw := graphics.InternalImageSize(srcW)
sh := graphics.InternalImageSize(srcH) sh := graphics.InternalImageSize(srcH)
if filter == graphics.FilterNearest { if filter != graphics.FilterNearest {
d.state.lastSourceWidth = 0
d.state.lastSourceHeight = 0
} else {
if d.state.lastSourceWidth != sw || d.state.lastSourceHeight != sh { if d.state.lastSourceWidth != sw || d.state.lastSourceHeight != sh {
d.context.uniformFloats(program, "source_size", []float32{float32(sw), float32(sh)}) d.context.uniformFloats(program, "source_size", []float32{float32(sw), float32(sh)})
d.state.lastSourceWidth = sw d.state.lastSourceWidth = sw

View File

@ -61,7 +61,7 @@ func vertexShaderStr() string {
return src return src
} }
func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string { func fragmentShaderStr(useColorM bool, filter graphics.Filter, address graphics.Address) string {
replaces := map[string]string{ replaces := map[string]string{
"{{.AddressClampToZero}}": fmt.Sprintf("%d", graphics.AddressClampToZero), "{{.AddressClampToZero}}": fmt.Sprintf("%d", graphics.AddressClampToZero),
"{{.AddressRepeat}}": fmt.Sprintf("%d", graphics.AddressRepeat), "{{.AddressRepeat}}": fmt.Sprintf("%d", graphics.AddressRepeat),
@ -72,6 +72,11 @@ func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string
} }
var defs []string var defs []string
if useColorM {
defs = append(defs, "#define USE_COLOR_MATRIX")
}
switch filter { switch filter {
case graphics.FilterNearest: case graphics.FilterNearest:
defs = append(defs, "#define FILTER_NEAREST") defs = append(defs, "#define FILTER_NEAREST")
@ -82,6 +87,7 @@ func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string
default: default:
panic(fmt.Sprintf("opengl: invalid filter: %d", filter)) panic(fmt.Sprintf("opengl: invalid filter: %d", filter))
} }
switch address { switch address {
case graphics.AddressClampToZero: case graphics.AddressClampToZero:
defs = append(defs, "#define ADDRESS_CLAMP_TO_ZERO") defs = append(defs, "#define ADDRESS_CLAMP_TO_ZERO")
@ -134,8 +140,11 @@ precision mediump float;
{{.Definitions}} {{.Definitions}}
uniform sampler2D texture; uniform sampler2D texture;
#if defined(USE_COLOR_MATRIX)
uniform mat4 color_matrix_body; uniform mat4 color_matrix_body;
uniform vec4 color_matrix_translation; uniform vec4 color_matrix_translation;
#endif
uniform highp vec2 source_size; uniform highp vec2 source_size;
@ -250,8 +259,12 @@ void main(void) {
// Un-premultiply alpha. // Un-premultiply alpha.
// When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing. // When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing.
color.rgb /= color.a + (1.0 - sign(color.a)); color.rgb /= color.a + (1.0 - sign(color.a));
#if defined(USE_COLOR_MATRIX)
// Apply the color matrix or scale. // Apply the color matrix or scale.
color = (color_matrix_body * color) + color_matrix_translation; color = (color_matrix_body * color) + color_matrix_translation;
#endif
color *= varying_color_scale; color *= varying_color_scale;
color = clamp(color, 0.0, 1.0); color = clamp(color, 0.0, 1.0);
// Premultiply alpha // Premultiply alpha