graphicsdriver: Optimize shader (skip color matrix calculation)

Skip multiplying with a color matrix when possible.
This commit is contained in:
Hajime Hoshi 2019-02-16 17:08:53 +09:00
parent 815ed8cda2
commit a8b97c9755
3 changed files with 127 additions and 93 deletions

View File

@ -194,7 +194,7 @@ struct GetColorFromTexel<FILTER_SCREEN, address> {
}
};
template<uint8_t filter, uint8_t address>
template<bool useColorM, uint8_t filter, uint8_t address>
float4 FragmentShaderImpl(
VertexOut v,
texture2d<float> texture,
@ -204,7 +204,9 @@ float4 FragmentShaderImpl(
constant float& scale) {
float4 c = GetColorFromTexel<filter, address>().Do(v, texture, source_size, scale);
c.rgb /= c.a + (1.0 - sign(c.a));
if (useColorM) {
c = (color_matrix_body * c) + color_matrix_translation;
}
c *= v.color;
c = clamp(c, 0.0, 1.0);
c.rgb *= c.a;
@ -214,31 +216,37 @@ float4 FragmentShaderImpl(
// Define Foo and FooCp macros to force macro replacement.
// See "6.10.3.1 Argument substitution" in ISO/IEC 9899.
#define FragmentShaderFunc(filter, address) \
FragmentShaderFuncCp(filter, address)
#define FragmentShaderFunc(useColorM, filter, address) \
FragmentShaderFuncCp(useColorM, filter, address)
#define FragmentShaderFuncCp(filter, address) \
fragment float4 FragmentShader_##filter##_##address( \
#define FragmentShaderFuncCp(useColorM, filter, address) \
fragment float4 FragmentShader_##useColorM##_##filter##_##address( \
VertexOut v [[stage_in]], \
texture2d<float> texture [[texture(0)]], \
constant float2& source_size [[buffer(2)]], \
constant float4x4& color_matrix_body [[buffer(3)]], \
constant float4& color_matrix_translation [[buffer(4)]], \
constant float& scale [[buffer(5)]]) { \
return FragmentShaderImpl<filter, address>( \
return FragmentShaderImpl<useColorM, filter, address>( \
v, texture, source_size, color_matrix_body, color_matrix_translation, scale); \
}
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(FILTER_SCREEN, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_REPEAT)
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_REPEAT)
FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_REPEAT)
FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_REPEAT)
FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_REPEAT)
FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_REPEAT)
FragmentShaderFunc(0, FILTER_SCREEN, ADDRESS_CLAMP_TO_ZERO)
#undef FragmentShaderFuncName
`
type rpsKey struct {
useColorM bool
filter graphics.Filter
address graphics.Address
compositeMode graphics.CompositeMode
@ -455,7 +463,7 @@ func (d *Driver) Reset() error {
return err
}
fs, err := lib.MakeFunction(
fmt.Sprintf("FragmentShader_%d_%d", graphics.FilterScreen, graphics.AddressClampToZero))
fmt.Sprintf("FragmentShader_%d_%d_%d", 0, graphics.FilterScreen, graphics.AddressClampToZero))
if err != nil {
return err
}
@ -494,6 +502,7 @@ func (d *Driver) Reset() error {
}
}
for _, cm := range []bool{false, true} {
for _, a := range []graphics.Address{
graphics.AddressClampToZero,
graphics.AddressRepeat,
@ -503,7 +512,11 @@ func (d *Driver) Reset() error {
graphics.FilterLinear,
} {
for c := graphics.CompositeModeSourceOver; c <= graphics.CompositeModeMax; c++ {
fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d", f, a))
cmi := 0
if cm {
cmi = 1
}
fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d_%d", cmi, f, a))
if err != nil {
return err
}
@ -524,6 +537,7 @@ func (d *Driver) Reset() error {
return err
}
d.rpss[rpsKey{
useColorM: cm,
filter: f,
address: a,
compositeMode: c,
@ -531,6 +545,7 @@ func (d *Driver) Reset() error {
}
}
}
}
d.cq = d.device.MakeCommandQueue()
return nil
@ -584,6 +599,7 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
rce.SetRenderPipelineState(d.screenRPS)
} else {
rce.SetRenderPipelineState(d.rpss[rpsKey{
useColorM: colorM != nil,
filter: filter,
address: address,
compositeMode: mode,
@ -601,9 +617,11 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
}
rce.SetFragmentBytes(unsafe.Pointer(&sourceSize[0]), unsafe.Sizeof(sourceSize), 2)
if colorM != nil {
esBody, esTranslate := colorM.UnsafeElements()
rce.SetFragmentBytes(unsafe.Pointer(&esBody[0]), unsafe.Sizeof(esBody[0])*uintptr(len(esBody)), 3)
rce.SetFragmentBytes(unsafe.Pointer(&esTranslate[0]), unsafe.Sizeof(esTranslate[0])*uintptr(len(esTranslate)), 4)
}
scale := float32(d.dst.width) / float32(d.src.width)
rce.SetFragmentBytes(unsafe.Pointer(&scale), unsafe.Sizeof(scale), 5)

View File

@ -116,6 +116,7 @@ func init() {
}
type programKey struct {
useColorM bool
filter graphics.Filter
address graphics.Address
}
@ -200,6 +201,7 @@ func (s *openGLState) reset(context *context) error {
}
defer context.deleteShader(shaderVertexModelviewNative)
for _, c := range []bool{false, true} {
for _, a := range []graphics.Address{
graphics.AddressClampToZero,
graphics.AddressRepeat,
@ -209,7 +211,7 @@ func (s *openGLState) reset(context *context) error {
graphics.FilterLinear,
graphics.FilterScreen,
} {
shaderFragmentColorMatrixNative, err := context.newShader(fragmentShader, fragmentShaderStr(f, a))
shaderFragmentColorMatrixNative, err := context.newShader(fragmentShader, fragmentShaderStr(c, f, a))
if err != nil {
panic(fmt.Sprintf("graphics: shader compiling error:\n%s", err))
}
@ -225,11 +227,13 @@ func (s *openGLState) reset(context *context) error {
}
s.programs[programKey{
useColorM: c,
filter: f,
address: a,
}] = program
}
}
}
s.arrayBuffer = theArrayBufferLayout.newArrayBuffer(context)
@ -274,6 +278,7 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM,
d.context.blendFunc(mode)
program := d.state.programs[programKey{
useColorM: colorM != nil,
filter: filter,
address: address,
}]
@ -307,8 +312,8 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM,
d.state.lastViewportHeight = vh
}
if colorM != nil {
esBody, esTranslate := colorM.UnsafeElements()
if !areSameFloat32Array(d.state.lastColorMatrix, esBody) {
d.context.uniformFloats(program, "color_matrix_body", esBody)
// ColorM's elements are immutable. It's OK to hold the reference without copying.
@ -319,14 +324,12 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM,
// ColorM's elements are immutable. It's OK to hold the reference without copying.
d.state.lastColorMatrixTranslation = esTranslate
}
}
sw := graphics.InternalImageSize(srcW)
sh := graphics.InternalImageSize(srcH)
if filter == graphics.FilterNearest {
d.state.lastSourceWidth = 0
d.state.lastSourceHeight = 0
} else {
if filter != graphics.FilterNearest {
if d.state.lastSourceWidth != sw || d.state.lastSourceHeight != sh {
d.context.uniformFloats(program, "source_size", []float32{float32(sw), float32(sh)})
d.state.lastSourceWidth = sw

View File

@ -61,7 +61,7 @@ func vertexShaderStr() string {
return src
}
func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string {
func fragmentShaderStr(useColorM bool, filter graphics.Filter, address graphics.Address) string {
replaces := map[string]string{
"{{.AddressClampToZero}}": fmt.Sprintf("%d", graphics.AddressClampToZero),
"{{.AddressRepeat}}": fmt.Sprintf("%d", graphics.AddressRepeat),
@ -72,6 +72,11 @@ func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string
}
var defs []string
if useColorM {
defs = append(defs, "#define USE_COLOR_MATRIX")
}
switch filter {
case graphics.FilterNearest:
defs = append(defs, "#define FILTER_NEAREST")
@ -82,6 +87,7 @@ func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string
default:
panic(fmt.Sprintf("opengl: invalid filter: %d", filter))
}
switch address {
case graphics.AddressClampToZero:
defs = append(defs, "#define ADDRESS_CLAMP_TO_ZERO")
@ -134,8 +140,11 @@ precision mediump float;
{{.Definitions}}
uniform sampler2D texture;
#if defined(USE_COLOR_MATRIX)
uniform mat4 color_matrix_body;
uniform vec4 color_matrix_translation;
#endif
uniform highp vec2 source_size;
@ -250,8 +259,12 @@ void main(void) {
// Un-premultiply alpha.
// When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing.
color.rgb /= color.a + (1.0 - sign(color.a));
#if defined(USE_COLOR_MATRIX)
// Apply the color matrix or scale.
color = (color_matrix_body * color) + color_matrix_translation;
#endif
color *= varying_color_scale;
color = clamp(color, 0.0, 1.0);
// Premultiply alpha