mirror of
https://github.com/hajimehoshi/ebiten.git
synced 2025-01-11 19:48:54 +01:00
graphicsdriver: Optimize shader (skip color matrix calculation)
Skip multiplying with a color matrix when possible.
This commit is contained in:
parent
815ed8cda2
commit
a8b97c9755
@ -194,7 +194,7 @@ struct GetColorFromTexel<FILTER_SCREEN, address> {
|
||||
}
|
||||
};
|
||||
|
||||
template<uint8_t filter, uint8_t address>
|
||||
template<bool useColorM, uint8_t filter, uint8_t address>
|
||||
float4 FragmentShaderImpl(
|
||||
VertexOut v,
|
||||
texture2d<float> texture,
|
||||
@ -204,7 +204,9 @@ float4 FragmentShaderImpl(
|
||||
constant float& scale) {
|
||||
float4 c = GetColorFromTexel<filter, address>().Do(v, texture, source_size, scale);
|
||||
c.rgb /= c.a + (1.0 - sign(c.a));
|
||||
if (useColorM) {
|
||||
c = (color_matrix_body * c) + color_matrix_translation;
|
||||
}
|
||||
c *= v.color;
|
||||
c = clamp(c, 0.0, 1.0);
|
||||
c.rgb *= c.a;
|
||||
@ -214,31 +216,37 @@ float4 FragmentShaderImpl(
|
||||
// Define Foo and FooCp macros to force macro replacement.
|
||||
// See "6.10.3.1 Argument substitution" in ISO/IEC 9899.
|
||||
|
||||
#define FragmentShaderFunc(filter, address) \
|
||||
FragmentShaderFuncCp(filter, address)
|
||||
#define FragmentShaderFunc(useColorM, filter, address) \
|
||||
FragmentShaderFuncCp(useColorM, filter, address)
|
||||
|
||||
#define FragmentShaderFuncCp(filter, address) \
|
||||
fragment float4 FragmentShader_##filter##_##address( \
|
||||
#define FragmentShaderFuncCp(useColorM, filter, address) \
|
||||
fragment float4 FragmentShader_##useColorM##_##filter##_##address( \
|
||||
VertexOut v [[stage_in]], \
|
||||
texture2d<float> texture [[texture(0)]], \
|
||||
constant float2& source_size [[buffer(2)]], \
|
||||
constant float4x4& color_matrix_body [[buffer(3)]], \
|
||||
constant float4& color_matrix_translation [[buffer(4)]], \
|
||||
constant float& scale [[buffer(5)]]) { \
|
||||
return FragmentShaderImpl<filter, address>( \
|
||||
return FragmentShaderImpl<useColorM, filter, address>( \
|
||||
v, texture, source_size, color_matrix_body, color_matrix_translation, scale); \
|
||||
}
|
||||
|
||||
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
|
||||
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
|
||||
FragmentShaderFunc(FILTER_SCREEN, ADDRESS_CLAMP_TO_ZERO)
|
||||
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_REPEAT)
|
||||
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_REPEAT)
|
||||
FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
|
||||
FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
|
||||
FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_REPEAT)
|
||||
FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_REPEAT)
|
||||
FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
|
||||
FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
|
||||
FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_REPEAT)
|
||||
FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_REPEAT)
|
||||
|
||||
FragmentShaderFunc(0, FILTER_SCREEN, ADDRESS_CLAMP_TO_ZERO)
|
||||
|
||||
#undef FragmentShaderFuncName
|
||||
`
|
||||
|
||||
type rpsKey struct {
|
||||
useColorM bool
|
||||
filter graphics.Filter
|
||||
address graphics.Address
|
||||
compositeMode graphics.CompositeMode
|
||||
@ -455,7 +463,7 @@ func (d *Driver) Reset() error {
|
||||
return err
|
||||
}
|
||||
fs, err := lib.MakeFunction(
|
||||
fmt.Sprintf("FragmentShader_%d_%d", graphics.FilterScreen, graphics.AddressClampToZero))
|
||||
fmt.Sprintf("FragmentShader_%d_%d_%d", 0, graphics.FilterScreen, graphics.AddressClampToZero))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -494,6 +502,7 @@ func (d *Driver) Reset() error {
|
||||
}
|
||||
}
|
||||
|
||||
for _, cm := range []bool{false, true} {
|
||||
for _, a := range []graphics.Address{
|
||||
graphics.AddressClampToZero,
|
||||
graphics.AddressRepeat,
|
||||
@ -503,7 +512,11 @@ func (d *Driver) Reset() error {
|
||||
graphics.FilterLinear,
|
||||
} {
|
||||
for c := graphics.CompositeModeSourceOver; c <= graphics.CompositeModeMax; c++ {
|
||||
fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d", f, a))
|
||||
cmi := 0
|
||||
if cm {
|
||||
cmi = 1
|
||||
}
|
||||
fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d_%d", cmi, f, a))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -524,6 +537,7 @@ func (d *Driver) Reset() error {
|
||||
return err
|
||||
}
|
||||
d.rpss[rpsKey{
|
||||
useColorM: cm,
|
||||
filter: f,
|
||||
address: a,
|
||||
compositeMode: c,
|
||||
@ -531,6 +545,7 @@ func (d *Driver) Reset() error {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
d.cq = d.device.MakeCommandQueue()
|
||||
return nil
|
||||
@ -584,6 +599,7 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
|
||||
rce.SetRenderPipelineState(d.screenRPS)
|
||||
} else {
|
||||
rce.SetRenderPipelineState(d.rpss[rpsKey{
|
||||
useColorM: colorM != nil,
|
||||
filter: filter,
|
||||
address: address,
|
||||
compositeMode: mode,
|
||||
@ -601,9 +617,11 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
|
||||
}
|
||||
rce.SetFragmentBytes(unsafe.Pointer(&sourceSize[0]), unsafe.Sizeof(sourceSize), 2)
|
||||
|
||||
if colorM != nil {
|
||||
esBody, esTranslate := colorM.UnsafeElements()
|
||||
rce.SetFragmentBytes(unsafe.Pointer(&esBody[0]), unsafe.Sizeof(esBody[0])*uintptr(len(esBody)), 3)
|
||||
rce.SetFragmentBytes(unsafe.Pointer(&esTranslate[0]), unsafe.Sizeof(esTranslate[0])*uintptr(len(esTranslate)), 4)
|
||||
}
|
||||
|
||||
scale := float32(d.dst.width) / float32(d.src.width)
|
||||
rce.SetFragmentBytes(unsafe.Pointer(&scale), unsafe.Sizeof(scale), 5)
|
||||
|
@ -116,6 +116,7 @@ func init() {
|
||||
}
|
||||
|
||||
type programKey struct {
|
||||
useColorM bool
|
||||
filter graphics.Filter
|
||||
address graphics.Address
|
||||
}
|
||||
@ -200,6 +201,7 @@ func (s *openGLState) reset(context *context) error {
|
||||
}
|
||||
defer context.deleteShader(shaderVertexModelviewNative)
|
||||
|
||||
for _, c := range []bool{false, true} {
|
||||
for _, a := range []graphics.Address{
|
||||
graphics.AddressClampToZero,
|
||||
graphics.AddressRepeat,
|
||||
@ -209,7 +211,7 @@ func (s *openGLState) reset(context *context) error {
|
||||
graphics.FilterLinear,
|
||||
graphics.FilterScreen,
|
||||
} {
|
||||
shaderFragmentColorMatrixNative, err := context.newShader(fragmentShader, fragmentShaderStr(f, a))
|
||||
shaderFragmentColorMatrixNative, err := context.newShader(fragmentShader, fragmentShaderStr(c, f, a))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("graphics: shader compiling error:\n%s", err))
|
||||
}
|
||||
@ -225,11 +227,13 @@ func (s *openGLState) reset(context *context) error {
|
||||
}
|
||||
|
||||
s.programs[programKey{
|
||||
useColorM: c,
|
||||
filter: f,
|
||||
address: a,
|
||||
}] = program
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.arrayBuffer = theArrayBufferLayout.newArrayBuffer(context)
|
||||
|
||||
@ -274,6 +278,7 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM,
|
||||
d.context.blendFunc(mode)
|
||||
|
||||
program := d.state.programs[programKey{
|
||||
useColorM: colorM != nil,
|
||||
filter: filter,
|
||||
address: address,
|
||||
}]
|
||||
@ -307,8 +312,8 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM,
|
||||
d.state.lastViewportHeight = vh
|
||||
}
|
||||
|
||||
if colorM != nil {
|
||||
esBody, esTranslate := colorM.UnsafeElements()
|
||||
|
||||
if !areSameFloat32Array(d.state.lastColorMatrix, esBody) {
|
||||
d.context.uniformFloats(program, "color_matrix_body", esBody)
|
||||
// ColorM's elements are immutable. It's OK to hold the reference without copying.
|
||||
@ -319,14 +324,12 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM,
|
||||
// ColorM's elements are immutable. It's OK to hold the reference without copying.
|
||||
d.state.lastColorMatrixTranslation = esTranslate
|
||||
}
|
||||
}
|
||||
|
||||
sw := graphics.InternalImageSize(srcW)
|
||||
sh := graphics.InternalImageSize(srcH)
|
||||
|
||||
if filter == graphics.FilterNearest {
|
||||
d.state.lastSourceWidth = 0
|
||||
d.state.lastSourceHeight = 0
|
||||
} else {
|
||||
if filter != graphics.FilterNearest {
|
||||
if d.state.lastSourceWidth != sw || d.state.lastSourceHeight != sh {
|
||||
d.context.uniformFloats(program, "source_size", []float32{float32(sw), float32(sh)})
|
||||
d.state.lastSourceWidth = sw
|
||||
|
@ -61,7 +61,7 @@ func vertexShaderStr() string {
|
||||
return src
|
||||
}
|
||||
|
||||
func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string {
|
||||
func fragmentShaderStr(useColorM bool, filter graphics.Filter, address graphics.Address) string {
|
||||
replaces := map[string]string{
|
||||
"{{.AddressClampToZero}}": fmt.Sprintf("%d", graphics.AddressClampToZero),
|
||||
"{{.AddressRepeat}}": fmt.Sprintf("%d", graphics.AddressRepeat),
|
||||
@ -72,6 +72,11 @@ func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string
|
||||
}
|
||||
|
||||
var defs []string
|
||||
|
||||
if useColorM {
|
||||
defs = append(defs, "#define USE_COLOR_MATRIX")
|
||||
}
|
||||
|
||||
switch filter {
|
||||
case graphics.FilterNearest:
|
||||
defs = append(defs, "#define FILTER_NEAREST")
|
||||
@ -82,6 +87,7 @@ func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string
|
||||
default:
|
||||
panic(fmt.Sprintf("opengl: invalid filter: %d", filter))
|
||||
}
|
||||
|
||||
switch address {
|
||||
case graphics.AddressClampToZero:
|
||||
defs = append(defs, "#define ADDRESS_CLAMP_TO_ZERO")
|
||||
@ -134,8 +140,11 @@ precision mediump float;
|
||||
{{.Definitions}}
|
||||
|
||||
uniform sampler2D texture;
|
||||
|
||||
#if defined(USE_COLOR_MATRIX)
|
||||
uniform mat4 color_matrix_body;
|
||||
uniform vec4 color_matrix_translation;
|
||||
#endif
|
||||
|
||||
uniform highp vec2 source_size;
|
||||
|
||||
@ -250,8 +259,12 @@ void main(void) {
|
||||
// Un-premultiply alpha.
|
||||
// When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing.
|
||||
color.rgb /= color.a + (1.0 - sign(color.a));
|
||||
|
||||
#if defined(USE_COLOR_MATRIX)
|
||||
// Apply the color matrix or scale.
|
||||
color = (color_matrix_body * color) + color_matrix_translation;
|
||||
#endif
|
||||
|
||||
color *= varying_color_scale;
|
||||
color = clamp(color, 0.0, 1.0);
|
||||
// Premultiply alpha
|
||||
|
Loading…
Reference in New Issue
Block a user