mirror of
https://github.com/hajimehoshi/ebiten.git
synced 2025-01-11 19:48:54 +01:00
graphicsdriver: Optimize shader (skip color matrix calculation)
Skip multiplying with a color matrix when possible.
This commit is contained in:
parent
815ed8cda2
commit
a8b97c9755
@ -194,7 +194,7 @@ struct GetColorFromTexel<FILTER_SCREEN, address> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<uint8_t filter, uint8_t address>
|
template<bool useColorM, uint8_t filter, uint8_t address>
|
||||||
float4 FragmentShaderImpl(
|
float4 FragmentShaderImpl(
|
||||||
VertexOut v,
|
VertexOut v,
|
||||||
texture2d<float> texture,
|
texture2d<float> texture,
|
||||||
@ -204,7 +204,9 @@ float4 FragmentShaderImpl(
|
|||||||
constant float& scale) {
|
constant float& scale) {
|
||||||
float4 c = GetColorFromTexel<filter, address>().Do(v, texture, source_size, scale);
|
float4 c = GetColorFromTexel<filter, address>().Do(v, texture, source_size, scale);
|
||||||
c.rgb /= c.a + (1.0 - sign(c.a));
|
c.rgb /= c.a + (1.0 - sign(c.a));
|
||||||
c = (color_matrix_body * c) + color_matrix_translation;
|
if (useColorM) {
|
||||||
|
c = (color_matrix_body * c) + color_matrix_translation;
|
||||||
|
}
|
||||||
c *= v.color;
|
c *= v.color;
|
||||||
c = clamp(c, 0.0, 1.0);
|
c = clamp(c, 0.0, 1.0);
|
||||||
c.rgb *= c.a;
|
c.rgb *= c.a;
|
||||||
@ -214,31 +216,37 @@ float4 FragmentShaderImpl(
|
|||||||
// Define Foo and FooCp macros to force macro replacement.
|
// Define Foo and FooCp macros to force macro replacement.
|
||||||
// See "6.10.3.1 Argument substitution" in ISO/IEC 9899.
|
// See "6.10.3.1 Argument substitution" in ISO/IEC 9899.
|
||||||
|
|
||||||
#define FragmentShaderFunc(filter, address) \
|
#define FragmentShaderFunc(useColorM, filter, address) \
|
||||||
FragmentShaderFuncCp(filter, address)
|
FragmentShaderFuncCp(useColorM, filter, address)
|
||||||
|
|
||||||
#define FragmentShaderFuncCp(filter, address) \
|
#define FragmentShaderFuncCp(useColorM, filter, address) \
|
||||||
fragment float4 FragmentShader_##filter##_##address( \
|
fragment float4 FragmentShader_##useColorM##_##filter##_##address( \
|
||||||
VertexOut v [[stage_in]], \
|
VertexOut v [[stage_in]], \
|
||||||
texture2d<float> texture [[texture(0)]], \
|
texture2d<float> texture [[texture(0)]], \
|
||||||
constant float2& source_size [[buffer(2)]], \
|
constant float2& source_size [[buffer(2)]], \
|
||||||
constant float4x4& color_matrix_body [[buffer(3)]], \
|
constant float4x4& color_matrix_body [[buffer(3)]], \
|
||||||
constant float4& color_matrix_translation [[buffer(4)]], \
|
constant float4& color_matrix_translation [[buffer(4)]], \
|
||||||
constant float& scale [[buffer(5)]]) { \
|
constant float& scale [[buffer(5)]]) { \
|
||||||
return FragmentShaderImpl<filter, address>( \
|
return FragmentShaderImpl<useColorM, filter, address>( \
|
||||||
v, texture, source_size, color_matrix_body, color_matrix_translation, scale); \
|
v, texture, source_size, color_matrix_body, color_matrix_translation, scale); \
|
||||||
}
|
}
|
||||||
|
|
||||||
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
|
FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
|
||||||
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
|
FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
|
||||||
FragmentShaderFunc(FILTER_SCREEN, ADDRESS_CLAMP_TO_ZERO)
|
FragmentShaderFunc(0, FILTER_NEAREST, ADDRESS_REPEAT)
|
||||||
FragmentShaderFunc(FILTER_NEAREST, ADDRESS_REPEAT)
|
FragmentShaderFunc(0, FILTER_LINEAR, ADDRESS_REPEAT)
|
||||||
FragmentShaderFunc(FILTER_LINEAR, ADDRESS_REPEAT)
|
FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_CLAMP_TO_ZERO)
|
||||||
|
FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_CLAMP_TO_ZERO)
|
||||||
|
FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_REPEAT)
|
||||||
|
FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_REPEAT)
|
||||||
|
|
||||||
|
FragmentShaderFunc(0, FILTER_SCREEN, ADDRESS_CLAMP_TO_ZERO)
|
||||||
|
|
||||||
#undef FragmentShaderFuncName
|
#undef FragmentShaderFuncName
|
||||||
`
|
`
|
||||||
|
|
||||||
type rpsKey struct {
|
type rpsKey struct {
|
||||||
|
useColorM bool
|
||||||
filter graphics.Filter
|
filter graphics.Filter
|
||||||
address graphics.Address
|
address graphics.Address
|
||||||
compositeMode graphics.CompositeMode
|
compositeMode graphics.CompositeMode
|
||||||
@ -455,7 +463,7 @@ func (d *Driver) Reset() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
fs, err := lib.MakeFunction(
|
fs, err := lib.MakeFunction(
|
||||||
fmt.Sprintf("FragmentShader_%d_%d", graphics.FilterScreen, graphics.AddressClampToZero))
|
fmt.Sprintf("FragmentShader_%d_%d_%d", 0, graphics.FilterScreen, graphics.AddressClampToZero))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -494,40 +502,47 @@ func (d *Driver) Reset() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, a := range []graphics.Address{
|
for _, cm := range []bool{false, true} {
|
||||||
graphics.AddressClampToZero,
|
for _, a := range []graphics.Address{
|
||||||
graphics.AddressRepeat,
|
graphics.AddressClampToZero,
|
||||||
} {
|
graphics.AddressRepeat,
|
||||||
for _, f := range []graphics.Filter{
|
|
||||||
graphics.FilterNearest,
|
|
||||||
graphics.FilterLinear,
|
|
||||||
} {
|
} {
|
||||||
for c := graphics.CompositeModeSourceOver; c <= graphics.CompositeModeMax; c++ {
|
for _, f := range []graphics.Filter{
|
||||||
fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d", f, a))
|
graphics.FilterNearest,
|
||||||
if err != nil {
|
graphics.FilterLinear,
|
||||||
return err
|
} {
|
||||||
}
|
for c := graphics.CompositeModeSourceOver; c <= graphics.CompositeModeMax; c++ {
|
||||||
rpld := mtl.RenderPipelineDescriptor{
|
cmi := 0
|
||||||
VertexFunction: vs,
|
if cm {
|
||||||
FragmentFunction: fs,
|
cmi = 1
|
||||||
}
|
}
|
||||||
rpld.ColorAttachments[0].PixelFormat = mtl.PixelFormatRGBA8UNorm
|
fs, err := lib.MakeFunction(fmt.Sprintf("FragmentShader_%d_%d_%d", cmi, f, a))
|
||||||
rpld.ColorAttachments[0].BlendingEnabled = true
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
rpld := mtl.RenderPipelineDescriptor{
|
||||||
|
VertexFunction: vs,
|
||||||
|
FragmentFunction: fs,
|
||||||
|
}
|
||||||
|
rpld.ColorAttachments[0].PixelFormat = mtl.PixelFormatRGBA8UNorm
|
||||||
|
rpld.ColorAttachments[0].BlendingEnabled = true
|
||||||
|
|
||||||
src, dst := c.Operations()
|
src, dst := c.Operations()
|
||||||
rpld.ColorAttachments[0].DestinationAlphaBlendFactor = conv(dst)
|
rpld.ColorAttachments[0].DestinationAlphaBlendFactor = conv(dst)
|
||||||
rpld.ColorAttachments[0].DestinationRGBBlendFactor = conv(dst)
|
rpld.ColorAttachments[0].DestinationRGBBlendFactor = conv(dst)
|
||||||
rpld.ColorAttachments[0].SourceAlphaBlendFactor = conv(src)
|
rpld.ColorAttachments[0].SourceAlphaBlendFactor = conv(src)
|
||||||
rpld.ColorAttachments[0].SourceRGBBlendFactor = conv(src)
|
rpld.ColorAttachments[0].SourceRGBBlendFactor = conv(src)
|
||||||
rps, err := d.device.MakeRenderPipelineState(rpld)
|
rps, err := d.device.MakeRenderPipelineState(rpld)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
}
|
||||||
|
d.rpss[rpsKey{
|
||||||
|
useColorM: cm,
|
||||||
|
filter: f,
|
||||||
|
address: a,
|
||||||
|
compositeMode: c,
|
||||||
|
}] = rps
|
||||||
}
|
}
|
||||||
d.rpss[rpsKey{
|
|
||||||
filter: f,
|
|
||||||
address: a,
|
|
||||||
compositeMode: c,
|
|
||||||
}] = rps
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -584,6 +599,7 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
|
|||||||
rce.SetRenderPipelineState(d.screenRPS)
|
rce.SetRenderPipelineState(d.screenRPS)
|
||||||
} else {
|
} else {
|
||||||
rce.SetRenderPipelineState(d.rpss[rpsKey{
|
rce.SetRenderPipelineState(d.rpss[rpsKey{
|
||||||
|
useColorM: colorM != nil,
|
||||||
filter: filter,
|
filter: filter,
|
||||||
address: address,
|
address: address,
|
||||||
compositeMode: mode,
|
compositeMode: mode,
|
||||||
@ -601,9 +617,11 @@ func (d *Driver) Draw(indexLen int, indexOffset int, mode graphics.CompositeMode
|
|||||||
}
|
}
|
||||||
rce.SetFragmentBytes(unsafe.Pointer(&sourceSize[0]), unsafe.Sizeof(sourceSize), 2)
|
rce.SetFragmentBytes(unsafe.Pointer(&sourceSize[0]), unsafe.Sizeof(sourceSize), 2)
|
||||||
|
|
||||||
esBody, esTranslate := colorM.UnsafeElements()
|
if colorM != nil {
|
||||||
rce.SetFragmentBytes(unsafe.Pointer(&esBody[0]), unsafe.Sizeof(esBody[0])*uintptr(len(esBody)), 3)
|
esBody, esTranslate := colorM.UnsafeElements()
|
||||||
rce.SetFragmentBytes(unsafe.Pointer(&esTranslate[0]), unsafe.Sizeof(esTranslate[0])*uintptr(len(esTranslate)), 4)
|
rce.SetFragmentBytes(unsafe.Pointer(&esBody[0]), unsafe.Sizeof(esBody[0])*uintptr(len(esBody)), 3)
|
||||||
|
rce.SetFragmentBytes(unsafe.Pointer(&esTranslate[0]), unsafe.Sizeof(esTranslate[0])*uintptr(len(esTranslate)), 4)
|
||||||
|
}
|
||||||
|
|
||||||
scale := float32(d.dst.width) / float32(d.src.width)
|
scale := float32(d.dst.width) / float32(d.src.width)
|
||||||
rce.SetFragmentBytes(unsafe.Pointer(&scale), unsafe.Sizeof(scale), 5)
|
rce.SetFragmentBytes(unsafe.Pointer(&scale), unsafe.Sizeof(scale), 5)
|
||||||
|
@ -116,8 +116,9 @@ func init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type programKey struct {
|
type programKey struct {
|
||||||
filter graphics.Filter
|
useColorM bool
|
||||||
address graphics.Address
|
filter graphics.Filter
|
||||||
|
address graphics.Address
|
||||||
}
|
}
|
||||||
|
|
||||||
// openGLState is a state for
|
// openGLState is a state for
|
||||||
@ -200,34 +201,37 @@ func (s *openGLState) reset(context *context) error {
|
|||||||
}
|
}
|
||||||
defer context.deleteShader(shaderVertexModelviewNative)
|
defer context.deleteShader(shaderVertexModelviewNative)
|
||||||
|
|
||||||
for _, a := range []graphics.Address{
|
for _, c := range []bool{false, true} {
|
||||||
graphics.AddressClampToZero,
|
for _, a := range []graphics.Address{
|
||||||
graphics.AddressRepeat,
|
graphics.AddressClampToZero,
|
||||||
} {
|
graphics.AddressRepeat,
|
||||||
for _, f := range []graphics.Filter{
|
|
||||||
graphics.FilterNearest,
|
|
||||||
graphics.FilterLinear,
|
|
||||||
graphics.FilterScreen,
|
|
||||||
} {
|
} {
|
||||||
shaderFragmentColorMatrixNative, err := context.newShader(fragmentShader, fragmentShaderStr(f, a))
|
for _, f := range []graphics.Filter{
|
||||||
if err != nil {
|
graphics.FilterNearest,
|
||||||
panic(fmt.Sprintf("graphics: shader compiling error:\n%s", err))
|
graphics.FilterLinear,
|
||||||
|
graphics.FilterScreen,
|
||||||
|
} {
|
||||||
|
shaderFragmentColorMatrixNative, err := context.newShader(fragmentShader, fragmentShaderStr(c, f, a))
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Sprintf("graphics: shader compiling error:\n%s", err))
|
||||||
|
}
|
||||||
|
defer context.deleteShader(shaderFragmentColorMatrixNative)
|
||||||
|
|
||||||
|
program, err := context.newProgram([]shader{
|
||||||
|
shaderVertexModelviewNative,
|
||||||
|
shaderFragmentColorMatrixNative,
|
||||||
|
}, theArrayBufferLayout.names())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.programs[programKey{
|
||||||
|
useColorM: c,
|
||||||
|
filter: f,
|
||||||
|
address: a,
|
||||||
|
}] = program
|
||||||
}
|
}
|
||||||
defer context.deleteShader(shaderFragmentColorMatrixNative)
|
|
||||||
|
|
||||||
program, err := context.newProgram([]shader{
|
|
||||||
shaderVertexModelviewNative,
|
|
||||||
shaderFragmentColorMatrixNative,
|
|
||||||
}, theArrayBufferLayout.names())
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
s.programs[programKey{
|
|
||||||
filter: f,
|
|
||||||
address: a,
|
|
||||||
}] = program
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,8 +278,9 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM,
|
|||||||
d.context.blendFunc(mode)
|
d.context.blendFunc(mode)
|
||||||
|
|
||||||
program := d.state.programs[programKey{
|
program := d.state.programs[programKey{
|
||||||
filter: filter,
|
useColorM: colorM != nil,
|
||||||
address: address,
|
filter: filter,
|
||||||
|
address: address,
|
||||||
}]
|
}]
|
||||||
if d.state.lastProgram != program {
|
if d.state.lastProgram != program {
|
||||||
d.context.useProgram(program)
|
d.context.useProgram(program)
|
||||||
@ -307,26 +312,24 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM,
|
|||||||
d.state.lastViewportHeight = vh
|
d.state.lastViewportHeight = vh
|
||||||
}
|
}
|
||||||
|
|
||||||
esBody, esTranslate := colorM.UnsafeElements()
|
if colorM != nil {
|
||||||
|
esBody, esTranslate := colorM.UnsafeElements()
|
||||||
if !areSameFloat32Array(d.state.lastColorMatrix, esBody) {
|
if !areSameFloat32Array(d.state.lastColorMatrix, esBody) {
|
||||||
d.context.uniformFloats(program, "color_matrix_body", esBody)
|
d.context.uniformFloats(program, "color_matrix_body", esBody)
|
||||||
// ColorM's elements are immutable. It's OK to hold the reference without copying.
|
// ColorM's elements are immutable. It's OK to hold the reference without copying.
|
||||||
d.state.lastColorMatrix = esBody
|
d.state.lastColorMatrix = esBody
|
||||||
}
|
}
|
||||||
if !areSameFloat32Array(d.state.lastColorMatrixTranslation, esTranslate) {
|
if !areSameFloat32Array(d.state.lastColorMatrixTranslation, esTranslate) {
|
||||||
d.context.uniformFloats(program, "color_matrix_translation", esTranslate)
|
d.context.uniformFloats(program, "color_matrix_translation", esTranslate)
|
||||||
// ColorM's elements are immutable. It's OK to hold the reference without copying.
|
// ColorM's elements are immutable. It's OK to hold the reference without copying.
|
||||||
d.state.lastColorMatrixTranslation = esTranslate
|
d.state.lastColorMatrixTranslation = esTranslate
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sw := graphics.InternalImageSize(srcW)
|
sw := graphics.InternalImageSize(srcW)
|
||||||
sh := graphics.InternalImageSize(srcH)
|
sh := graphics.InternalImageSize(srcH)
|
||||||
|
|
||||||
if filter == graphics.FilterNearest {
|
if filter != graphics.FilterNearest {
|
||||||
d.state.lastSourceWidth = 0
|
|
||||||
d.state.lastSourceHeight = 0
|
|
||||||
} else {
|
|
||||||
if d.state.lastSourceWidth != sw || d.state.lastSourceHeight != sh {
|
if d.state.lastSourceWidth != sw || d.state.lastSourceHeight != sh {
|
||||||
d.context.uniformFloats(program, "source_size", []float32{float32(sw), float32(sh)})
|
d.context.uniformFloats(program, "source_size", []float32{float32(sw), float32(sh)})
|
||||||
d.state.lastSourceWidth = sw
|
d.state.lastSourceWidth = sw
|
||||||
|
@ -61,7 +61,7 @@ func vertexShaderStr() string {
|
|||||||
return src
|
return src
|
||||||
}
|
}
|
||||||
|
|
||||||
func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string {
|
func fragmentShaderStr(useColorM bool, filter graphics.Filter, address graphics.Address) string {
|
||||||
replaces := map[string]string{
|
replaces := map[string]string{
|
||||||
"{{.AddressClampToZero}}": fmt.Sprintf("%d", graphics.AddressClampToZero),
|
"{{.AddressClampToZero}}": fmt.Sprintf("%d", graphics.AddressClampToZero),
|
||||||
"{{.AddressRepeat}}": fmt.Sprintf("%d", graphics.AddressRepeat),
|
"{{.AddressRepeat}}": fmt.Sprintf("%d", graphics.AddressRepeat),
|
||||||
@ -72,6 +72,11 @@ func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string
|
|||||||
}
|
}
|
||||||
|
|
||||||
var defs []string
|
var defs []string
|
||||||
|
|
||||||
|
if useColorM {
|
||||||
|
defs = append(defs, "#define USE_COLOR_MATRIX")
|
||||||
|
}
|
||||||
|
|
||||||
switch filter {
|
switch filter {
|
||||||
case graphics.FilterNearest:
|
case graphics.FilterNearest:
|
||||||
defs = append(defs, "#define FILTER_NEAREST")
|
defs = append(defs, "#define FILTER_NEAREST")
|
||||||
@ -82,6 +87,7 @@ func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string
|
|||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("opengl: invalid filter: %d", filter))
|
panic(fmt.Sprintf("opengl: invalid filter: %d", filter))
|
||||||
}
|
}
|
||||||
|
|
||||||
switch address {
|
switch address {
|
||||||
case graphics.AddressClampToZero:
|
case graphics.AddressClampToZero:
|
||||||
defs = append(defs, "#define ADDRESS_CLAMP_TO_ZERO")
|
defs = append(defs, "#define ADDRESS_CLAMP_TO_ZERO")
|
||||||
@ -134,8 +140,11 @@ precision mediump float;
|
|||||||
{{.Definitions}}
|
{{.Definitions}}
|
||||||
|
|
||||||
uniform sampler2D texture;
|
uniform sampler2D texture;
|
||||||
|
|
||||||
|
#if defined(USE_COLOR_MATRIX)
|
||||||
uniform mat4 color_matrix_body;
|
uniform mat4 color_matrix_body;
|
||||||
uniform vec4 color_matrix_translation;
|
uniform vec4 color_matrix_translation;
|
||||||
|
#endif
|
||||||
|
|
||||||
uniform highp vec2 source_size;
|
uniform highp vec2 source_size;
|
||||||
|
|
||||||
@ -250,8 +259,12 @@ void main(void) {
|
|||||||
// Un-premultiply alpha.
|
// Un-premultiply alpha.
|
||||||
// When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing.
|
// When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing.
|
||||||
color.rgb /= color.a + (1.0 - sign(color.a));
|
color.rgb /= color.a + (1.0 - sign(color.a));
|
||||||
|
|
||||||
|
#if defined(USE_COLOR_MATRIX)
|
||||||
// Apply the color matrix or scale.
|
// Apply the color matrix or scale.
|
||||||
color = (color_matrix_body * color) + color_matrix_translation;
|
color = (color_matrix_body * color) + color_matrix_translation;
|
||||||
|
#endif
|
||||||
|
|
||||||
color *= varying_color_scale;
|
color *= varying_color_scale;
|
||||||
color = clamp(color, 0.0, 1.0);
|
color = clamp(color, 0.0, 1.0);
|
||||||
// Premultiply alpha
|
// Premultiply alpha
|
||||||
|
Loading…
Reference in New Issue
Block a user