From c2c3579cdef1209f87bfb6eeb42e0940d8d2f504 Mon Sep 17 00:00:00 2001 From: Hajime Hoshi Date: Tue, 12 Feb 2019 11:00:18 +0900 Subject: [PATCH] graphicsdriver/opengl: Reduce 'if' in shader programs Fixes #812 --- internal/graphicsdriver/opengl/program.go | 71 +++++--- internal/graphicsdriver/opengl/shader.go | 211 +++++++++++----------- 2 files changed, 154 insertions(+), 128 deletions(-) diff --git a/internal/graphicsdriver/opengl/program.go b/internal/graphicsdriver/opengl/program.go index 3a3f5aa67..99be1717c 100644 --- a/internal/graphicsdriver/opengl/program.go +++ b/internal/graphicsdriver/opengl/program.go @@ -107,6 +107,11 @@ func init() { } } +type programKey struct { + filter graphics.Filter + address graphics.Address +} + // openGLState is a state for type openGLState struct { // arrayBuffer is OpenGL's array buffer (vertices data). @@ -115,8 +120,8 @@ type openGLState struct { // elementArrayBuffer is OpenGL's element array buffer (indices data). elementArrayBuffer buffer - // program is OpenGL's program for rendering a texture. - program program + // programs is OpenGL's program for rendering a texture. + programs map[programKey]program lastProgram program lastViewportWidth int @@ -161,8 +166,13 @@ func (s *openGLState) reset(context *context) error { // When context lost happens, deleting programs or buffers is not necessary. // However, it is not assumed that reset is called only when context lost happens. // Let's delete them explicitly. - if s.program != zeroProgram { - context.deleteProgram(s.program) + if s.programs == nil { + s.programs = map[programKey]program{} + } else { + for k, p := range s.programs { + context.deleteProgram(p) + delete(s.programs, k) + } } // On browsers (at least Chrome), buffers are already detached from the context @@ -176,24 +186,39 @@ func (s *openGLState) reset(context *context) error { } } - shaderVertexModelviewNative, err := context.newShader(vertexShader, shaderStr(shaderVertexModelview)) + shaderVertexModelviewNative, err := context.newShader(vertexShader, vertexShaderStr()) if err != nil { panic(fmt.Sprintf("graphics: shader compiling error:\n%s", err)) } defer context.deleteShader(shaderVertexModelviewNative) - shaderFragmentColorMatrixNative, err := context.newShader(fragmentShader, shaderStr(shaderFragmentColorMatrix)) - if err != nil { - panic(fmt.Sprintf("graphics: shader compiling error:\n%s", err)) - } - defer context.deleteShader(shaderFragmentColorMatrixNative) + for _, a := range []graphics.Address{ + graphics.AddressClampToZero, + graphics.AddressRepeat, + } { + for _, f := range []graphics.Filter{ + graphics.FilterNearest, + graphics.FilterLinear, + graphics.FilterScreen, + } { + shaderFragmentColorMatrixNative, err := context.newShader(fragmentShader, fragmentShaderStr(f, a)) + if err != nil { + panic(fmt.Sprintf("graphics: shader compiling error:\n%s", err)) + } + defer context.deleteShader(shaderFragmentColorMatrixNative) - s.program, err = context.newProgram([]shader{ - shaderVertexModelviewNative, - shaderFragmentColorMatrixNative, - }) - if err != nil { - return err + program, err := context.newProgram([]shader{ + shaderVertexModelviewNative, + shaderFragmentColorMatrixNative, + }) + if err != nil { + return err + } + s.programs[programKey{ + filter: f, + address: a, + }] = program + } } s.arrayBuffer = theArrayBufferLayout.newArrayBuffer(context) @@ -238,7 +263,10 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM, d.context.blendFunc(mode) - program := d.state.program + program := d.state.programs[programKey{ + filter: filter, + address: address, + }] if d.state.lastProgram != program { d.context.useProgram(program) if d.state.lastProgram != zeroProgram { @@ -291,15 +319,6 @@ func (d *Driver) useProgram(mode graphics.CompositeMode, colorM *affine.ColorM, d.state.lastSourceHeight = sh } - if d.state.lastFilter == nil || *d.state.lastFilter != filter { - d.context.uniformInt(program, "filter_type", int(filter)) - d.state.lastFilter = &filter - } - if d.state.lastAddress == nil || *d.state.lastAddress != address { - d.context.uniformInt(program, "address", int(address)) - d.state.lastAddress = &address - } - if filter == graphics.FilterScreen { scale := float32(dstW) / float32(srcW) d.context.uniformFloat(program, "scale", scale) diff --git a/internal/graphicsdriver/opengl/shader.go b/internal/graphicsdriver/opengl/shader.go index 934348c09..57267df54 100644 --- a/internal/graphicsdriver/opengl/shader.go +++ b/internal/graphicsdriver/opengl/shader.go @@ -22,13 +22,6 @@ import ( "github.com/hajimehoshi/ebiten/internal/graphics" ) -type shaderID int - -const ( - shaderVertexModelview shaderID = iota - shaderFragmentColorMatrix -) - // glslReservedKeywords is a set of reserved keywords that cannot be used as an indentifier on some environments. // See https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.60.pdf. var glslReservedKeywords = map[string]struct{}{ @@ -62,26 +55,43 @@ func checkGLSL(src string) { } } -func shaderStr(id shaderID) string { - src := "" - switch id { - case shaderVertexModelview: - src = shaderStrVertex - case shaderFragmentColorMatrix: - replaces := map[string]string{ - "{{.FilterNearest}}": fmt.Sprintf("%d", graphics.FilterNearest), - "{{.FilterLinear}}": fmt.Sprintf("%d", graphics.FilterLinear), - "{{.FilterScreen}}": fmt.Sprintf("%d", graphics.FilterScreen), - "{{.AddressClampToZero}}": fmt.Sprintf("%d", graphics.AddressClampToZero), - "{{.AddressRepeat}}": fmt.Sprintf("%d", graphics.AddressRepeat), - } - src = shaderStrFragment - for k, v := range replaces { - src = strings.Replace(src, k, v, -1) - } - default: - panic(fmt.Sprintf("opengl: invalid shader id: %d", id)) +func vertexShaderStr() string { + src := shaderStrVertex + checkGLSL(src) + return src +} + +func fragmentShaderStr(filter graphics.Filter, address graphics.Address) string { + replaces := map[string]string{ + "{{.AddressClampToZero}}": fmt.Sprintf("%d", graphics.AddressClampToZero), + "{{.AddressRepeat}}": fmt.Sprintf("%d", graphics.AddressRepeat), } + src := shaderStrFragment + for k, v := range replaces { + src = strings.Replace(src, k, v, -1) + } + + var defs []string + switch filter { + case graphics.FilterNearest: + defs = append(defs, "#define FILTER_NEAREST") + case graphics.FilterLinear: + defs = append(defs, "#define FILTER_LINEAR") + case graphics.FilterScreen: + defs = append(defs, "#define FILTER_SCREEN") + default: + panic(fmt.Sprintf("opengl: invalid filter: %d", filter)) + } + switch address { + case graphics.AddressClampToZero: + defs = append(defs, "#define ADDRESS_CLAMP_TO_ZERO") + case graphics.AddressRepeat: + defs = append(defs, "#define ADDRESS_REPEAT") + default: + panic(fmt.Sprintf("opengl: invalid address: %d", address)) + } + + src = strings.Replace(src, "{{.Definitions}}", strings.Join(defs, "\n"), -1) checkGLSL(src) return src @@ -121,19 +131,13 @@ precision mediump float; #define highp #endif -#define FILTER_NEAREST ({{.FilterNearest}}) -#define FILTER_LINEAR ({{.FilterLinear}}) -#define FILTER_SCREEN ({{.FilterScreen}}) -#define ADDRESS_CLAMP_TO_ZERO ({{.AddressClampToZero}}) -#define ADDRESS_REPEAT ({{.AddressRepeat}}) +{{.Definitions}} uniform sampler2D texture; uniform mat4 color_matrix_body; uniform vec4 color_matrix_translation; -uniform int filter_type; uniform highp vec2 source_size; -uniform int address; #if defined(FILTER_SCREEN) uniform highp float scale; @@ -164,17 +168,16 @@ highp float floorMod(highp float x, highp float y) { return x - y * floor(x/y); } -highp vec2 adjustTexelByAddress(highp vec2 p, highp vec4 tex_region, int address) { - if (address == ADDRESS_CLAMP_TO_ZERO) { - return p; - } - if (address == ADDRESS_REPEAT) { - highp vec2 o = vec2(tex_region[0], tex_region[1]); - highp vec2 size = vec2(tex_region[2] - tex_region[0], tex_region[3] - tex_region[1]); - return vec2(floorMod((p.x - o.x), size.x) + o.x, floorMod((p.y - o.y), size.y) + o.y); - } - // Not reached. - return vec2(0.0); +highp vec2 adjustTexelByAddress(highp vec2 p, highp vec4 tex_region) { +#if defined(ADDRESS_CLAMP_TO_ZERO) + return p; +#endif + +#if defined(ADDRESS_REPEAT) + highp vec2 o = vec2(tex_region[0], tex_region[1]); + highp vec2 size = vec2(tex_region[2] - tex_region[0], tex_region[3] - tex_region[1]); + return vec2(floorMod((p.x - o.x), size.x) + o.x, floorMod((p.y - o.y), size.y) + o.y); +#endif } void main(void) { @@ -183,65 +186,69 @@ void main(void) { vec4 color; - if (filter_type == FILTER_NEAREST) { - pos = adjustTexelByAddress(pos, varying_tex_region, address); - color = texture2D(texture, pos); - if (pos.x < varying_tex_region[0] || - pos.y < varying_tex_region[1] || - (varying_tex_region[2] - texel_size.x / 512.0) <= pos.x || - (varying_tex_region[3] - texel_size.y / 512.0) <= pos.y) { - color = vec4(0, 0, 0, 0); - } - } else if (filter_type == FILTER_LINEAR) { - highp vec2 p0 = pos - texel_size / 2.0; - highp vec2 p1 = pos + texel_size / 2.0; - - p1 = adjustTexel(p0, p1); - p0 = adjustTexelByAddress(p0, varying_tex_region, address); - p1 = adjustTexelByAddress(p1, varying_tex_region, address); - - vec4 c0 = texture2D(texture, p0); - vec4 c1 = texture2D(texture, vec2(p1.x, p0.y)); - vec4 c2 = texture2D(texture, vec2(p0.x, p1.y)); - vec4 c3 = texture2D(texture, p1); - if (p0.x < varying_tex_region[0]) { - c0 = vec4(0, 0, 0, 0); - c2 = vec4(0, 0, 0, 0); - } - if (p0.y < varying_tex_region[1]) { - c0 = vec4(0, 0, 0, 0); - c1 = vec4(0, 0, 0, 0); - } - if ((varying_tex_region[2] - texel_size.x / 512.0) <= p1.x) { - c1 = vec4(0, 0, 0, 0); - c3 = vec4(0, 0, 0, 0); - } - if ((varying_tex_region[3] - texel_size.y / 512.0) <= p1.y) { - c2 = vec4(0, 0, 0, 0); - c3 = vec4(0, 0, 0, 0); - } - - vec2 rate = fract(p0 * source_size); - color = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); - } else if (filter_type == FILTER_SCREEN) { - highp vec2 p0 = pos - texel_size / 2.0 / scale; - highp vec2 p1 = pos + texel_size / 2.0 / scale; - - p1 = adjustTexel(p0, p1); - - vec4 c0 = texture2D(texture, p0); - vec4 c1 = texture2D(texture, vec2(p1.x, p0.y)); - vec4 c2 = texture2D(texture, vec2(p0.x, p1.y)); - vec4 c3 = texture2D(texture, p1); - // Texels must be in the source rect, so it is not necessary to check that like linear filter. - - vec2 rateCenter = vec2(1.0, 1.0) - texel_size / 2.0 / scale; - vec2 rate = clamp(((fract(p0 * source_size) - rateCenter) * scale) + rateCenter, 0.0, 1.0); - color = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); - } else { - // Not reached. - discard; +#if defined(FILTER_NEAREST) + pos = adjustTexelByAddress(pos, varying_tex_region); + color = texture2D(texture, pos); + if (pos.x < varying_tex_region[0] || + pos.y < varying_tex_region[1] || + (varying_tex_region[2] - texel_size.x / 512.0) <= pos.x || + (varying_tex_region[3] - texel_size.y / 512.0) <= pos.y) { + color = vec4(0, 0, 0, 0); } +#endif + +#if defined(FILTER_LINEAR) + highp vec2 p0 = pos - texel_size / 2.0; + highp vec2 p1 = pos + texel_size / 2.0; + + p1 = adjustTexel(p0, p1); + p0 = adjustTexelByAddress(p0, varying_tex_region); + p1 = adjustTexelByAddress(p1, varying_tex_region); + + vec4 c0 = texture2D(texture, p0); + vec4 c1 = texture2D(texture, vec2(p1.x, p0.y)); + vec4 c2 = texture2D(texture, vec2(p0.x, p1.y)); + vec4 c3 = texture2D(texture, p1); + if (p0.x < varying_tex_region[0]) { + c0 = vec4(0, 0, 0, 0); + c2 = vec4(0, 0, 0, 0); + } + if (p0.y < varying_tex_region[1]) { + c0 = vec4(0, 0, 0, 0); + c1 = vec4(0, 0, 0, 0); + } + if ((varying_tex_region[2] - texel_size.x / 512.0) <= p1.x) { + c1 = vec4(0, 0, 0, 0); + c3 = vec4(0, 0, 0, 0); + } + if ((varying_tex_region[3] - texel_size.y / 512.0) <= p1.y) { + c2 = vec4(0, 0, 0, 0); + c3 = vec4(0, 0, 0, 0); + } + + vec2 rate = fract(p0 * source_size); + color = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); +#endif + +#if defined(FILTER_SCREEN) + highp vec2 p0 = pos - texel_size / 2.0 / scale; + highp vec2 p1 = pos + texel_size / 2.0 / scale; + + // Prevent this variable from being optimized out. + p0 += varying_tex_region.xy - varying_tex_region.xy; + + p1 = adjustTexel(p0, p1); + + vec4 c0 = texture2D(texture, p0); + vec4 c1 = texture2D(texture, vec2(p1.x, p0.y)); + vec4 c2 = texture2D(texture, vec2(p0.x, p1.y)); + vec4 c3 = texture2D(texture, p1); + // Texels must be in the source rect, so it is not necessary to check that like linear filter. + + vec2 rateCenter = vec2(1.0, 1.0) - texel_size / 2.0 / scale; + vec2 rate = clamp(((fract(p0 * source_size) - rateCenter) * scale) + rateCenter, 0.0, 1.0); + color = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); +#endif // Un-premultiply alpha if (0.0 < color.a) {