From 63e3c4adea102e90fb7a41e1909acf7b372fbbd0 Mon Sep 17 00:00:00 2001 From: Hajime Hoshi Date: Tue, 30 Aug 2022 01:47:23 +0900 Subject: [PATCH] Revert "internal/graphics: remove FilterScreen and use an original Kage program instead" This reverts these commits * 3259ef3dafda4a457cf9b9c3d680189807c64cc0 * 7c55065490d900ff70829c42a8fd4bb25a7fe1f1 * 5bb70f485efac6d6f2d84fa69c38c8398c943005 Reason: iOS crash. Probably we have to prepare a special rendering pipeline for the final screen. Closes #2278 --- internal/graphicscommand/command.go | 2 + .../directx/graphics_windows.go | 5 + .../directx/pipeline_windows.go | 28 ++++++ internal/graphicsdriver/filter.go | 1 + .../graphicsdriver/metal/graphics_darwin.go | 93 ++++++++++++++++--- .../graphicsdriver/opengl/defaultshader.go | 30 ++++++ internal/graphicsdriver/opengl/graphics.go | 9 ++ internal/graphicsdriver/opengl/program.go | 1 + internal/mipmap/mipmap.go | 6 +- internal/ui/context.go | 63 +------------ 10 files changed, 164 insertions(+), 74 deletions(-) diff --git a/internal/graphicscommand/command.go b/internal/graphicscommand/command.go index 065594a0e..145ba9b3a 100644 --- a/internal/graphicscommand/command.go +++ b/internal/graphicscommand/command.go @@ -345,6 +345,8 @@ func (c *drawTrianglesCommand) String() string { filter = "nearest" case graphicsdriver.FilterLinear: filter = "linear" + case graphicsdriver.FilterScreen: + filter = "screen" default: panic(fmt.Sprintf("graphicscommand: invalid filter: %d", c.filter)) } diff --git a/internal/graphicsdriver/directx/graphics_windows.go b/internal/graphicsdriver/directx/graphics_windows.go index 0beb941b4..9449eadb6 100644 --- a/internal/graphicsdriver/directx/graphics_windows.go +++ b/internal/graphicsdriver/directx/graphics_windows.go @@ -1239,6 +1239,10 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.Sh var esBody [16]float32 var esTranslate [4]float32 colorM.Elements(&esBody, &esTranslate) + scale := float32(0) + if filter == graphicsdriver.FilterScreen { + scale = float32(dst.width) / float32(srcImages[0].width) + } flattenUniforms = []float32{ float32(screenWidth), @@ -1269,6 +1273,7 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.Sh srcRegion.Y, srcRegion.X + srcRegion.Width, srcRegion.Y + srcRegion.Height, + scale, } } else { // TODO: This logic is very similar to Metal's. Let's unify them. diff --git a/internal/graphicsdriver/directx/pipeline_windows.go b/internal/graphicsdriver/directx/pipeline_windows.go index 70a5a4c47..a874bfd6c 100644 --- a/internal/graphicsdriver/directx/pipeline_windows.go +++ b/internal/graphicsdriver/directx/pipeline_windows.go @@ -73,6 +73,9 @@ func (k *builtinPipelineStatesKey) defs() ([]_D3D_SHADER_MACRO, error) { case graphicsdriver.FilterLinear: name := []byte("FILTER_LINEAR\x00") defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) + case graphicsdriver.FilterScreen: + name := []byte("FILTER_SCREEN\x00") + defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]}) default: return nil, fmt.Errorf("directx: invalid filter: %d", k.filter) } @@ -110,6 +113,10 @@ cbuffer ShaderParameter : register(b0) { float4x4 color_matrix_body; float4 color_matrix_translation; float4 source_region; + + // This member should be the last not to create a new sector. + // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules + float scale; } PSInput VSMain(float2 position : POSITION, float2 tex : TEXCOORD, float4 color : COLOR) { @@ -213,6 +220,25 @@ float4 PSMain(PSInput input) : SV_TARGET { float4 color = lerp(lerp(c0, c1, rate.x), lerp(c2, c3, rate.x), rate.y); #endif // defined(FILTER_LINEAR) +#if defined(FILTER_SCREEN) + float2 pos = input.texcoord; + float2 texel_size = 1.0 / source_size; + float2 half_scaled_texel_size = texel_size / 2.0 / scale; + + float2 p0 = pos - half_scaled_texel_size + (texel_size / 512.0); + float2 p1 = pos + half_scaled_texel_size + (texel_size / 512.0); + + float4 c0 = tex.Sample(samp, p0); + float4 c1 = tex.Sample(samp, float2(p1.x, p0.y)); + float4 c2 = tex.Sample(samp, float2(p0.x, p1.y)); + float4 c3 = tex.Sample(samp, p1); + // Texels must be in the source rect, so it is not necessary to check that like linear filter. + + float2 rate_center = float2(1.0, 1.0) - half_scaled_texel_size; + float2 rate = clamp(((frac(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0); + float4 color = lerp(lerp(c0, c1, rate.x), lerp(c2, c3, rate.x), rate.y); +#endif // defined(FILTER_SCREEN) + #if defined(USE_COLOR_MATRIX) // Un-premultiply alpha. // When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing. @@ -226,6 +252,8 @@ float4 PSMain(PSInput input) : SV_TARGET { // Clamp the output. color.rgb = min(color.rgb, color.a); return color; +#elif defined(FILTER_SCREEN) + return color; #else return input.color * color; #endif // defined(USE_COLOR_MATRIX) diff --git a/internal/graphicsdriver/filter.go b/internal/graphicsdriver/filter.go index f2143c252..64e24029a 100644 --- a/internal/graphicsdriver/filter.go +++ b/internal/graphicsdriver/filter.go @@ -19,6 +19,7 @@ type Filter int const ( FilterNearest Filter = iota FilterLinear + FilterScreen ) type Address int diff --git a/internal/graphicsdriver/metal/graphics_darwin.go b/internal/graphicsdriver/metal/graphics_darwin.go index 96d1141a8..2a2944ffc 100644 --- a/internal/graphicsdriver/metal/graphics_darwin.go +++ b/internal/graphicsdriver/metal/graphics_darwin.go @@ -33,6 +33,7 @@ const source = `#include #define FILTER_NEAREST {{.FilterNearest}} #define FILTER_LINEAR {{.FilterLinear}} +#define FILTER_SCREEN {{.FilterScreen}} #define ADDRESS_CLAMP_TO_ZERO {{.AddressClampToZero}} #define ADDRESS_REPEAT {{.AddressRepeat}} @@ -183,6 +184,25 @@ struct ColorFromTexel { } }; +template +struct ColorFromTexel { + inline float4 Do(VertexOut v, texture2d texture, constant float2& source_size, constant float4& source_region, float scale) { + const float2 texel_size = 1 / source_size; + + float2 p0 = v.tex - texel_size / 2.0 / scale + (texel_size / 512.0); + float2 p1 = v.tex + texel_size / 2.0 / scale + (texel_size / 512.0); + + float4 c0 = texture.sample(texture_sampler, p0); + float4 c1 = texture.sample(texture_sampler, float2(p1.x, p0.y)); + float4 c2 = texture.sample(texture_sampler, float2(p0.x, p1.y)); + float4 c3 = texture.sample(texture_sampler, p1); + + float2 rate_center = float2(1.0, 1.0) - texel_size / 2.0 / scale; + float2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0); + return mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); + } +}; + template struct FragmentShaderImpl { inline float4 Do( @@ -207,6 +227,20 @@ struct FragmentShaderImpl { } }; +template +struct FragmentShaderImpl { + inline float4 Do( + VertexOut v, + texture2d texture, + constant float2& source_size, + constant float4x4& color_matrix_body, + constant float4& color_matrix_translation, + constant float4& source_region, + constant float& scale) { + return ColorFromTexel().Do(v, texture, source_size, source_region, scale); + } +}; + // Define Foo and FooCp macros to force macro replacement. // See "6.10.3.1 Argument substitution" in ISO/IEC 9899. @@ -239,6 +273,8 @@ FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_REPEAT) FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_UNSAFE) FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_UNSAFE) +FragmentShaderFunc(0, FILTER_SCREEN, ADDRESS_UNSAFE) + #undef FragmentShaderFuncName ` @@ -572,6 +608,7 @@ func (g *Graphics) Initialize() error { replaces := map[string]string{ "{{.FilterNearest}}": fmt.Sprintf("%d", graphicsdriver.FilterNearest), "{{.FilterLinear}}": fmt.Sprintf("%d", graphicsdriver.FilterLinear), + "{{.FilterScreen}}": fmt.Sprintf("%d", graphicsdriver.FilterScreen), "{{.AddressClampToZero}}": fmt.Sprintf("%d", graphicsdriver.AddressClampToZero), "{{.AddressRepeat}}": fmt.Sprintf("%d", graphicsdriver.AddressRepeat), "{{.AddressUnsafe}}": fmt.Sprintf("%d", graphicsdriver.AddressUnsafe), @@ -589,6 +626,27 @@ func (g *Graphics) Initialize() error { if err != nil { return err } + fs, err := lib.MakeFunction( + fmt.Sprintf("FragmentShader_%d_%d_%d", 0, graphicsdriver.FilterScreen, graphicsdriver.AddressUnsafe)) + if err != nil { + return err + } + rpld := mtl.RenderPipelineDescriptor{ + VertexFunction: vs, + FragmentFunction: fs, + } + rpld.ColorAttachments[0].PixelFormat = g.view.colorPixelFormat() + rpld.ColorAttachments[0].BlendingEnabled = true + rpld.ColorAttachments[0].DestinationAlphaBlendFactor = mtl.BlendFactorZero + rpld.ColorAttachments[0].DestinationRGBBlendFactor = mtl.BlendFactorZero + rpld.ColorAttachments[0].SourceAlphaBlendFactor = mtl.BlendFactorOne + rpld.ColorAttachments[0].SourceRGBBlendFactor = mtl.BlendFactorOne + rpld.ColorAttachments[0].WriteMask = mtl.ColorWriteMaskAll + rps, err := g.view.getMTLDevice().MakeRenderPipelineState(rpld) + if err != nil { + return err + } + g.screenRPS = rps for _, screen := range []bool{false, true} { for _, cm := range []bool{false, true} { @@ -813,19 +871,23 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics. rpss := map[stencilMode]mtl.RenderPipelineState{} var uniformVars [][]float32 if shaderID == graphicsdriver.InvalidShaderID { - for _, stencil := range []stencilMode{ - prepareStencil, - drawWithStencil, - noStencil, - } { - rpss[stencil] = g.rpss[rpsKey{ - screen: dst.screen, - useColorM: !colorM.IsIdentity(), - filter: filter, - address: address, - compositeMode: mode, - stencilMode: stencil, - }] + if dst.screen && filter == graphicsdriver.FilterScreen { + rpss[noStencil] = g.screenRPS + } else { + for _, stencil := range []stencilMode{ + prepareStencil, + drawWithStencil, + noStencil, + } { + rpss[stencil] = g.rpss[rpsKey{ + screen: dst.screen, + useColorM: !colorM.IsIdentity(), + filter: filter, + address: address, + compositeMode: mode, + stencilMode: stencil, + }] + } } w, h := dst.internalSize() @@ -838,6 +900,10 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics. var esBody [16]float32 var esTranslate [4]float32 colorM.Elements(&esBody, &esTranslate) + scale := float32(0) + if filter == graphicsdriver.FilterScreen { + scale = float32(dst.width) / float32(srcs[0].width) + } uniformVars = [][]float32{ {float32(w), float32(h)}, sourceSize, @@ -849,6 +915,7 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics. srcRegion.X + srcRegion.Width, srcRegion.Y + srcRegion.Height, }, + {scale}, } } else { for _, stencil := range []stencilMode{ diff --git a/internal/graphicsdriver/opengl/defaultshader.go b/internal/graphicsdriver/opengl/defaultshader.go index eb3c51364..b63d18259 100644 --- a/internal/graphicsdriver/opengl/defaultshader.go +++ b/internal/graphicsdriver/opengl/defaultshader.go @@ -83,6 +83,8 @@ func fragmentShaderStr(useColorM bool, filter graphicsdriver.Filter, address gra defs = append(defs, "#define FILTER_NEAREST") case graphicsdriver.FilterLinear: defs = append(defs, "#define FILTER_LINEAR") + case graphicsdriver.FilterScreen: + defs = append(defs, "#define FILTER_SCREEN") default: panic(fmt.Sprintf("opengl: invalid filter: %d", filter)) } @@ -149,6 +151,10 @@ uniform vec4 color_matrix_translation; uniform highp vec2 source_size; +#if defined(FILTER_SCREEN) +uniform highp float scale; +#endif + varying highp vec2 varying_tex; varying highp vec4 varying_color_scale; @@ -229,6 +235,27 @@ void main(void) { color = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); #endif // defined(FILTER_LINEAR) +#if defined(FILTER_SCREEN) + highp vec2 texel_size = 1.0 / source_size; + highp vec2 half_scaled_texel_size = texel_size / 2.0 / scale; + + highp vec2 p0 = pos - half_scaled_texel_size + (texel_size / 512.0); + highp vec2 p1 = pos + half_scaled_texel_size + (texel_size / 512.0); + + vec4 c0 = texture2D(T0, p0); + vec4 c1 = texture2D(T0, vec2(p1.x, p0.y)); + vec4 c2 = texture2D(T0, vec2(p0.x, p1.y)); + vec4 c3 = texture2D(T0, p1); + // Texels must be in the source rect, so it is not necessary to check that like linear filter. + + vec2 rate_center = vec2(1.0, 1.0) - half_scaled_texel_size; + vec2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0); + gl_FragColor = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y); + + // Assume that a color matrix and color vector values are not used with FILTER_SCREEN. + +#else + # if defined(USE_COLOR_MATRIX) // Un-premultiply alpha. // When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing. @@ -248,6 +275,9 @@ void main(void) { # endif // defined(USE_COLOR_MATRIX) gl_FragColor = color; + +#endif // defined(FILTER_SCREEN) + } ` ) diff --git a/internal/graphicsdriver/opengl/graphics.go b/internal/graphicsdriver/opengl/graphics.go index 4be520ef4..0dd9df33f 100644 --- a/internal/graphicsdriver/opengl/graphics.go +++ b/internal/graphicsdriver/opengl/graphics.go @@ -244,6 +244,15 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics. typ: shaderir.Type{Main: shaderir.Vec2}, }) } + + if filter == graphicsdriver.FilterScreen { + scale := float32(destination.width) / float32(g.images[srcIDs[0]].width) + g.uniformVars = append(g.uniformVars, uniformVariable{ + name: "scale", + value: []float32{scale}, + typ: shaderir.Type{Main: shaderir.Float}, + }) + } } else { shader := g.shaders[shaderID] program = shader.p diff --git a/internal/graphicsdriver/opengl/program.go b/internal/graphicsdriver/opengl/program.go index 128d056f7..45d370633 100644 --- a/internal/graphicsdriver/opengl/program.go +++ b/internal/graphicsdriver/opengl/program.go @@ -191,6 +191,7 @@ func (s *openGLState) reset(context *context) error { for _, f := range []graphicsdriver.Filter{ graphicsdriver.FilterNearest, graphicsdriver.FilterLinear, + graphicsdriver.FilterScreen, } { shaderFragmentColorMatrixNative, err := context.newFragmentShader(fragmentShaderStr(c, f, a)) if err != nil { diff --git a/internal/mipmap/mipmap.go b/internal/mipmap/mipmap.go index be0c7078c..45b7f9c0a 100644 --- a/internal/mipmap/mipmap.go +++ b/internal/mipmap/mipmap.go @@ -65,7 +65,7 @@ func (m *Mipmap) DrawTriangles(srcs [graphics.ShaderImageCount]*Mipmap, vertices level := 0 // TODO: Do we need to check all the sources' states of being volatile? - if !canSkipMipmap && srcs[0] != nil && !srcs[0].volatile { + if !canSkipMipmap && srcs[0] != nil && !srcs[0].volatile && filter != graphicsdriver.FilterScreen { level = math.MaxInt32 for i := 0; i < len(indices)/3; i++ { const n = graphics.VertexFloatCount @@ -226,6 +226,10 @@ func (m *Mipmap) disposeMipmaps() { func mipmapLevelFromDistance(dx0, dy0, dx1, dy1, sx0, sy0, sx1, sy1 float32, filter graphicsdriver.Filter) int { const maxLevel = 6 + if filter == graphicsdriver.FilterScreen { + return 0 + } + d := (dx1-dx0)*(dx1-dx0) + (dy1-dy0)*(dy1-dy0) s := (sx1-sx0)*(sx1-sx0) + (sy1-sy0)*(sy1-sy0) if s == 0 { diff --git a/internal/ui/context.go b/internal/ui/context.go index 616b02720..b97694f18 100644 --- a/internal/ui/context.go +++ b/internal/ui/context.go @@ -30,41 +30,6 @@ import ( "github.com/hajimehoshi/ebiten/v2/internal/hooks" ) -const screenShader = `package main - -var Scale float - -func Fragment(position vec4, texCoord vec2, color vec4) vec4 { - sourceSize := imageSrcTextureSize() - // texelSize is one pixel size in texel sizes. - texelSize := 1 / sourceSize - halfScaledTexelSize := texelSize / 2 / Scale - - // Shift 1/512 [texel] to avoid the tie-breaking issue. - // As all the vertex positions are aligned to 1/16 [pixel], this shiting should work in most cases. - pos := texCoord - p0 := pos - halfScaledTexelSize + (texelSize / 512) - p1 := pos + halfScaledTexelSize + (texelSize / 512) - - // Texels must be in the source rect, so it is not necessary to check. - c0 := imageSrc0UnsafeAt(p0) - c1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y)) - c2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y)) - c3 := imageSrc0UnsafeAt(p1) - - // p is the p1 value in one pixel assuming that the pixel's upper-left is (0, 0) and the lower-right is (1, 1). - p := fract(p1 * sourceSize) - - // rate indicates how much the 4 colors are mixed. rate is in between [0, 1]. - // - // 0 <= p <= 1/Scale: The rate is in between [0, 1] - // 1/Scale < p: Don't care. Adjacent colors (e.g. c0 vs c1 in an X direction) should be the same. - rate := clamp(p*Scale, 0, 1) - - return mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y) -} -` - type Game interface { NewOffscreenImage(width, height int) *Image Layout(outsideWidth, outsideHeight int) (int, int) @@ -84,8 +49,6 @@ type context struct { outsideWidth float64 outsideHeight float64 - screenShader *Shader - m sync.Mutex } @@ -145,15 +108,6 @@ func (c *context) updateFrameImpl(graphicsDriver graphicsdriver.Graphics, update } }() - // Create a shader for the screen if necessary. - if c.screenShader == nil { - ir, err := graphics.CompileShader([]byte(screenShader)) - if err != nil { - return err - } - c.screenShader = NewShader(ir) - } - // ForceUpdate can be invoked even if the context is not initialized yet (#1591). if w, h := c.layoutGame(outsideWidth, outsideHeight, deviceScaleFactor); w == 0 || h == 0 { return nil @@ -232,16 +186,15 @@ func (c *context) drawGame(graphicsDriver graphicsdriver.Graphics) { gty += offsetY var filter graphicsdriver.Filter - var screenFilter bool switch { case !theGlobalState.isScreenFilterEnabled(): filter = graphicsdriver.FilterNearest case math.Floor(s) == s: filter = graphicsdriver.FilterNearest case s > 1: - screenFilter = true + filter = graphicsdriver.FilterScreen default: - // screenShader works with >=1 scale, but does not well with <1 scale. + // FilterScreen works with >=1 scale, but does not well with <1 scale. // Use regular FilterLinear instead so far (#669). filter = graphicsdriver.FilterLinear } @@ -260,17 +213,7 @@ func (c *context) drawGame(graphicsDriver graphicsdriver.Graphics) { is := graphics.QuadIndices() srcs := [graphics.ShaderImageCount]*Image{c.offscreen} - var shader *Shader - var uniforms [][]float32 - if screenFilter { - shader = c.screenShader - dstWidth := c.screen.width - srcWidth := c.offscreen.width - uniforms = shader.ConvertUniforms(map[string]interface{}{ - "Scale": float32(dstWidth) / float32(srcWidth), - }) - } - c.screen.DrawTriangles(srcs, vs, is, affine.ColorMIdentity{}, graphicsdriver.CompositeModeCopy, filter, graphicsdriver.AddressUnsafe, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, shader, uniforms, false, true) + c.screen.DrawTriangles(srcs, vs, is, affine.ColorMIdentity{}, graphicsdriver.CompositeModeCopy, filter, graphicsdriver.AddressUnsafe, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, nil, nil, false, true) } func (c *context) layoutGame(outsideWidth, outsideHeight float64, deviceScaleFactor float64) (int, int) {