Revert "internal/graphics: remove FilterScreen and use an original Kage program instead"

This reverts these commits

* 3259ef3daf
* 7c55065490
* 5bb70f485e

Reason: iOS crash. Probably we have to prepare a special rendering
pipeline for the final screen.

Closes #2278
This commit is contained in:
Hajime Hoshi 2022-08-30 01:47:23 +09:00
parent f7c44f086f
commit 63e3c4adea
10 changed files with 164 additions and 74 deletions

View File

@ -345,6 +345,8 @@ func (c *drawTrianglesCommand) String() string {
filter = "nearest"
case graphicsdriver.FilterLinear:
filter = "linear"
case graphicsdriver.FilterScreen:
filter = "screen"
default:
panic(fmt.Sprintf("graphicscommand: invalid filter: %d", c.filter))
}

View File

@ -1239,6 +1239,10 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.Sh
var esBody [16]float32
var esTranslate [4]float32
colorM.Elements(&esBody, &esTranslate)
scale := float32(0)
if filter == graphicsdriver.FilterScreen {
scale = float32(dst.width) / float32(srcImages[0].width)
}
flattenUniforms = []float32{
float32(screenWidth),
@ -1269,6 +1273,7 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.Sh
srcRegion.Y,
srcRegion.X + srcRegion.Width,
srcRegion.Y + srcRegion.Height,
scale,
}
} else {
// TODO: This logic is very similar to Metal's. Let's unify them.

View File

@ -73,6 +73,9 @@ func (k *builtinPipelineStatesKey) defs() ([]_D3D_SHADER_MACRO, error) {
case graphicsdriver.FilterLinear:
name := []byte("FILTER_LINEAR\x00")
defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]})
case graphicsdriver.FilterScreen:
name := []byte("FILTER_SCREEN\x00")
defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]})
default:
return nil, fmt.Errorf("directx: invalid filter: %d", k.filter)
}
@ -110,6 +113,10 @@ cbuffer ShaderParameter : register(b0) {
float4x4 color_matrix_body;
float4 color_matrix_translation;
float4 source_region;
// This member should be the last not to create a new sector.
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules
float scale;
}
PSInput VSMain(float2 position : POSITION, float2 tex : TEXCOORD, float4 color : COLOR) {
@ -213,6 +220,25 @@ float4 PSMain(PSInput input) : SV_TARGET {
float4 color = lerp(lerp(c0, c1, rate.x), lerp(c2, c3, rate.x), rate.y);
#endif // defined(FILTER_LINEAR)
#if defined(FILTER_SCREEN)
float2 pos = input.texcoord;
float2 texel_size = 1.0 / source_size;
float2 half_scaled_texel_size = texel_size / 2.0 / scale;
float2 p0 = pos - half_scaled_texel_size + (texel_size / 512.0);
float2 p1 = pos + half_scaled_texel_size + (texel_size / 512.0);
float4 c0 = tex.Sample(samp, p0);
float4 c1 = tex.Sample(samp, float2(p1.x, p0.y));
float4 c2 = tex.Sample(samp, float2(p0.x, p1.y));
float4 c3 = tex.Sample(samp, p1);
// Texels must be in the source rect, so it is not necessary to check that like linear filter.
float2 rate_center = float2(1.0, 1.0) - half_scaled_texel_size;
float2 rate = clamp(((frac(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0);
float4 color = lerp(lerp(c0, c1, rate.x), lerp(c2, c3, rate.x), rate.y);
#endif // defined(FILTER_SCREEN)
#if defined(USE_COLOR_MATRIX)
// Un-premultiply alpha.
// When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing.
@ -226,6 +252,8 @@ float4 PSMain(PSInput input) : SV_TARGET {
// Clamp the output.
color.rgb = min(color.rgb, color.a);
return color;
#elif defined(FILTER_SCREEN)
return color;
#else
return input.color * color;
#endif // defined(USE_COLOR_MATRIX)

View File

@ -19,6 +19,7 @@ type Filter int
const (
FilterNearest Filter = iota
FilterLinear
FilterScreen
)
type Address int

View File

@ -33,6 +33,7 @@ const source = `#include <metal_stdlib>
#define FILTER_NEAREST {{.FilterNearest}}
#define FILTER_LINEAR {{.FilterLinear}}
#define FILTER_SCREEN {{.FilterScreen}}
#define ADDRESS_CLAMP_TO_ZERO {{.AddressClampToZero}}
#define ADDRESS_REPEAT {{.AddressRepeat}}
@ -183,6 +184,25 @@ struct ColorFromTexel<FILTER_LINEAR, address> {
}
};
template<uint8_t address>
struct ColorFromTexel<FILTER_SCREEN, address> {
inline float4 Do(VertexOut v, texture2d<float> texture, constant float2& source_size, constant float4& source_region, float scale) {
const float2 texel_size = 1 / source_size;
float2 p0 = v.tex - texel_size / 2.0 / scale + (texel_size / 512.0);
float2 p1 = v.tex + texel_size / 2.0 / scale + (texel_size / 512.0);
float4 c0 = texture.sample(texture_sampler, p0);
float4 c1 = texture.sample(texture_sampler, float2(p1.x, p0.y));
float4 c2 = texture.sample(texture_sampler, float2(p0.x, p1.y));
float4 c3 = texture.sample(texture_sampler, p1);
float2 rate_center = float2(1.0, 1.0) - texel_size / 2.0 / scale;
float2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0);
return mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
}
};
template<bool useColorM, uint8_t filter, uint8_t address>
struct FragmentShaderImpl {
inline float4 Do(
@ -207,6 +227,20 @@ struct FragmentShaderImpl {
}
};
template<bool useColorM, uint8_t address>
struct FragmentShaderImpl<useColorM, FILTER_SCREEN, address> {
inline float4 Do(
VertexOut v,
texture2d<float> texture,
constant float2& source_size,
constant float4x4& color_matrix_body,
constant float4& color_matrix_translation,
constant float4& source_region,
constant float& scale) {
return ColorFromTexel<FILTER_SCREEN, address>().Do(v, texture, source_size, source_region, scale);
}
};
// Define Foo and FooCp macros to force macro replacement.
// See "6.10.3.1 Argument substitution" in ISO/IEC 9899.
@ -239,6 +273,8 @@ FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_REPEAT)
FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_UNSAFE)
FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_UNSAFE)
FragmentShaderFunc(0, FILTER_SCREEN, ADDRESS_UNSAFE)
#undef FragmentShaderFuncName
`
@ -572,6 +608,7 @@ func (g *Graphics) Initialize() error {
replaces := map[string]string{
"{{.FilterNearest}}": fmt.Sprintf("%d", graphicsdriver.FilterNearest),
"{{.FilterLinear}}": fmt.Sprintf("%d", graphicsdriver.FilterLinear),
"{{.FilterScreen}}": fmt.Sprintf("%d", graphicsdriver.FilterScreen),
"{{.AddressClampToZero}}": fmt.Sprintf("%d", graphicsdriver.AddressClampToZero),
"{{.AddressRepeat}}": fmt.Sprintf("%d", graphicsdriver.AddressRepeat),
"{{.AddressUnsafe}}": fmt.Sprintf("%d", graphicsdriver.AddressUnsafe),
@ -589,6 +626,27 @@ func (g *Graphics) Initialize() error {
if err != nil {
return err
}
fs, err := lib.MakeFunction(
fmt.Sprintf("FragmentShader_%d_%d_%d", 0, graphicsdriver.FilterScreen, graphicsdriver.AddressUnsafe))
if err != nil {
return err
}
rpld := mtl.RenderPipelineDescriptor{
VertexFunction: vs,
FragmentFunction: fs,
}
rpld.ColorAttachments[0].PixelFormat = g.view.colorPixelFormat()
rpld.ColorAttachments[0].BlendingEnabled = true
rpld.ColorAttachments[0].DestinationAlphaBlendFactor = mtl.BlendFactorZero
rpld.ColorAttachments[0].DestinationRGBBlendFactor = mtl.BlendFactorZero
rpld.ColorAttachments[0].SourceAlphaBlendFactor = mtl.BlendFactorOne
rpld.ColorAttachments[0].SourceRGBBlendFactor = mtl.BlendFactorOne
rpld.ColorAttachments[0].WriteMask = mtl.ColorWriteMaskAll
rps, err := g.view.getMTLDevice().MakeRenderPipelineState(rpld)
if err != nil {
return err
}
g.screenRPS = rps
for _, screen := range []bool{false, true} {
for _, cm := range []bool{false, true} {
@ -813,19 +871,23 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.
rpss := map[stencilMode]mtl.RenderPipelineState{}
var uniformVars [][]float32
if shaderID == graphicsdriver.InvalidShaderID {
for _, stencil := range []stencilMode{
prepareStencil,
drawWithStencil,
noStencil,
} {
rpss[stencil] = g.rpss[rpsKey{
screen: dst.screen,
useColorM: !colorM.IsIdentity(),
filter: filter,
address: address,
compositeMode: mode,
stencilMode: stencil,
}]
if dst.screen && filter == graphicsdriver.FilterScreen {
rpss[noStencil] = g.screenRPS
} else {
for _, stencil := range []stencilMode{
prepareStencil,
drawWithStencil,
noStencil,
} {
rpss[stencil] = g.rpss[rpsKey{
screen: dst.screen,
useColorM: !colorM.IsIdentity(),
filter: filter,
address: address,
compositeMode: mode,
stencilMode: stencil,
}]
}
}
w, h := dst.internalSize()
@ -838,6 +900,10 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.
var esBody [16]float32
var esTranslate [4]float32
colorM.Elements(&esBody, &esTranslate)
scale := float32(0)
if filter == graphicsdriver.FilterScreen {
scale = float32(dst.width) / float32(srcs[0].width)
}
uniformVars = [][]float32{
{float32(w), float32(h)},
sourceSize,
@ -849,6 +915,7 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.
srcRegion.X + srcRegion.Width,
srcRegion.Y + srcRegion.Height,
},
{scale},
}
} else {
for _, stencil := range []stencilMode{

View File

@ -83,6 +83,8 @@ func fragmentShaderStr(useColorM bool, filter graphicsdriver.Filter, address gra
defs = append(defs, "#define FILTER_NEAREST")
case graphicsdriver.FilterLinear:
defs = append(defs, "#define FILTER_LINEAR")
case graphicsdriver.FilterScreen:
defs = append(defs, "#define FILTER_SCREEN")
default:
panic(fmt.Sprintf("opengl: invalid filter: %d", filter))
}
@ -149,6 +151,10 @@ uniform vec4 color_matrix_translation;
uniform highp vec2 source_size;
#if defined(FILTER_SCREEN)
uniform highp float scale;
#endif
varying highp vec2 varying_tex;
varying highp vec4 varying_color_scale;
@ -229,6 +235,27 @@ void main(void) {
color = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
#endif // defined(FILTER_LINEAR)
#if defined(FILTER_SCREEN)
highp vec2 texel_size = 1.0 / source_size;
highp vec2 half_scaled_texel_size = texel_size / 2.0 / scale;
highp vec2 p0 = pos - half_scaled_texel_size + (texel_size / 512.0);
highp vec2 p1 = pos + half_scaled_texel_size + (texel_size / 512.0);
vec4 c0 = texture2D(T0, p0);
vec4 c1 = texture2D(T0, vec2(p1.x, p0.y));
vec4 c2 = texture2D(T0, vec2(p0.x, p1.y));
vec4 c3 = texture2D(T0, p1);
// Texels must be in the source rect, so it is not necessary to check that like linear filter.
vec2 rate_center = vec2(1.0, 1.0) - half_scaled_texel_size;
vec2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0);
gl_FragColor = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
// Assume that a color matrix and color vector values are not used with FILTER_SCREEN.
#else
# if defined(USE_COLOR_MATRIX)
// Un-premultiply alpha.
// When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing.
@ -248,6 +275,9 @@ void main(void) {
# endif // defined(USE_COLOR_MATRIX)
gl_FragColor = color;
#endif // defined(FILTER_SCREEN)
}
`
)

View File

@ -244,6 +244,15 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.
typ: shaderir.Type{Main: shaderir.Vec2},
})
}
if filter == graphicsdriver.FilterScreen {
scale := float32(destination.width) / float32(g.images[srcIDs[0]].width)
g.uniformVars = append(g.uniformVars, uniformVariable{
name: "scale",
value: []float32{scale},
typ: shaderir.Type{Main: shaderir.Float},
})
}
} else {
shader := g.shaders[shaderID]
program = shader.p

View File

@ -191,6 +191,7 @@ func (s *openGLState) reset(context *context) error {
for _, f := range []graphicsdriver.Filter{
graphicsdriver.FilterNearest,
graphicsdriver.FilterLinear,
graphicsdriver.FilterScreen,
} {
shaderFragmentColorMatrixNative, err := context.newFragmentShader(fragmentShaderStr(c, f, a))
if err != nil {

View File

@ -65,7 +65,7 @@ func (m *Mipmap) DrawTriangles(srcs [graphics.ShaderImageCount]*Mipmap, vertices
level := 0
// TODO: Do we need to check all the sources' states of being volatile?
if !canSkipMipmap && srcs[0] != nil && !srcs[0].volatile {
if !canSkipMipmap && srcs[0] != nil && !srcs[0].volatile && filter != graphicsdriver.FilterScreen {
level = math.MaxInt32
for i := 0; i < len(indices)/3; i++ {
const n = graphics.VertexFloatCount
@ -226,6 +226,10 @@ func (m *Mipmap) disposeMipmaps() {
func mipmapLevelFromDistance(dx0, dy0, dx1, dy1, sx0, sy0, sx1, sy1 float32, filter graphicsdriver.Filter) int {
const maxLevel = 6
if filter == graphicsdriver.FilterScreen {
return 0
}
d := (dx1-dx0)*(dx1-dx0) + (dy1-dy0)*(dy1-dy0)
s := (sx1-sx0)*(sx1-sx0) + (sy1-sy0)*(sy1-sy0)
if s == 0 {

View File

@ -30,41 +30,6 @@ import (
"github.com/hajimehoshi/ebiten/v2/internal/hooks"
)
const screenShader = `package main
var Scale float
func Fragment(position vec4, texCoord vec2, color vec4) vec4 {
sourceSize := imageSrcTextureSize()
// texelSize is one pixel size in texel sizes.
texelSize := 1 / sourceSize
halfScaledTexelSize := texelSize / 2 / Scale
// Shift 1/512 [texel] to avoid the tie-breaking issue.
// As all the vertex positions are aligned to 1/16 [pixel], this shiting should work in most cases.
pos := texCoord
p0 := pos - halfScaledTexelSize + (texelSize / 512)
p1 := pos + halfScaledTexelSize + (texelSize / 512)
// Texels must be in the source rect, so it is not necessary to check.
c0 := imageSrc0UnsafeAt(p0)
c1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))
c2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))
c3 := imageSrc0UnsafeAt(p1)
// p is the p1 value in one pixel assuming that the pixel's upper-left is (0, 0) and the lower-right is (1, 1).
p := fract(p1 * sourceSize)
// rate indicates how much the 4 colors are mixed. rate is in between [0, 1].
//
// 0 <= p <= 1/Scale: The rate is in between [0, 1]
// 1/Scale < p: Don't care. Adjacent colors (e.g. c0 vs c1 in an X direction) should be the same.
rate := clamp(p*Scale, 0, 1)
return mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)
}
`
type Game interface {
NewOffscreenImage(width, height int) *Image
Layout(outsideWidth, outsideHeight int) (int, int)
@ -84,8 +49,6 @@ type context struct {
outsideWidth float64
outsideHeight float64
screenShader *Shader
m sync.Mutex
}
@ -145,15 +108,6 @@ func (c *context) updateFrameImpl(graphicsDriver graphicsdriver.Graphics, update
}
}()
// Create a shader for the screen if necessary.
if c.screenShader == nil {
ir, err := graphics.CompileShader([]byte(screenShader))
if err != nil {
return err
}
c.screenShader = NewShader(ir)
}
// ForceUpdate can be invoked even if the context is not initialized yet (#1591).
if w, h := c.layoutGame(outsideWidth, outsideHeight, deviceScaleFactor); w == 0 || h == 0 {
return nil
@ -232,16 +186,15 @@ func (c *context) drawGame(graphicsDriver graphicsdriver.Graphics) {
gty += offsetY
var filter graphicsdriver.Filter
var screenFilter bool
switch {
case !theGlobalState.isScreenFilterEnabled():
filter = graphicsdriver.FilterNearest
case math.Floor(s) == s:
filter = graphicsdriver.FilterNearest
case s > 1:
screenFilter = true
filter = graphicsdriver.FilterScreen
default:
// screenShader works with >=1 scale, but does not well with <1 scale.
// FilterScreen works with >=1 scale, but does not well with <1 scale.
// Use regular FilterLinear instead so far (#669).
filter = graphicsdriver.FilterLinear
}
@ -260,17 +213,7 @@ func (c *context) drawGame(graphicsDriver graphicsdriver.Graphics) {
is := graphics.QuadIndices()
srcs := [graphics.ShaderImageCount]*Image{c.offscreen}
var shader *Shader
var uniforms [][]float32
if screenFilter {
shader = c.screenShader
dstWidth := c.screen.width
srcWidth := c.offscreen.width
uniforms = shader.ConvertUniforms(map[string]interface{}{
"Scale": float32(dstWidth) / float32(srcWidth),
})
}
c.screen.DrawTriangles(srcs, vs, is, affine.ColorMIdentity{}, graphicsdriver.CompositeModeCopy, filter, graphicsdriver.AddressUnsafe, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, shader, uniforms, false, true)
c.screen.DrawTriangles(srcs, vs, is, affine.ColorMIdentity{}, graphicsdriver.CompositeModeCopy, filter, graphicsdriver.AddressUnsafe, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, nil, nil, false, true)
}
func (c *context) layoutGame(outsideWidth, outsideHeight float64, deviceScaleFactor float64) (int, int) {