internal/graphics: remove FilterScreen and use an original Kage program instead

Updates #2046
This commit is contained in:
Hajime Hoshi 2022-08-26 16:30:55 +09:00
parent 3b50e57f19
commit 5bb70f485e
10 changed files with 66 additions and 164 deletions

View File

@ -345,8 +345,6 @@ func (c *drawTrianglesCommand) String() string {
filter = "nearest"
case graphicsdriver.FilterLinear:
filter = "linear"
case graphicsdriver.FilterScreen:
filter = "screen"
default:
panic(fmt.Sprintf("graphicscommand: invalid filter: %d", c.filter))
}

View File

@ -1249,10 +1249,6 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.Sh
var esBody [16]float32
var esTranslate [4]float32
colorM.Elements(&esBody, &esTranslate)
scale := float32(0)
if filter == graphicsdriver.FilterScreen {
scale = float32(dst.width) / float32(srcImages[0].width)
}
flattenUniforms = []float32{
float32(screenWidth),
@ -1283,7 +1279,6 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.Sh
srcRegion.Y,
srcRegion.X + srcRegion.Width,
srcRegion.Y + srcRegion.Height,
scale,
}
} else {
// TODO: This logic is very similar to Metal's. Let's unify them.

View File

@ -73,9 +73,6 @@ func (k *builtinPipelineStatesKey) defs() ([]_D3D_SHADER_MACRO, error) {
case graphicsdriver.FilterLinear:
name := []byte("FILTER_LINEAR\x00")
defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]})
case graphicsdriver.FilterScreen:
name := []byte("FILTER_SCREEN\x00")
defs = append(defs, _D3D_SHADER_MACRO{&name[0], &defval[0]})
default:
return nil, fmt.Errorf("directx: invalid filter: %d", k.filter)
}
@ -113,10 +110,6 @@ cbuffer ShaderParameter : register(b0) {
float4x4 color_matrix_body;
float4 color_matrix_translation;
float4 source_region;
// This member should be the last not to create a new sector.
// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules
float scale;
}
PSInput VSMain(float2 position : POSITION, float2 tex : TEXCOORD, float4 color : COLOR) {
@ -220,25 +213,6 @@ float4 PSMain(PSInput input) : SV_TARGET {
float4 color = lerp(lerp(c0, c1, rate.x), lerp(c2, c3, rate.x), rate.y);
#endif // defined(FILTER_LINEAR)
#if defined(FILTER_SCREEN)
float2 pos = input.texcoord;
float2 texel_size = 1.0 / source_size;
float2 half_scaled_texel_size = texel_size / 2.0 / scale;
float2 p0 = pos - half_scaled_texel_size + (texel_size / 512.0);
float2 p1 = pos + half_scaled_texel_size + (texel_size / 512.0);
float4 c0 = tex.Sample(samp, p0);
float4 c1 = tex.Sample(samp, float2(p1.x, p0.y));
float4 c2 = tex.Sample(samp, float2(p0.x, p1.y));
float4 c3 = tex.Sample(samp, p1);
// Texels must be in the source rect, so it is not necessary to check that like linear filter.
float2 rate_center = float2(1.0, 1.0) - half_scaled_texel_size;
float2 rate = clamp(((frac(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0);
float4 color = lerp(lerp(c0, c1, rate.x), lerp(c2, c3, rate.x), rate.y);
#endif // defined(FILTER_SCREEN)
#if defined(USE_COLOR_MATRIX)
// Un-premultiply alpha.
// When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing.
@ -252,8 +226,6 @@ float4 PSMain(PSInput input) : SV_TARGET {
// Clamp the output.
color.rgb = min(color.rgb, color.a);
return color;
#elif defined(FILTER_SCREEN)
return color;
#else
return input.color * color;
#endif // defined(USE_COLOR_MATRIX)

View File

@ -19,7 +19,6 @@ type Filter int
const (
FilterNearest Filter = iota
FilterLinear
FilterScreen
)
type Address int

View File

@ -33,7 +33,6 @@ const source = `#include <metal_stdlib>
#define FILTER_NEAREST {{.FilterNearest}}
#define FILTER_LINEAR {{.FilterLinear}}
#define FILTER_SCREEN {{.FilterScreen}}
#define ADDRESS_CLAMP_TO_ZERO {{.AddressClampToZero}}
#define ADDRESS_REPEAT {{.AddressRepeat}}
@ -184,25 +183,6 @@ struct ColorFromTexel<FILTER_LINEAR, address> {
}
};
template<uint8_t address>
struct ColorFromTexel<FILTER_SCREEN, address> {
inline float4 Do(VertexOut v, texture2d<float> texture, constant float2& source_size, constant float4& source_region, float scale) {
const float2 texel_size = 1 / source_size;
float2 p0 = v.tex - texel_size / 2.0 / scale + (texel_size / 512.0);
float2 p1 = v.tex + texel_size / 2.0 / scale + (texel_size / 512.0);
float4 c0 = texture.sample(texture_sampler, p0);
float4 c1 = texture.sample(texture_sampler, float2(p1.x, p0.y));
float4 c2 = texture.sample(texture_sampler, float2(p0.x, p1.y));
float4 c3 = texture.sample(texture_sampler, p1);
float2 rate_center = float2(1.0, 1.0) - texel_size / 2.0 / scale;
float2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0);
return mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
}
};
template<bool useColorM, uint8_t filter, uint8_t address>
struct FragmentShaderImpl {
inline float4 Do(
@ -227,20 +207,6 @@ struct FragmentShaderImpl {
}
};
template<bool useColorM, uint8_t address>
struct FragmentShaderImpl<useColorM, FILTER_SCREEN, address> {
inline float4 Do(
VertexOut v,
texture2d<float> texture,
constant float2& source_size,
constant float4x4& color_matrix_body,
constant float4& color_matrix_translation,
constant float4& source_region,
constant float& scale) {
return ColorFromTexel<FILTER_SCREEN, address>().Do(v, texture, source_size, source_region, scale);
}
};
// Define Foo and FooCp macros to force macro replacement.
// See "6.10.3.1 Argument substitution" in ISO/IEC 9899.
@ -273,8 +239,6 @@ FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_REPEAT)
FragmentShaderFunc(1, FILTER_NEAREST, ADDRESS_UNSAFE)
FragmentShaderFunc(1, FILTER_LINEAR, ADDRESS_UNSAFE)
FragmentShaderFunc(0, FILTER_SCREEN, ADDRESS_UNSAFE)
#undef FragmentShaderFuncName
`
@ -608,7 +572,6 @@ func (g *Graphics) Initialize() error {
replaces := map[string]string{
"{{.FilterNearest}}": fmt.Sprintf("%d", graphicsdriver.FilterNearest),
"{{.FilterLinear}}": fmt.Sprintf("%d", graphicsdriver.FilterLinear),
"{{.FilterScreen}}": fmt.Sprintf("%d", graphicsdriver.FilterScreen),
"{{.AddressClampToZero}}": fmt.Sprintf("%d", graphicsdriver.AddressClampToZero),
"{{.AddressRepeat}}": fmt.Sprintf("%d", graphicsdriver.AddressRepeat),
"{{.AddressUnsafe}}": fmt.Sprintf("%d", graphicsdriver.AddressUnsafe),
@ -626,27 +589,6 @@ func (g *Graphics) Initialize() error {
if err != nil {
return err
}
fs, err := lib.MakeFunction(
fmt.Sprintf("FragmentShader_%d_%d_%d", 0, graphicsdriver.FilterScreen, graphicsdriver.AddressUnsafe))
if err != nil {
return err
}
rpld := mtl.RenderPipelineDescriptor{
VertexFunction: vs,
FragmentFunction: fs,
}
rpld.ColorAttachments[0].PixelFormat = g.view.colorPixelFormat()
rpld.ColorAttachments[0].BlendingEnabled = true
rpld.ColorAttachments[0].DestinationAlphaBlendFactor = mtl.BlendFactorZero
rpld.ColorAttachments[0].DestinationRGBBlendFactor = mtl.BlendFactorZero
rpld.ColorAttachments[0].SourceAlphaBlendFactor = mtl.BlendFactorOne
rpld.ColorAttachments[0].SourceRGBBlendFactor = mtl.BlendFactorOne
rpld.ColorAttachments[0].WriteMask = mtl.ColorWriteMaskAll
rps, err := g.view.getMTLDevice().MakeRenderPipelineState(rpld)
if err != nil {
return err
}
g.screenRPS = rps
for _, screen := range []bool{false, true} {
for _, cm := range []bool{false, true} {
@ -871,23 +813,19 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.
rpss := map[stencilMode]mtl.RenderPipelineState{}
var uniformVars [][]float32
if shaderID == graphicsdriver.InvalidShaderID {
if dst.screen && filter == graphicsdriver.FilterScreen {
rpss[noStencil] = g.screenRPS
} else {
for _, stencil := range []stencilMode{
prepareStencil,
drawWithStencil,
noStencil,
} {
rpss[stencil] = g.rpss[rpsKey{
screen: dst.screen,
useColorM: !colorM.IsIdentity(),
filter: filter,
address: address,
compositeMode: mode,
stencilMode: stencil,
}]
}
for _, stencil := range []stencilMode{
prepareStencil,
drawWithStencil,
noStencil,
} {
rpss[stencil] = g.rpss[rpsKey{
screen: dst.screen,
useColorM: !colorM.IsIdentity(),
filter: filter,
address: address,
compositeMode: mode,
stencilMode: stencil,
}]
}
w, h := dst.internalSize()
@ -900,10 +838,6 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.
var esBody [16]float32
var esTranslate [4]float32
colorM.Elements(&esBody, &esTranslate)
scale := float32(0)
if filter == graphicsdriver.FilterScreen {
scale = float32(dst.width) / float32(srcs[0].width)
}
uniformVars = [][]float32{
{float32(w), float32(h)},
sourceSize,
@ -915,7 +849,6 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.
srcRegion.X + srcRegion.Width,
srcRegion.Y + srcRegion.Height,
},
{scale},
}
} else {
for _, stencil := range []stencilMode{

View File

@ -83,8 +83,6 @@ func fragmentShaderStr(useColorM bool, filter graphicsdriver.Filter, address gra
defs = append(defs, "#define FILTER_NEAREST")
case graphicsdriver.FilterLinear:
defs = append(defs, "#define FILTER_LINEAR")
case graphicsdriver.FilterScreen:
defs = append(defs, "#define FILTER_SCREEN")
default:
panic(fmt.Sprintf("opengl: invalid filter: %d", filter))
}
@ -151,10 +149,6 @@ uniform vec4 color_matrix_translation;
uniform highp vec2 source_size;
#if defined(FILTER_SCREEN)
uniform highp float scale;
#endif
varying highp vec2 varying_tex;
varying highp vec4 varying_color_scale;
@ -235,27 +229,6 @@ void main(void) {
color = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
#endif // defined(FILTER_LINEAR)
#if defined(FILTER_SCREEN)
highp vec2 texel_size = 1.0 / source_size;
highp vec2 half_scaled_texel_size = texel_size / 2.0 / scale;
highp vec2 p0 = pos - half_scaled_texel_size + (texel_size / 512.0);
highp vec2 p1 = pos + half_scaled_texel_size + (texel_size / 512.0);
vec4 c0 = texture2D(T0, p0);
vec4 c1 = texture2D(T0, vec2(p1.x, p0.y));
vec4 c2 = texture2D(T0, vec2(p0.x, p1.y));
vec4 c3 = texture2D(T0, p1);
// Texels must be in the source rect, so it is not necessary to check that like linear filter.
vec2 rate_center = vec2(1.0, 1.0) - half_scaled_texel_size;
vec2 rate = clamp(((fract(p0 * source_size) - rate_center) * scale) + rate_center, 0.0, 1.0);
gl_FragColor = mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y);
// Assume that a color matrix and color vector values are not used with FILTER_SCREEN.
#else
# if defined(USE_COLOR_MATRIX)
// Un-premultiply alpha.
// When the alpha is 0, 1.0 - sign(alpha) is 1.0, which means division does nothing.
@ -275,9 +248,6 @@ void main(void) {
# endif // defined(USE_COLOR_MATRIX)
gl_FragColor = color;
#endif // defined(FILTER_SCREEN)
}
`
)

View File

@ -244,15 +244,6 @@ func (g *Graphics) DrawTriangles(dstID graphicsdriver.ImageID, srcIDs [graphics.
typ: shaderir.Type{Main: shaderir.Vec2},
})
}
if filter == graphicsdriver.FilterScreen {
scale := float32(destination.width) / float32(g.images[srcIDs[0]].width)
g.uniformVars = append(g.uniformVars, uniformVariable{
name: "scale",
value: []float32{scale},
typ: shaderir.Type{Main: shaderir.Float},
})
}
} else {
shader := g.shaders[shaderID]
program = shader.p

View File

@ -191,7 +191,6 @@ func (s *openGLState) reset(context *context) error {
for _, f := range []graphicsdriver.Filter{
graphicsdriver.FilterNearest,
graphicsdriver.FilterLinear,
graphicsdriver.FilterScreen,
} {
shaderFragmentColorMatrixNative, err := context.newFragmentShader(fragmentShaderStr(c, f, a))
if err != nil {

View File

@ -65,7 +65,7 @@ func (m *Mipmap) DrawTriangles(srcs [graphics.ShaderImageCount]*Mipmap, vertices
level := 0
// TODO: Do we need to check all the sources' states of being volatile?
if !canSkipMipmap && srcs[0] != nil && !srcs[0].volatile && filter != graphicsdriver.FilterScreen {
if !canSkipMipmap && srcs[0] != nil && !srcs[0].volatile {
level = math.MaxInt32
for i := 0; i < len(indices)/3; i++ {
const n = graphics.VertexFloatCount
@ -226,10 +226,6 @@ func (m *Mipmap) disposeMipmaps() {
func mipmapLevelFromDistance(dx0, dy0, dx1, dy1, sx0, sy0, sx1, sy1 float32, filter graphicsdriver.Filter) int {
const maxLevel = 6
if filter == graphicsdriver.FilterScreen {
return 0
}
d := (dx1-dx0)*(dx1-dx0) + (dy1-dy0)*(dy1-dy0)
s := (sx1-sx0)*(sx1-sx0) + (sy1-sy0)*(sy1-sy0)
if s == 0 {

View File

@ -30,6 +30,33 @@ import (
"github.com/hajimehoshi/ebiten/v2/internal/hooks"
)
const screenShader = `package main
var Scale float
func Fragment(position vec4, texCoord vec2, color vec4) vec4 {
sourceSize := imageSrcTextureSize()
texelSize := 1 / sourceSize
halfScaledTexelSize := texelSize / 2 / Scale
// Shift 1/512 [texel] to avoid the tie-breaking issue.
// As all the vertex positions are aligned to 1/16 [pixel], this shiting should work in most cases.
pos := texCoord
p0 := pos - halfScaledTexelSize + (texelSize / 512)
p1 := pos + halfScaledTexelSize + (texelSize / 512)
// Texels must be in the source rect, so it is not necessary to check.
c0 := imageSrc0UnsafeAt(p0)
c1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))
c2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))
c3 := imageSrc0UnsafeAt(p1)
rateCenter := vec2(1, 1) - halfScaledTexelSize
rate := clamp(((fract(p0 * sourceSize) - rateCenter) * Scale) + rateCenter, 0, 1)
return mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)
}
`
type Game interface {
NewOffscreenImage(width, height int) *Image
Layout(outsideWidth, outsideHeight int) (int, int)
@ -49,6 +76,8 @@ type context struct {
outsideWidth float64
outsideHeight float64
screenShader *Shader
m sync.Mutex
}
@ -108,6 +137,15 @@ func (c *context) updateFrameImpl(graphicsDriver graphicsdriver.Graphics, update
}
}()
// Create a shader for the screen if necessary.
if c.screenShader == nil {
ir, err := graphics.CompileShader([]byte(screenShader))
if err != nil {
return err
}
c.screenShader = NewShader(ir)
}
// ForceUpdate can be invoked even if the context is not initialized yet (#1591).
if w, h := c.layoutGame(outsideWidth, outsideHeight, deviceScaleFactor); w == 0 || h == 0 {
return nil
@ -186,15 +224,16 @@ func (c *context) drawGame(graphicsDriver graphicsdriver.Graphics) {
gty += offsetY
var filter graphicsdriver.Filter
var screenFilter bool
switch {
case !theGlobalState.isScreenFilterEnabled():
filter = graphicsdriver.FilterNearest
case math.Floor(s) == s:
filter = graphicsdriver.FilterNearest
case s > 1:
filter = graphicsdriver.FilterScreen
screenFilter = true
default:
// FilterScreen works with >=1 scale, but does not well with <1 scale.
// screenShader works with >=1 scale, but does not well with <1 scale.
// Use regular FilterLinear instead so far (#669).
filter = graphicsdriver.FilterLinear
}
@ -213,7 +252,17 @@ func (c *context) drawGame(graphicsDriver graphicsdriver.Graphics) {
is := graphics.QuadIndices()
srcs := [graphics.ShaderImageCount]*Image{c.offscreen}
c.screen.DrawTriangles(srcs, vs, is, affine.ColorMIdentity{}, graphicsdriver.CompositeModeCopy, filter, graphicsdriver.AddressUnsafe, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, nil, nil, false, true)
var shader *Shader
var uniforms [][]float32
if screenFilter {
shader = c.screenShader
dstWidth := c.screen.width
srcWidth := c.offscreen.width
uniforms = shader.ConvertUniforms(map[string]interface{}{
"Scale": float32(dstWidth) / float32(srcWidth),
})
}
c.screen.DrawTriangles(srcs, vs, is, affine.ColorMIdentity{}, graphicsdriver.CompositeModeCopy, filter, graphicsdriver.AddressUnsafe, dstRegion, graphicsdriver.Region{}, [graphics.ShaderImageCount - 1][2]float32{}, shader, uniforms, false, true)
}
func (c *context) layoutGame(outsideWidth, outsideHeight float64, deviceScaleFactor float64) (int, int) {