

index 83948ab4..29c04fde 100644

--- a/

+++ b/ diff --git a/Makefile.am b/Makefile.amindex 83948ab4..29c04fde 100644--- a/ Makefile.am +++ b/ Makefile.am @@ -23,6 +23,10 @@ ACLOCAL_AMFLAGS = -I m4 ${ACLOCAL_FLAGS} SUBDIRS = uxa src man MAINTAINERCLEANFILES = ChangeLog INSTALL +if HAVE_X11 +SUBDIRS += test +endif + .PHONY: ChangeLog INSTALL INSTALL:

index 9449e567..b4b693e2 100644

--- a/

+++ b/ diff --git a/configure.ac b/configure.acindex 9449e567..b4b693e2 100644--- a/ configure.ac +++ b/ configure.ac @@ -67,6 +67,9 @@ if test x"$udev" = xyes; then AC_DEFINE(HAVE_UDEV,1,[Enable udev-based monitor hotplug detection]) fi +PKG_CHECK_MODULES(X11, [x11 xrender xext pixman-1], [x11=yes], [x11=no]) +AM_CONDITIONAL(HAVE_X11, test x$x11 = xyes) + AH_TOP([#include "xorg-server.h"]) # Define a configure option for an alternate module directory @@ -89,11 +92,23 @@ AC_ARG_ENABLE(kms-only, AS_HELP_STRING([--enable-kms-only], [KMS_ONLY="$enableval"], [KMS_ONLY=no]) +AC_ARG_ENABLE(sna, + AS_HELP_STRING([--enable-sna], + [Enable SandyBridge's New Acceleration (SNA) [options=default|gen2|gen3|ge4|gen5|gen6]]), + [SNA="$enableval"], + [SNA=no]) + +AC_ARG_ENABLE(vmap, + AS_HELP_STRING([--enable-vmap], + [Enable use of vmap [default=no]]), + [VMAP="$enableval"], + [VMAP=no]) + AC_ARG_ENABLE(debug, AS_HELP_STRING([--enable-debug], - [Enables internal debugging [[default=yes]]]), + [Enables internal debugging [default=no]]), [DEBUG="$enableval"], - [DEBUG=yes]) + [DEBUG=no]) # Store the list of server defined optional extensions in REQUIRED_MODULES XORG_DRIVER_CHECK_EXT(RANDR, randrproto) @@ -165,9 +180,58 @@ if test "x$KMS_ONLY" = xyes; then AC_DEFINE(KMS_ONLY,1,[Assume KMS support]) fi +AM_CONDITIONAL(USE_VMAP, test x$VMAP = xyes) +if test "x$VMAP" = xyes; then + AC_DEFINE(USE_VMAP,1,[Assume VMAP support]) +fi + +AM_CONDITIONAL(SNA, test x$SNA != xno) +AM_CONDITIONAL(SNA_GEN2, [echo $SNA | grep -E -qsi '(yes)|(all)|(gen2)']) +AM_CONDITIONAL(SNA_GEN3, [echo $SNA | grep -E -qsi '(yes)|(all)|(pnv)|(gen3)']) +AM_CONDITIONAL(SNA_GEN4, [echo $SNA | grep -E -qsi '(yes)|(all)|(brw)|(gen4)']) +AM_CONDITIONAL(SNA_GEN5, [echo $SNA | grep -E -qsi '(yes)|(all)|(ilk)|(gen5)']) +AM_CONDITIONAL(SNA_GEN6, [echo $SNA | grep -E -qsi '(yes)|(all)|(snb)|(gen6)']) +AC_MSG_CHECKING([whether to include SNA support]) +sna_drivers="no" +if test "x$SNA" != xno; then + sna_drivers="" + AC_DEFINE(SNA,1,[Enable SandyBridge's New Architecture]) + if echo $SNA | grep -E -qsi '(yes)|(default)'; then + AC_DEFINE(SNA_DEFAULT,1,[Enable SandyBridge's New Architecture by default]) + sna_drivers="default $sna_drivers" + fi + if echo $SNA | grep -E -qsi '(yes)|(all)|(gen2)'; then + AC_DEFINE(SNA_GEN2,1,[Enable SandyBridge's New Architecture for GEN2]) + sna_drivers="i8xx $sna_drivers" + fi + if echo $SNA | grep -E -qsi '(yes)|(all)|(pnv)|(gen3)'; then + AC_DEFINE(SNA_GEN3,1,[Enable SandyBridge's New Architecture for PNV]) + sna_drivers="pnv $sna_drivers" + fi + if echo $SNA | grep -E -qsi '(yes)|(all)|(brw)|(gen4)'; then + AC_DEFINE(SNA_GEN4,1,[Enable SandyBridge's New Architecture for BRW]) + sna_drivers="brw $sna_drivers" + fi + if echo $SNA | grep -E -qsi '(yes)|(all)|(ilk)|(gen5)'; then + AC_DEFINE(SNA_GEN5,1,[Enable SandyBridge's New Architecture for ILK]) + sna_drivers="ilk $sna_drivers" + fi + if echo $SNA | grep -E -qsi '(yes)|(all)|(snb)|(gen6)'; then + AC_DEFINE(SNA_GEN6,1,[Enable SandyBridge's New Architecture for SNB]) + sna_drivers="snb $sna_drivers" + fi +fi +AC_MSG_RESULT([$sna_drivers]) + +AM_CONDITIONAL(DEBUG, test x$DEBUG = xyes) if test "x$DEBUG" = xno; then AC_DEFINE(NDEBUG,1,[Disable internal debugging]) fi +if test "x$DEBUG" = xyes; then + AC_DEFINE(HAS_EXTRA_DEBUG,1,[Enable additional debugging]) +fi + +AC_CHECK_HEADERS([sys/timerfd.h]) DRIVER_NAME=intel AC_SUBST([DRIVER_NAME]) @@ -184,7 +248,9 @@ AC_CONFIG_FILES([ src/legacy/Makefile src/legacy/i810/Makefile src/legacy/i810/xvmc/Makefile + src/sna/Makefile man/Makefile src/render_program/Makefile + test/Makefile ]) AC_OUTPUT

index 85e2b2ea..ab46db2a 100644

--- a/

+++ b/ diff --git a/man/intel.man b/man/intel.manindex 85e2b2ea..ab46db2a 100644--- a/ man/intel.man +++ b/ man/intel.man @@ -210,6 +210,13 @@ User should provide absolute path to libIntelXvMC.so in XvMCConfig file. .IP Default: Disabled. .TP +.BI "Option \*qThrottle\*q \*q" boolean \*q +This option controls whether the driver periodically waits for pending +drawing operations to complete. Throttling ensures that the GPU does not +lag too far behind the CPU and thus noticeable delays in user responsible at +the cost of throughput performance. +.IP +Default: enabled. .BI "Option \*qHotPlug\*q \*q" boolean \*q This option controls whether the driver automatically notifies applications when monitors are connected or disconnected.

index abb03c3f..a7f219c1 100644

--- a/

+++ b/ diff --git a/src/Makefile.am b/src/Makefile.amindex abb03c3f..a7f219c1 100644--- a/ src/Makefile.am +++ b/ src/Makefile.am @@ -35,6 +35,11 @@ intel_drv_ladir = @moduledir@/drivers intel_drv_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ -ldrm_intel ../uxa/libuxa.la legacy/liblegacy.la intel_drv_la_LIBADD += @PCIACCESS_LIBS@ +if SNA +SUBDIRS += sna +intel_drv_la_LIBADD += sna/libsna.la +endif + NULL:=# intel_drv_la_SOURCES = \

index 9468e72f..9b1da491 100644

--- a/

+++ b/ diff --git a/src/intel_module.c b/src/intel_module.cindex 9468e72f..9b1da491 100644--- a/ src/intel_module.c +++ b/ src/intel_module.c @@ -36,6 +36,7 @@ #include "intel.h" #include "intel_driver.h" #include "legacy/legacy.h" +#include "sna/sna_module.h" #include <xf86drmMode.h> @@ -320,22 +321,49 @@ static Bool intel_pci_probe(DriverPtr driver, scrn->name = INTEL_NAME; scrn->Probe = NULL; -#if KMS_ONLY - intel_init_scrn(scrn); -#else switch (DEVICE_ID(device)) { +#if !KMS_ONLY case PCI_CHIP_I810: case PCI_CHIP_I810_DC100: case PCI_CHIP_I810_E: case PCI_CHIP_I815: lg_i810_init(scrn); break; +#endif +#if SNA + case 0: +#if SNA_GEN3 + case PCI_CHIP_PINEVIEW_M: + case PCI_CHIP_PINEVIEW_G: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q35_G: + case PCI_CHIP_Q33_G: +#endif +#if SNA_GEN5 + case PCI_CHIP_IRONLAKE_D_G: + case PCI_CHIP_IRONLAKE_M_G: +#endif +#if SNA_GEN6 + case PCI_CHIP_SANDYBRIDGE_GT1: + case PCI_CHIP_SANDYBRIDGE_GT2: + case PCI_CHIP_SANDYBRIDGE_GT2_PLUS: + case PCI_CHIP_SANDYBRIDGE_M_GT1: + case PCI_CHIP_SANDYBRIDGE_M_GT2: + case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS: + case PCI_CHIP_SANDYBRIDGE_S_GT: +#endif + sna_init_scrn(scrn); + break; +#endif default: +#if SNA_DEFAULT + sna_init_scrn(scrn); +#else intel_init_scrn(scrn); +#endif break; } -#endif } return scrn != NULL; } @@ -360,20 +388,46 @@ static XF86ModuleVersionInfo intel_version = { static const OptionInfoRec * intel_available_options(int chipid, int busid) { -#if KMS_ONLY - return intel_uxa_available_options(chipid, busid); -#else switch (chipid) { +#if !KMS_ONLY case PCI_CHIP_I810: case PCI_CHIP_I810_DC100: case PCI_CHIP_I810_E: case PCI_CHIP_I815: return lg_i810_available_options(chipid, busid); +#endif +#if SNA + case 0: +#if SNA_GEN3 + case PCI_CHIP_PINEVIEW_M: + case PCI_CHIP_PINEVIEW_G: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q35_G: + case PCI_CHIP_Q33_G: +#endif +#if SNA_GEN5 + case PCI_CHIP_IRONLAKE_D_G: + case PCI_CHIP_IRONLAKE_M_G: +#endif +#if SNA_GEN6 + case PCI_CHIP_SANDYBRIDGE_GT1: + case PCI_CHIP_SANDYBRIDGE_GT2: + case PCI_CHIP_SANDYBRIDGE_GT2_PLUS: + case PCI_CHIP_SANDYBRIDGE_M_GT1: + case PCI_CHIP_SANDYBRIDGE_M_GT2: + case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS: + case PCI_CHIP_SANDYBRIDGE_S_GT: +#endif + return sna_available_options(chipid, busid); +#endif default: +#if SNA_DEFAULT + return sna_available_options(chipid, busid); +#else return intel_uxa_available_options(chipid, busid); - } #endif + } } static DriverRec intel = {

new file mode 100644

index 00000000..f65b281b

--- /dev/null

+++ b/ diff --git a/src/sna/Makefile.am b/src/sna/Makefile.amnew file mode 100644index 00000000..f65b281b--- /dev/null+++ b/ src/sna/Makefile.am @@ -0,0 +1,115 @@ +# Copyright 2005 Adam Jackson. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AM_CFLAGS = @CWARNFLAGS@ @XORG_CFLAGS@ @UDEV_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \ + -I$(top_srcdir)/src -I$(top_srcdir)/uxa -I$(top_srcdir)/src/render_program + +noinst_LTLIBRARIES = libsna.la +libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ + +NULL:=# + +libsna_la_SOURCES = \ + blt.c \ + kgem.c \ + kgem.h \ + sna.h \ + sna_accel.c \ + sna_blt.c \ + sna_composite.c \ + sna_damage.c \ + snd_damage.h \ + sna_display.c \ + sna_driver.c \ + sna_driver.h \ + sna_glyphs.c \ + sna_gradient.c \ + sna_io.c \ + sna_render.c \ + sna_render.h \ + sna_render_inline.h \ + sna_reg.h \ + sna_stream.c \ + sna_trapezoids.c \ + sna_tiling.c \ + sna_transform.c \ + sna_video.c \ + sna_video.h \ + sna_video_overlay.c \ + sna_video_textured.c \ + $(NULL) + +if SNA_GEN2 +libsna_la_SOURCES += \ + gen2_render.c \ + gen2_render.h \ + $(NULL) +endif +if SNA_GEN3 +libsna_la_SOURCES += \ + gen3_render.c \ + gen3_render.h \ + $(NULL) +endif +if SNA_GEN4 +libsna_la_SOURCES += \ + gen4_render.c \ + gen4_render.h \ + $(NULL) +endif +if SNA_GEN5 +libsna_la_SOURCES += \ + gen5_render.c \ + gen5_render.h \ + $(NULL) +endif +if SNA_GEN6 +libsna_la_SOURCES += \ + gen6_render.c \ + gen6_render.h \ + $(NULL) +endif + +if DRI +libsna_la_SOURCES += \ + sna_dri.c \ + $(NULL) +libsna_la_LIBADD += \ + $(DRI_LIBS) \ + $(NULL) +endif + +if XVMC +libsna_la_SOURCES += \ + sna_video_hwmc.h \ + sna_video_hwmc.c \ + $(NULL) +endif + +if DEBUG +libsna_la_SOURCES += \ + kgem_debug.c \ + kgem_debug.h \ + kgem_debug_gen3.c \ + kgem_debug_gen4.c \ + kgem_debug_gen5.c \ + kgem_debug_gen6.c \ + $(NULL) +endif

new file mode 100644

index 00000000..fd847de3

--- /dev/null

+++ b/ diff --git a/src/sna/README b/src/sna/READMEnew file mode 100644index 00000000..fd847de3--- /dev/null+++ b/ src/sna/README @@ -0,0 +1,30 @@ +SandyBridge's New Acceleration +------------------------------ + +The guiding principle behind the design is to avoid GPU context switches. +On SandyBridge (and beyond), these are especially pernicious because the +RENDER and BLT engine are now on different rings and require +synchronisation of the various execution units when switching contexts. +They were not cheap on early generation, but with the increasing +complexity of the GPU, avoiding such serialisations is important. + +Furthermore, we try very hard to avoid migrating between the CPU and GPU. +Every pixmap (apart from temporary "scratch" surfaces which we intend to +use on the GPU) is created in system memory. All operations are then done +upon this shadow copy until we are forced to move it onto the GPU. Such +migration can only be first triggered by: setting the pixmap as the +scanout (we obviously need a GPU buffer here), using the pixmap as a DRI +buffer (the client expects to perform hardware acceleration and we do not +want to disappoint) and lastly using the pixmap as a RENDER target. This +last is chosen because when we know we are going to perform hardware +acceleration and will continue to do so without fallbacks, using the GPU +is much, much faster than the CPU. The heuristic I chose therefore was +that if the application uses RENDER, i.e. cairo, then it will only be +using those paths and not intermixing core drawing operations and so +unlikely to trigger a fallback. + +The complicating case is front-buffer rendering. So in order to accommodate +using RENDER on an application whilst running xterm without a composite +manager redirecting all the pixmaps to backing surfaces, we have to +perform damage tracking to avoid excess migration of portions of the +buffer.

new file mode 100644

index 00000000..ac20372e

--- /dev/null

+++ b/ diff --git a/src/sna/blt.c b/src/sna/blt.cnew file mode 100644index 00000000..ac20372e--- /dev/null+++ b/ src/sna/blt.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" + +#if DEBUG_BLT +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +void +memcpy_blt(const void *src, void *dst, int bpp, + uint16_t src_stride, uint16_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + uint8_t *src_bytes; + uint8_t *dst_bytes; + + assert(width && height); + assert(bpp >= 8); + + DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d

", + __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + + bpp /= 8; + width *= bpp; + + src_bytes = (uint8_t *)src + src_stride * src_y + src_x * bpp; + dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * bpp; + + if (width == src_stride && width == dst_stride) { + memcpy(dst_bytes, src_bytes, width * height); + return; + } + + do { + memcpy(dst_bytes, src_bytes, width); + src_bytes += src_stride; + dst_bytes += dst_stride; + } while (--height); +}

new file mode 100644

index 00000000..896f7308

--- /dev/null

+++ b/ diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.cnew file mode 100644index 00000000..896f7308--- /dev/null+++ b/ src/sna/gen2_render.c @@ -0,0 +1,1237 @@ +/* + * Copyright © 2006,2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_reg.h" +#include "sna_render.h" +#include "sna_render_inline.h" + +#include "gen2_render.h" + +#if DEBUG_RENDER +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define OUT_BATCH(v) batch_emit(sna, v) +#define OUT_BATCH_F(v) batch_emit_float(sna, v) +#define OUT_VERTEX(v) batch_emit_float(sna, v) + +static const struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +} gen2_blend_op[] = { + /* Clear */ + {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, + /* Src */ + {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, + /* Dst */ + {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, + /* Over */ + {0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, + /* OverReverse */ + {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, + /* In */ + {1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, + /* InReverse */ + {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, + /* Out */ + {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, + /* OutReverse */ + {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, + /* Atop */ + {1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, + /* AtopReverse */ + {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, + /* Xor */ + {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, + /* Add */ + {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, +}; + +static const struct formatinfo { + int fmt; + uint32_t card_fmt; +} i8xx_tex_formats[] = { + {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8}, + {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888}, + {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888}, + {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565}, + {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555}, + {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444}, +}, i85x_tex_formats[] = { + {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888}, + {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888}, +}; + +static inline uint32_t +gen2_buf_tiling(uint32_t tiling) +{ + uint32_t v = 0; + switch (tiling) { + case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y; + case I915_TILING_X: v |= BUF_3D_TILED_SURFACE; + case I915_TILING_NONE: break; + } + return v; +} + +static uint32_t +gen2_get_dst_format(uint32_t format) +{ +#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8) + switch (format) { + default: + assert(0); + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + return COLR_BUF_ARGB8888 | BIAS; + case PICT_r5g6b5: + return COLR_BUF_RGB565 | BIAS; + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + return COLR_BUF_ARGB1555 | BIAS; + case PICT_a8: + return COLR_BUF_8BIT | BIAS; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return COLR_BUF_ARGB4444 | BIAS; + } +#undef BIAS +} + +static Bool +gen2_check_dst_format(uint32_t format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_r5g6b5: + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + case PICT_a8: + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return TRUE; + default: + return FALSE; + } +} + +static uint32_t +gen2_get_card_format(struct sna *sna, uint32_t format) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) { + if (i8xx_tex_formats[i].fmt == format) + return i8xx_tex_formats[i].card_fmt; + } + + if (!(IS_I830(sna) || IS_845G(sna))) { + for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) { + if (i85x_tex_formats[i].fmt == format) + return i85x_tex_formats[i].card_fmt; + } + } + + assert(0); + return 0; +} + +static Bool +gen2_check_card_format(struct sna *sna, uint32_t format) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) { + if (i8xx_tex_formats[i].fmt == format) + return TRUE; + } + + if (!(IS_I830(sna) || IS_845G(sna))) { + for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) { + if (i85x_tex_formats[i].fmt == format) + return TRUE; + } + } + + return FALSE; +} + +static uint32_t +gen2_sampler_tiling_bits(uint32_t tiling) +{ + uint32_t bits = 0; + switch (tiling) { + default: + assert(0); + case I915_TILING_Y: + bits |= TM0S1_TILE_WALK; + case I915_TILING_X: + bits |= TM0S1_TILED_SURFACE; + case I915_TILING_NONE: + break; + } + return bits; +} + +static Bool +gen2_check_filter(PicturePtr picture) +{ + switch (picture->filter) { + case PictFilterNearest: + case PictFilterBilinear: + return TRUE; + default: + return FALSE; + } +} + +static Bool +gen2_check_repeat(PicturePtr picture) +{ + if (!picture->repeat) + return TRUE; + + switch (picture->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return TRUE; + default: + return FALSE; + } +} + +static void +gen2_emit_texture(struct sna *sna, + const struct sna_composite_channel *channel, + int unit) +{ + uint32_t filter; + uint32_t wrap_mode; + uint32_t texcoordtype; + + if (channel->is_affine) + texcoordtype = TEXCOORDTYPE_CARTESIAN; + else + texcoordtype = TEXCOORDTYPE_HOMOGENEOUS; + + switch (channel->repeat) { + default: + assert(0); + case RepeatNone: + wrap_mode = TEXCOORDMODE_CLAMP_BORDER; + break; + case RepeatNormal: + wrap_mode = TEXCOORDMODE_WRAP; + break; + case RepeatPad: + wrap_mode = TEXCOORDMODE_CLAMP; + break; + case RepeatReflect: + wrap_mode = TEXCOORDMODE_MIRROR; + break; + } + + switch (channel->filter) { + default: + assert(0); + case PictFilterNearest: + filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | + FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT); + break; + case PictFilterBilinear: + filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT | + FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT); + break; + } + filter |= MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT; + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_MAP(unit) | 4); + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + channel->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + 0)); + OUT_BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) | + ((channel->width - 1) << TM0S1_WIDTH_SHIFT) | + gen2_get_card_format(sna, channel->pict_format) | + gen2_sampler_tiling_bits(channel->bo->tiling)); + OUT_BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); + OUT_BATCH(filter); + OUT_BATCH(0); /* default color */ + OUT_BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) | + ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | + texcoordtype | + ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode) | + ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode)); + /* map texel stream */ + OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); + if (unit == 0) + OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | + TEXBIND_SET1(TEXCOORDSRC_KEEP) | + TEXBIND_SET2(TEXCOORDSRC_KEEP) | + TEXBIND_SET3(TEXCOORDSRC_KEEP)); + else + OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | + TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | + TEXBIND_SET2(TEXCOORDSRC_KEEP) | + TEXBIND_SET3(TEXCOORDSRC_KEEP)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + (unit << 16) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(unit) | + ENABLE_TEX_STREAM_MAP_IDX | + TEX_STREAM_MAP_IDX(unit)); +} + +static void +gen2_get_blend_factors(const struct sna_composite_op *op, + uint32_t *c_out, + uint32_t *a_out) +{ + uint32_t cblend, ablend; + + /* If component alpha is active in the mask and the blend operation + * uses the source alpha, then we know we don't need the source + * value (otherwise we would have hit a fallback earlier), so we + * provide the source alpha (src.A * mask.X) as output color. + * Conversely, if CA is set and we don't need the source alpha, then + * we produce the source value (src.X * mask.X) and the source alpha + * is unused.. Otherwise, we provide the non-CA source value + * (src.X * mask.A). + * + * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8 + * pictures, but we need to implement it for 830/845 and there's no + * harm done in leaving it in. + */ + cblend = + TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULE | + TB0C_OUTPUT_WRITE_CURRENT; + ablend = + TB0A_RESULT_SCALE_1X | TB0A_OP_MODULE | + TB0A_OUTPUT_WRITE_CURRENT; + + /* Get the source picture's channels into TBx_ARG1 */ + if ((op->has_component_alpha && gen2_blend_op[op->op].src_alpha) || + op->dst.format == PICT_a8) { + /* Producing source alpha value, so the first set of channels + * is src.A instead of src.X. We also do this if the destination + * is a8, in which case src.G is what's written, and the other + * channels are ignored. + */ + ablend |= TB0A_ARG1_SEL_TEXEL0; + cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA; + } else { + if (PICT_FORMAT_RGB(op->src.pict_format) != 0) + cblend |= TB0C_ARG1_SEL_TEXEL0; + else + cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ + ablend |= TB0A_ARG1_SEL_TEXEL0; + } + + if (op->mask.bo) { + cblend |= TB0C_ARG2_SEL_TEXEL1; + if (op->dst.format == PICT_a8 || op->has_component_alpha) + cblend |= TB0C_ARG2_REPLICATE_ALPHA; + ablend |= TB0A_ARG2_SEL_TEXEL1; + } else { + cblend |= TB0C_ARG2_SEL_ONE; + ablend |= TB0A_ARG2_SEL_ONE; + } + + *c_out = cblend; + *a_out = ablend; +} + +static uint32_t gen2_get_blend_cntl(int op, + Bool has_component_alpha, + uint32_t dst_format) +{ + uint32_t sblend, dblend; + + sblend = gen2_blend_op[op].src_blend; + dblend = gen2_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that + * we'll treat it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0 && gen2_blend_op[op].dst_alpha) { + if (sblend == BLENDFACTOR_DST_ALPHA) + sblend = BLENDFACTOR_ONE; + else if (sblend == BLENDFACTOR_INV_DST_ALPHA) + sblend = BLENDFACTOR_ZERO; + } + + /* If the source alpha is being used, then we should only be in a case + * where the source blend factor is 0, and the source blend value is + * the mask channels multiplied by the source picture's alpha. + */ + if (has_component_alpha && gen2_blend_op[op].src_alpha) { + if (dblend == BLENDFACTOR_SRC_ALPHA) + dblend = BLENDFACTOR_SRC_COLR; + else if (dblend == BLENDFACTOR_INV_SRC_ALPHA) + dblend = BLENDFACTOR_INV_SRC_COLR; + } + + return (sblend << S8_SRC_BLEND_FACTOR_SHIFT | + dblend << S8_DST_BLEND_FACTOR_SHIFT); +} + +static void gen2_emit_invariant(struct sna *sna) +{ + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(2)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(3)); + + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_FOG_MODE_CMD); + OUT_BATCH(FOGFUNC_ENABLE | + FOG_LINEAR_CONST | FOGSRC_INDEX_Z | ENABLE_FOG_DENSITY); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(0) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(0) | + ENABLE_TEX_STREAM_MAP_IDX | + TEX_STREAM_MAP_IDX(0)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(1) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(1) | + ENABLE_TEX_STREAM_MAP_IDX | + TEX_STREAM_MAP_IDX(1)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(2) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(2) | + ENABLE_TEX_STREAM_MAP_IDX | + TEX_STREAM_MAP_IDX(2)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(3) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(3) | + ENABLE_TEX_STREAM_MAP_IDX | + TEX_STREAM_MAP_IDX(3)); + + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(1)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(2)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3)); + + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + ENABLE_TRI_STRIP_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2)); + + OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM); + OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); + + OUT_BATCH(_3DSTATE_W_STATE_CMD); + OUT_BATCH(MAGIC_W_STATE_DWORD1); + OUT_BATCH_F(1.0); + + OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD); + OUT_BATCH(0x80808080); /* .5 required in alpha for GL_DOT3_RGBA_EXT */ + + OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); + OUT_BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) | + TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) | + TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | + TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); + + /* copy from mesa */ + OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | + DISABLE_INDPT_ALPHA_BLEND | + ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD); + + OUT_BATCH(_3DSTATE_FOG_COLOR_CMD | + FOG_COLOR_RED(0) | FOG_COLOR_GREEN(0) | FOG_COLOR_BLUE(0)); + + OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_MODES_1_CMD | + ENABLE_COLR_BLND_FUNC | + BLENDFUNC_ADD | + ENABLE_SRC_BLND_FACTOR | + SRC_BLND_FACT(BLENDFACTOR_ONE) | + ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO)); + OUT_BATCH(_3DSTATE_MODES_2_CMD | + ENABLE_GLOBAL_DEPTH_BIAS | + GLOBAL_DEPTH_BIAS(0) | + ENABLE_ALPHA_TEST_FUNC | + ALPHA_TEST_FUNC(0) | /* always */ + ALPHA_REF_VALUE(0)); + OUT_BATCH(_3DSTATE_MODES_3_CMD | + ENABLE_DEPTH_TEST_FUNC | + DEPTH_TEST_FUNC(0x2) | /* COMPAREFUNC_LESS */ + ENABLE_ALPHA_SHADE_MODE | ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_FOG_SHADE_MODE | FOG_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_SPEC_SHADE_MODE | SPEC_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_COLOR_SHADE_MODE | COLOR_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_CULL_MODE | CULLMODE_NONE); + + OUT_BATCH(_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) | + ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff) | + ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff)); + + OUT_BATCH(_3DSTATE_STENCIL_TEST_CMD | + ENABLE_STENCIL_PARMS | + STENCIL_FAIL_OP(0) | /* STENCILOP_KEEP */ + STENCIL_PASS_DEPTH_FAIL_OP(0) | /* STENCILOP_KEEP */ + STENCIL_PASS_DEPTH_PASS_OP(0) | /* STENCILOP_KEEP */ + ENABLE_STENCIL_TEST_FUNC | STENCIL_TEST_FUNC(0) | /* COMPAREFUNC_ALWAYS */ + ENABLE_STENCIL_REF_VALUE | STENCIL_REF_VALUE(0)); + + OUT_BATCH(_3DSTATE_MODES_5_CMD | + FLUSH_TEXTURE_CACHE | + ENABLE_SPRITE_POINT_TEX | SPRITE_POINT_TEX_OFF | + ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(0x2) | /* 1.0 */ + ENABLE_FIXED_POINT_WIDTH | FIXED_POINT_WIDTH(1)); + + OUT_BATCH(_3DSTATE_ENABLES_1_CMD | + DISABLE_LOGIC_OP | + DISABLE_STENCIL_TEST | + DISABLE_DEPTH_BIAS | + DISABLE_SPEC_ADD | + DISABLE_FOG | + DISABLE_ALPHA_TEST | + ENABLE_COLOR_BLEND | + DISABLE_DEPTH_TEST); + OUT_BATCH(_3DSTATE_ENABLES_2_CMD | + DISABLE_STENCIL_WRITE | + ENABLE_TEX_CACHE | + DISABLE_DITHER | + ENABLE_COLOR_MASK | + ENABLE_COLOR_WRITE | + DISABLE_DEPTH_WRITE); + + OUT_BATCH(_3DSTATE_STIPPLE); + + /* Set default blend state */ + OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | + TEXPIPE_COLOR | + ENABLE_TEXOUTPUT_WRT_SEL | TEXOP_OUTPUT_CURRENT | + DISABLE_TEX_CNTRL_STAGE | + TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | + TEXOP_LAST_STAGE | TEXBLENDOP_ARG1); + OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | + TEXPIPE_ALPHA | + ENABLE_TEXOUTPUT_WRT_SEL | TEXOP_OUTPUT_CURRENT | + TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1); + OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | + TEXPIPE_COLOR | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE); + OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | + TEXPIPE_ALPHA | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE); + + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | + AA_LINE_REGION_WIDTH_1_0 | AA_LINE_DISABLE); + + sna->render_state.gen2.need_invariant = FALSE; +} + +static void +gen2_get_batch(struct sna *sna, + const struct sna_composite_op *op) +{ + kgem_set_mode(&sna->kgem, KGEM_RENDER); + + if (!kgem_check_batch(&sna->kgem, 50)) { + DBG(("%s: flushing batch: size %d > %d

", + __FUNCTION__, 50, + sna->kgem.surface-sna->kgem.nbatch)); + kgem_submit(&sna->kgem); + } + + if (sna->kgem.nreloc + 3 > KGEM_RELOC_SIZE(&sna->kgem)) { + DBG(("%s: flushing batch: reloc %d >= %d

", + __FUNCTION__, + sna->kgem.nreloc + 3, + (int)KGEM_RELOC_SIZE(&sna->kgem))); + kgem_submit(&sna->kgem); + } + + if (sna->kgem.nexec + 3 > KGEM_EXEC_SIZE(&sna->kgem)) { + DBG(("%s: flushing batch: exec %d >= %d

", + __FUNCTION__, + sna->kgem.nexec + 1, + (int)KGEM_EXEC_SIZE(&sna->kgem))); + kgem_submit(&sna->kgem); + } + + if (sna->render_state.gen2.need_invariant) + gen2_emit_invariant(sna); +} + +static void gen2_emit_composite_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t texcoordfmt; + uint32_t cblend, ablend; + + gen2_get_batch(sna, op); + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | + gen2_buf_tiling(op->dst.bo->tiling) | + BUF_3D_PITCH(op->dst.bo->pitch)); + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + op->dst.bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER, + 0)); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(gen2_get_dst_format(op->dst.format)); + + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); /* ymin, xmin */ + OUT_BATCH(DRAW_YMAX(op->dst.height - 1) | + DRAW_XMAX(op->dst.width - 1)); + OUT_BATCH(0); /* yorig, xorig */ + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); + OUT_BATCH((1 + (op->mask.bo != NULL)) << 12); + OUT_BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); + OUT_BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | + gen2_get_blend_cntl(op->op, + op->has_component_alpha, + op->dst.format) | + S8_ENABLE_COLOR_BUFFER_WRITE); + + OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | DISABLE_INDPT_ALPHA_BLEND); + + gen2_get_blend_factors(op, &cblend, &ablend); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_BLEND_STAGE(0) | 1); + OUT_BATCH(cblend); + OUT_BATCH(ablend); + + OUT_BATCH(_3DSTATE_ENABLES_1_CMD | DISABLE_LOGIC_OP | + DISABLE_STENCIL_TEST | DISABLE_DEPTH_BIAS | + DISABLE_SPEC_ADD | DISABLE_FOG | DISABLE_ALPHA_TEST | + ENABLE_COLOR_BLEND | DISABLE_DEPTH_TEST); + /* We have to explicitly say we don't want write disabled */ + OUT_BATCH(_3DSTATE_ENABLES_2_CMD | ENABLE_COLOR_MASK | + DISABLE_STENCIL_WRITE | ENABLE_TEX_CACHE | + DISABLE_DITHER | ENABLE_COLOR_WRITE | DISABLE_DEPTH_WRITE); + + texcoordfmt = 0; + if (op->src.is_affine) + texcoordfmt |= TEXCOORDFMT_2D << 0; + else + texcoordfmt |= TEXCOORDFMT_3D << 0; + if (op->mask.bo) { + if (op->mask.is_affine) + texcoordfmt |= TEXCOORDFMT_2D << 2; + else + texcoordfmt |= TEXCOORDFMT_3D << 2; + } + OUT_BATCH(_3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt); + + gen2_emit_texture(sna, &op->src, 0); + if (op->mask.bo) + gen2_emit_texture(sna, &op->mask, 1); +} + +static inline void +gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY) +{ + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); +} + +static void +gen2_emit_composite_texcoord(struct sna *sna, + const struct sna_composite_channel *channel, + int16_t x, int16_t y) +{ + float s = 0, t = 0, w = 1; + + x += channel->offset[0]; + y += channel->offset[1]; + + if (channel->is_affine) { + sna_get_transformed_coordinates(x, y, + channel->transform, + &s, &t); + OUT_VERTEX(s * channel->scale[0]); + OUT_VERTEX(t * channel->scale[1]); + } else { + sna_get_transformed_coordinates_3d(x, y, + channel->transform, + &s, &t, &w); + OUT_VERTEX(s * channel->scale[0]); + OUT_VERTEX(t * channel->scale[1]); + OUT_VERTEX(w); + } +} + +static void +gen2_emit_composite_vertex(struct sna *sna, + const struct sna_composite_op *op, + int16_t srcX, int16_t srcY, + int16_t mskX, int16_t mskY, + int16_t dstX, int16_t dstY) +{ + gen2_emit_composite_dstcoord(sna, dstX, dstY); + gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY); + gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY); +} + +static void +gen2_emit_composite_primitive(struct sna *sna, + const struct sna_composite_op *op, + int16_t srcX, int16_t srcY, + int16_t mskX, int16_t mskY, + int16_t dstX, int16_t dstY, + int16_t w, int16_t h) +{ + dstX += op->dst.x; + dstY += op->dst.y; + + gen2_emit_composite_vertex(sna, op, + srcX + w, srcY + h, + mskX + w, mskY + h, + dstX + w, dstY + h); + gen2_emit_composite_vertex(sna, op, + srcX, srcY + h, + mskX, mskY + h, + dstX, dstY + h); + gen2_emit_composite_vertex(sna, op, + srcX, srcY, + mskX, mskY, + dstX, dstY); +} + +static void gen2_magic_ca_pass(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t ablend, cblend; + + if (!op->need_magic_ca_pass) + return; + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 2); + OUT_BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | + gen2_get_blend_cntl(PictOpAdd, + op->has_component_alpha, + op->dst.format) | + S8_ENABLE_COLOR_BUFFER_WRITE); + + gen2_get_blend_factors(op, &cblend, &ablend); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_BLEND_STAGE(0) | 1); + OUT_BATCH(cblend); + OUT_BATCH(ablend); + + memcpy(sna->kgem.batch + sna->kgem.nbatch, + sna->kgem.batch + sna->render_state.gen2.vertex_offset, + (1 + 3*sna->render.vertex_index)*sizeof(uint32_t)); + sna->kgem.nbatch += 1 + 3*sna->render.vertex_index; +} + +static void gen2_vertex_flush(struct sna *sna) +{ + if (sna->render.vertex_index == 0) + return; + + sna->kgem.batch[sna->render_state.gen2.vertex_offset] |= + sna->render.vertex_index - 1; + + if (sna->render.op) + gen2_magic_ca_pass(sna, sna->render.op); + + sna->render_state.gen2.vertex_offset = 0; + sna->render.vertex_index = 0; +} + +inline static int gen2_get_rectangles(struct sna *sna, + const const struct sna_composite_op *op, + int want) +{ + struct gen2_render_state *state = &sna->render_state.gen2; + int rem = batch_space(sna), size, need; + + need = 0; + size = 3*op->floats_per_vertex; + if (op->need_magic_ca_pass) + need += 5, size *= 2; + + need += size; + if (state->vertex_offset == 0) + need += 2; + + if (rem < need) + return 0; + + if (state->vertex_offset == 0) { + state->vertex_offset = sna->kgem.nbatch; + OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); + rem--; + } + + if (want * size > rem) + want = rem / size; + + sna->render.vertex_index += 3*want; + return want; +} + +fastcall static void +gen2_render_composite_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + if (!gen2_get_rectangles(sna, op, 1)) { + gen2_emit_composite_state(sna, op); + gen2_get_rectangles(sna, op, 1); + } + + gen2_emit_composite_primitive(sna, op, + r->src.x, r->src.y, + r->mask.x, r->mask.y, + r->dst.x, r->dst.y, + r->width, r->height); +} + +static void +gen2_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + do { + int nbox_this_time; + + nbox_this_time = gen2_get_rectangles(sna, op, nbox); + if (nbox_this_time == 0) { + gen2_emit_composite_state(sna, op); + nbox_this_time = gen2_get_rectangles(sna, op, nbox); + } + nbox -= nbox_this_time; + + do { + gen2_emit_composite_primitive(sna, op, + box->x1, box->y1, + box->x1, box->y1, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +static void gen2_render_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + gen2_vertex_flush(sna); + sna->render.op = NULL; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + + sna_render_composite_redirect_done(sna, op); + + if (op->src.bo) + kgem_bo_destroy(&sna->kgem, op->src.bo); + if (op->mask.bo) + kgem_bo_destroy(&sna->kgem, op->mask.bo); +} + +static Bool +gen2_composite_solid_init(struct sna *sna, + struct sna_composite_channel *channel, + uint32_t color) +{ + channel->filter = PictFilterNearest; + channel->repeat = RepeatNormal; + channel->is_affine = TRUE; + channel->is_solid = TRUE; + channel->transform = NULL; + channel->width = 1; + channel->height = 1; + channel->pict_format = PICT_a8r8g8b8; + + channel->bo = sna_render_get_solid(sna, color); + + channel->scale[0] = channel->scale[1] = 1; + channel->offset[0] = channel->offset[1] = 0; + return channel->bo != NULL; +} + +static int +gen2_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)

", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + channel->is_solid = FALSE; + channel->card_format = -1; + + if (sna_picture_is_solid(picture, &color)) + return gen2_composite_solid_init(sna, channel, color); + + if (picture->pDrawable == NULL) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen2_check_repeat(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen2_check_filter(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing

", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + } else + channel->transform = picture->transform; + + if (!gen2_check_card_format(sna, picture->format)) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y); + + channel->pict_format = picture->format; + if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192) + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} + +static Bool +gen2_composite_set_target(struct sna *sna, + struct sna_composite_op *op, + PicturePtr dst) +{ + struct sna_pixmap *priv; + + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.format = dst->format; + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + + priv = sna_pixmap_force_to_gpu(op->dst.pixmap); + if (priv == NULL) + return FALSE; + + op->dst.bo = priv->gpu_bo; + if (!priv->gpu_only) + op->damage = &priv->gpu_damage; + + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + return TRUE; +} + +static Bool +try_blt(struct sna *sna, + PicturePtr dst, + PicturePtr source, + int width, int height) +{ + uint32_t color; + + if (sna->kgem.mode == KGEM_BLT) { + DBG(("%s: already performing BLT

", __FUNCTION__)); + return TRUE; + } + + if (width > 2048 || height > 2048) { + DBG(("%s: operation too large for 3D pipe (%d, %d)

", + __FUNCTION__, width, height)); + return TRUE; + } + + /* If it is a solid, try to use the BLT paths */ + if (sna_picture_is_solid(source, &color)) + return TRUE; + + if (!source->pDrawable) + return FALSE; + + return is_cpu(source->pDrawable); +} + +static Bool +gen2_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + DBG(("%s()

", __FUNCTION__)); + + /* Try to use the BLT engine unless it implies a + * 3D -> 2D context switch. + */ + if (mask == NULL && + try_blt(sna, dst, src, width, height) && + sna_blt_composite(sna, + op, src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp)) + return TRUE; + + if (op >= ARRAY_SIZE(gen2_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d

", + __FUNCTION__, op)); + return FALSE; + } + + if (!gen2_check_dst_format(dst->format)) { + DBG(("%s: fallback due to unhandled dst format: %x

", + __FUNCTION__, dst->format)); + return FALSE; + } + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(sna, + op, src, mask, dst, + src_x, src_y, + mask_x, mask_y, + dst_x, dst_y, + width, height, + tmp); + + memset(&tmp->u.gen2, 0, sizeof(tmp->u.gen2)); + + if (!gen2_composite_set_target(sna, tmp, dst)) { + DBG(("%s: unable to set render target

", + __FUNCTION__)); + return FALSE; + } + + tmp->op = op; + if (tmp->dst.width > 2048 || + tmp->dst.height > 2048 || + tmp->dst.bo->pitch > 8192) { + if (!sna_render_composite_redirect(sna, tmp, + dst_x, dst_y, width, height)) + return FALSE; + } + + switch (gen2_composite_picture(sna, src, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y)) { + case -1: + goto cleanup_dst; + case 0: + gen2_composite_solid_init(sna, &tmp->src, 0); + case 1: + break; + } + + if (mask) { + switch (gen2_composite_picture(sna, mask, &tmp->mask, + mask_x, mask_y, + width, height, + dst_x, dst_y)) { + case -1: + goto cleanup_src; + case 0: + gen2_composite_solid_init(sna, &tmp->mask, 0); + case 1: + break; + } + + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + /* Check if it's component alpha that relies on a source alpha + * and on the source value. We can only get one of those + * into the single source value that we get to blend with. + */ + tmp->has_component_alpha = TRUE; + if (gen2_blend_op[op].src_alpha && + (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { + if (op != PictOpOver) + return FALSE; + + tmp->need_magic_ca_pass = TRUE; + tmp->op = PictOpOutReverse; + } + } + } + + tmp->blt = gen2_render_composite_blt; + tmp->boxes = gen2_render_composite_boxes; + tmp->done = gen2_render_composite_done; + + if (!kgem_check_bo(&sna->kgem, tmp->dst.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->src.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->mask.bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) + kgem_emit_flush(&sna->kgem); + + gen2_emit_composite_state(sna, tmp); + + sna->render.op = tmp; + return TRUE; + +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return FALSE; +} + +static void +gen2_render_reset(struct sna *sna) +{ + sna->render_state.gen2.need_invariant = TRUE; + sna->render_state.gen2.vertex_offset = 0; +} + +static void +gen2_render_flush(struct sna *sna) +{ + gen2_vertex_flush(sna); +} + +static void +gen2_render_context_switch(struct sna *sna, + int new_mode) +{ +} + +static void +gen2_render_fini(struct sna *sna) +{ +} + +Bool gen2_render_init(struct sna *sna) +{ + struct sna_render *render = &sna->render; + + gen2_render_reset(sna); + + /* Use the BLT (and overlay) for everything except when forced to + * use the texture combiners. + */ + render->composite = gen2_render_composite; + + /* XXX Y-tiling copies */ + + render->reset = gen2_render_reset; + render->flush = gen2_render_flush; + render->context_switch = gen2_render_context_switch; + render->fini = gen2_render_fini; + + render->max_3d_size = 2048; + return TRUE; +}

new file mode 100644

index 00000000..945cd846

--- /dev/null

+++ b/ diff --git a/src/sna/gen2_render.h b/src/sna/gen2_render.hnew file mode 100644index 00000000..945cd846--- /dev/null+++ b/ src/sna/gen2_render.h @@ -0,0 +1,785 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GEN2_RENDER_H +#define GEN2_RENDER_H + +#define CMD_3D (0x3<<29) + +#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_CLEAR_RECT (0xa<<18) +#define PRIM3D_ZONE_INIT (0xd<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) +#define AA_LINE_ENABLE ((1<<1) | 1) +#define AA_LINE_DISABLE (1<<1) + +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +#define _3DSTATE_COLOR_FACTOR_CMD (CMD_3D | (0x1d<<24) | (0x1<<16)) + +#define _3DSTATE_COLOR_FACTOR_N_CMD(stage) (CMD_3D | (0x1d<<24) | \ + ((0x90+(stage))<<16)) + +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLR_BUF_8BIT 0 +#define COLR_BUF_RGB555 (1<<8) +#define COLR_BUF_RGB565 (2<<8) +#define COLR_BUF_ARGB8888 (3<<8) +#define COLR_BUF_ARGB4444 (8<<8) +#define COLR_BUF_ARGB1555 (9<<8) +#define DEPTH_IS_Z 0 +#define DEPTH_IS_W (1<<6) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 0 +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + +#define _3DSTATE_ENABLES_1_CMD (CMD_3D|(0x3<<24)) +#define ENABLE_LOGIC_OP_MASK ((1<<23)|(1<<22)) +#define ENABLE_LOGIC_OP ((1<<23)|(1<<22)) +#define DISABLE_LOGIC_OP (1<<23) +#define ENABLE_STENCIL_TEST ((1<<21)|(1<<20)) +#define DISABLE_STENCIL_TEST (1<<21) +#define ENABLE_DEPTH_BIAS ((1<<11)|(1<<10)) +#define DISABLE_DEPTH_BIAS (1<<11) +#define ENABLE_SPEC_ADD_MASK ((1<<9)|(1<<8)) +#define ENABLE_SPEC_ADD ((1<<9)|(1<<8)) +#define DISABLE_SPEC_ADD (1<<9) +#define ENABLE_DIS_FOG_MASK ((1<<7)|(1<<6)) +#define ENABLE_FOG ((1<<7)|(1<<6)) +#define DISABLE_FOG (1<<7) +#define ENABLE_DIS_ALPHA_TEST_MASK ((1<<5)|(1<<4)) +#define ENABLE_ALPHA_TEST ((1<<5)|(1<<4)) +#define DISABLE_ALPHA_TEST (1<<5) +#define ENABLE_DIS_CBLEND_MASK ((1<<3)|(1<<2)) +#define ENABLE_COLOR_BLEND ((1<<3)|(1<<2)) +#define DISABLE_COLOR_BLEND (1<<3) +#define ENABLE_DIS_DEPTH_TEST_MASK ((1<<1)|1) +#define ENABLE_DEPTH_TEST ((1<<1)|1) +#define DISABLE_DEPTH_TEST (1<<1) + +/* _3DSTATE_ENABLES_2, p138 */ +#define _3DSTATE_ENABLES_2_CMD (CMD_3D|(0x4<<24)) +#define ENABLE_STENCIL_WRITE ((1<<21)|(1<<20)) +#define DISABLE_STENCIL_WRITE (1<<21) +#define ENABLE_TEX_CACHE ((1<<17)|(1<<16)) +#define DISABLE_TEX_CACHE (1<<17) +#define ENABLE_DITHER ((1<<9)|(1<<8)) +#define DISABLE_DITHER (1<<9) +#define ENABLE_COLOR_MASK (1<<10) +#define WRITEMASK_ALPHA (1<<7) +#define WRITEMASK_ALPHA_SHIFT 7 +#define WRITEMASK_RED (1<<6) +#define WRITEMASK_RED_SHIFT 6 +#define WRITEMASK_GREEN (1<<5) +#define WRITEMASK_GREEN_SHIFT 5 +#define WRITEMASK_BLUE (1<<4) +#define WRITEMASK_BLUE_SHIFT 4 +#define WRITEMASK_MASK ((1<<4)|(1<<5)|(1<<6)|(1<<7)) +#define ENABLE_COLOR_WRITE ((1<<3)|(1<<2)) +#define DISABLE_COLOR_WRITE (1<<3) +#define ENABLE_DIS_DEPTH_WRITE_MASK 0x3 +#define ENABLE_DEPTH_WRITE ((1<<1)|1) +#define DISABLE_DEPTH_WRITE (1<<1) + +/* _3DSTATE_FOG_COLOR, p139 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p140 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FOGFUNC_ENABLE (1<<31) +#define FOGFUNC_VERTEX 0 +#define FOGFUNC_PIXEL_EXP (1<<28) +#define FOGFUNC_PIXEL_EXP2 (2<<28) +#define FOGFUNC_PIXEL_LINEAR (3<<28) +#define FOGSRC_INDEX_Z (1<<27) +#define FOGSRC_INDEX_W ((1<<27)|(1<<25)) +#define FOG_LINEAR_CONST (1<<24) +#define FOG_CONST_1(x) ((x)<<4) +#define ENABLE_FOG_DENSITY (1<<23) +/* Dword 2 */ +#define FOG_CONST_2(x) (x) +/* Dword 3 */ +#define FOG_DENSITY(x) (x) + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p142 */ +#define _3DSTATE_INDPT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define ENABLE_INDPT_ALPHA_BLEND ((1<<23)|(1<<22)) +#define DISABLE_INDPT_ALPHA_BLEND (1<<23) +#define ALPHA_BLENDFUNC_MASK 0x3f0000 +#define ENABLE_ALPHA_BLENDFUNC (1<<21) +#define ABLENDFUNC_ADD 0 +#define ABLENDFUNC_SUB (1<<16) +#define ABLENDFUNC_RVSE_SUB (2<<16) +#define ABLENDFUNC_MIN (3<<16) +#define ABLENDFUNC_MAX (4<<16) +#define SRC_DST_ABLEND_MASK 0xfff +#define ENABLE_SRC_ABLEND_FACTOR (1<<11) +#define SRC_ABLEND_FACT(x) ((x)<<6) +#define ENABLE_DST_ABLEND_FACTOR (1<<5) +#define DST_ABLEND_FACT(x) (x) + +#define BLENDFACTOR_ZERO 0x01 +#define BLENDFACTOR_ONE 0x02 +#define BLENDFACTOR_SRC_COLR 0x03 +#define BLENDFACTOR_INV_SRC_COLR 0x04 +#define BLENDFACTOR_SRC_ALPHA 0x05 +#define BLENDFACTOR_INV_SRC_ALPHA 0x06 +#define BLENDFACTOR_DST_ALPHA 0x07 +#define BLENDFACTOR_INV_DST_ALPHA 0x08 +#define BLENDFACTOR_DST_COLR 0x09 +#define BLENDFACTOR_INV_DST_COLR 0x0a +#define BLENDFACTOR_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACTOR_CONST_COLOR 0x0c +#define BLENDFACTOR_INV_CONST_COLOR 0x0d +#define BLENDFACTOR_CONST_ALPHA 0x0e +#define BLENDFACTOR_INV_CONST_ALPHA 0x0f +#define BLENDFACTOR_MASK 0x0f + +/* _3DSTATE_MAP_BLEND_ARG, p152 */ +#define _3DSTATE_MAP_BLEND_ARG_CMD(stage) (CMD_3D|(0x0e<<24)|((stage)<<20)) + +#define TEXPIPE_COLOR 0 +#define TEXPIPE_ALPHA (1<<18) +#define TEXPIPE_KILL (2<<18) +#define TEXBLEND_ARG0 0 +#define TEXBLEND_ARG1 (1<<15) +#define TEXBLEND_ARG2 (2<<15) +#define TEXBLEND_ARG3 (3<<15) +#define TEXBLENDARG_MODIFY_PARMS (1<<6) +#define TEXBLENDARG_REPLICATE_ALPHA (1<<5) +#define TEXBLENDARG_INV_ARG (1<<4) +#define TEXBLENDARG_ONE 0 +#define TEXBLENDARG_FACTOR 0x01 +#define TEXBLENDARG_ACCUM 0x02 +#define TEXBLENDARG_DIFFUSE 0x03 +#define TEXBLENDARG_SPEC 0x04 +#define TEXBLENDARG_CURRENT 0x05 +#define TEXBLENDARG_TEXEL0 0x06 +#define TEXBLENDARG_TEXEL1 0x07 +#define TEXBLENDARG_TEXEL2 0x08 +#define TEXBLENDARG_TEXEL3 0x09 +#define TEXBLENDARG_FACTOR_N 0x0e + +/* _3DSTATE_MAP_BLEND_OP, p155 */ +#define _3DSTATE_MAP_BLEND_OP_CMD(stage) (CMD_3D|(0x0d<<24)|((stage)<<20)) +#if 0 +# define TEXPIPE_COLOR 0 +# define TEXPIPE_ALPHA (1<<18) +# define TEXPIPE_KILL (2<<18) +#endif +#define ENABLE_TEXOUTPUT_WRT_SEL (1<<17) +#define TEXOP_OUTPUT_CURRENT 0 +#define TEXOP_OUTPUT_ACCUM (1<<15) +#define ENABLE_TEX_CNTRL_STAGE ((1<<12)|(1<<11)) +#define DISABLE_TEX_CNTRL_STAGE (1<<12) +#define TEXOP_SCALE_SHIFT 9 +#define TEXOP_SCALE_1X (0 << TEXOP_SCALE_SHIFT) +#define TEXOP_SCALE_2X (1 << TEXOP_SCALE_SHIFT) +#define TEXOP_SCALE_4X (2 << TEXOP_SCALE_SHIFT) +#define TEXOP_MODIFY_PARMS (1<<8) +#define TEXOP_LAST_STAGE (1<<7) +#define TEXBLENDOP_KILLPIXEL 0x02 +#define TEXBLENDOP_ARG1 0x01 +#define TEXBLENDOP_ARG2 0x02 +#define TEXBLENDOP_MODULATE 0x03 +#define TEXBLENDOP_ADD 0x06 +#define TEXBLENDOP_ADDSIGNED 0x07 +#define TEXBLENDOP_BLEND 0x08 +#define TEXBLENDOP_BLEND_AND_ADD 0x09 +#define TEXBLENDOP_SUBTRACT 0x0a +#define TEXBLENDOP_DOT3 0x0b +#define TEXBLENDOP_DOT4 0x0c +#define TEXBLENDOP_MODULATE_AND_ADD 0x0d +#define TEXBLENDOP_MODULATE_2X_AND_ADD 0x0e +#define TEXBLENDOP_MODULATE_4X_AND_ADD 0x0f + +/* _3DSTATE_MAP_BUMP_TABLE, p160 TODO */ +/* _3DSTATE_MAP_COLOR_CHROMA_KEY, p161 TODO */ + +#define _3DSTATE_MAP_COORD_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8c<<16)) +#define DISABLE_TEX_TRANSFORM (1<<28) +#define TEXTURE_SET(x) (x<<29) + +#define _3DSTATE_VERTEX_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8b<<16)) +#define DISABLE_VIEWPORT_TRANSFORM (1<<31) +#define DISABLE_PERSPECTIVE_DIVIDE (1<<29) + +/* _3DSTATE_MAP_COORD_SET_BINDINGS, p162 */ +#define _3DSTATE_MAP_COORD_SETBIND_CMD (CMD_3D|(0x1d<<24)|(0x02<<16)) +#define TEXBIND_MASK3 ((1<<15)|(1<<14)|(1<<13)|(1<<12)) +#define TEXBIND_MASK2 ((1<<11)|(1<<10)|(1<<9)|(1<<8)) +#define TEXBIND_MASK1 ((1<<7)|(1<<6)|(1<<5)|(1<<4)) +#define TEXBIND_MASK0 ((1<<3)|(1<<2)|(1<<1)|1) + +#define TEXBIND_SET3(x) ((x)<<12) +#define TEXBIND_SET2(x) ((x)<<8) +#define TEXBIND_SET1(x) ((x)<<4) +#define TEXBIND_SET0(x) (x) + +#define TEXCOORDSRC_KEEP 0 +#define TEXCOORDSRC_DEFAULT 0x01 +#define TEXCOORDSRC_VTXSET_0 0x08 +#define TEXCOORDSRC_VTXSET_1 0x09 +#define TEXCOORDSRC_VTXSET_2 0x0a +#define TEXCOORDSRC_VTXSET_3 0x0b +#define TEXCOORDSRC_VTXSET_4 0x0c +#define TEXCOORDSRC_VTXSET_5 0x0d +#define TEXCOORDSRC_VTXSET_6 0x0e +#define TEXCOORDSRC_VTXSET_7 0x0f + +#define MAP_UNIT(unit) ((unit)<<16) +#define MAP_UNIT_MASK (0x7<<16) + +/* _3DSTATE_MAP_COORD_SETS, p164 */ +#define _3DSTATE_MAP_COORD_SET_CMD (CMD_3D|(0x1c<<24)|(0x01<<19)) +#define TEXCOORD_SET(n) ((n)<<16) +#define ENABLE_TEXCOORD_PARAMS (1<<15) +#define TEXCOORDS_ARE_NORMAL (1<<14) +#define TEXCOORDS_ARE_IN_TEXELUNITS 0 +#define TEXCOORDTYPE_CARTESIAN 0 +#define TEXCOORDTYPE_HOMOGENEOUS (1<<11) +#define TEXCOORDTYPE_VECTOR (2<<11) +#define TEXCOORDTYPE_MASK (0x7<<11) +#define ENABLE_ADDR_V_CNTL (1<<7) +#define ENABLE_ADDR_U_CNTL (1<<3) +#define TEXCOORD_ADDR_V_MODE(x) ((x)<<4) +#define TEXCOORD_ADDR_U_MODE(x) (x) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP 2 +#define TEXCOORDMODE_WRAP_SHORTEST 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORD_ADDR_V_MASK 0x70 +#define TEXCOORD_ADDR_U_MASK 0x7 + +/* _3DSTATE_MAP_CUBE, p168 TODO */ +#define _3DSTATE_MAP_CUBE (CMD_3D|(0x1c<<24)|(0x0a<<19)) +#define CUBE_NEGX_ENABLE (1<<5) +#define CUBE_POSX_ENABLE (1<<4) +#define CUBE_NEGY_ENABLE (1<<3) +#define CUBE_POSY_ENABLE (1<<2) +#define CUBE_NEGZ_ENABLE (1<<1) +#define CUBE_POSZ_ENABLE (1<<0) + +#define _3DSTATE_MAP_INFO_CMD (CMD_3D|(0x1d<<24)|(0x0<<16)|3) +#define TEXMAP_INDEX(x) ((x)<<28) +#define MAP_SURFACE_8BIT (1<<24) +#define MAP_SURFACE_16BIT (2<<24) +#define MAP_SURFACE_32BIT (3<<24) +#define MAP_FORMAT_2D (0) +#define MAP_FORMAT_3D_CUBE (1<<11) + +/* _3DSTATE_MODES_1, p190 */ +#define _3DSTATE_MODES_1_CMD (CMD_3D|(0x08<<24)) +#define BLENDFUNC_MASK 0x3f0000 +#define ENABLE_COLR_BLND_FUNC (1<<21) +#define BLENDFUNC_ADD 0 +#define BLENDFUNC_SUB (1<<16) +#define BLENDFUNC_RVRSE_SUB (2<<16) +#define BLENDFUNC_MIN (3<<16) +#define BLENDFUNC_MAX (4<<16) +#define SRC_DST_BLND_MASK 0xfff +#define ENABLE_SRC_BLND_FACTOR (1<<11) +#define ENABLE_DST_BLND_FACTOR (1<<5) +#define SRC_BLND_FACT(x) ((x)<<6) +#define DST_BLND_FACT(x) (x) + +/* _3DSTATE_MODES_2, p192 */ +#define _3DSTATE_MODES_2_CMD (CMD_3D|(0x0f<<24)) +#define ENABLE_GLOBAL_DEPTH_BIAS (1<<22) +#define GLOBAL_DEPTH_BIAS(x) ((x)<<14) +#define ENABLE_ALPHA_TEST_FUNC (1<<13) +#define ENABLE_ALPHA_REF_VALUE (1<<8) +#define ALPHA_TEST_FUNC(x) ((x)<<9) +#define ALPHA_REF_VALUE(x) (x) + +#define ALPHA_TEST_REF_MASK 0x3fff + +/* _3DSTATE_MODES_3, p193 */ +#define _3DSTATE_MODES_3_CMD (CMD_3D|(0x02<<24)) +#define DEPTH_TEST_FUNC_MASK 0x1f0000 +#define ENABLE_DEPTH_TEST_FUNC (1<<20) +/* Uses COMPAREFUNC */ +#define DEPTH_TEST_FUNC(x) ((x)<<16) +#define ENABLE_ALPHA_SHADE_MODE (1<<11) +#define ENABLE_FOG_SHADE_MODE (1<<9) +#define ENABLE_SPEC_SHADE_MODE (1<<7) +#define ENABLE_COLOR_SHADE_MODE (1<<5) +#define ALPHA_SHADE_MODE(x) ((x)<<10) +#define FOG_SHADE_MODE(x) ((x)<<8) +#define SPEC_SHADE_MODE(x) ((x)<<6) +#define COLOR_SHADE_MODE(x) ((x)<<4) +#define CULLMODE_MASK 0xf +#define ENABLE_CULL_MODE (1<<3) +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_CW 2 +#define CULLMODE_CCW 3 + +#define SHADE_MODE_LINEAR 0 +#define SHADE_MODE_FLAT 0x1 + +/* _3DSTATE_MODES_4, p195 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x16<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK ((1<<18)|(1<<19)|(1<<20)|(1<<21)) +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) ((x)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p196 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define ENABLE_SPRITE_POINT_TEX (1<<23) +#define SPRITE_POINT_TEX_ON (1<<22) +#define SPRITE_POINT_TEX_OFF 0 +#define FLUSH_RENDER_CACHE (1<<18) +#define FLUSH_TEXTURE_CACHE (1<<16) +#define FIXED_LINE_WIDTH_MASK 0xfc00 +#define ENABLE_FIXED_LINE_WIDTH (1<<15) +#define FIXED_LINE_WIDTH(x) ((x)<<10) +#define FIXED_POINT_WIDTH_MASK 0x3ff +#define ENABLE_FIXED_POINT_WIDTH (1<<9) +#define FIXED_POINT_WIDTH(x) (x) + +/* _3DSTATE_RASTERIZATION_RULES, p198 */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) +#define TRI_STRIP_PROVOKE_VRTX(x) (x) + +/* _3DSTATE_SCISSOR_ENABLE, p200 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p201 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* _3DSTATE_STENCIL_TEST, p202 */ +#define _3DSTATE_STENCIL_TEST_CMD (CMD_3D|(0x09<<24)) +#define ENABLE_STENCIL_PARMS (1<<23) +#define STENCIL_OPS_MASK (0xffc000) +#define STENCIL_FAIL_OP(x) ((x)<<20) +#define STENCIL_PASS_DEPTH_FAIL_OP(x) ((x)<<17) +#define STENCIL_PASS_DEPTH_PASS_OP(x) ((x)<<14) + +#define ENABLE_STENCIL_TEST_FUNC_MASK ((1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)) +#define ENABLE_STENCIL_TEST_FUNC (1<<13) +/* Uses COMPAREFUNC */ +#define STENCIL_TEST_FUNC(x) ((x)<<9) +#define STENCIL_REF_VALUE_MASK ((1<<8)|0xff) +#define ENABLE_STENCIL_REF_VALUE (1<<8) +#define STENCIL_REF_VALUE(x) (x) + +/* _3DSTATE_VERTEX_FORMAT, p204 */ +#define _3DSTATE_VFT0_CMD (CMD_3D|(0x05<<24)) +#define VFT0_POINT_WIDTH (1<<12) +#define VFT0_TEX_COUNT_MASK (7<<8) +#define VFT0_TEX_COUNT_SHIFT 8 +#define VFT0_TEX_COUNT(x) ((x)<<8) +#define VFT0_SPEC (1<<7) +#define VFT0_DIFFUSE (1<<6) +#define VFT0_DEPTH_OFFSET (1<<5) +#define VFT0_XYZ (1<<1) +#define VFT0_XYZW (2<<1) +#define VFT0_XY (3<<1) +#define VFT0_XYW (4<<1) +#define VFT0_XYZW_MASK (7<<1) + +/* _3DSTATE_VERTEX_FORMAT_2, p206 */ +#define _3DSTATE_VERTEX_FORMAT_2_CMD (CMD_3D|(0x0a<<24)) +#define VFT1_TEX7_FMT(x) ((x)<<14) +#define VFT1_TEX6_FMT(x) ((x)<<12) +#define VFT1_TEX5_FMT(x) ((x)<<10) +#define VFT1_TEX4_FMT(x) ((x)<<8) +#define VFT1_TEX3_FMT(x) ((x)<<6) +#define VFT1_TEX2_FMT(x) ((x)<<4) +#define VFT1_TEX1_FMT(x) ((x)<<2) +#define VFT1_TEX0_FMT(x) (x) +#define VFT1_TEX0_MASK 3 +#define VFT1_TEX1_SHIFT 2 +#define TEXCOORDFMT_2D 0 +#define TEXCOORDFMT_3D 1 +#define TEXCOORDFMT_4D 2 +#define TEXCOORDFMT_1D 3 + +/*New stuff picked up along the way */ + +#define MLC_LOD_BIAS_MASK ((1<<7)-1) + +/* _3DSTATE_VERTEX_TRANSFORM, p207 */ +#define _3DSTATE_VERTEX_TRANS_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|0) +#define _3DSTATE_VERTEX_TRANS_MTX_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|6) +/* Dword 1 */ +#define ENABLE_VIEWPORT_TRANSFORM ((1<<31)|(1<<30)) +#define DISABLE_VIEWPORT_TRANSFORM (1<<31) +#define ENABLE_PERSP_DIVIDE ((1<<29)|(1<<28)) +#define DISABLE_PERSP_DIVIDE (1<<29) +#define VRTX_TRANS_LOAD_MATRICES 0x7421 +#define VRTX_TRANS_NO_LOAD_MATRICES 0x0000 +/* Dword 2 -> 7 are matrix elements */ + +/* _3DSTATE_W_STATE, p209 */ +#define _3DSTATE_W_STATE_CMD (CMD_3D|(0x1d<<24)|(0x8d<<16)|1) +/* Dword 1 */ +#define MAGIC_W_STATE_DWORD1 0x00000008 +/* Dword 2 */ +#define WFAR_VALUE(x) (x) + +/* Stipple command, carried over from the i810, apparently: + */ +#define _3DSTATE_STIPPLE (CMD_3D|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<((n)+4)) +#define S3_POINT_WIDTH_SHIFT 23 +#define S3_LINE_WIDTH_SHIFT 19 +#define S3_ALPHA_SHADE_MODE_SHIFT 18 +#define S3_FOG_SHADE_MODE_SHIFT 17 +#define S3_SPEC_SHADE_MODE_SHIFT 16 +#define S3_COLOR_SHADE_MODE_SHIFT 15 +#define S3_CULL_MODE_SHIFT 13 +#define S3_CULLMODE_BOTH (0) +#define S3_CULLMODE_NONE (1<<13) +#define S3_CULLMODE_CW (2<<13) +#define S3_CULLMODE_CCW (3<<13) +#define S3_POINT_WIDTH_PRESENT (1<<12) +#define S3_SPEC_FOG_PRESENT (1<<11) +#define S3_DIFFUSE_PRESENT (1<<10) +#define S3_DEPTH_OFFSET_PRESENT (1<<9) +#define S3_POSITION_SHIFT 6 +#define S3_VERTEXHAS_XYZ (1<<6) +#define S3_VERTEXHAS_XYZW (2<<6) +#define S3_VERTEXHAS_XY (3<<6) +#define S3_VERTEXHAS_XYW (4<<6) +#define S3_ENABLE_SPEC_ADD (1<<5) +#define S3_ENABLE_FOG (1<<4) +#define S3_ENABLE_LOCAL_DEPTH_BIAS (1<<3) +#define S3_ENABLE_SPRITE_POINT (1<<1) +#define S3_ENABLE_ANTIALIASING 1 +#define S8_ENABLE_ALPHA_TEST (1<<31) +#define S8_ALPHA_TEST_FUNC_SHIFT 28 +#define S8_ALPHA_REFVALUE_SHIFT 20 +#define S8_ENABLE_DEPTH_TEST (1<<19) +#define S8_DEPTH_TEST_FUNC_SHIFT 16 +#define S8_ENABLE_COLOR_BLEND (1<<15) +#define S8_COLOR_BLEND_FUNC_SHIFT 12 +#define S8_BLENDFUNC_ADD (0) +#define S8_BLENDFUNC_SUB (1<<12) +#define S8_BLENDFUNC_RVRSE_SUB (2<<12) +#define S8_BLENDFUNC_MIN (3<<12) +#define S8_BLENDFUNC_MAX (4<<12) +#define S8_SRC_BLEND_FACTOR_SHIFT 8 +#define S8_DST_BLEND_FACTOR_SHIFT 4 +#define S8_ENABLE_DEPTH_BUFFER_WRITE (1<<3) +#define S8_ENABLE_COLOR_BUFFER_WRITE (1<<2) + +#define _3DSTATE_LOAD_STATE_IMMEDIATE_2 (CMD_3D|(0x1d<<24)|(0x03<<16)) +#define LOAD_TEXTURE_MAP(x) (1<<((x)+11)) +#define LOAD_TEXTURE_BLEND_STAGE(x) (1<<((x)+7)) +#define LOAD_GLOBAL_COLOR_FACTOR (1<<6) + +#define TM0S0_ADDRESS_MASK 0xfffffffc +#define TM0S0_USE_FENCE (1<<1) + +#define TM0S1_HEIGHT_SHIFT 21 +#define TM0S1_WIDTH_SHIFT 10 +#define TM0S1_PALETTE_SELECT (1<<9) +#define TM0S1_MAPSURF_FORMAT_MASK (0x7 << 6) +#define TM0S1_MAPSURF_FORMAT_SHIFT 6 +#define MAPSURF_8BIT_INDEXED (0<<6) +#define MAPSURF_8BIT (1<<6) +#define MAPSURF_16BIT (2<<6) +#define MAPSURF_32BIT (3<<6) +#define MAPSURF_411 (4<<6) +#define MAPSURF_422 (5<<6) +#define MAPSURF_COMPRESSED (6<<6) +#define MAPSURF_4BIT_INDEXED (7<<6) +#define TM0S1_MT_FORMAT_MASK (0x7 << 3) +#define TM0S1_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_IDX_RGB565 (0<<3) /* SURFACE_8BIT_INDEXED */ +#define MT_8BIT_IDX_ARGB1555 (1<<3) +#define MT_8BIT_IDX_ARGB4444 (2<<3) +#define MT_8BIT_IDX_AY88 (3<<3) +#define MT_8BIT_IDX_ABGR8888 (4<<3) +#define MT_8BIT_IDX_BUMP_88DVDU (5<<3) +#define MT_8BIT_IDX_BUMP_655LDVDU (6<<3) +#define MT_8BIT_IDX_ARGB8888 (7<<3) +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_DIB_ARGB1555_8888 (4<<3) +#define MT_16BIT_BUMP_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_DIB_RGB565_8888 (7<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_BUMP_XLDVDU_8888 (6<<3) +#define MT_32BIT_DIB_8888 (7<<3) +#define MT_411_YUV411 (0<<3) /* SURFACE_411 */ +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define TM0S1_COLORSPACE_CONVERSION (1 << 2) +#define TM0S1_TILED_SURFACE (1 << 1) +#define TM0S1_TILE_WALK (1 << 0) + +#define TM0S2_PITCH_SHIFT 21 +#define TM0S2_CUBE_FACE_ENA_SHIFT 15 +#define TM0S2_CUBE_FACE_ENA_MASK (1<<15) +#define TM0S2_MAP_FORMAT (1<<14) +#define TM0S2_MAP_2D (0<<14) +#define TM0S2_MAP_3D_CUBE (1<<14) +#define TM0S2_VERTICAL_LINE_STRIDE (1<<13) +#define TM0S2_VERITCAL_LINE_STRIDE_OFF (1<<12) +#define TM0S2_OUTPUT_CHAN_SHIFT 10 +#define TM0S2_OUTPUT_CHAN_MASK (3<<10) + +#define TM0S3_MIP_FILTER_MASK (0x3<<30) +#define TM0S3_MIP_FILTER_SHIFT 30 +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define TM0S3_MAG_FILTER_MASK (0x3<<28) +#define TM0S3_MAG_FILTER_SHIFT 28 +#define TM0S3_MIN_FILTER_MASK (0x3<<26) +#define TM0S3_MIN_FILTER_SHIFT 26 +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 + +#define TM0S3_LOD_BIAS_SHIFT 17 +#define TM0S3_LOD_BIAS_MASK (0x1ff<<17) +#define TM0S3_MAX_MIP_SHIFT 9 +#define TM0S3_MAX_MIP_MASK (0xff<<9) +#define TM0S3_MIN_MIP_SHIFT 3 +#define TM0S3_MIN_MIP_MASK (0x3f<<3) +#define TM0S3_KILL_PIXEL (1<<2) +#define TM0S3_KEYED_FILTER (1<<1) +#define TM0S3_CHROMA_KEY (1<<0) + +/* _3DSTATE_MAP_TEXEL_STREAM, p188 */ +#define _3DSTATE_MAP_TEX_STREAM_CMD (CMD_3D|(0x1c<<24)|(0x05<<19)) +#define DISABLE_TEX_STREAM_BUMP (1<<12) +#define ENABLE_TEX_STREAM_BUMP ((1<<12)|(1<<11)) +#define TEX_MODIFY_UNIT_0 0 +#define TEX_MODIFY_UNIT_1 (1<<8) +#define ENABLE_TEX_STREAM_COORD_SET (1<<7) +#define TEX_STREAM_COORD_SET(x) ((x)<<4) +#define ENABLE_TEX_STREAM_MAP_IDX (1<<3) +#define TEX_STREAM_MAP_IDX(x) (x) + +#define FLUSH_MAP_CACHE (1<<0) + +#define _3DSTATE_MAP_FILTER_CMD (CMD_3D|(0x1c<<24)|(0x02<<19)) +#define FILTER_TEXMAP_INDEX(x) ((x) << 16) +#define MAG_MODE_FILTER_ENABLE (1 << 5) +#define MIN_MODE_FILTER_ENABLE (1 << 2) +#define MAG_MAPFILTER_NEAREST (0 << 3) +#define MAG_MAPFILTER_LINEAR (1 << 3) +#define MAG_MAPFILTER_ANISOTROPIC (2 << 3) +#define MIN_MAPFILTER_NEAREST (0) +#define MIN_MAPFILTER_LINEAR (1) +#define MIN_MAPFILTER_ANISOTROPIC (2) +#define ENABLE_KEYS (1<<15) +#define DISABLE_COLOR_KEY 0 +#define DISABLE_CHROMA_KEY 0 +#define DISABLE_KILL_PIXEL 0 +#define ENABLE_MIP_MODE_FILTER (1 << 9) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + +#define TB0C_LAST_STAGE (1 << 31) +#define TB0C_RESULT_SCALE_1X (0 << 29) +#define TB0C_RESULT_SCALE_2X (1 << 29) +#define TB0C_RESULT_SCALE_4X (2 << 29) +#define TB0C_OP_MODULE (3 << 25) +#define TB0C_OUTPUT_WRITE_CURRENT (0 << 24) +#define TB0C_OUTPUT_WRITE_ACCUM (1 << 24) +#define TB0C_ARG3_REPLICATE_ALPHA (1<<23) +#define TB0C_ARG3_INVERT (1<<22) +#define TB0C_ARG3_SEL_XXX +#define TB0C_ARG2_REPLICATE_ALPHA (1<<17) +#define TB0C_ARG2_INVERT (1<<16) +#define TB0C_ARG2_SEL_ONE (0 << 12) +#define TB0C_ARG2_SEL_FACTOR (1 << 12) +#define TB0C_ARG2_SEL_TEXEL0 (6 << 12) +#define TB0C_ARG2_SEL_TEXEL1 (7 << 12) +#define TB0C_ARG2_SEL_TEXEL2 (8 << 12) +#define TB0C_ARG2_SEL_TEXEL3 (9 << 12) +#define TB0C_ARG1_REPLICATE_ALPHA (1<<11) +#define TB0C_ARG1_INVERT (1<<10) +#define TB0C_ARG1_SEL_ONE (0 << 6) +#define TB0C_ARG1_SEL_TEXEL0 (6 << 6) +#define TB0C_ARG1_SEL_TEXEL1 (7 << 6) +#define TB0C_ARG1_SEL_TEXEL2 (8 << 6) +#define TB0C_ARG1_SEL_TEXEL3 (9 << 6) +#define TB0C_ARG0_REPLICATE_ALPHA (1<<5) +#define TB0C_ARG0_SEL_XXX + +#define TB0A_CTR_STAGE_ENABLE (1<<31) +#define TB0A_RESULT_SCALE_1X (0 << 29) +#define TB0A_RESULT_SCALE_2X (1 << 29) +#define TB0A_RESULT_SCALE_4X (2 << 29) +#define TB0A_OP_MODULE (3 << 25) +#define TB0A_OUTPUT_WRITE_CURRENT (0<<24) +#define TB0A_OUTPUT_WRITE_ACCUM (1<<24) +#define TB0A_CTR_STAGE_SEL_BITS_XXX +#define TB0A_ARG3_SEL_XXX +#define TB0A_ARG3_INVERT (1<<17) +#define TB0A_ARG2_INVERT (1<<16) +#define TB0A_ARG2_SEL_ONE (0 << 12) +#define TB0A_ARG2_SEL_TEXEL0 (6 << 12) +#define TB0A_ARG2_SEL_TEXEL1 (7 << 12) +#define TB0A_ARG2_SEL_TEXEL2 (8 << 12) +#define TB0A_ARG2_SEL_TEXEL3 (9 << 12) +#define TB0A_ARG1_INVERT (1<<10) +#define TB0A_ARG1_SEL_ONE (0 << 6) +#define TB0A_ARG1_SEL_TEXEL0 (6 << 6) +#define TB0A_ARG1_SEL_TEXEL1 (7 << 6) +#define TB0A_ARG1_SEL_TEXEL2 (8 << 6) +#define TB0A_ARG1_SEL_TEXEL3 (9 << 6) + +#endif /* GEN2_RENDER_H */

new file mode 100644

index 00000000..203de08f

--- /dev/null

+++ b/ diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.cnew file mode 100644index 00000000..203de08f--- /dev/null+++ b/ src/sna/gen3_render.c @@ -0,0 +1,3694 @@ +/* + * Copyright © 2010-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" +#include "sna_render_inline.h" +#include "sna_reg.h" +#include "sna_video.h" + +#include "gen3_render.h" + +#if DEBUG_RENDER +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define NO_COMPOSITE 0 +#define NO_COMPOSITE_SPANS 0 +#define NO_COPY 0 +#define NO_COPY_BOXES 0 +#define NO_FILL 0 +#define NO_FILL_BOXES 0 + +enum { + SHADER_NONE = 0, + SHADER_ZERO, + SHADER_CONSTANT, + SHADER_LINEAR, + SHADER_RADIAL, + SHADER_TEXTURE, + SHADER_OPACITY, +}; + +#define OUT_BATCH(v) batch_emit(sna, v) +#define OUT_BATCH_F(v) batch_emit_float(sna, v) +#define OUT_VERTEX(v) vertex_emit(sna, v) + +enum gen3_radial_mode { + RADIAL_ONE, + RADIAL_TWO +}; + +static const struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +} gen3_blend_op[] = { + /* Clear */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO}, + /* Src */ {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO}, + /* Dst */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE}, + /* Over */ {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA}, + /* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE}, + /* In */ {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO}, + /* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA}, + /* Out */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO}, + /* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA}, + /* Atop */ {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA}, + /* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA}, + /* Xor */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA}, + /* Add */ {0, 0, BLENDFACT_ONE, BLENDFACT_ONE}, +}; + +static const struct formatinfo { + int fmt, xfmt; + uint32_t card_fmt; + Bool rb_reversed; +} gen3_tex_formats[] = { + {PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, FALSE}, + {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, FALSE}, + {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, FALSE}, + {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, FALSE}, + {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, FALSE}, + {PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, FALSE}, + {PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, FALSE}, + {PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, FALSE}, + {PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, TRUE}, + {PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, FALSE}, + {PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, TRUE}, + {PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, FALSE}, + {PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, TRUE}, +}; + +#define xFixedToDouble(f) pixman_fixed_to_double(f) + +static inline uint32_t gen3_buf_tiling(uint32_t tiling) +{ + uint32_t v = 0; + switch (tiling) { + case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y; + case I915_TILING_X: v |= BUF_3D_TILED_SURFACE; + case I915_TILING_NONE: break; + } + return v; +} + +static inline Bool +gen3_check_pitch_3d(struct kgem_bo *bo) +{ + return bo->pitch <= 8192; +} + +static uint32_t gen3_get_blend_cntl(int op, + Bool has_component_alpha, + uint32_t dst_format) +{ + uint32_t sblend = gen3_blend_op[op].src_blend; + uint32_t dblend = gen3_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll + * treat it as always 1. + */ + if (gen3_blend_op[op].dst_alpha) { + if (PICT_FORMAT_A(dst_format) == 0) { + if (sblend == BLENDFACT_DST_ALPHA) + sblend = BLENDFACT_ONE; + else if (sblend == BLENDFACT_INV_DST_ALPHA) + sblend = BLENDFACT_ZERO; + } + + /* gen3 engine reads 8bit color buffer into green channel + * in cases like color buffer blending etc., and also writes + * back green channel. So with dst_alpha blend we should use + * color factor. See spec on "8-bit rendering". + */ + if (dst_format == PICT_a8) { + if (sblend == BLENDFACT_DST_ALPHA) + sblend = BLENDFACT_DST_COLR; + else if (sblend == BLENDFACT_INV_DST_ALPHA) + sblend = BLENDFACT_INV_DST_COLR; + } + } + + /* If the source alpha is being used, then we should only be in a case + * where the source blend factor is 0, and the source blend value is the + * mask channels multiplied by the source picture's alpha. + */ + if (has_component_alpha && gen3_blend_op[op].src_alpha) { + if (dblend == BLENDFACT_SRC_ALPHA) + dblend = BLENDFACT_SRC_COLR; + else if (dblend == BLENDFACT_INV_SRC_ALPHA) + dblend = BLENDFACT_INV_SRC_COLR; + } + + return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | + BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | + sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT | + dblend << S6_CBUF_DST_BLEND_FACT_SHIFT); +} + +static Bool gen3_check_dst_format(uint32_t format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + case PICT_r5g6b5: + case PICT_b5g6r5: + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + case PICT_a1b5g5r5: + case PICT_x1b5g5r5: + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + case PICT_a2b10g10r10: + case PICT_x2b10g10r10: + case PICT_a8: + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + case PICT_a4b4g4r4: + case PICT_x4b4g4r4: + return TRUE; + default: + return FALSE; + } +} + +static Bool gen3_dst_rb_reversed(uint32_t format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_r5g6b5: + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + case PICT_a8: + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return FALSE; + default: + return TRUE; + } +} + +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) + +static uint32_t gen3_get_dst_format(uint32_t format) +{ +#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8)) + switch (format) { + default: + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return BIAS | COLR_BUF_ARGB8888; + case PICT_r5g6b5: + case PICT_b5g6r5: + return BIAS | COLR_BUF_RGB565; + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + case PICT_a1b5g5r5: + case PICT_x1b5g5r5: + return BIAS | COLR_BUF_ARGB1555; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + case PICT_a2b10g10r10: + case PICT_x2b10g10r10: + return BIAS | COLR_BUF_ARGB2AAA; + case PICT_a8: + return BIAS | COLR_BUF_8BIT; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + case PICT_a4b4g4r4: + case PICT_x4b4g4r4: + return BIAS | COLR_BUF_ARGB4444; + } +#undef BIAS +} + +static uint32_t gen3_texture_repeat(uint32_t repeat) +{ +#define REPEAT(x) \ + (SS3_NORMALIZED_COORDS | \ + TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \ + TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT) + switch (repeat) { + default: + case RepeatNone: + return REPEAT(CLAMP_BORDER); + case RepeatNormal: + return REPEAT(WRAP); + case RepeatPad: + return REPEAT(CLAMP_EDGE); + case RepeatReflect: + return REPEAT(MIRROR); + } +#undef REPEAT +} + +static uint32_t gen3_gradient_repeat(uint32_t repeat) +{ +#define REPEAT(x) \ + (SS3_NORMALIZED_COORDS | \ + TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \ + TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) + switch (repeat) { + default: + case RepeatNone: + return REPEAT(CLAMP_BORDER); + case RepeatNormal: + return REPEAT(WRAP); + case RepeatPad: + return REPEAT(CLAMP_EDGE); + case RepeatReflect: + return REPEAT(MIRROR); + } +#undef REPEAT +} + +static Bool gen3_check_repeat(uint32_t repeat) +{ + switch (repeat) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return TRUE; + default: + return FALSE; + } +} + +static uint32_t gen3_filter(uint32_t filter) +{ + switch (filter) { + default: + assert(0); + case PictFilterNearest: + return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | + FILTER_NEAREST << SS2_MIN_FILTER_SHIFT | + MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); + case PictFilterBilinear: + return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT | + FILTER_LINEAR << SS2_MIN_FILTER_SHIFT | + MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); + } +} + +static bool gen3_check_filter(uint32_t filter) +{ + switch (filter) { + case PictFilterNearest: + case PictFilterBilinear: + return TRUE; + default: + return FALSE; + } +} + +static inline void +gen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY) +{ + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); +} + +fastcall static void +gen3_emit_composite_primitive_constant(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int16_t dst_x = r->dst.x + op->dst.x; + int16_t dst_y = r->dst.y + op->dst.y; + + gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y); +} + +fastcall static void +gen3_emit_composite_primitive_identity_gradient(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int16_t dst_x, dst_y; + int16_t src_x, src_y; + + dst_x = r->dst.x + op->dst.x; + dst_y = r->dst.y + op->dst.y; + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + + gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); + OUT_VERTEX(src_x + r->width); + OUT_VERTEX(src_y + r->height); + + gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); + OUT_VERTEX(src_x); + OUT_VERTEX(src_y + r->height); + + gen3_emit_composite_dstcoord(sna, dst_x, dst_y); + OUT_VERTEX(src_x); + OUT_VERTEX(src_y); +} + +fastcall static void +gen3_emit_composite_primitive_affine_gradient(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + PictTransform *transform = op->src.transform; + int16_t dst_x, dst_y; + int16_t src_x, src_y; + float sx, sy; + + dst_x = r->dst.x + op->dst.x; + dst_y = r->dst.y + op->dst.y; + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + + sna_get_transformed_coordinates(src_x + r->width, src_y + r->height, + transform, + &sx, &sy); + gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); + OUT_VERTEX(sx); + OUT_VERTEX(sy); + + sna_get_transformed_coordinates(src_x, src_y + r->height, + transform, + &sx, &sy); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); + OUT_VERTEX(sx); + OUT_VERTEX(sy); + + sna_get_transformed_coordinates(src_x, src_y, + transform, + &sx, &sy); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y); + OUT_VERTEX(sx); + OUT_VERTEX(sy); +} + +fastcall static void +gen3_emit_composite_primitive_identity_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float w = r->width; + float h = r->height; + float *v; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 12; + + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + w; + + v[9] = r->dst.y + op->dst.y; + v[5] = v[1] = v[9] + h; + + v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; + v[2] = v[6] + w * op->src.scale[0]; + + v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; + v[7] = v[3] = v[11] + h * op->src.scale[1]; +} + +fastcall static void +gen3_emit_composite_primitive_affine_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + PictTransform *transform = op->src.transform; + int16_t dst_x = r->dst.x + op->dst.x; + int16_t dst_y = r->dst.y + op->dst.y; + int src_x = r->src.x + (int)op->src.offset[0]; + int src_y = r->src.y + (int)op->src.offset[1]; + float sx, sy; + + _sna_get_transformed_coordinates(src_x + r->width, src_y + r->height, + transform, + &sx, &sy); + + gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); + OUT_VERTEX(sx * op->src.scale[0]); + OUT_VERTEX(sy * op->src.scale[1]); + + _sna_get_transformed_coordinates(src_x, src_y + r->height, + transform, + &sx, &sy); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); + OUT_VERTEX(sx * op->src.scale[0]); + OUT_VERTEX(sy * op->src.scale[1]); + + _sna_get_transformed_coordinates(src_x, src_y, + transform, + &sx, &sy); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y); + OUT_VERTEX(sx * op->src.scale[0]); + OUT_VERTEX(sy * op->src.scale[1]); +} + +fastcall static void +gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float w = r->width; + float h = r->height; + float *v; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 12; + + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + w; + + v[9] = r->dst.y + op->dst.y; + v[5] = v[1] = v[9] + h; + + v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; + v[2] = v[6] + w * op->mask.scale[0]; + + v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; + v[7] = v[3] = v[11] + h * op->mask.scale[1]; +} + +fastcall static void +gen3_emit_composite_primitive_identity_source_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float dst_x, dst_y; + float src_x, src_y; + float msk_x, msk_y; + float w, h; + float *v; + + dst_x = r->dst.x + op->dst.x; + dst_y = r->dst.y + op->dst.y; + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 18; + + v[0] = dst_x + w; + v[1] = dst_y + h; + v[2] = (src_x + w) * op->src.scale[0]; + v[3] = (src_y + h) * op->src.scale[1]; + v[4] = (msk_x + w) * op->mask.scale[0]; + v[5] = (msk_y + h) * op->mask.scale[1]; + + v[6] = dst_x; + v[7] = v[1]; + v[8] = src_x * op->src.scale[0]; + v[9] = v[3]; + v[10] = msk_x * op->mask.scale[0]; + v[11] =v[5]; + + v[12] = v[6]; + v[13] = dst_y; + v[14] = v[8]; + v[15] = src_y * op->src.scale[1]; + v[16] = v[10]; + v[17] = msk_y * op->mask.scale[1]; +} + +fastcall static void +gen3_emit_composite_primitive_affine_source_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int16_t src_x, src_y; + float dst_x, dst_y; + float msk_x, msk_y; + float w, h; + float *v; + + dst_x = r->dst.x + op->dst.x; + dst_y = r->dst.y + op->dst.y; + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 18; + + v[0] = dst_x + w; + v[1] = dst_y + h; + sna_get_transformed_coordinates(src_x + r->width, src_y + r->height, + op->src.transform, + &v[2], &v[3]); + v[2] *= op->src.scale[0]; + v[3] *= op->src.scale[1]; + v[4] = (msk_x + w) * op->mask.scale[0]; + v[5] = (msk_y + h) * op->mask.scale[1]; + + v[6] = dst_x; + v[7] = v[1]; + sna_get_transformed_coordinates(src_x, src_y + r->height, + op->src.transform, + &v[8], &v[9]); + v[8] *= op->src.scale[0]; + v[9] *= op->src.scale[1]; + v[10] = msk_x * op->mask.scale[0]; + v[11] =v[5]; + + v[12] = v[6]; + v[13] = dst_y; + sna_get_transformed_coordinates(src_x, src_y, + op->src.transform, + &v[14], &v[15]); + v[14] *= op->src.scale[0]; + v[15] *= op->src.scale[1]; + v[16] = v[10]; + v[17] = msk_y * op->mask.scale[1]; +} + +static void +gen3_emit_composite_texcoord(struct sna *sna, + const struct sna_composite_channel *channel, + int16_t x, int16_t y) +{ + float s = 0, t = 0, w = 1; + + switch (channel->gen3.type) { + case SHADER_OPACITY: + case SHADER_NONE: + case SHADER_ZERO: + case SHADER_CONSTANT: + break; + + case SHADER_LINEAR: + case SHADER_RADIAL: + case SHADER_TEXTURE: + x += channel->offset[0]; + y += channel->offset[1]; + if (channel->is_affine) { + sna_get_transformed_coordinates(x, y, + channel->transform, + &s, &t); + OUT_VERTEX(s * channel->scale[0]); + OUT_VERTEX(t * channel->scale[1]); + } else { + sna_get_transformed_coordinates_3d(x, y, + channel->transform, + &s, &t, &w); + OUT_VERTEX(s * channel->scale[0]); + OUT_VERTEX(t * channel->scale[1]); + OUT_VERTEX(0); + OUT_VERTEX(w); + } + break; + } +} + +static void +gen3_emit_composite_vertex(struct sna *sna, + const struct sna_composite_op *op, + int16_t srcX, int16_t srcY, + int16_t maskX, int16_t maskY, + int16_t dstX, int16_t dstY) +{ + gen3_emit_composite_dstcoord(sna, dstX, dstY); + gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY); + gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY); +} + +fastcall static void +gen3_emit_composite_primitive(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + gen3_emit_composite_vertex(sna, op, + r->src.x + r->width, + r->src.y + r->height, + r->mask.x + r->width, + r->mask.y + r->height, + op->dst.x + r->dst.x + r->width, + op->dst.y + r->dst.y + r->height); + gen3_emit_composite_vertex(sna, op, + r->src.x, + r->src.y + r->height, + r->mask.x, + r->mask.y + r->height, + op->dst.x + r->dst.x, + op->dst.y + r->dst.y + r->height); + gen3_emit_composite_vertex(sna, op, + r->src.x, + r->src.y, + r->mask.x, + r->mask.y, + op->dst.x + r->dst.x, + op->dst.y + r->dst.y); +} + +static inline void +gen3_2d_perspective(struct sna *sna, int in, int out) +{ + gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W)); + gen3_fs_mul(out, + gen3_fs_operand(in, X, Y, ZERO, ONE), + gen3_fs_operand_reg(out)); +} + +static inline void +gen3_linear_coord(struct sna *sna, + const struct sna_composite_channel *channel, + int in, int out) +{ + int c = channel->gen3.constants; + + if (!channel->is_affine) { + gen3_2d_perspective(sna, in, FS_U0); + in = FS_U0; + } + + gen3_fs_mov(out, gen3_fs_operand_zero()); + gen3_fs_dp3(out, MASK_X, + gen3_fs_operand(in, X, Y, ONE, ZERO), + gen3_fs_operand_reg(c)); +} + +static void +gen3_radial_coord(struct sna *sna, + const struct sna_composite_channel *channel, + int in, int out) +{ + int c = channel->gen3.constants; + + if (!channel->is_affine) { + gen3_2d_perspective(sna, in, FS_U0); + in = FS_U0; + } + + switch (channel->gen3.mode) { + case RADIAL_ONE: + /* + pdx = (x - c1x) / dr, pdy = (y - c1y) / dr; + r² = pdx*pdx + pdy*pdy + t = r²/sqrt(r²) - r1/dr; + */ + gen3_fs_mad(FS_U0, MASK_X | MASK_Y, + gen3_fs_operand(in, X, Y, ZERO, ZERO), + gen3_fs_operand(c, Z, Z, ZERO, ZERO), + gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO)); + gen3_fs_dp2add(FS_U0, MASK_X, + gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO), + gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO), + gen3_fs_operand_zero()); + gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X)); + gen3_fs_mad(out, 0, + gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO), + gen3_fs_operand(out, X, ZERO, ZERO, ZERO), + gen3_fs_operand(c, W, ZERO, ZERO, ZERO)); + break; + + case RADIAL_TWO: + /* + pdx = x - c1x, pdy = y - c1y; + A = dx² + dy² - dr² + B = -2*(pdx*dx + pdy*dy + r1*dr); + C = pdx² + pdy² - r1²; + det = B*B - 4*A*C; + t = (-B + sqrt (det)) / (2 * A) + */ + + /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */ + gen3_fs_add(FS_U0, + gen3_fs_operand(in, X, Y, ZERO, ZERO), + gen3_fs_operand(c, X, Y, Z, ZERO)); + /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */ + gen3_fs_dp3(FS_U0, MASK_W, + gen3_fs_operand(FS_U0, X, Y, ONE, ZERO), + gen3_fs_operand(c+1, X, Y, Z, ZERO)); + /* u1.x = pdx² + pdy² - r1²; [C] */ + gen3_fs_dp3(FS_U1, MASK_X, + gen3_fs_operand(FS_U0, X, Y, Z, ZERO), + gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO)); + /* u1.x = C, u1.y = B, u1.z=-4*A; */ + gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W)); + gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W)); + /* u1.x = B² - 4*A*C */ + gen3_fs_dp2add(FS_U1, MASK_X, + gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO), + gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO), + gen3_fs_operand_zero()); + /* out.x = -B + sqrt (B² - 4*A*C), */ + gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X)); + gen3_fs_mad(out, MASK_X, + gen3_fs_operand(out, X, ZERO, ZERO, ZERO), + gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO), + gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO)); + /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */ + gen3_fs_mul(out, + gen3_fs_operand(out, X, ZERO, ZERO, ZERO), + gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO)); + break; + } +} + +static void +gen3_composite_emit_shader(struct sna *sna, + const struct sna_composite_op *op, + uint8_t blend) +{ + Bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0; + const struct sna_composite_channel *src, *mask; + struct gen3_render_state *state = &sna->render_state.gen3; + uint32_t shader_offset, id; + int src_reg, mask_reg; + int t, length; + + src = &op->src; + mask = &op->mask; + if (mask->gen3.type == SHADER_NONE) + mask = NULL; + + if (mask && src->is_opaque && + gen3_blend_op[blend].src_alpha && + op->has_component_alpha) { + src = mask; + mask = NULL; + } + + id = (src->gen3.type | + src->is_affine << 4 | + src->alpha_fixup << 5 | + src->rb_reversed << 6); + if (mask) { + id |= (mask->gen3.type << 8 | + mask->is_affine << 12 | + gen3_blend_op[blend].src_alpha << 13 | + op->has_component_alpha << 14 | + mask->alpha_fixup << 15 | + mask->rb_reversed << 16); + } + id |= dst_is_alpha << 24; + id |= op->rb_reversed << 25; + + if (id == state->last_shader) + return; + + state->last_shader = id; + + shader_offset = sna->kgem.nbatch++; + t = 0; + switch (src->gen3.type) { + case SHADER_NONE: + case SHADER_OPACITY: + assert(0); + case SHADER_ZERO: + break; + case SHADER_CONSTANT: + gen3_fs_dcl(FS_T8); + src_reg = FS_T8; + break; + case SHADER_TEXTURE: + case SHADER_RADIAL: + case SHADER_LINEAR: + gen3_fs_dcl(FS_S0); + gen3_fs_dcl(FS_T0); + t++; + break; + } + + if (mask == NULL) { + if (src->gen3.type == SHADER_ZERO) { + gen3_fs_mov(FS_OC, gen3_fs_operand_zero()); + goto done; + } + if (src->alpha_fixup && dst_is_alpha) { + gen3_fs_mov(FS_OC, gen3_fs_operand_one()); + goto done; + } + /* No mask, so load directly to output color */ + if (src->gen3.type != SHADER_CONSTANT) { + if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed) + src_reg = FS_R0; + else + src_reg = FS_OC; + } + switch (src->gen3.type) { + case SHADER_LINEAR: + gen3_linear_coord(sna, src, FS_T0, FS_R0); + gen3_fs_texld(src_reg, FS_S0, FS_R0); + break; + + case SHADER_RADIAL: + gen3_radial_coord(sna, src, FS_T0, FS_R0); + gen3_fs_texld(src_reg, FS_S0, FS_R0); + break; + + case SHADER_TEXTURE: + if (src->is_affine) + gen3_fs_texld(src_reg, FS_S0, FS_T0); + else + gen3_fs_texldp(src_reg, FS_S0, FS_T0); + break; + + case SHADER_NONE: + case SHADER_CONSTANT: + case SHADER_ZERO: + break; + } + + if (src_reg != FS_OC) { + if (src->alpha_fixup)