

index e7423bea1424..f5948c48b9a2 100644

--- a/

+++ b/ diff --git a/Documentation/devicetree/bindings/display/msm/dsi.txt b/Documentation/devicetree/bindings/display/msm/dsi.txtindex e7423bea1424..f5948c48b9a2 100644--- a/ Documentation/devicetree/bindings/display/msm/dsi.txt +++ b/ Documentation/devicetree/bindings/display/msm/dsi.txt @@ -44,9 +44,34 @@ Optional properties: - pinctrl-names: the pin control state names; should contain "default" - pinctrl-0: the default pinctrl state (active) - pinctrl-n: the "sleep" pinctrl state -- port: DSI controller output port. This contains one endpoint subnode, with its - remote-endpoint set to the phandle of the connected panel's endpoint. - See Documentation/devicetree/bindings/graph.txt for device graph info. +- port: DSI controller output port, containing one endpoint subnode. + + DSI Endpoint properties: + - remote-endpoint: set to phandle of the connected panel's endpoint. + See Documentation/devicetree/bindings/graph.txt for device graph info. + - qcom,data-lane-map: this describes how the logical DSI lanes are mapped + to the physical lanes on the given platform. The value contained in + index n describes what logical data lane is mapped to the physical data + lane n (DATAn, where n lies between 0 and 3). + + For example: + + qcom,data-lane-map = <3 0 1 2>; + + The above mapping describes that the logical data lane DATA3 is mapped to + the physical data lane DATA0, logical DATA0 to physical DATA1, logic DATA1 + to phys DATA2 and logic DATA2 to phys DATA3. + + There are only a limited number of physical to logical mappings possible: + + "0123": Logic 0->Phys 0; Logic 1->Phys 1; Logic 2->Phys 2; Logic 3->Phys 3; + "3012": Logic 3->Phys 0; Logic 0->Phys 1; Logic 1->Phys 2; Logic 2->Phys 3; + "2301": Logic 2->Phys 0; Logic 3->Phys 1; Logic 0->Phys 2; Logic 1->Phys 3; + "1230": Logic 1->Phys 0; Logic 2->Phys 1; Logic 3->Phys 2; Logic 0->Phys 3; + "0321": Logic 0->Phys 0; Logic 3->Phys 1; Logic 2->Phys 2; Logic 1->Phys 3; + "1032": Logic 1->Phys 0; Logic 0->Phys 1; Logic 3->Phys 2; Logic 2->Phys 3; + "2103": Logic 2->Phys 0; Logic 1->Phys 1; Logic 0->Phys 2; Logic 3->Phys 3; + "3210": Logic 3->Phys 0; Logic 2->Phys 1; Logic 1->Phys 2; Logic 0->Phys 3; DSI PHY: Required properties: @@ -131,6 +156,7 @@ Example: port { dsi0_out: endpoint { remote-endpoint = <&panel_in>; + lanes = <0 1 2 3>; }; }; };

index 379ee2ea9a3d..b63f614e0c04 100644

--- a/

+++ b/ diff --git a/Documentation/devicetree/bindings/display/msm/hdmi.txt b/Documentation/devicetree/bindings/display/msm/hdmi.txtindex 379ee2ea9a3d..b63f614e0c04 100644--- a/ Documentation/devicetree/bindings/display/msm/hdmi.txt +++ b/ Documentation/devicetree/bindings/display/msm/hdmi.txt @@ -11,6 +11,7 @@ Required properties: - reg: Physical base address and length of the controller's registers - reg-names: "core_physical" - interrupts: The interrupt signal from the hdmi block. +- power-domains: Should be <&mmcc MDSS_GDSC>. - clocks: device clocks See ../clocks/clock-bindings.txt for details. - qcom,hdmi-tx-ddc-clk-gpio: ddc clk pin @@ -18,6 +19,8 @@ Required properties: - qcom,hdmi-tx-hpd-gpio: hpd pin - core-vdda-supply: phandle to supply regulator - hdmi-mux-supply: phandle to mux regulator +- phys: the phandle for the HDMI PHY device +- phy-names: the name of the corresponding PHY device Optional properties: - qcom,hdmi-tx-mux-en-gpio: hdmi mux enable pin @@ -27,15 +30,38 @@ Optional properties: - pinctrl-0: the default pinctrl state (active) - pinctrl-1: the "sleep" pinctrl state +HDMI PHY: +Required properties: +- compatible: Could be the following + * "qcom,hdmi-phy-8660" + * "qcom,hdmi-phy-8960" + * "qcom,hdmi-phy-8974" + * "qcom,hdmi-phy-8084" + * "qcom,hdmi-phy-8996" +- #phy-cells: Number of cells in a PHY specifier; Should be 0. +- reg: Physical base address and length of the registers of the PHY sub blocks. +- reg-names: The names of register regions. The following regions are required: + * "hdmi_phy" + * "hdmi_pll" + For HDMI PHY on msm8996, these additional register regions are required: + * "hdmi_tx_l0" + * "hdmi_tx_l1" + * "hdmi_tx_l3" + * "hdmi_tx_l4" +- power-domains: Should be <&mmcc MDSS_GDSC>. +- clocks: device clocks + See Documentation/devicetree/bindings/clocks/clock-bindings.txt for details. +- core-vdda-supply: phandle to vdda regulator device node + Example: / { ... - hdmi: qcom,hdmi-tx-8960@4a00000 { + hdmi: hdmi@4a00000 { compatible = "qcom,hdmi-tx-8960"; reg-names = "core_physical"; - reg = <0x04a00000 0x1000>; + reg = <0x04a00000 0x2f0>; interrupts = <GIC_SPI 79 0>; power-domains = <&mmcc MDSS_GDSC>; clock-names = @@ -54,5 +80,21 @@ Example: pinctrl-names = "default", "sleep"; pinctrl-0 = <&hpd_active &ddc_active &cec_active>; pinctrl-1 = <&hpd_suspend &ddc_suspend &cec_suspend>; + + phys = <&hdmi_phy>; + phy-names = "hdmi_phy"; + }; + + hdmi_phy: phy@4a00400 { + compatible = "qcom,hdmi-phy-8960"; + reg-names = "hdmi_phy", + "hdmi_pll"; + reg = <0x4a00400 0x60>, + <0x4a00500 0x100>; + #phy-cells = <0>; + power-domains = <&mmcc MDSS_GDSC>; + clock-names = "slave_iface_clk"; + clocks = <&mmcc HDMI_S_AHB_CLK>; + core-vdda-supply = <&pm8921_hdmi_mvs>; }; };

index 065ad4138799..ddb4c9d097e4 100644

--- a/

+++ b/ diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefileindex 065ad4138799..ddb4c9d097e4 100644--- a/ drivers/gpu/drm/msm/Makefile +++ b/ drivers/gpu/drm/msm/Makefile @@ -12,6 +12,7 @@ msm-y := \ hdmi/hdmi_connector.o \ hdmi/hdmi_hdcp.o \ hdmi/hdmi_i2c.o \ + hdmi/hdmi_phy.o \ hdmi/hdmi_phy_8960.o \ hdmi/hdmi_phy_8x60.o \ hdmi/hdmi_phy_8x74.o \ @@ -52,6 +53,8 @@ msm-y := \ msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o msm-$(CONFIG_COMMON_CLK) += mdp/mdp4/mdp4_lvds_pll.o +msm-$(CONFIG_COMMON_CLK) += hdmi/hdmi_pll_8960.o +msm-$(CONFIG_COMMON_CLK) += hdmi/hdmi_phy_8996.o msm-$(CONFIG_DRM_MSM_DSI) += dsi/dsi.o \ mdp/mdp4/mdp4_dsi_encoder.o \

index 9e2aceb4ffe6..fee24297fb92 100644

--- a/

+++ b/ diff --git a/drivers/gpu/drm/msm/adreno/a2xx.xml.h b/drivers/gpu/drm/msm/adreno/a2xx.xml.hindex 9e2aceb4ffe6..fee24297fb92 100644--- a/ drivers/gpu/drm/msm/adreno/a2xx.xml.h +++ b/ drivers/gpu/drm/msm/adreno/a2xx.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109916 bytes, from 2016-02-20 18:44:48) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) +- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the

index 97dc1c6ec107..27dabd5e57fb 100644

--- a/

+++ b/ diff --git a/drivers/gpu/drm/msm/adreno/a3xx.xml.h b/drivers/gpu/drm/msm/adreno/a3xx.xml.hindex 97dc1c6ec107..27dabd5e57fb 100644--- a/ drivers/gpu/drm/msm/adreno/a3xx.xml.h +++ b/ drivers/gpu/drm/msm/adreno/a3xx.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109916 bytes, from 2016-02-20 18:44:48) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) +- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -111,10 +112,14 @@ enum a3xx_vtx_fmt { VFMT_8_8_SNORM = 53, VFMT_8_8_8_SNORM = 54, VFMT_8_8_8_8_SNORM = 55, - VFMT_10_10_10_2_UINT = 60, - VFMT_10_10_10_2_UNORM = 61, - VFMT_10_10_10_2_SINT = 62, - VFMT_10_10_10_2_SNORM = 63, + VFMT_10_10_10_2_UINT = 56, + VFMT_10_10_10_2_UNORM = 57, + VFMT_10_10_10_2_SINT = 58, + VFMT_10_10_10_2_SNORM = 59, + VFMT_2_10_10_10_UINT = 60, + VFMT_2_10_10_10_UNORM = 61, + VFMT_2_10_10_10_SINT = 62, + VFMT_2_10_10_10_SNORM = 63, }; enum a3xx_tex_fmt { @@ -138,10 +143,12 @@ enum a3xx_tex_fmt { TFMT_DXT1 = 36, TFMT_DXT3 = 37, TFMT_DXT5 = 38, + TFMT_2_10_10_10_UNORM = 40, TFMT_10_10_10_2_UNORM = 41, TFMT_9_9_9_E5_FLOAT = 42, TFMT_11_11_10_FLOAT = 43, TFMT_A8_UNORM = 44, + TFMT_L8_UNORM = 45, TFMT_L8_A8_UNORM = 47, TFMT_8_UNORM = 48, TFMT_8_8_UNORM = 49, @@ -183,6 +190,8 @@ enum a3xx_tex_fmt { TFMT_32_SINT = 92, TFMT_32_32_SINT = 93, TFMT_32_32_32_32_SINT = 95, + TFMT_2_10_10_10_UINT = 96, + TFMT_10_10_10_2_UINT = 97, TFMT_ETC2_RG11_SNORM = 112, TFMT_ETC2_RG11_UNORM = 113, TFMT_ETC2_R11_SNORM = 114, @@ -215,6 +224,9 @@ enum a3xx_color_fmt { RB_R8_UINT = 14, RB_R8_SINT = 15, RB_R10G10B10A2_UNORM = 16, + RB_A2R10G10B10_UNORM = 17, + RB_R10G10B10A2_UINT = 18, + RB_A2R10G10B10_UINT = 19, RB_A8_UNORM = 20, RB_R8_UNORM = 21, RB_R16_FLOAT = 24, @@ -244,30 +256,273 @@ enum a3xx_color_fmt { RB_R32G32B32A32_UINT = 59, }; +enum a3xx_cp_perfcounter_select { + CP_ALWAYS_COUNT = 0, + CP_AHB_PFPTRANS_WAIT = 3, + CP_AHB_NRTTRANS_WAIT = 6, + CP_CSF_NRT_READ_WAIT = 8, + CP_CSF_I1_FIFO_FULL = 9, + CP_CSF_I2_FIFO_FULL = 10, + CP_CSF_ST_FIFO_FULL = 11, + CP_RESERVED_12 = 12, + CP_CSF_RING_ROQ_FULL = 13, + CP_CSF_I1_ROQ_FULL = 14, + CP_CSF_I2_ROQ_FULL = 15, + CP_CSF_ST_ROQ_FULL = 16, + CP_RESERVED_17 = 17, + CP_MIU_TAG_MEM_FULL = 18, + CP_MIU_NRT_WRITE_STALLED = 22, + CP_MIU_NRT_READ_STALLED = 23, + CP_ME_REGS_RB_DONE_FIFO_FULL = 26, + CP_ME_REGS_VS_EVENT_FIFO_FULL = 27, + CP_ME_REGS_PS_EVENT_FIFO_FULL = 28, + CP_ME_REGS_CF_EVENT_FIFO_FULL = 29, + CP_ME_MICRO_RB_STARVED = 30, + CP_AHB_RBBM_DWORD_SENT = 40, + CP_ME_BUSY_CLOCKS = 41, + CP_ME_WAIT_CONTEXT_AVAIL = 42, + CP_PFP_TYPE0_PACKET = 43, + CP_PFP_TYPE3_PACKET = 44, + CP_CSF_RB_WPTR_NEQ_RPTR = 45, + CP_CSF_I1_SIZE_NEQ_ZERO = 46, + CP_CSF_I2_SIZE_NEQ_ZERO = 47, + CP_CSF_RBI1I2_FETCHING = 48, +}; + +enum a3xx_gras_tse_perfcounter_select { + GRAS_TSEPERF_INPUT_PRIM = 0, + GRAS_TSEPERF_INPUT_NULL_PRIM = 1, + GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2, + GRAS_TSEPERF_CLIPPED_PRIM = 3, + GRAS_TSEPERF_NEW_PRIM = 4, + GRAS_TSEPERF_ZERO_AREA_PRIM = 5, + GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6, + GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7, + GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8, + GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9, + GRAS_TSEPERF_PRE_CLIP_PRIM = 10, + GRAS_TSEPERF_POST_CLIP_PRIM = 11, + GRAS_TSEPERF_WORKING_CYCLES = 12, + GRAS_TSEPERF_PC_STARVE = 13, + GRAS_TSERASPERF_STALL = 14, +}; + +enum a3xx_gras_ras_perfcounter_select { + GRAS_RASPERF_16X16_TILES = 0, + GRAS_RASPERF_8X8_TILES = 1, + GRAS_RASPERF_4X4_TILES = 2, + GRAS_RASPERF_WORKING_CYCLES = 3, + GRAS_RASPERF_STALL_CYCLES_BY_RB = 4, + GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5, + GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6, +}; + +enum a3xx_hlsq_perfcounter_select { + HLSQ_PERF_SP_VS_CONSTANT = 0, + HLSQ_PERF_SP_VS_INSTRUCTIONS = 1, + HLSQ_PERF_SP_FS_CONSTANT = 2, + HLSQ_PERF_SP_FS_INSTRUCTIONS = 3, + HLSQ_PERF_TP_STATE = 4, + HLSQ_PERF_QUADS = 5, + HLSQ_PERF_PIXELS = 6, + HLSQ_PERF_VERTICES = 7, + HLSQ_PERF_FS8_THREADS = 8, + HLSQ_PERF_FS16_THREADS = 9, + HLSQ_PERF_FS32_THREADS = 10, + HLSQ_PERF_VS8_THREADS = 11, + HLSQ_PERF_VS16_THREADS = 12, + HLSQ_PERF_SP_VS_DATA_BYTES = 13, + HLSQ_PERF_SP_FS_DATA_BYTES = 14, + HLSQ_PERF_ACTIVE_CYCLES = 15, + HLSQ_PERF_STALL_CYCLES_SP_STATE = 16, + HLSQ_PERF_STALL_CYCLES_SP_VS = 17, + HLSQ_PERF_STALL_CYCLES_SP_FS = 18, + HLSQ_PERF_STALL_CYCLES_UCHE = 19, + HLSQ_PERF_RBBM_LOAD_CYCLES = 20, + HLSQ_PERF_DI_TO_VS_START_SP0 = 21, + HLSQ_PERF_DI_TO_FS_START_SP0 = 22, + HLSQ_PERF_VS_START_TO_DONE_SP0 = 23, + HLSQ_PERF_FS_START_TO_DONE_SP0 = 24, + HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25, + HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26, + HLSQ_PERF_UCHE_LATENCY_CYCLES = 27, + HLSQ_PERF_UCHE_LATENCY_COUNT = 28, +}; + +enum a3xx_pc_perfcounter_select { + PC_PCPERF_VISIBILITY_STREAMS = 0, + PC_PCPERF_TOTAL_INSTANCES = 1, + PC_PCPERF_PRIMITIVES_PC_VPC = 2, + PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3, + PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4, + PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5, + PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6, + PC_PCPERF_VERTICES_TO_VFD = 7, + PC_PCPERF_REUSED_VERTICES = 8, + PC_PCPERF_CYCLES_STALLED_BY_VFD = 9, + PC_PCPERF_CYCLES_STALLED_BY_TSE = 10, + PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11, + PC_PCPERF_CYCLES_IS_WORKING = 12, +}; + +enum a3xx_rb_perfcounter_select { + RB_RBPERF_ACTIVE_CYCLES_ANY = 0, + RB_RBPERF_ACTIVE_CYCLES_ALL = 1, + RB_RBPERF_STARVE_CYCLES_BY_SP = 2, + RB_RBPERF_STARVE_CYCLES_BY_RAS = 3, + RB_RBPERF_STARVE_CYCLES_BY_MARB = 4, + RB_RBPERF_STALL_CYCLES_BY_MARB = 5, + RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6, + RB_RBPERF_RB_MARB_DATA = 7, + RB_RBPERF_SP_RB_QUAD = 8, + RB_RBPERF_RAS_EARLY_Z_QUADS = 9, + RB_RBPERF_GMEM_CH0_READ = 10, + RB_RBPERF_GMEM_CH1_READ = 11, + RB_RBPERF_GMEM_CH0_WRITE = 12, + RB_RBPERF_GMEM_CH1_WRITE = 13, + RB_RBPERF_CP_CONTEXT_DONE = 14, + RB_RBPERF_CP_CACHE_FLUSH = 15, + RB_RBPERF_CP_ZPASS_DONE = 16, +}; + +enum a3xx_rbbm_perfcounter_select { + RBBM_ALAWYS_ON = 0, + RBBM_VBIF_BUSY = 1, + RBBM_TSE_BUSY = 2, + RBBM_RAS_BUSY = 3, + RBBM_PC_DCALL_BUSY = 4, + RBBM_PC_VSD_BUSY = 5, + RBBM_VFD_BUSY = 6, + RBBM_VPC_BUSY = 7, + RBBM_UCHE_BUSY = 8, + RBBM_VSC_BUSY = 9, + RBBM_HLSQ_BUSY = 10, + RBBM_ANY_RB_BUSY = 11, + RBBM_ANY_TEX_BUSY = 12, + RBBM_ANY_USP_BUSY = 13, + RBBM_ANY_MARB_BUSY = 14, + RBBM_ANY_ARB_BUSY = 15, + RBBM_AHB_STATUS_BUSY = 16, + RBBM_AHB_STATUS_STALLED = 17, + RBBM_AHB_STATUS_TXFR = 18, + RBBM_AHB_STATUS_TXFR_SPLIT = 19, + RBBM_AHB_STATUS_TXFR_ERROR = 20, + RBBM_AHB_STATUS_LONG_STALL = 21, + RBBM_RBBM_STATUS_MASKED = 22, +}; + enum a3xx_sp_perfcounter_select { + SP_LM_LOAD_INSTRUCTIONS = 0, + SP_LM_STORE_INSTRUCTIONS = 1, + SP_LM_ATOMICS = 2, + SP_UCHE_LOAD_INSTRUCTIONS = 3, + SP_UCHE_STORE_INSTRUCTIONS = 4, + SP_UCHE_ATOMICS = 5, + SP_VS_TEX_INSTRUCTIONS = 6, + SP_VS_CFLOW_INSTRUCTIONS = 7, + SP_VS_EFU_INSTRUCTIONS = 8, + SP_VS_FULL_ALU_INSTRUCTIONS = 9, + SP_VS_HALF_ALU_INSTRUCTIONS = 10, + SP_FS_TEX_INSTRUCTIONS = 11, SP_FS_CFLOW_INSTRUCTIONS = 12, + SP_FS_EFU_INSTRUCTIONS = 13, SP_FS_FULL_ALU_INSTRUCTIONS = 14, - SP0_ICL1_MISSES = 26, + SP_FS_HALF_ALU_INSTRUCTIONS = 15, + SP_FS_BARY_INSTRUCTIONS = 16, + SP_VS_INSTRUCTIONS = 17, + SP_FS_INSTRUCTIONS = 18, + SP_ADDR_LOCK_COUNT = 19, + SP_UCHE_READ_TRANS = 20, + SP_UCHE_WRITE_TRANS = 21, + SP_EXPORT_VPC_TRANS = 22, + SP_EXPORT_RB_TRANS = 23, + SP_PIXELS_KILLED = 24, + SP_ICL1_REQUESTS = 25, + SP_ICL1_MISSES = 26, + SP_ICL0_REQUESTS = 27, + SP_ICL0_MISSES = 28, SP_ALU_ACTIVE_CYCLES = 29, + SP_EFU_ACTIVE_CYCLES = 30, + SP_STALL_CYCLES_BY_VPC = 31, + SP_STALL_CYCLES_BY_TP = 32, + SP_STALL_CYCLES_BY_UCHE = 33, + SP_STALL_CYCLES_BY_RB = 34, + SP_ACTIVE_CYCLES_ANY = 35, + SP_ACTIVE_CYCLES_ALL = 36, +}; + +enum a3xx_tp_perfcounter_select { + TPL1_TPPERF_L1_REQUESTS = 0, + TPL1_TPPERF_TP0_L1_REQUESTS = 1, + TPL1_TPPERF_TP0_L1_MISSES = 2, + TPL1_TPPERF_TP1_L1_REQUESTS = 3, + TPL1_TPPERF_TP1_L1_MISSES = 4, + TPL1_TPPERF_TP2_L1_REQUESTS = 5, + TPL1_TPPERF_TP2_L1_MISSES = 6, + TPL1_TPPERF_TP3_L1_REQUESTS = 7, + TPL1_TPPERF_TP3_L1_MISSES = 8, + TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9, + TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10, + TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11, + TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12, + TPL1_TPPERF_BILINEAR_OPS = 13, + TPL1_TPPERF_QUADSQUADS_OFFSET = 14, + TPL1_TPPERF_QUADQUADS_SHADOW = 15, + TPL1_TPPERF_QUADS_ARRAY = 16, + TPL1_TPPERF_QUADS_PROJECTION = 17, + TPL1_TPPERF_QUADS_GRADIENT = 18, + TPL1_TPPERF_QUADS_1D2D = 19, + TPL1_TPPERF_QUADS_3DCUBE = 20, + TPL1_TPPERF_ZERO_LOD = 21, + TPL1_TPPERF_OUTPUT_TEXELS = 22, + TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23, + TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24, + TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25, + TPL1_TPPERF_LATENCY = 26, + TPL1_TPPERF_LATENCY_TRANS = 27, }; -enum a3xx_rop_code { - ROP_CLEAR = 0, - ROP_NOR = 1, - ROP_AND_INVERTED = 2, - ROP_COPY_INVERTED = 3, - ROP_AND_REVERSE = 4, - ROP_INVERT = 5, - ROP_XOR = 6, - ROP_NAND = 7, - ROP_AND = 8, - ROP_EQUIV = 9, - ROP_NOOP = 10, - ROP_OR_INVERTED = 11, - ROP_COPY = 12, - ROP_OR_REVERSE = 13, - ROP_OR = 14, - ROP_SET = 15, +enum a3xx_vfd_perfcounter_select { + VFD_PERF_UCHE_BYTE_FETCHED = 0, + VFD_PERF_UCHE_TRANS = 1, + VFD_PERF_VPC_BYPASS_COMPONENTS = 2, + VFD_PERF_FETCH_INSTRUCTIONS = 3, + VFD_PERF_DECODE_INSTRUCTIONS = 4, + VFD_PERF_ACTIVE_CYCLES = 5, + VFD_PERF_STALL_CYCLES_UCHE = 6, + VFD_PERF_STALL_CYCLES_HLSQ = 7, + VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8, + VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9, +}; + +enum a3xx_vpc_perfcounter_select { + VPC_PERF_SP_LM_PRIMITIVES = 0, + VPC_PERF_COMPONENTS_FROM_SP = 1, + VPC_PERF_SP_LM_COMPONENTS = 2, + VPC_PERF_ACTIVE_CYCLES = 3, + VPC_PERF_STALL_CYCLES_LM = 4, + VPC_PERF_STALL_CYCLES_RAS = 5, +}; + +enum a3xx_uche_perfcounter_select { + UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0, + UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1, + UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2, + UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3, + UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4, + UCHE_UCHEPERF_READ_REQUESTS_TP = 8, + UCHE_UCHEPERF_READ_REQUESTS_VFD = 9, + UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10, + UCHE_UCHEPERF_READ_REQUESTS_MARB = 11, + UCHE_UCHEPERF_READ_REQUESTS_SP = 12, + UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13, + UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14, + UCHE_UCHEPERF_TAG_CHECK_FAILS = 15, + UCHE_UCHEPERF_EVICTS = 16, + UCHE_UCHEPERF_FLUSHES = 17, + UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18, + UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19, + UCHE_UCHEPERF_ACTIVE_CYCLES = 20, }; enum a3xx_rb_blend_opcode { @@ -1429,15 +1684,23 @@ static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_ #define REG_A3XX_PC_RESTART_INDEX 0x000021ed #define REG_A3XX_HLSQ_CONTROL_0_REG 0x00002200 -#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010 +#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000030 #define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4 static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) { return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; } #define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE 0x00000040 +#define A3XX_HLSQ_CONTROL_0_REG_COMPUTEMODE 0x00000100 #define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART 0x00000200 #define A3XX_HLSQ_CONTROL_0_REG_RESERVED2 0x00000400 +#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK 0x00fff000 +#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT 12 +static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK; +} +#define A3XX_HLSQ_CONTROL_0_REG_FSONLYTEX 0x02000000 #define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE 0x04000000 #define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK 0x08000000 #define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT 27 @@ -1451,17 +1714,39 @@ static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val) #define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT 0x80000000 #define REG_A3XX_HLSQ_CONTROL_1_REG 0x00002201 -#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x00000040 +#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x000000c0 #define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT 6 static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val) { return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK; } #define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE 0x00000100 -#define A3XX_HLSQ_CONTROL_1_REG_RESERVED1 0x00000200 -#define A3XX_HLSQ_CONTROL_1_REG_ZWCOORD 0x02000000 +#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK 0x00ff0000 +#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT 16 +static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK; +} +#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK 0xff000000 +#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT 24 +static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK; +} #define REG_A3XX_HLSQ_CONTROL_2_REG 0x00002202 +#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK 0x000003fc +#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT 2 +static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK; +} +#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK 0x03fc0000 +#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT 18 +static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK; +} #define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK 0xfc000000 #define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT 26 static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val) @@ -1478,13 +1763,13 @@ static inline uint32_t A3XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val) } #define REG_A3XX_HLSQ_VS_CONTROL_REG 0x00002204 -#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x00000fff +#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x000003ff #define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0 static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val) { return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK; } -#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x00fff000 +#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x001ff000 #define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12 static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val) { @@ -1498,13 +1783,13 @@ static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val) } #define REG_A3XX_HLSQ_FS_CONTROL_REG 0x00002205 -#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x00000fff +#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x000003ff #define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT 0 static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val) { return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK; } -#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x00fff000 +#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x001ff000 #define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12 static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val) { @@ -1518,13 +1803,13 @@ static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val) } #define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x00002206 -#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK 0x0000ffff +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK 0x000001ff #define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT 0 static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val) { return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK; } -#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK 0xffff0000 +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK 0x01ff0000 #define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT 16 static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val) { @@ -1532,13 +1817,13 @@ static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val) } #define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x00002207 -#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK 0x0000ffff +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK 0x000001ff #define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT 0 static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val) { return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK; } -#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK 0xffff0000 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK 0x01ff0000 #define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT 16 static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val) { @@ -1620,12 +1905,24 @@ static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val) } #define REG_A3XX_VFD_CONTROL_1 0x00002241 -#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff +#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000000f #define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0 static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val) { return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK; } +#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK 0x000000f0 +#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT 4 +static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK; +} +#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK 0x00000f00 +#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT 8 +static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK; +} #define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000 #define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16 static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) @@ -2008,24 +2305,19 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffe return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK; } #define A3XX_SP_VS_CTRL_REG0_CACHEINVALID 0x00000004 +#define A3XX_SP_VS_CTRL_REG0_ALUSCHMODE 0x00000008 #define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 #define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) { return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; } -#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00 +#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 #define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) { return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; } -#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000 -#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18 -static inline uint32_t A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val) -{ - return ((val) << A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK; -} #define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000 #define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20 static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) @@ -2033,8 +2325,6 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; } #define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000 -#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00400000 -#define A3XX_SP_VS_CTRL_REG0_COMPUTEMODE 0x00800000 #define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK 0xff000000 #define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT 24 static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val) @@ -2075,7 +2365,8 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val) { return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK; } -#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0xfff00000 +#define A3XX_SP_VS_PARAM_REG_POS2DMODE 0x00010000 +#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0x01f00000 #define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT 20 static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val) { @@ -2085,24 +2376,26 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val) static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; } static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; } -#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff +#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff #define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val) { return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK; } +#define A3XX_SP_VS_OUT_REG_A_HALF 0x00000100 #define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00001e00 #define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 9 static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) { return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK; } -#define A3XX_SP_VS_OUT_REG_B_REGID__MASK 0x01ff0000 +#define A3XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000 #define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val) { return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK; } +#define A3XX_SP_VS_OUT_REG_B_HALF 0x01000000 #define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x1e000000 #define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 25 static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) @@ -2113,25 +2406,25 @@ static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; } static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; } -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x0000007f #define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) { return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; } -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x00007f00 #define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) { return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; } -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x007f0000 #define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) { return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; } -#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0x7f000000 #define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) { @@ -2139,6 +2432,12 @@ static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) } #define REG_A3XX_SP_VS_OBJ_OFFSET_REG 0x000022d4 +#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK 0x0000ffff +#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT 0 +static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK; +} #define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 #define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) @@ -2155,8 +2454,38 @@ static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) #define REG_A3XX_SP_VS_OBJ_START_REG 0x000022d5 #define REG_A3XX_SP_VS_PVT_MEM_PARAM_REG 0x000022d6 +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK 0x000000ff +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT 0 +static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK; +} +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK 0x00ffff00 +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT 8 +static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK; +} +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK 0xff000000 +#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT 24 +static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK; +} #define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG 0x000022d7 +#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK 0x0000001f +#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT 0 +static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK; +} +#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK 0xffffffe0 +#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5 +static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val) +{ + return ((val >> 5) << A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK; +} #define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG 0x000022d8 @@ -2182,24 +2511,22 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffe return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK; } #define A3XX_SP_FS_CTRL_REG0_CACHEINVALID 0x00000004 +#define A3XX_SP_FS_CTRL_REG0_ALUSCHMODE 0x00000008 #define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 #define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) { return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; } -#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00 +#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00 #define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) { return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; } -#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000 -#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18 -static inline uint32_t A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val) -{ - return ((val) << A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK; -} +#define A3XX_SP_FS_CTRL_REG0_FSBYPASSENABLE 0x00020000 +#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP 0x00040000 +#define A3XX_SP_FS_CTRL_REG0_OUTORDERED 0x00080000 #define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000 #define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20 static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) @@ -2235,7 +2562,7 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val) { return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK; } -#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK 0x3f000000 +#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK 0x7f000000 #define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT 24 static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val) { @@ -2243,6 +2570,12 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val) } #define REG_A3XX_SP_FS_OBJ_OFFSET_REG 0x000022e2 +#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK 0x0000ffff +#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT 0 +static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK; +} #define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 #define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) @@ -2259,8 +2592,38 @@ static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) #define REG_A3XX_SP_FS_OBJ_START_REG 0x000022e3 #define REG_A3XX_SP_FS_PVT_MEM_PARAM_REG 0x000022e4 +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK 0x000000ff +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT 0 +static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val) +{ + return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK; +} +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK 0x00ffff00 +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT 8 +static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK; +} +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK 0xff000000 +#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT 24 +static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val) +{ + return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK; +} #define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG 0x000022e5 +#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK 0x0000001f +#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT 0 +static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val) +{ + return ((val) << A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK; +} +#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK 0xffffffe0 +#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5 +static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val) +{ + return ((val >> 5) << A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK; +} #define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG 0x000022e6

index 99de8271dba8..3220b91f559a 100644

--- a/

+++ b/ diff --git a/drivers/gpu/drm/msm/adreno/a4xx.xml.h b/drivers/gpu/drm/msm/adreno/a4xx.xml.hindex 99de8271dba8..3220b91f559a 100644--- a/ drivers/gpu/drm/msm/adreno/a4xx.xml.h +++ b/ drivers/gpu/drm/msm/adreno/a4xx.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109916 bytes, from 2016-02-20 18:44:48) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) +- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -47,11 +48,13 @@ enum a4xx_color_fmt { RB4_R8_UNORM = 2, RB4_R4G4B4A4_UNORM = 8, RB4_R5G5B5A1_UNORM = 10, - RB4_R5G6R5_UNORM = 14, + RB4_R5G6B5_UNORM = 14, RB4_R8G8_UNORM = 15, RB4_R8G8_SNORM = 16, RB4_R8G8_UINT = 17, RB4_R8G8_SINT = 18, + RB4_R16_UNORM = 19, + RB4_R16_SNORM = 20, RB4_R16_FLOAT = 21, RB4_R16_UINT = 22, RB4_R16_SINT = 23, @@ -63,12 +66,16 @@ enum a4xx_color_fmt { RB4_R10G10B10A2_UNORM = 31, RB4_R10G10B10A2_UINT = 34, RB4_R11G11B10_FLOAT = 39, + RB4_R16G16_UNORM = 40, + RB4_R16G16_SNORM = 41, RB4_R16G16_FLOAT = 42, RB4_R16G16_UINT = 43, RB4_R16G16_SINT = 44, RB4_R32_FLOAT = 45, RB4_R32_UINT = 46, RB4_R32_SINT = 47, + RB4_R16G16B16A16_UNORM = 52, + RB4_R16G16B16A16_SNORM = 53, RB4_R16G16B16A16_FLOAT = 54, RB4_R16G16B16A16_UINT = 55, RB4_R16G16B16A16_SINT = 56, @@ -106,6 +113,7 @@ enum a4xx_vtx_fmt { VFMT4_32_32_FIXED = 10, VFMT4_32_32_32_FIXED = 11, VFMT4_32_32_32_32_FIXED = 12, + VFMT4_11_11_10_FLOAT = 13, VFMT4_16_SINT = 16, VFMT4_16_16_SINT = 17, VFMT4_16_16_16_SINT = 18, @@ -146,52 +154,76 @@ enum a4xx_vtx_fmt { VFMT4_8_8_SNORM = 53, VFMT4_8_8_8_SNORM = 54, VFMT4_8_8_8_8_SNORM = 55, - VFMT4_10_10_10_2_UINT = 60, - VFMT4_10_10_10_2_UNORM = 61, - VFMT4_10_10_10_2_SINT = 62, - VFMT4_10_10_10_2_SNORM = 63, + VFMT4_10_10_10_2_UINT = 56, + VFMT4_10_10_10_2_UNORM = 57, + VFMT4_10_10_10_2_SINT = 58, + VFMT4_10_10_10_2_SNORM = 59, + VFMT4_2_10_10_10_UINT = 60, + VFMT4_2_10_10_10_UNORM = 61, + VFMT4_2_10_10_10_SINT = 62, + VFMT4_2_10_10_10_SNORM = 63, }; enum a4xx_tex_fmt { - TFMT4_5_6_5_UNORM = 11, - TFMT4_5_5_5_1_UNORM = 10, - TFMT4_4_4_4_4_UNORM = 8, - TFMT4_X8Z24_UNORM = 71, - TFMT4_10_10_10_2_UNORM = 33, TFMT4_A8_UNORM = 3, - TFMT4_L8_A8_UNORM = 13, TFMT4_8_UNORM = 4, - TFMT4_8_8_UNORM = 14, - TFMT4_8_8_8_8_UNORM = 28, TFMT4_8_SNORM = 5, - TFMT4_8_8_SNORM = 15, - TFMT4_8_8_8_8_SNORM = 29, TFMT4_8_UINT = 6, - TFMT4_8_8_UINT = 16, - TFMT4_8_8_8_8_UINT = 30, TFMT4_8_SINT = 7, + TFMT4_4_4_4_4_UNORM = 8, + TFMT4_5_5_5_1_UNORM = 9, + TFMT4_5_6_5_UNORM = 11, + TFMT4_L8_A8_UNORM = 13, + TFMT4_8_8_UNORM = 14, + TFMT4_8_8_SNORM = 15, + TFMT4_8_8_UINT = 16, TFMT4_8_8_SINT = 17, - TFMT4_8_8_8_8_SINT = 31, + TFMT4_16_UNORM = 18, + TFMT4_16_SNORM = 19, + TFMT4_16_FLOAT = 20, TFMT4_16_UINT = 21, - TFMT4_16_16_UINT = 41, - TFMT4_16_16_16_16_UINT = 54, TFMT4_16_SINT = 22, + TFMT4_8_8_8_8_UNORM = 28, + TFMT4_8_8_8_8_SNORM = 29, + TFMT4_8_8_8_8_UINT = 30, + TFMT4_8_8_8_8_SINT = 31, + TFMT4_9_9_9_E5_FLOAT = 32, + TFMT4_10_10_10_2_UNORM = 33, + TFMT4_10_10_10_2_UINT = 34, + TFMT4_11_11_10_FLOAT = 37, + TFMT4_16_16_UNORM = 38, + TFMT4_16_16_SNORM = 39, + TFMT4_16_16_FLOAT = 40, + TFMT4_16_16_UINT = 41, TFMT4_16_16_SINT = 42, - TFMT4_16_16_16_16_SINT = 55, + TFMT4_32_FLOAT = 43, TFMT4_32_UINT = 44, - TFMT4_32_32_UINT = 57, - TFMT4_32_32_32_32_UINT = 64, TFMT4_32_SINT = 45, - TFMT4_32_32_SINT = 58, - TFMT4_32_32_32_32_SINT = 65, - TFMT4_16_FLOAT = 20, - TFMT4_16_16_FLOAT = 40, + TFMT4_16_16_16_16_UNORM = 51, + TFMT4_16_16_16_16_SNORM = 52, TFMT4_16_16_16_16_FLOAT = 53, - TFMT4_32_FLOAT = 43, + TFMT4_16_16_16_16_UINT = 54, + TFMT4_16_16_16_16_SINT = 55, TFMT4_32_32_FLOAT = 56, + TFMT4_32_32_UINT = 57, + TFMT4_32_32_SINT = 58, + TFMT4_32_32_32_FLOAT = 59, + TFMT4_32_32_32_UINT = 60, + TFMT4_32_32_32_SINT = 61, TFMT4_32_32_32_32_FLOAT = 63, - TFMT4_9_9_9_E5_FLOAT = 32, - TFMT4_11_11_10_FLOAT = 37, + TFMT4_32_32_32_32_UINT = 64, + TFMT4_32_32_32_32_SINT = 65, + TFMT4_X8Z24_UNORM = 71, + TFMT4_DXT1 = 86, + TFMT4_DXT3 = 87, + TFMT4_DXT5 = 88, + TFMT4_RGTC1_UNORM = 90, + TFMT4_RGTC1_SNORM = 91, + TFMT4_RGTC2_UNORM = 94, + TFMT4_RGTC2_SNORM = 95, + TFMT4_BPTC_UFLOAT = 97, + TFMT4_BPTC_FLOAT = 98, + TFMT4_BPTC = 99, TFMT4_ATC_RGB = 100, TFMT4_ATC_RGBA_EXPLICIT = 101, TFMT4_ATC_RGBA_INTERPOLATED = 102, @@ -240,6 +272,545 @@ enum a4xx_tess_spacing { EVEN_SPACING = 3, }; +enum a4xx_ccu_perfcounter_select { + CCU_BUSY_CYCLES = 0, + CCU_RB_DEPTH_RETURN_STALL = 2, + CCU_RB_COLOR_RETURN_STALL = 3, + CCU_DEPTH_BLOCKS = 6, + CCU_COLOR_BLOCKS = 7, + CCU_DEPTH_BLOCK_HIT = 8, + CCU_COLOR_BLOCK_HIT = 9, + CCU_DEPTH_FLAG1_COUNT = 10, + CCU_DEPTH_FLAG2_COUNT = 11, + CCU_DEPTH_FLAG3_COUNT = 12, + CCU_DEPTH_FLAG4_COUNT = 13, + CCU_COLOR_FLAG1_COUNT = 14, + CCU_COLOR_FLAG2_COUNT = 15, + CCU_COLOR_FLAG3_COUNT = 16, + CCU_COLOR_FLAG4_COUNT = 17, + CCU_PARTIAL_BLOCK_READ = 18, +}; + +enum a4xx_cp_perfcounter_select { + CP_ALWAYS_COUNT = 0, + CP_BUSY = 1, + CP_PFP_IDLE = 2, + CP_PFP_BUSY_WORKING = 3, + CP_PFP_STALL_CYCLES_ANY = 4, + CP_PFP_STARVE_CYCLES_ANY = 5, + CP_PFP_STARVED_PER_LOAD_ADDR = 6, + CP_PFP_STALLED_PER_STORE_ADDR = 7, + CP_PFP_PC_PROFILE = 8, + CP_PFP_MATCH_PM4_PKT_PROFILE = 9, + CP_PFP_COND_INDIRECT_DISCARDED = 10, + CP_LONG_RESUMPTIONS = 11, + CP_RESUME_CYCLES = 12, + CP_RESUME_TO_BOUNDARY_CYCLES = 13, + CP_LONG_PREEMPTIONS = 14, + CP_PREEMPT_CYCLES = 15, + CP_PREEMPT_TO_BOUNDARY_CYCLES = 16, + CP_ME_FIFO_EMPTY_PFP_IDLE = 17, + CP_ME_FIFO_EMPTY_PFP_BUSY = 18, + CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19, + CP_ME_FIFO_FULL_ME_BUSY = 20, + CP_ME_FIFO_FULL_ME_NON_WORKING = 21, + CP_ME_WAITING_FOR_PACKETS = 22, + CP_ME_BUSY_WORKING = 23, + CP_ME_STARVE_CYCLES_ANY = 24, + CP_ME_STARVE_CYCLES_PER_PROFILE = 25, + CP_ME_STALL_CYCLES_PER_PROFILE = 26, + CP_ME_PC_PROFILE = 27, + CP_RCIU_FIFO_EMPTY = 28, + CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29, + CP_RCIU_FIFO_FULL = 30, + CP_RCIU_FIFO_FULL_NO_CONTEXT = 31, + CP_RCIU_FIFO_FULL_AHB_MASTER = 32, + CP_RCIU_FIFO_FULL_OTHER = 33, + CP_AHB_IDLE = 34, + CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35, + CP_AHB_STALL_ON_GRANT_SPLIT = 36, + CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37, + CP_AHB_BUSY_WORKING = 38, + CP_AHB_BUSY_STALL_ON_HRDY = 39, + CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40, +}; + +enum a4xx_gras_ras_perfcounter_select { + RAS_SUPER_TILES = 0, + RAS_8X8_TILES = 1, + RAS_4X4_TILES = 2, + RAS_BUSY_CYCLES = 3, + RAS_STALL_CYCLES_BY_RB = 4, + RAS_STALL_CYCLES_BY_VSC = 5, + RAS_STARVE_CYCLES_BY_TSE = 6, + RAS_SUPERTILE_CYCLES = 7, + RAS_TILE_CYCLES = 8, + RAS_FULLY_COVERED_SUPER_TILES = 9, + RAS_FULLY_COVERED_8X8_TILES = 10, + RAS_4X4_PRIM = 11, + RAS_8X4_4X8_PRIM = 12, + RAS_8X8_PRIM = 13, +}; + +enum a4xx_gras_tse_perfcounter_select { + TSE_INPUT_PRIM = 0, + TSE_INPUT_NULL_PRIM = 1, + TSE_TRIVAL_REJ_PRIM = 2, + TSE_CLIPPED_PRIM = 3, + TSE_NEW_PRIM = 4, + TSE_ZERO_AREA_PRIM = 5, + TSE_FACENESS_CULLED_PRIM = 6, + TSE_ZERO_PIXEL_PRIM = 7, + TSE_OUTPUT_NULL_PRIM = 8, + TSE_OUTPUT_VISIBLE_PRIM = 9, + TSE_PRE_CLIP_PRIM = 10, + TSE_POST_CLIP_PRIM = 11, + TSE_BUSY_CYCLES = 12, + TSE_PC_STARVE = 13, + TSE_RAS_STALL = 14, + TSE_STALL_BARYPLANE_FIFO_FULL = 15, + TSE_STALL_ZPLANE_FIFO_FULL = 16, +}; + +enum a4xx_hlsq_perfcounter_select { + HLSQ_SP_VS_STAGE_CONSTANT = 0, + HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1, + HLSQ_SP_FS_STAGE_CONSTANT = 2, + HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3, + HLSQ_TP_STATE = 4, + HLSQ_QUADS = 5, + HLSQ_PIXELS = 6, + HLSQ_VERTICES = 7, + HLSQ_SP_VS_STAGE_DATA_BYTES = 13, + HLSQ_SP_FS_STAGE_DATA_BYTES = 14, + HLSQ_BUSY_CYCLES = 15, + HLSQ_STALL_CYCLES_SP_STATE = 16, + HLSQ_STALL_CYCLES_SP_VS_STAGE = 17, + HLSQ_STALL_CYCLES_SP_FS_STAGE = 18, + HLSQ_STALL_CYCLES_UCHE = 19, + HLSQ_RBBM_LOAD_CYCLES = 20, + HLSQ_DI_TO_VS_START_SP = 21, + HLSQ_DI_TO_FS_START_SP = 22, + HLSQ_VS_STAGE_START_TO_DONE_SP = 23, + HLSQ_FS_STAGE_START_TO_DONE_SP = 24, + HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25, + HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26, + HLSQ_UCHE_LATENCY_CYCLES = 27, + HLSQ_UCHE_LATENCY_COUNT = 28, + HLSQ_STARVE_CYCLES_VFD = 29, +}; + +enum a4xx_pc_perfcounter_select { + PC_VIS_STREAMS_LOADED = 0, + PC_VPC_PRIMITIVES = 2, + PC_DEAD_PRIM = 3, + PC_LIVE_PRIM = 4, + PC_DEAD_DRAWCALLS = 5, + PC_LIVE_DRAWCALLS = 6, + PC_VERTEX_MISSES = 7, + PC_STALL_CYCLES_VFD = 9, + PC_STALL_CYCLES_TSE = 10, + PC_STALL_CYCLES_UCHE = 11, + PC_WORKING_CYCLES = 12, + PC_IA_VERTICES = 13, + PC_GS_PRIMITIVES = 14, + PC_HS_INVOCATIONS = 15, + PC_DS_INVOCATIONS = 16, + PC_DS_PRIMITIVES = 17, + PC_STARVE_CYCLES_FOR_INDEX = 20, + PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21, + PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22, + PC_STALL_CYCLES_TESS = 23, + PC_STARVE_CYCLES_FOR_POSITION = 24, + PC_MODE0_DRAWCALL = 25, + PC_MODE1_DRAWCALL = 26, + PC_MODE2_DRAWCALL = 27, + PC_MODE3_DRAWCALL = 28, + PC_MODE4_DRAWCALL = 29, + PC_PREDICATED_DEAD_DRAWCALL = 30, + PC_STALL_CYCLES_BY_TSE_ONLY = 31, + PC_STALL_CYCLES_BY_VPC_ONLY = 32, + PC_VPC_POS_DATA_TRANSACTION = 33, + PC_BUSY_CYCLES = 34, + PC_STARVE_CYCLES_DI = 35, + PC_STALL_CYCLES_VPC = 36, + TESS_WORKING_CYCLES = 37, + TESS_NUM_CYCLES_SETUP_WORKING = 38, + TESS_NUM_CYCLES_PTGEN_WORKING = 39, + TESS_NUM_CYCLES_CONNGEN_WORKING = 40, + TESS_BUSY_CYCLES = 41, + TESS_STARVE_CYCLES_PC = 42, + TESS_STALL_CYCLES_PC = 43, +}; + +enum a4xx_pwr_perfcounter_select { + PWR_CORE_CLOCK_CYCLES = 0, + PWR_BUSY_CLOCK_CYCLES = 1, +}; + +enum a4xx_rb_perfcounter_select { + RB_BUSY_CYCLES = 0, + RB_BUSY_CYCLES_BINNING = 1, + RB_BUSY_CYCLES_RENDERING = 2, + RB_BUSY_CYCLES_RESOLVE = 3, + RB_STARVE_CYCLES_BY_SP = 4, + RB_STARVE_CYCLES_BY_RAS = 5, + RB_STARVE_CYCLES_BY_MARB = 6, + RB_STALL_CYCLES_BY_MARB = 7, + RB_STALL_CYCLES_BY_HLSQ = 8, + RB_RB_RB_MARB_DATA = 9, + RB_SP_RB_QUAD = 10, + RB_RAS_RB_Z_QUADS = 11, + RB_GMEM_CH0_READ = 12, + RB_GMEM_CH1_READ = 13, + RB_GMEM_CH0_WRITE = 14, + RB_GMEM_CH1_WRITE = 15, + RB_CP_CONTEXT_DONE = 16, + RB_CP_CACHE_FLUSH = 17, + RB_CP_ZPASS_DONE = 18, + RB_STALL_FIFO0_FULL = 19, + RB_STALL_FIFO1_FULL = 20, + RB_STALL_FIFO2_FULL = 21, + RB_STALL_FIFO3_FULL = 22, + RB_RB_HLSQ_TRANSACTIONS = 23, + RB_Z_READ = 24, + RB_Z_WRITE = 25, + RB_C_READ = 26, + RB_C_WRITE = 27, + RB_C_READ_LATENCY = 28, + RB_Z_READ_LATENCY = 29, + RB_STALL_BY_UCHE = 30, + RB_MARB_UCHE_TRANSACTIONS = 31, + RB_CACHE_STALL_MISS = 32, + RB_CACHE_STALL_FIFO_FULL = 33, + RB_8BIT_BLENDER_UNITS_ACTIVE = 34, + RB_16BIT_BLENDER_UNITS_ACTIVE = 35, + RB_SAMPLER_UNITS_ACTIVE = 36, + RB_TOTAL_PASS = 38, + RB_Z_PASS = 39, + RB_Z_FAIL = 40, + RB_S_FAIL = 41, + RB_POWER0 = 42, + RB_POWER1 = 43, + RB_POWER2 = 44, + RB_POWER3 = 45, + RB_POWER4 = 46, + RB_POWER5 = 47, + RB_POWER6 = 48, + RB_POWER7 = 49, +}; + +enum a4xx_rbbm_perfcounter_select { + RBBM_ALWAYS_ON = 0, + RBBM_VBIF_BUSY = 1, + RBBM_TSE_BUSY = 2, + RBBM_RAS_BUSY = 3, + RBBM_PC_DCALL_BUSY = 4, + RBBM_PC_VSD_BUSY = 5, + RBBM_VFD_BUSY = 6, + RBBM_VPC_BUSY = 7, + RBBM_UCHE_BUSY = 8, + RBBM_VSC_BUSY = 9, + RBBM_HLSQ_BUSY = 10, + RBBM_ANY_RB_BUSY = 11, + RBBM_ANY_TPL1_BUSY = 12, + RBBM_ANY_SP_BUSY = 13, + RBBM_ANY_MARB_BUSY = 14, + RBBM_ANY_ARB_BUSY = 15, + RBBM_AHB_STATUS_BUSY = 16, + RBBM_AHB_STATUS_STALLED = 17, + RBBM_AHB_STATUS_TXFR = 18, + RBBM_AHB_STATUS_TXFR_SPLIT = 19, + RBBM_AHB_STATUS_TXFR_ERROR = 20, + RBBM_AHB_STATUS_LONG_STALL = 21, + RBBM_STATUS_MASKED = 22, + RBBM_CP_BUSY_GFX_CORE_IDLE = 23, + RBBM_TESS_BUSY = 24, + RBBM_COM_BUSY = 25, + RBBM_DCOM_BUSY = 32, + RBBM_ANY_CCU_BUSY = 33, + RBBM_DPM_BUSY = 34, +}; + +enum a4xx_sp_perfcounter_select { + SP_LM_LOAD_INSTRUCTIONS = 0, + SP_LM_STORE_INSTRUCTIONS = 1, + SP_LM_ATOMICS = 2, + SP_GM_LOAD_INSTRUCTIONS = 3, + SP_GM_STORE_INSTRUCTIONS = 4, + SP_GM_ATOMICS = 5, + SP_VS_STAGE_TEX_INSTRUCTIONS = 6, + SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7, + SP_VS_STAGE_EFU_INSTRUCTIONS = 8, + SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9, + SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10, + SP_FS_STAGE_TEX_INSTRUCTIONS = 11, + SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12, + SP_FS_STAGE_EFU_INSTRUCTIONS = 13, + SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14, + SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15, + SP_VS_INSTRUCTIONS = 17, + SP_FS_INSTRUCTIONS = 18, + SP_ADDR_LOCK_COUNT = 19, + SP_UCHE_READ_TRANS = 20, + SP_UCHE_WRITE_TRANS = 21, + SP_EXPORT_VPC_TRANS = 22, + SP_EXPORT_RB_TRANS = 23, + SP_PIXELS_KILLED = 24, + SP_ICL1_REQUESTS = 25, + SP_ICL1_MISSES = 26, + SP_ICL0_REQUESTS = 27, + SP_ICL0_MISSES = 28, + SP_ALU_WORKING_CYCLES = 29, + SP_EFU_WORKING_CYCLES = 30, + SP_STALL_CYCLES_BY_VPC = 31, + SP_STALL_CYCLES_BY_TP = 32, + SP_STALL_CYCLES_BY_UCHE = 33, + SP_STALL_CYCLES_BY_RB = 34, + SP_BUSY_CYCLES = 35, + SP_HS_INSTRUCTIONS = 36, + SP_DS_INSTRUCTIONS = 37, + SP_GS_INSTRUCTIONS = 38, + SP_CS_INSTRUCTIONS = 39, + SP_SCHEDULER_NON_WORKING = 40, + SP_WAVE_CONTEXTS = 41, + SP_WAVE_CONTEXT_CYCLES = 42, + SP_POWER0 = 43, + SP_POWER1 = 44, + SP_POWER2 = 45, + SP_POWER3 = 46, + SP_POWER4 = 47, + SP_POWER5 = 48, + SP_POWER6 = 49, + SP_POWER7 = 50, + SP_POWER8 = 51, + SP_POWER9 = 52, + SP_POWER10 = 53, + SP_POWER11 = 54, + SP_POWER12 = 55, + SP_POWER13 = 56, + SP_POWER14 = 57, + SP_POWER15 = 58, +}; + +enum a4xx_tp_perfcounter_select { + TP_L1_REQUESTS = 0, + TP_L1_MISSES = 1, + TP_QUADS_OFFSET = 8, + TP_QUAD_SHADOW = 9, + TP_QUADS_ARRAY = 10, + TP_QUADS_GRADIENT = 11, + TP_QUADS_1D2D = 12, + TP_QUADS_3DCUBE = 13, + TP_BUSY_CYCLES = 16, + TP_STALL_CYCLES_BY_ARB = 17, + TP_STATE_CACHE_REQUESTS = 20, + TP_STATE_CACHE_MISSES = 21, + TP_POWER0 = 22, + TP_POWER1 = 23, + TP_POWER2 = 24, + TP_POWER3 = 25, + TP_POWER4 = 26, + TP_POWER5 = 27, + TP_POWER6 = 28, + TP_POWER7 = 29, +}; + +enum a4xx_uche_perfcounter_select { + UCHE_VBIF_READ_BEATS_TP = 0, + UCHE_VBIF_READ_BEATS_VFD = 1, + UCHE_VBIF_READ_BEATS_HLSQ = 2, + UCHE_VBIF_READ_BEATS_MARB = 3, + UCHE_VBIF_READ_BEATS_SP = 4, + UCHE_READ_REQUESTS_TP = 5, + UCHE_READ_REQUESTS_VFD = 6, + UCHE_READ_REQUESTS_HLSQ = 7, + UCHE_READ_REQUESTS_MARB = 8, + UCHE_READ_REQUESTS_SP = 9, + UCHE_WRITE_REQUESTS_MARB = 10, + UCHE_WRITE_REQUESTS_SP = 11, + UCHE_TAG_CHECK_FAILS = 12, + UCHE_EVICTS = 13, + UCHE_FLUSHES = 14, + UCHE_VBIF_LATENCY_CYCLES = 15, + UCHE_VBIF_LATENCY_SAMPLES = 16, + UCHE_BUSY_CYCLES = 17, + UCHE_VBIF_READ_BEATS_PC = 18, + UCHE_READ_REQUESTS_PC = 19, + UCHE_WRITE_REQUESTS_VPC = 20, + UCHE_STALL_BY_VBIF = 21, + UCHE_WRITE_REQUESTS_VSC = 22, + UCHE_POWER0 = 23, + UCHE_POWER1 = 24, + UCHE_POWER2 = 25, + UCHE_POWER3 = 26, + UCHE_POWER4 = 27, + UCHE_POWER5 = 28, + UCHE_POWER6 = 29, + UCHE_POWER7 = 30, +}; + +enum a4xx_vbif_perfcounter_select { + AXI_READ_REQUESTS_ID_0 = 0, + AXI_READ_REQUESTS_ID_1 = 1, + AXI_READ_REQUESTS_ID_2 = 2, + AXI_READ_REQUESTS_ID_3 = 3, + AXI_READ_REQUESTS_ID_4 = 4, + AXI_READ_REQUESTS_ID_5 = 5, + AXI_READ_REQUESTS_ID_6 = 6, + AXI_READ_REQUESTS_ID_7 = 7, + AXI_READ_REQUESTS_ID_8 = 8, + AXI_READ_REQUESTS_ID_9 = 9, + AXI_READ_REQUESTS_ID_10 = 10, + AXI_READ_REQUESTS_ID_11 = 11, + AXI_READ_REQUESTS_ID_12 = 12, + AXI_READ_REQUESTS_ID_13 = 13, + AXI_READ_REQUESTS_ID_14 = 14, + AXI_READ_REQUESTS_ID_15 = 15, + AXI0_READ_REQUESTS_TOTAL = 16, + AXI1_READ_REQUESTS_TOTAL = 17, + AXI2_READ_REQUESTS_TOTAL = 18, + AXI3_READ_REQUESTS_TOTAL = 19, + AXI_READ_REQUESTS_TOTAL = 20, + AXI_WRITE_REQUESTS_ID_0 = 21, + AXI_WRITE_REQUESTS_ID_1 = 22, + AXI_WRITE_REQUESTS_ID_2 = 23, + AXI_WRITE_REQUESTS_ID_3 = 24, + AXI_WRITE_REQUESTS_ID_4 = 25, + AXI_WRITE_REQUESTS_ID_5 = 26, + AXI_WRITE_REQUESTS_ID_6 = 27, + AXI_WRITE_REQUESTS_ID_7 = 28, + AXI_WRITE_REQUESTS_ID_8 = 29, + AXI_WRITE_REQUESTS_ID_9 = 30, + AXI_WRITE_REQUESTS_ID_10 = 31, + AXI_WRITE_REQUESTS_ID_11 = 32, + AXI_WRITE_REQUESTS_ID_12 = 33, + AXI_WRITE_REQUESTS_ID_13 = 34, + AXI_WRITE_REQUESTS_ID_14 = 35, + AXI_WRITE_REQUESTS_ID_15 = 36, + AXI0_WRITE_REQUESTS_TOTAL = 37, + AXI1_WRITE_REQUESTS_TOTAL = 38, + AXI2_WRITE_REQUESTS_TOTAL = 39, + AXI3_WRITE_REQUESTS_TOTAL = 40, + AXI_WRITE_REQUESTS_TOTAL = 41, + AXI_TOTAL_REQUESTS = 42, + AXI_READ_DATA_BEATS_ID_0 = 43, + AXI_READ_DATA_BEATS_ID_1 = 44, + AXI_READ_DATA_BEATS_ID_2 = 45, + AXI_READ_DATA_BEATS_ID_3 = 46, + AXI_READ_DATA_BEATS_ID_4 = 47, + AXI_READ_DATA_BEATS_ID_5 = 48, + AXI_READ_DATA_BEATS_ID_6 = 49, + AXI_READ_DATA_BEATS_ID_7 = 50, + AXI_READ_DATA_BEATS_ID_8 = 51, + AXI_READ_DATA_BEATS_ID_9 = 52, + AXI_READ_DATA_BEATS_ID_10 = 53, + AXI_READ_DATA_BEATS_ID_11 = 54, + AXI_READ_DATA_BEATS_ID_12 = 55, + AXI_READ_DATA_BEATS_ID_13 = 56, + AXI_READ_DATA_BEATS_ID_14 = 57, + AXI_READ_DATA_BEATS_ID_15 = 58, + AXI0_READ_DATA_BEATS_TOTAL = 59, + AXI1_READ_DATA_BEATS_TOTAL = 60, + AXI2_READ_DATA_BEATS_TOTAL = 61, + AXI3_READ_DATA_BEATS_TOTAL = 62, + AXI_READ_DATA_BEATS_TOTAL = 63, + AXI_WRITE_DATA_BEATS_ID_0 = 64, + AXI_WRITE_DATA_BEATS_ID_1 = 65, + AXI_WRITE_DATA_BEATS_ID_2 = 66, + AXI_WRITE_DATA_BEATS_ID_3 = 67, + AXI_WRITE_DATA_BEATS_ID_4 = 68, + AXI_WRITE_DATA_BEATS_ID_5 = 69, + AXI_WRITE_DATA_BEATS_ID_6 = 70, + AXI_WRITE_DATA_BEATS_ID_7 = 71, + AXI_WRITE_DATA_BEATS_ID_8 = 72, + AXI_WRITE_DATA_BEATS_ID_9 = 73, + AXI_WRITE_DATA_BEATS_ID_10 = 74, + AXI_WRITE_DATA_BEATS_ID_11 = 75, + AXI_WRITE_DATA_BEATS_ID_12 = 76, + AXI_WRITE_DATA_BEATS_ID_13 = 77, + AXI_WRITE_DATA_BEATS_ID_14 = 78, + AXI_WRITE_DATA_BEATS_ID_15 = 79, + AXI0_WRITE_DATA_BEATS_TOTAL = 80, + AXI1_WRITE_DATA_BEATS_TOTAL = 81, + AXI2_WRITE_DATA_BEATS_TOTAL = 82, + AXI3_WRITE_DATA_BEATS_TOTAL = 83, + AXI_WRITE_DATA_BEATS_TOTAL = 84, + AXI_DATA_BEATS_TOTAL = 85, + CYCLES_HELD_OFF_ID_0 = 86, + CYCLES_HELD_OFF_ID_1 = 87, + CYCLES_HELD_OFF_ID_2 = 88, + CYCLES_HELD_OFF_ID_3 = 89, + CYCLES_HELD_OFF_ID_4 = 90, + CYCLES_HELD_OFF_ID_5 = 91, + CYCLES_HELD_OFF_ID_6 = 92, + CYCLES_HELD_OFF_ID_7 = 93, + CYCLES_HELD_OFF_ID_8 = 94, + CYCLES_HELD_OFF_ID_9 = 95, + CYCLES_HELD_OFF_ID_10 = 96, + CYCLES_HELD_OFF_ID_11 = 97, + CYCLES_HELD_OFF_ID_12 = 98, + CYCLES_HELD_OFF_ID_13 = 99, + CYCLES_HELD_OFF_ID_14 = 100, + CYCLES_HELD_OFF_ID_15 = 101, + AXI_READ_REQUEST_HELD_OFF = 102, + AXI_WRITE_REQUEST_HELD_OFF = 103, + AXI_REQUEST_HELD_OFF = 104, + AXI_WRITE_DATA_HELD_OFF = 105, + OCMEM_AXI_READ_REQUEST_HELD_OFF = 106, + OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107, + OCMEM_AXI_REQUEST_HELD_OFF = 108, + OCMEM_AXI_WRITE_DATA_HELD_OFF = 109, + ELAPSED_CYCLES_DDR = 110, + ELAPSED_CYCLES_OCMEM = 111, +}; + +enum a4xx_vfd_perfcounter_select { + VFD_UCHE_BYTE_FETCHED = 0, + VFD_UCHE_TRANS = 1, + VFD_FETCH_INSTRUCTIONS = 3, + VFD_BUSY_CYCLES = 5, + VFD_STALL_CYCLES_UCHE = 6, + VFD_STALL_CYCLES_HLSQ = 7, + VFD_STALL_CYCLES_VPC_BYPASS = 8, + VFD_STALL_CYCLES_VPC_ALLOC = 9, + VFD_MODE_0_FIBERS = 13, + VFD_MODE_1_FIBERS = 14, + VFD_MODE_2_FIBERS = 15, + VFD_MODE_3_FIBERS = 16, + VFD_MODE_4_FIBERS = 17, + VFD_BFIFO_STALL = 18, + VFD_NUM_VERTICES_TOTAL = 19, + VFD_PACKER_FULL = 20, + VFD_UCHE_REQUEST_FIFO_FULL = 21, + VFD_STARVE_CYCLES_PC = 22, + VFD_STARVE_CYCLES_UCHE = 23, +}; + +enum a4xx_vpc_perfcounter_select { + VPC_SP_LM_COMPONENTS = 2, + VPC_SP0_LM_BYTES = 3, + VPC_SP1_LM_BYTES = 4, + VPC_SP2_LM_BYTES = 5, + VPC_SP3_LM_BYTES = 6, + VPC_WORKING_CYCLES = 7, + VPC_STALL_CYCLES_LM = 8, + VPC_STARVE_CYCLES_RAS = 9, + VPC_STREAMOUT_CYCLES = 10, + VPC_UCHE_TRANSACTIONS = 12, + VPC_STALL_CYCLES_UCHE = 13, + VPC_BUSY_CYCLES = 14, + VPC_STARVE_CYCLES_SP = 15, +}; + +enum a4xx_vsc_perfcounter_select { + VSC_BUSY_CYCLES = 0, + VSC_WORKING_CYCLES = 1, + VSC_STALL_CYCLES_UCHE = 2, + VSC_STARVE_CYCLES_RAS = 3, + VSC_EOT_NUM = 4, +}; + enum a4xx_tex_filter { A4XX_TEX_NEAREST = 0, A4XX_TEX_LINEAR = 1, @@ -326,6 +897,12 @@ static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val) #define REG_A4XX_RB_PERFCTR_RB_SEL_7 0x00000cce +#define REG_A4XX_RB_PERFCTR_CCU_SEL_0 0x00000ccf + +#define REG_A4XX_RB_PERFCTR_CCU_SEL_1 0x00000cd0 + +#define REG_A4XX_RB_PERFCTR_CCU_SEL_2 0x00000cd1 + #define REG_A4XX_RB_PERFCTR_CCU_SEL_3 0x00000cd2 #define REG_A4XX_RB_FRAME_BUFFER_DIMENSION 0x00000ce0 @@ -400,8 +977,13 @@ static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4 #define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 #define A4XX_RB_MRT_CONTROL_BLEND 0x00000010 #define A4XX_RB_MRT_CONTROL_BLEND2 0x00000020 -#define A4XX_RB_MRT_CONTROL_FASTCLEAR 0x00000400 -#define A4XX_RB_MRT_CONTROL_B11 0x00000800 +#define A4XX_RB_MRT_CONTROL_ROP_ENABLE 0x00000040 +#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00 +#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8 +static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val) +{ + return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000 #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24 static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) @@ -490,8 +1072,8 @@ static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_r return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; } -#define REG_A4XX_RB_BLEND_RED 0x000020f3 -#define A4XX_RB_BLEND_RED_UINT__MASK 0x00007fff +#define REG_A4XX_RB_BLEND_RED 0x000020f0 +#define A4XX_RB_BLEND_RED_UINT__MASK 0x0000ffff #define A4XX_RB_BLEND_RED_UINT__SHIFT 0 static inline uint32_t A4XX_RB_BLEND_RED_UINT(uint32_t val) { @@ -504,8 +1086,16 @@ static inline uint32_t A4XX_RB_BLEND_RED_FLOAT(float val) return ((util_float_to_half(val)) << A4XX_RB_BLEND_RED_FLOAT__SHIFT) & A4XX_RB_BLEND_RED_FLOAT__MASK; } -#define REG_A4XX_RB_BLEND_GREEN 0x000020f4 -#define A4XX_RB_BLEND_GREEN_UINT__MASK 0x00007fff +#define REG_A4XX_RB_BLEND_RED_F32 0x000020f1 +#define A4XX_RB_BLEND_RED_F32__MASK 0xffffffff +#define A4XX_RB_BLEND_RED_F32__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_RED_F32(float val) +{ + return ((fui(val)) << A4XX_RB_BLEND_RED_F32__SHIFT) & A4XX_RB_BLEND_RED_F32__MASK; +} + +#define REG_A4XX_RB_BLEND_GREEN 0x000020f2 +#define A4XX_RB_BLEND_GREEN_UINT__MASK 0x0000ffff #define A4XX_RB_BLEND_GREEN_UINT__SHIFT 0 static inline uint32_t A4XX_RB_BLEND_GREEN_UINT(uint32_t val) { @@ -518,8 +1108,16 @@ static inline uint32_t A4XX_RB_BLEND_GREEN_FLOAT(float val) return ((util_float_to_half(val)) << A4XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A4XX_RB_BLEND_GREEN_FLOAT__MASK; } -#define REG_A4XX_RB_BLEND_BLUE 0x000020f5 -#define A4XX_RB_BLEND_BLUE_UINT__MASK 0x00007fff +#define REG_A4XX_RB_BLEND_GREEN_F32 0x000020f3 +#define A4XX_RB_BLEND_GREEN_F32__MASK 0xffffffff +#define A4XX_RB_BLEND_GREEN_F32__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_GREEN_F32(float val) +{ + return ((fui(val)) << A4XX_RB_BLEND_GREEN_F32__SHIFT) & A4XX_RB_BLEND_GREEN_F32__MASK; +} + +#define REG_A4XX_RB_BLEND_BLUE 0x000020f4 +#define A4XX_RB_BLEND_BLUE_UINT__MASK 0x0000ffff #define A4XX_RB_BLEND_BLUE_UINT__SHIFT 0 static inline uint32_t A4XX_RB_BLEND_BLUE_UINT(uint32_t val) { @@ -532,8 +1130,16 @@ static inline uint32_t A4XX_RB_BLEND_BLUE_FLOAT(float val) return ((util_float_to_half(val)) << A4XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A4XX_RB_BLEND_BLUE_FLOAT__MASK; } +#define REG_A4XX_RB_BLEND_BLUE_F32 0x000020f5 +#define A4XX_RB_BLEND_BLUE_F32__MASK 0xffffffff +#define A4XX_RB_BLEND_BLUE_F32__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_BLUE_F32(float val) +{ + return ((fui(val)) << A4XX_RB_BLEND_BLUE_F32__SHIFT) & A4XX_RB_BLEND_BLUE_F32__MASK; +} + #define REG_A4XX_RB_BLEND_ALPHA 0x000020f6 -#define A4XX_RB_BLEND_ALPHA_UINT__MASK 0x00007fff +#define A4XX_RB_BLEND_ALPHA_UINT__MASK 0x0000ffff #define A4XX_RB_BLEND_ALPHA_UINT__SHIFT 0 static inline uint32_t A4XX_RB_BLEND_ALPHA_UINT(uint32_t val) { @@ -546,6 +1152,14 @@ static inline uint32_t A4XX_RB_BLEND_ALPHA_FLOAT(float val) return ((util_float_to_half(val)) << A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A4XX_RB_BLEND_ALPHA_FLOAT__MASK; } +#define REG_A4XX_RB_BLEND_ALPHA_F32 0x000020f7 +#define A4XX_RB_BLEND_ALPHA_F32__MASK 0xffffffff +#define A4XX_RB_BLEND_ALPHA_F32__SHIFT 0 +static inline uint32_t A4XX_RB_BLEND_ALPHA_F32(float val) +{ + return ((fui(val)) << A4XX_RB_BLEND_ALPHA_F32__SHIFT) & A4XX_RB_BLEND_ALPHA_F32__MASK; +} + #define REG_A4XX_RB_ALPHA_CONTROL 0x000020f8 #define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK 0x000000ff #define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT 0 @@ -568,7 +1182,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val) { return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK; } -#define A4XX_RB_FS_OUTPUT_FAST_CLEAR 0x00000100 +#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND 0x00000100 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16 static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val) @@ -736,6 +1350,7 @@ static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val) } #define A4XX_RB_DEPTH_CONTROL_BF_ENABLE 0x00000080 #define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE 0x00010000 +#define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS 0x00020000 #define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000 #define REG_A4XX_RB_DEPTH_CLEAR 0x00002102 @@ -996,8 +1611,386 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x #define REG_A4XX_RBBM_CFG_DEBBUS_SEL_D 0x0000004d +#define REG_A4XX_RBBM_POWER_CNTL_IP 0x00000098 +#define A4XX_RBBM_POWER_CNTL_IP_SW_COLLAPSE 0x00000001 +#define A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON 0x00100000 + #define REG_A4XX_RBBM_PERFCTR_CP_0_LO 0x0000009c +#define REG_A4XX_RBBM_PERFCTR_CP_0_HI 0x0000009d + +#define REG_A4XX_RBBM_PERFCTR_CP_1_LO 0x0000009e + +#define REG_A4XX_RBBM_PERFCTR_CP_1_HI 0x0000009f + +#define REG_A4XX_RBBM_PERFCTR_CP_2_LO 0x000000a0 + +#define REG_A4XX_RBBM_PERFCTR_CP_2_HI 0x000000a1 + +#define REG_A4XX_RBBM_PERFCTR_CP_3_LO 0x000000a2 + +#define REG_A4XX_RBBM_PERFCTR_CP_3_HI 0x000000a3 + +#define REG_A4XX_RBBM_PERFCTR_CP_4_LO 0x000000a4 + +#define REG_A4XX_RBBM_PERFCTR_CP_4_HI 0x000000a5 + +#define REG_A4XX_RBBM_PERFCTR_CP_5_LO 0x000000a6 + +#define REG_A4XX_RBBM_PERFCTR_CP_5_HI 0x000000a7 + +#define REG_A4XX_RBBM_PERFCTR_CP_6_LO 0x000000a8 + +#define REG_A4XX_RBBM_PERFCTR_CP_6_HI 0x000000a9 + +#define REG_A4XX_RBBM_PERFCTR_CP_7_LO 0x000000aa + +#define REG_A4XX_RBBM_PERFCTR_CP_7_HI 0x000000ab + +#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO 0x000000ac + +#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI 0x000000ad + +#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO 0x000000ae + +#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI 0x000000af + +#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO 0x000000b0 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI 0x000000b1 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO 0x000000b2 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI 0x000000b3 + +#define REG_A4XX_RBBM_PERFCTR_PC_0_LO 0x000000b4 + +#define REG_A4XX_RBBM_PERFCTR_PC_0_HI 0x000000b5 + +#define REG_A4XX_RBBM_PERFCTR_PC_1_LO 0x000000b6 + +#define REG_A4XX_RBBM_PERFCTR_PC_1_HI 0x000000b7 + +#define REG_A4XX_RBBM_PERFCTR_PC_2_LO 0x000000b8 + +#define REG_A4XX_RBBM_PERFCTR_PC_2_HI 0x000000b9 + +#define REG_A4XX_RBBM_PERFCTR_PC_3_LO 0x000000ba + +#define REG_A4XX_RBBM_PERFCTR_PC_3_HI 0x000000bb + +#define REG_A4XX_RBBM_PERFCTR_PC_4_LO 0x000000bc + +#define REG_A4XX_RBBM_PERFCTR_PC_4_HI 0x000000bd + +#define REG_A4XX_RBBM_PERFCTR_PC_5_LO 0x000000be + +#define REG_A4XX_RBBM_PERFCTR_PC_5_HI 0x000000bf + +#define REG_A4XX_RBBM_PERFCTR_PC_6_LO 0x000000c0 + +#define REG_A4XX_RBBM_PERFCTR_PC_6_HI 0x000000c1 + +#define REG_A4XX_RBBM_PERFCTR_PC_7_LO 0x000000c2 + +#define REG_A4XX_RBBM_PERFCTR_PC_7_HI 0x000000c3 + +#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO 0x000000c4 + +#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI 0x000000c5 + +#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO 0x000000c6 + +#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI 0x000000c7 + +#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO 0x000000c8 + +#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI 0x000000c9 + +#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO 0x000000ca + +#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI 0x000000cb + +#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO 0x000000cc + +#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI 0x000000cd + +#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO 0x000000ce + +#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI 0x000000cf + +#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO 0x000000d0 + +#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI 0x000000d1 + +#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO 0x000000d2 + +#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI 0x000000d3 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000d4 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000d5 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000d6 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000d7 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000d8 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000d9 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000da + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000db + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000dc + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000dd + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000de + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000df + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO 0x000000e0 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI 0x000000e1 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO 0x000000e2 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI 0x000000e3 + +#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO 0x000000e4 + +#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI 0x000000e5 + +#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO 0x000000e6 + +#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI 0x000000e7 + +#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO 0x000000e8 + +#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI 0x000000e9 + +#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO 0x000000ea + +#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI 0x000000eb + +#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO 0x000000ec + +#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI 0x000000ed + +#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO 0x000000ee + +#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI 0x000000ef + +#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO 0x000000f0 + +#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI 0x000000f1 + +#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO 0x000000f2 + +#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI 0x000000f3 + +#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO 0x000000f4 + +#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI 0x000000f5 + +#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO 0x000000f6 + +#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI 0x000000f7 + +#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO 0x000000f8 + +#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI 0x000000f9 + +#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO 0x000000fa + +#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI 0x000000fb + +#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO 0x000000fc + +#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI 0x000000fd + +#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO 0x000000fe + +#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI 0x000000ff + +#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO 0x00000100 + +#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI 0x00000101 + +#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO 0x00000102 + +#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI 0x00000103 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO 0x00000104 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI 0x00000105 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO 0x00000106 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI 0x00000107 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO 0x00000108 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI 0x00000109 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO 0x0000010a + +#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI 0x0000010b + +#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO 0x0000010c + +#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI 0x0000010d + +#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO 0x0000010e + +#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI 0x0000010f + +#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO 0x00000110 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI 0x00000111 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO 0x00000112 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI 0x00000113 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115 + +#define REG_A4XX_RBBM_PERFCTR_TP_1_LO 0x00000116 + +#define REG_A4XX_RBBM_PERFCTR_TP_1_HI 0x00000117 + +#define REG_A4XX_RBBM_PERFCTR_TP_2_LO 0x00000118 + +#define REG_A4XX_RBBM_PERFCTR_TP_2_HI 0x00000119 + +#define REG_A4XX_RBBM_PERFCTR_TP_3_LO 0x0000011a + +#define REG_A4XX_RBBM_PERFCTR_TP_3_HI 0x0000011b + +#define REG_A4XX_RBBM_PERFCTR_TP_4_LO 0x0000011c + +#define REG_A4XX_RBBM_PERFCTR_TP_4_HI 0x0000011d + +#define REG_A4XX_RBBM_PERFCTR_TP_5_LO 0x0000011e + +#define REG_A4XX_RBBM_PERFCTR_TP_5_HI 0x0000011f + +#define REG_A4XX_RBBM_PERFCTR_TP_6_LO 0x00000120 + +#define REG_A4XX_RBBM_PERFCTR_TP_6_HI 0x00000121 + +#define REG_A4XX_RBBM_PERFCTR_TP_7_LO 0x00000122 + +#define REG_A4XX_RBBM_PERFCTR_TP_7_HI 0x00000123 + +#define REG_A4XX_RBBM_PERFCTR_SP_0_LO 0x00000124 + +#define REG_A4XX_RBBM_PERFCTR_SP_0_HI 0x00000125 + +#define REG_A4XX_RBBM_PERFCTR_SP_1_LO 0x00000126 + +#define REG_A4XX_RBBM_PERFCTR_SP_1_HI 0x00000127 + +#define REG_A4XX_RBBM_PERFCTR_SP_2_LO 0x00000128 + +#define REG_A4XX_RBBM_PERFCTR_SP_2_HI 0x00000129 + +#define REG_A4XX_RBBM_PERFCTR_SP_3_LO 0x0000012a + +#define REG_A4XX_RBBM_PERFCTR_SP_3_HI 0x0000012b + +#define REG_A4XX_RBBM_PERFCTR_SP_4_LO 0x0000012c + +#define REG_A4XX_RBBM_PERFCTR_SP_4_HI 0x0000012d + +#define REG_A4XX_RBBM_PERFCTR_SP_5_LO 0x0000012e + +#define REG_A4XX_RBBM_PERFCTR_SP_5_HI 0x0000012f + +#define REG_A4XX_RBBM_PERFCTR_SP_6_LO 0x00000130 + +#define REG_A4XX_RBBM_PERFCTR_SP_6_HI 0x00000131 + +#define REG_A4XX_RBBM_PERFCTR_SP_7_LO 0x00000132 + +#define REG_A4XX_RBBM_PERFCTR_SP_7_HI 0x00000133 + +#define REG_A4XX_RBBM_PERFCTR_SP_8_LO 0x00000134 + +#define REG_A4XX_RBBM_PERFCTR_SP_8_HI 0x00000135 + +#define REG_A4XX_RBBM_PERFCTR_SP_9_LO 0x00000136 + +#define REG_A4XX_RBBM_PERFCTR_SP_9_HI 0x00000137 + +#define REG_A4XX_RBBM_PERFCTR_SP_10_LO 0x00000138 + +#define REG_A4XX_RBBM_PERFCTR_SP_10_HI 0x00000139 + +#define REG_A4XX_RBBM_PERFCTR_SP_11_LO 0x0000013a + +#define REG_A4XX_RBBM_PERFCTR_SP_11_HI 0x0000013b + +#define REG_A4XX_RBBM_PERFCTR_RB_0_LO 0x0000013c + +#define REG_A4XX_RBBM_PERFCTR_RB_0_HI 0x0000013d + +#define REG_A4XX_RBBM_PERFCTR_RB_1_LO 0x0000013e + +#define REG_A4XX_RBBM_PERFCTR_RB_1_HI 0x0000013f + +#define REG_A4XX_RBBM_PERFCTR_RB_2_LO 0x00000140 + +#define REG_A4XX_RBBM_PERFCTR_RB_2_HI 0x00000141 + +#define REG_A4XX_RBBM_PERFCTR_RB_3_LO 0x00000142 + +#define REG_A4XX_RBBM_PERFCTR_RB_3_HI 0x00000143 + +#define REG_A4XX_RBBM_PERFCTR_RB_4_LO 0x00000144 + +#define REG_A4XX_RBBM_PERFCTR_RB_4_HI 0x00000145 + +#define REG_A4XX_RBBM_PERFCTR_RB_5_LO 0x00000146 + +#define REG_A4XX_RBBM_PERFCTR_RB_5_HI 0x00000147 + +#define REG_A4XX_RBBM_PERFCTR_RB_6_LO 0x00000148 + +#define REG_A4XX_RBBM_PERFCTR_RB_6_HI 0x00000149 + +#define REG_A4XX_RBBM_PERFCTR_RB_7_LO 0x0000014a + +#define REG_A4XX_RBBM_PERFCTR_RB_7_HI 0x0000014b + +#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO 0x0000014c + +#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI 0x0000014d + +#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO 0x0000014e + +#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI 0x0000014f + +#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO 0x00000166 + +#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI 0x00000167 + +#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168 + +#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI 0x00000169 + +#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO 0x0000016e + +#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI 0x0000016f + static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; } static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; } @@ -1046,6 +2039,10 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(uint32_t i0) { r static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) { return 0x0000008e + 0x1*i0; } +#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0 0x00000099 + +#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1 0x0000009a + #define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168 #define REG_A4XX_RBBM_PERFCTR_CTL 0x00000170 @@ -1060,6 +2057,14 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) #define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000175 +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000176 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000177 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000178 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3 0x00000179 + #define REG_A4XX_RBBM_GPU_BUSY_MASKED 0x0000017a #define REG_A4XX_RBBM_INT_0_STATUS 0x0000017d @@ -1099,6 +2104,11 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) #define REG_A4XX_RBBM_INTERFACE_RRDY_STATUS5 0x0000019f +#define REG_A4XX_RBBM_POWER_STATUS 0x000001b0 +#define A4XX_RBBM_POWER_STATUS_SP_TP_PWR_ON 0x00100000 + +#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2 0x000001b8 + #define REG_A4XX_CP_SCRATCH_UMASK 0x00000228 #define REG_A4XX_CP_SCRATCH_ADDR 0x00000229 @@ -1191,6 +2201,20 @@ static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240 #define REG_A4XX_CP_PERFCTR_CP_SEL_0 0x00000500 +#define REG_A4XX_CP_PERFCTR_CP_SEL_1 0x00000501 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_2 0x00000502 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_3 0x00000503 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_4 0x00000504 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_5 0x00000505 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_6 0x00000506 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_7 0x00000507 + #define REG_A4XX_CP_PERFCOMBINER_SELECT 0x0000050b static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; } @@ -1201,6 +2225,28 @@ static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578 #define REG_A4XX_SP_MODE_CONTROL 0x00000ec3 +#define REG_A4XX_SP_PERFCTR_SP_SEL_0 0x00000ec4 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_1 0x00000ec5 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_2 0x00000ec6 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_3 0x00000ec7 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_4 0x00000ec8 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_5 0x00000ec9 + +#define REG_A4XX_SP_PERFCTR_SP_SEL_6 0x00000eca + +#define REG_A4XX_SP_PERFCTR_SP_SEL_7 0x00000ecb + +#define REG_A4XX_SP_PERFCTR_SP_SEL_8 0x00000ecc + +#define REG_A4XX_SP_PERFCTR_SP_SEL_9 0x00000ecd + +#define REG_A4XX_SP_PERFCTR_SP_SEL_10 0x00000ece + #define REG_A4XX_SP_PERFCTR_SP_SEL_11 0x00000ecf #define REG_A4XX_SP_SP_CTRL_REG 0x000022c0 @@ -1699,6 +2745,12 @@ static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) #define REG_A4XX_VPC_DEBUG_ECO_CONTROL 0x00000e64 +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0 0x00000e65 + +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1 0x00000e66 + +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2 0x00000e67 + #define REG_A4XX_VPC_PERFCTR_VPC_SEL_3 0x00000e68 #define REG_A4XX_VPC_ATTR 0x00002140 @@ -1811,6 +2863,20 @@ static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0 #define REG_A4XX_VFD_DEBUG_CONTROL 0x00000e40 +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0 0x00000e43 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1 0x00000e44 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2 0x00000e45 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3 0x00000e46 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4 0x00000e47 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5 0x00000e48 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6 0x00000e49 + #define REG_A4XX_VFD_PERFCTR_VFD_SEL_7 0x00000e4a #define REG_A4XX_VGT_CL_INITIATOR 0x000021d0 @@ -1967,6 +3033,20 @@ static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val) #define REG_A4XX_TPL1_TP_MODE_CONTROL 0x00000f03 +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0 0x00000f04 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1 0x00000f05 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2 0x00000f06 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3 0x00000f07 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4 0x00000f08 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5 0x00000f09 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6 0x00000f0a + #define REG_A4XX_TPL1_PERFCTR_TP_SEL_7 0x00000f0b #define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380 @@ -2021,9 +3101,23 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val) #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c88 +#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c89 + +#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c8a + #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c8c + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c8d + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c8e + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c8f + #define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000 +#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000 +#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 #define REG_A4XX_GRAS_CLEAR_CNTL 0x00002003 #define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR 0x00000001 @@ -2114,6 +3208,7 @@ static inline uint32_t A4XX_GRAS_SU_POINT_SIZE(float val) #define REG_A4XX_GRAS_ALPHA_CONTROL 0x00002073 #define A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE 0x00000004 +#define A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS 0x00000008 #define REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE 0x00002074 #define A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff @@ -2285,6 +3380,20 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val) #define REG_A4XX_UCHE_CACHE_WAYS_VFD 0x00000e8c +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e8e + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e8f + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e90 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e91 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e92 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e93 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e94 + #define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e95 #define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD 0x00000e00 @@ -2295,6 +3404,22 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val) #define REG_A4XX_HLSQ_PERF_PIPE_MASK 0x00000e0e +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e06 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e07 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e08 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e09 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e0a + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e0b + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e0c + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e0d + #define REG_A4XX_HLSQ_CONTROL_0_REG 0x000023c0 #define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010 #define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4 @@ -2549,6 +3674,18 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val) #define REG_A4XX_PC_PERFCTR_PC_SEL_0 0x00000d10 +#define REG_A4XX_PC_PERFCTR_PC_SEL_1 0x00000d11 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_2 0x00000d12 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_3 0x00000d13 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_4 0x00000d14 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_5 0x00000d15 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_6 0x00000d16 + #define REG_A4XX_PC_PERFCTR_PC_SEL_7 0x00000d17 #define REG_A4XX_PC_BIN_BASE 0x000021c0 @@ -2564,7 +3701,20 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val) #define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 #define A4XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000 -#define REG_A4XX_UNKNOWN_21C5 0x000021c5 +#define REG_A4XX_PC_PRIM_VTX_CNTL2 0x000021c5 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK 0x00000007 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT 0 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK 0x00000038 +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT 3 +static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK; +} +#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE 0x00000040 #define REG_A4XX_PC_RESTART_INDEX 0x000021c6 @@ -2646,20 +3796,6 @@ static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val) #define REG_A4XX_UNKNOWN_20EF 0x000020ef -#define REG_A4XX_UNKNOWN_20F0 0x000020f0 - -#define REG_A4XX_UNKNOWN_20F1 0x000020f1 - -#define REG_A4XX_UNKNOWN_20F2 0x000020f2 - -#define REG_A4XX_UNKNOWN_20F7 0x000020f7 -#define A4XX_UNKNOWN_20F7__MASK 0xffffffff -#define A4XX_UNKNOWN_20F7__SHIFT 0 -static inline uint32_t A4XX_UNKNOWN_20F7(float val) -{ - return ((fui(val)) << A4XX_UNKNOWN_20F7__SHIFT) & A4XX_UNKNOWN_20F7__MASK; -} - #define REG_A4XX_UNKNOWN_2152 0x00002152 #define REG_A4XX_UNKNOWN_2153 0x00002153 @@ -2720,6 +3856,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val) { return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK; } +#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000 +#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19 +static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val) +{ + return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK; +} #define REG_A4XX_TEX_SAMP_1 0x00000001 #define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e @@ -2728,6 +3870,7 @@ static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val { return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK; } +#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF 0x00000010 #define A4XX_TEX_SAMP_1_UNNORM_COORDS 0x00000020 #define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR 0x00000040 #define A4XX_TEX_SAMP_1_MAX_LOD__MASK 0x000fff00 @@ -2796,7 +3939,7 @@ static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val) { return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK; } -#define A4XX_TEX_CONST_1_WIDTH__MASK 0x1fff8000 +#define A4XX_TEX_CONST_1_WIDTH__MASK 0x3fff8000 #define A4XX_TEX_CONST_1_WIDTH__SHIFT 15 static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val) {

index a53f1be05f75..d0d3c7baa8fe 100644

--- a/

+++ b/ diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.cindex a53f1be05f75..d0d3c7baa8fe 100644--- a/ drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/ drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -102,11 +102,17 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000); - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00020000); - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000); + /* Early A430's have a timing issue with SP/TP power collapse; + disabling HW clock gating prevents it. */ + if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2) + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0); + else + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0); } + static void a4xx_me_init(struct msm_gpu *gpu) { struct msm_ringbuffer *ring = gpu->rb; @@ -141,7 +147,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu) uint32_t *ptr, len; int i, ret; - if (adreno_is_a4xx(adreno_gpu)) { + if (adreno_is_a420(adreno_gpu)) { gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F); gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4); gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); @@ -150,6 +156,13 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); + } else if (adreno_is_a430(adreno_gpu)) { + gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); + gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018); + gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); + gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); } else { BUG(); } @@ -161,6 +174,10 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10); gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); + if (adreno_is_a430(adreno_gpu)) { + gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30); + } + /* Enable the RBBM error reporting bits */ gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001); @@ -183,6 +200,14 @@ static int a4xx_hw_init(struct msm_gpu *gpu) /* Turn on performance counters: */ gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01); + /* use the first CP counter for timestamp queries.. userspace may set + * this as well but it selects the same counter/countable: + */ + gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT); + + if (adreno_is_a430(adreno_gpu)) + gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07); + /* Disable L2 bypass to avoid UCHE out of bounds errors */ gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000); @@ -190,6 +215,15 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0)); + /* On A430 enable SP regfile sleep for power savings */ + /* TODO downstream does this for !420, so maybe applies for 405 too? */ + if (!adreno_is_a420(adreno_gpu)) { + gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0, + 0x00000441); + gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1, + 0x00000441); + } + a4xx_enable_hwcg(gpu); /* @@ -204,10 +238,6 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val); } - ret = adreno_hw_init(gpu); - if (ret) - return ret; - /* setup access protection: */ gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007); @@ -263,6 +293,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0); a4xx_me_init(gpu); + return 0; } @@ -317,6 +348,13 @@ static irqreturn_t a4xx_irq(struct msm_gpu *gpu) status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS); DBG("%s: Int status %08x", gpu->name, status); + if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) { + uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS); + printk("CP | Protected mode error| %s | addr=%x

", + reg & (1 << 24) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2); + } + gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status); msm_gpu_retire(gpu); @@ -512,12 +550,63 @@ static void a4xx_dump(struct msm_gpu *gpu) adreno_dump(gpu); } +static int a4xx_pm_resume(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int ret; + + ret = msm_gpu_pm_resume(gpu); + if (ret) + return ret; + + if (adreno_is_a430(adreno_gpu)) { + unsigned int reg; + /* Set the default register values; set SW_COLLAPSE to 0 */ + gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000); + do { + udelay(5); + reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS); + } while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON)); + } + return 0; +} + +static int a4xx_pm_suspend(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int ret; + + ret = msm_gpu_pm_suspend(gpu); + if (ret) + return ret; + + if (adreno_is_a430(adreno_gpu)) { + /* Set the default register values; set SW_COLLAPSE to 1 */ + gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001); + } + return 0; +} + +static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) +{ + uint32_t hi, lo, tmp; + + tmp = gpu_read(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_HI); + do { + hi = tmp; + lo = gpu_read(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO); + tmp = gpu_read(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_HI); + } while (tmp != hi); + + *value = (((uint64_t)hi) << 32) | lo; + + return 0; +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, .hw_init = a4xx_hw_init, - .pm_suspend = msm_gpu_pm_suspend, - .pm_resume = msm_gpu_pm_resume, + .pm_suspend = a4xx_pm_suspend, + .pm_resume = a4xx_pm_resume, .recover = a4xx_recover, .last_fence = adreno_last_fence, .submit = adreno_submit, @@ -529,6 +618,7 @@ static const struct adreno_gpu_funcs funcs = { .show = a4xx_show, #endif }, + .get_timestamp = a4xx_get_timestamp, }; struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)

index c304468cf2bd..e81481d1b7df 100644

--- a/

+++ b/ diff --git a/drivers/gpu/drm/msm/adreno/adreno_common.xml.h b/drivers/gpu/drm/msm/adreno/adreno_common.xml.hindex c304468cf2bd..e81481d1b7df 100644--- a/ drivers/gpu/drm/msm/adreno/adreno_common.xml.h +++ b/ drivers/gpu/drm/msm/adreno/adreno_common.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67771 bytes, from 2015-09-14 20:46:55) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63970 bytes, from 2015-09-14 20:50:12) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109916 bytes, from 2016-02-20 18:44:48) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) +- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -119,6 +120,23 @@ enum adreno_rb_copy_control_mode { RB_COPY_DEPTH_STENCIL = 5, }; +enum a3xx_rop_code { + ROP_CLEAR = 0, + ROP_NOR = 1, + ROP_AND_INVERTED = 2, + ROP_COPY_INVERTED = 3, + ROP_AND_REVERSE = 4, + ROP_INVERT = 5, + ROP_NAND = 7, + ROP_AND = 8, + ROP_EQUIV = 9, + ROP_NOOP = 10, + ROP_OR_INVERTED = 11, + ROP_OR_REVERSE = 13, + ROP_OR = 14, + ROP_SET = 15, +}; + enum a3xx_render_mode { RB_RENDERING_PASS = 0, RB_TILING_PASS = 1,

index 950d27d26b30..5127b75dbf40 100644

--- a/

+++ b/ diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.cindex 950d27d26b30..5127b75dbf40 100644--- a/ drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/ drivers/gpu/drm/msm/adreno/adreno_device.c @@ -69,6 +69,14 @@ static const struct adreno_info gpulist[] = { .pfpfw = "a420_pfp.fw", .gmem = (SZ_1M + SZ_512K), .init = a4xx_gpu_init, + }, { + .rev = ADRENO_REV(4, 3, 0, ANY_ID), + .revn = 430, + .name = "A430", + .pm4fw = "a420_pm4.fw", + .pfpfw = "a420_pfp.fw", + .gmem = (SZ_1M + SZ_512K), + .init = a4xx_gpu_init, }, };

index 0e1d0c57cd3d..4951172ede06 100644

--- a/

+++ b/ diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.cindex 0e1d0c57cd3d..4951172ede06 100644--- a/ drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/ drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -44,6 +44,10 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) case MSM_PARAM_MAX_FREQ: *value = adreno_gpu->base.fast_rate; return 0; + case MSM_PARAM_TIMESTAMP: + if (adreno_gpu->funcs->get_timestamp) + return adreno_gpu->funcs->get_timestamp(gpu, value); + return -EINVAL; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; @@ -71,18 +75,15 @@ int adreno_hw_init(struct msm_gpu *gpu) adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, /* size is log2(quad-words): */ AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8))); + AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | + (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); /* Setup ringbuffer address: */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_BASE, gpu->rb_iova); - adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, - rbmemptr(adreno_gpu, rptr)); - - /* Setup scratch/timestamp: */ - adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_ADDR, - rbmemptr(adreno_gpu, fence)); - adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_UMSK, 0x1); + if (!adreno_is_a430(adreno_gpu)) + adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, + rbmemptr(adreno_gpu, rptr)); return 0; } @@ -92,6 +93,16 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring) return ring->cur - ring->start; } +/* Use this helper to read rptr, since a430 doesn't update rptr in memory */ +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) +{ + if (adreno_is_a430(adreno_gpu)) + return adreno_gpu->memptrs->rptr = adreno_gpu_read( + adreno_gpu, REG_ADRENO_CP_RB_RPTR); + else + 