| <?xml version="1.0" encoding="UTF-8"?> |
| <database xmlns="http://nouveau.freedesktop.org/" |
| xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> |
| <import file="freedreno_copyright.xml"/> |
| <import file="adreno/adreno_common.xml"/> |
| |
| <enum name="vgt_event_type" varset="chip"> |
| <value name="VS_DEALLOC" value="0"/> |
| <value name="PS_DEALLOC" value="1" variants="A2XX-A6XX"/> |
| <value name="VS_DONE_TS" value="2"/> |
| <value name="PS_DONE_TS" value="3"/> |
| <doc> |
| Flushes dirty data from UCHE, and also writes a GPU timestamp to |
| the address if one is provided. |
| </doc> |
| <value name="CACHE_FLUSH_TS" value="4"/> |
| <value name="CONTEXT_DONE" value="5"/> |
| <value name="CACHE_FLUSH" value="6" variants="A2XX-A4XX"/> |
| <value name="VIZQUERY_START" value="7" variants="A2XX"/> |
| <value name="HLSQ_FLUSH" value="7" variants="A3XX-A4XX"/> |
| <value name="VIZQUERY_END" value="8" variants="A2XX"/> |
| <value name="SC_WAIT_WC" value="9" variants="A2XX"/> |
| <value name="WRITE_PRIMITIVE_COUNTS" value="9" variants="A6XX"/> |
| <value name="START_PRIMITIVE_CTRS" value="11" variants="A6XX"/> |
| <value name="STOP_PRIMITIVE_CTRS" value="12" variants="A6XX"/> |
| <!-- Not sure that these 4 events don't have the same meaning as on A5XX+ --> |
| <value name="RST_PIX_CNT" value="13" variants="A2XX-A4XX"/> |
| <value name="RST_VTX_CNT" value="14" variants="A2XX-A4XX"/> |
| <value name="TILE_FLUSH" value="15" variants="A2XX-A4XX"/> |
| <value name="STAT_EVENT" value="16" variants="A2XX-A4XX"/> |
| <value name="CACHE_FLUSH_AND_INV_TS_EVENT" value="20" variants="A2XX-A4XX"/> |
| <doc> |
| If A6XX_RB_SAMPLE_COUNT_CONTROL.copy is true, writes OQ Z passed |
| sample counts to RB_SAMPLE_COUNT_ADDR. This writes to main |
| memory, skipping UCHE. |
| </doc> |
| <value name="ZPASS_DONE" value="21"/> |
| <value name="CACHE_FLUSH_AND_INV_EVENT" value="22" variants="A2XX"/> |
| |
| <doc> |
| Writes the GPU timestamp to the address that follows, once RB |
| access and flushes are complete. |
| </doc> |
| <value name="RB_DONE_TS" value="22" variants="A3XX-"/> |
| |
| <value name="PERFCOUNTER_START" value="23" variants="A2XX-A4XX"/> |
| <value name="PERFCOUNTER_STOP" value="24" variants="A2XX-A4XX"/> |
| <value name="VS_FETCH_DONE" value="27"/> |
| <value name="FACENESS_FLUSH" value="28" variants="A2XX-A4XX"/> |
| |
| <!-- a5xx events --> |
| <value name="WT_DONE_TS" value="8" variants="A5XX-"/> |
| <value name="START_FRAGMENT_CTRS" value="13" variants="A5XX-"/> |
| <value name="STOP_FRAGMENT_CTRS" value="14" variants="A5XX-"/> |
| <value name="START_COMPUTE_CTRS" value="15" variants="A5XX-"/> |
| <value name="STOP_COMPUTE_CTRS" value="16" variants="A5XX-"/> |
| <value name="FLUSH_SO_0" value="17" variants="A5XX-"/> |
| <value name="FLUSH_SO_1" value="18" variants="A5XX-"/> |
| <value name="FLUSH_SO_2" value="19" variants="A5XX-"/> |
| <value name="FLUSH_SO_3" value="20" variants="A5XX-"/> |
| |
| <doc> |
| Invalidates depth attachment data from the CCU. We assume this |
| happens in the last stage. |
| </doc> |
| <value name="PC_CCU_INVALIDATE_DEPTH" value="24" variants="A5XX-"/> |
| |
| <doc> |
| Invalidates color attachment data from the CCU. We assume this |
| happens in the last stage. |
| </doc> |
| <value name="PC_CCU_INVALIDATE_COLOR" value="25" variants="A5XX-"/> |
| |
| <doc> |
| Flushes the small cache used by CP_EVENT_WRITE::BLIT (which, |
| along with its registers, would be better named RESOLVE). |
| </doc> |
| <value name="PC_CCU_RESOLVE_TS" value="26" variants="A6XX"/> |
| |
| <doc> |
| Flushes depth attachment data from the CCU. We assume this |
| happens in the last stage. |
| </doc> |
| <value name="PC_CCU_FLUSH_DEPTH_TS" value="28" variants="A5XX-"/> |
| |
| <doc> |
| Flushes color attachment data from the CCU. We assume this |
| happens in the last stage. |
| </doc> |
| <value name="PC_CCU_FLUSH_COLOR_TS" value="29" variants="A5XX-"/> |
| |
| <doc> |
| 2D blit to resolve GMEM to system memory (skipping CCU) at the |
| end of a render pass. Compare to CP_BLIT's BLIT_OP_SCALE for |
| more general blitting. |
| </doc> |
| <value name="BLIT" value="30" variants="A5XX-"/> |
| |
| <doc> |
| Clears based on GRAS_LRZ_CNTL configuration, could clear |
| fast-clear buffer or LRZ direction. |
| LRZ direction is stored at lrz_fc_offset + 0x200, has 1 byte which |
| could be expressed by enum: |
| CUR_DIR_DISABLED = 0x0 |
| CUR_DIR_GE = 0x1 |
| CUR_DIR_LE = 0x2 |
| CUR_DIR_UNSET = 0x3 |
| Clear of direction means setting the direction to CUR_DIR_UNSET. |
| </doc> |
| <value name="LRZ_CLEAR" value="37" variants="A5XX-"/> |
| |
| <value name="LRZ_FLUSH" value="38" variants="A5XX-"/> |
| <value name="BLIT_OP_FILL_2D" value="39" variants="A5XX-"/> |
| <value name="BLIT_OP_COPY_2D" value="40" variants="A5XX-A6XX"/> |
| <value name="UNK_40" value="40" variants="A7XX"/> |
| <value name="BLIT_OP_SCALE_2D" value="42" variants="A5XX-"/> |
| <value name="CONTEXT_DONE_2D" value="43" variants="A5XX-"/> |
| <value name="UNK_2C" value="44" variants="A5XX-"/> |
| <value name="UNK_2D" value="45" variants="A5XX-"/> |
| |
| <!-- a6xx events --> |
| <doc> |
| Invalidates UCHE. |
| </doc> |
| <value name="CACHE_INVALIDATE" value="49" variants="A6XX"/> |
| |
| <value name="LABEL" value="63" variants="A6XX-"/> |
| |
| <!-- note, some of these are the same as a6xx, just named differently --> |
| |
| <doc> Doesn't seem to do anything </doc> |
| <value name="DUMMY_EVENT" value="1" variants="A7XX"/> |
| <value name="CCU_INVALIDATE_DEPTH" value="24" variants="A7XX"/> |
| <value name="CCU_INVALIDATE_COLOR" value="25" variants="A7XX"/> |
| <value name="CCU_RESOLVE_CLEAN" value="26" variants="A7XX"/> |
| <value name="CCU_FLUSH_DEPTH" value="28" variants="A7XX"/> |
| <value name="CCU_FLUSH_COLOR" value="29" variants="A7XX"/> |
| <value name="CCU_RESOLVE" value="30" variants="A7XX"/> |
| <value name="CCU_END_RESOLVE_GROUP" value="31" variants="A7XX"/> |
| <value name="CCU_CLEAN_DEPTH" value="32" variants="A7XX"/> |
| <value name="CCU_CLEAN_COLOR" value="33" variants="A7XX"/> |
| <value name="CACHE_RESET" value="48" variants="A7XX"/> |
| <value name="CACHE_CLEAN" value="49" variants="A7XX"/> |
| <!-- TODO: deal with name conflicts with other gens --> |
| <value name="CACHE_FLUSH7" value="50" variants="A7XX"/> |
| <value name="CACHE_INVALIDATE7" value="51" variants="A7XX"/> |
| </enum> |
| |
| <enum name="pc_di_primtype"> |
| <value name="DI_PT_NONE" value="0"/> |
| <!-- POINTLIST_PSIZE is used on a3xx/a4xx when gl_PointSize is written: --> |
| <value name="DI_PT_POINTLIST_PSIZE" value="1"/> |
| <value name="DI_PT_LINELIST" value="2"/> |
| <value name="DI_PT_LINESTRIP" value="3"/> |
| <value name="DI_PT_TRILIST" value="4"/> |
| <value name="DI_PT_TRIFAN" value="5"/> |
| <value name="DI_PT_TRISTRIP" value="6"/> |
| <value name="DI_PT_LINELOOP" value="7"/> <!-- a22x, a3xx --> |
| <value name="DI_PT_RECTLIST" value="8"/> |
| <value name="DI_PT_POINTLIST" value="9"/> |
| <value name="DI_PT_LINE_ADJ" value="0xa"/> |
| <value name="DI_PT_LINESTRIP_ADJ" value="0xb"/> |
| <value name="DI_PT_TRI_ADJ" value="0xc"/> |
| <value name="DI_PT_TRISTRIP_ADJ" value="0xd"/> |
| |
| <value name="DI_PT_PATCHES0" value="0x1f"/> |
| <value name="DI_PT_PATCHES1" value="0x20"/> |
| <value name="DI_PT_PATCHES2" value="0x21"/> |
| <value name="DI_PT_PATCHES3" value="0x22"/> |
| <value name="DI_PT_PATCHES4" value="0x23"/> |
| <value name="DI_PT_PATCHES5" value="0x24"/> |
| <value name="DI_PT_PATCHES6" value="0x25"/> |
| <value name="DI_PT_PATCHES7" value="0x26"/> |
| <value name="DI_PT_PATCHES8" value="0x27"/> |
| <value name="DI_PT_PATCHES9" value="0x28"/> |
| <value name="DI_PT_PATCHES10" value="0x29"/> |
| <value name="DI_PT_PATCHES11" value="0x2a"/> |
| <value name="DI_PT_PATCHES12" value="0x2b"/> |
| <value name="DI_PT_PATCHES13" value="0x2c"/> |
| <value name="DI_PT_PATCHES14" value="0x2d"/> |
| <value name="DI_PT_PATCHES15" value="0x2e"/> |
| <value name="DI_PT_PATCHES16" value="0x2f"/> |
| <value name="DI_PT_PATCHES17" value="0x30"/> |
| <value name="DI_PT_PATCHES18" value="0x31"/> |
| <value name="DI_PT_PATCHES19" value="0x32"/> |
| <value name="DI_PT_PATCHES20" value="0x33"/> |
| <value name="DI_PT_PATCHES21" value="0x34"/> |
| <value name="DI_PT_PATCHES22" value="0x35"/> |
| <value name="DI_PT_PATCHES23" value="0x36"/> |
| <value name="DI_PT_PATCHES24" value="0x37"/> |
| <value name="DI_PT_PATCHES25" value="0x38"/> |
| <value name="DI_PT_PATCHES26" value="0x39"/> |
| <value name="DI_PT_PATCHES27" value="0x3a"/> |
| <value name="DI_PT_PATCHES28" value="0x3b"/> |
| <value name="DI_PT_PATCHES29" value="0x3c"/> |
| <value name="DI_PT_PATCHES30" value="0x3d"/> |
| <value name="DI_PT_PATCHES31" value="0x3e"/> |
| </enum> |
| |
| <enum name="pc_di_src_sel"> |
| <value name="DI_SRC_SEL_DMA" value="0"/> |
| <value name="DI_SRC_SEL_IMMEDIATE" value="1"/> |
| <value name="DI_SRC_SEL_AUTO_INDEX" value="2"/> |
| <value name="DI_SRC_SEL_AUTO_XFB" value="3"/> |
| </enum> |
| |
| <enum name="pc_di_face_cull_sel"> |
| <value name="DI_FACE_CULL_NONE" value="0"/> |
| <value name="DI_FACE_CULL_FETCH" value="1"/> |
| <value name="DI_FACE_BACKFACE_CULL" value="2"/> |
| <value name="DI_FACE_FRONTFACE_CULL" value="3"/> |
| </enum> |
| |
| <enum name="pc_di_index_size"> |
| <value name="INDEX_SIZE_IGN" value="0"/> |
| <value name="INDEX_SIZE_16_BIT" value="0"/> |
| <value name="INDEX_SIZE_32_BIT" value="1"/> |
| <value name="INDEX_SIZE_8_BIT" value="2"/> |
| <value name="INDEX_SIZE_INVALID"/> |
| </enum> |
| |
| <enum name="pc_di_vis_cull_mode"> |
| <value name="IGNORE_VISIBILITY" value="0"/> |
| <value name="USE_VISIBILITY" value="1"/> |
| </enum> |
| |
| <enum name="adreno_pm4_packet_type"> |
| <value name="CP_TYPE0_PKT" value="0x00000000"/> |
| <value name="CP_TYPE1_PKT" value="0x40000000"/> |
| <value name="CP_TYPE2_PKT" value="0x80000000"/> |
| <value name="CP_TYPE3_PKT" value="0xc0000000"/> |
| <value name="CP_TYPE4_PKT" value="0x40000000"/> |
| <value name="CP_TYPE7_PKT" value="0x70000000"/> |
| </enum> |
| |
| <!-- |
| Note that in some cases, the same packet id is recycled on a later |
| generation, so variants attribute is used to distinguish. They |
| may not be completely accurate, we would probably have to analyze |
| the pfp and me/pm4 firmware to verify the packet is actually |
| handled on a particular generation. But it is at least enough to |
| disambiguate the packet-id's that were re-used for different |
| packets starting with a5xx. |
| --> |
| <enum name="adreno_pm4_type3_packets" varset="chip"> |
| <doc>initialize CP's micro-engine</doc> |
| <value name="CP_ME_INIT" value="0x48"/> |
| <doc>skip N 32-bit words to get to the next packet</doc> |
| <value name="CP_NOP" value="0x10"/> |
| <doc> |
| indirect buffer dispatch. prefetch parser uses this packet |
| type to determine whether to pre-fetch the IB |
| </doc> |
| <value name="CP_PREEMPT_ENABLE" value="0x1c" variants="A5XX"/> |
| <value name="CP_PREEMPT_TOKEN" value="0x1e" variants="A5XX"/> |
| <value name="CP_INDIRECT_BUFFER" value="0x3f"/> |
| <doc> |
| Takes the same arguments as CP_INDIRECT_BUFFER, but jumps to |
| another buffer at the same level. Must be at the end of IB, and |
| doesn't work with draw state IB's. |
| </doc> |
| <value name="CP_INDIRECT_BUFFER_CHAIN" value="0x57" variants="A5XX-"/> |
| <doc>indirect buffer dispatch. same as IB, but init is pipelined</doc> |
| <value name="CP_INDIRECT_BUFFER_PFD" value="0x37"/> |
| <doc> |
| Waits for the IDLE state of the engine before further drawing. |
| This is pipelined, so the CP may continue. |
| </doc> |
| <value name="CP_WAIT_FOR_IDLE" value="0x26"/> |
| <doc>wait until a register or memory location is a specific value</doc> |
| <value name="CP_WAIT_REG_MEM" value="0x3c"/> |
| <doc>wait until a register location is equal to a specific value</doc> |
| <value name="CP_WAIT_REG_EQ" value="0x52"/> |
| <doc>wait until a register location is >= a specific value</doc> |
| <value name="CP_WAIT_REG_GTE" value="0x53" variants="A2XX-A4XX"/> |
| <doc>wait until a read completes</doc> |
| <value name="CP_WAIT_UNTIL_READ" value="0x5c" variants="A2XX-A4XX"/> |
| <doc>wait until all base/size writes from an IB_PFD packet have completed</doc> |
| <!-- |
| NOTE: CP_WAIT_IB_PFD_COMPLETE unimplemented at least since a5xx fw, and |
| recycled for something new on a7xx |
| --> |
| <value name="CP_WAIT_IB_PFD_COMPLETE" value="0x5d" varset="chip" variants="A2XX-A4XX"/> |
| <doc>register read/modify/write</doc> |
| <value name="CP_REG_RMW" value="0x21"/> |
| <doc>Set binning configuration registers</doc> |
| <value name="CP_SET_BIN_DATA" value="0x2f" variants="A2XX-A4XX"/> |
| <value name="CP_SET_BIN_DATA5" value="0x2f" variants="A5XX-"/> |
| <doc>reads register in chip and writes to memory</doc> |
| <value name="CP_REG_TO_MEM" value="0x3e"/> |
| <doc>write N 32-bit words to memory</doc> |
| <value name="CP_MEM_WRITE" value="0x3d"/> |
| <doc>write CP_PROG_COUNTER value to memory</doc> |
| <value name="CP_MEM_WRITE_CNTR" value="0x4f"/> |
| <doc>conditional execution of a sequence of packets</doc> |
| <value name="CP_COND_EXEC" value="0x44"/> |
| <doc>conditional write to memory or register</doc> |
| <value name="CP_COND_WRITE" value="0x45" variants="A2XX-A4XX"/> |
| <value name="CP_COND_WRITE5" value="0x45" variants="A5XX-"/> |
| <doc>generate an event that creates a write to memory when completed</doc> |
| <value name="CP_EVENT_WRITE" value="0x46" variants="A2XX-A6XX"/> |
| <value name="CP_EVENT_WRITE7" value="0x46" variants="A7XX-"/> |
| <doc>generate a VS|PS_done event</doc> |
| <value name="CP_EVENT_WRITE_SHD" value="0x58"/> |
| <doc>generate a cache flush done event</doc> |
| <value name="CP_EVENT_WRITE_CFL" value="0x59"/> |
| <doc>generate a z_pass done event</doc> |
| <value name="CP_EVENT_WRITE_ZPD" value="0x5b"/> |
| <doc> |
| not sure the real name, but this seems to be what is used for |
| opencl, instead of CP_DRAW_INDX.. |
| </doc> |
| <value name="CP_RUN_OPENCL" value="0x31"/> |
| <doc>initiate fetch of index buffer and draw</doc> |
| <value name="CP_DRAW_INDX" value="0x22"/> |
| <doc>draw using supplied indices in packet</doc> |
| <value name="CP_DRAW_INDX_2" value="0x36" variants="A2XX-A4XX"/> <!-- this is something different on a6xx and unused on a5xx --> |
| <doc>initiate fetch of index buffer and binIDs and draw</doc> |
| <value name="CP_DRAW_INDX_BIN" value="0x34" variants="A2XX-A4XX"/> |
| <doc>initiate fetch of bin IDs and draw using supplied indices</doc> |
| <value name="CP_DRAW_INDX_2_BIN" value="0x35" variants="A2XX-A4XX"/> |
| <doc>begin/end initiator for viz query extent processing</doc> |
| <value name="CP_VIZ_QUERY" value="0x23" variants="A2XX-A4XX"/> |
| <doc>fetch state sub-blocks and initiate shader code DMAs</doc> |
| <value name="CP_SET_STATE" value="0x25"/> |
| <doc>load constant into chip and to memory</doc> |
| <value name="CP_SET_CONSTANT" value="0x2d" variants="A2XX"/> |
| <doc>load sequencer instruction memory (pointer-based)</doc> |
| <value name="CP_IM_LOAD" value="0x27"/> |
| <doc>load sequencer instruction memory (code embedded in packet)</doc> |
| <value name="CP_IM_LOAD_IMMEDIATE" value="0x2b"/> |
| <doc>load constants from a location in memory</doc> |
| <value name="CP_LOAD_CONSTANT_CONTEXT" value="0x2e" variants="A2XX"/> |
| <doc>selective invalidation of state pointers</doc> |
| <value name="CP_INVALIDATE_STATE" value="0x3b"/> |
| <doc>dynamically changes shader instruction memory partition</doc> |
| <value name="CP_SET_SHADER_BASES" value="0x4a" variants="A2XX-A4XX"/> |
| <doc>sets the 64-bit BIN_MASK register in the PFP</doc> |
| <value name="CP_SET_BIN_MASK" value="0x50" variants="A2XX-A4XX"/> |
| <doc>sets the 64-bit BIN_SELECT register in the PFP</doc> |
| <value name="CP_SET_BIN_SELECT" value="0x51" variants="A2XX-A4XX"/> |
| <doc>updates the current context, if needed</doc> |
| <value name="CP_CONTEXT_UPDATE" value="0x5e"/> |
| <doc>generate interrupt from the command stream</doc> |
| <value name="CP_INTERRUPT" value="0x40"/> |
| <doc>copy sequencer instruction memory to system memory</doc> |
| <value name="CP_IM_STORE" value="0x2c" variants="A2XX"/> |
| |
| <!-- For a20x --> |
| <!-- TODO handle variants.. |
| <doc> |
| Program an offset that will added to the BIN_BASE value of |
| the 3D_DRAW_INDX_BIN packet |
| </doc> |
| <value name="CP_SET_BIN_BASE_OFFSET" value="0x4b"/> |
| --> |
| |
| <!-- for a22x --> |
| <doc> |
| sets draw initiator flags register in PFP, gets bitwise-ORed into |
| every draw initiator |
| </doc> |
| <value name="CP_SET_DRAW_INIT_FLAGS" value="0x4b"/> |
| <doc>sets the register protection mode</doc> |
| <value name="CP_SET_PROTECTED_MODE" value="0x5f"/> |
| |
| <value name="CP_BOOTSTRAP_UCODE" value="0x6f"/> |
| |
| <!-- for a3xx --> |
| <doc>load high level sequencer command</doc> |
| <value name="CP_LOAD_STATE" value="0x30" variants="A3XX"/> |
| <value name="CP_LOAD_STATE4" value="0x30" variants="A4XX-A5XX"/> |
| <doc>Conditionally load a IB based on a flag, prefetch enabled</doc> |
| <value name="CP_COND_INDIRECT_BUFFER_PFE" value="0x3a"/> |
| <doc>Conditionally load a IB based on a flag, prefetch disabled</doc> |
| <value name="CP_COND_INDIRECT_BUFFER_PFD" value="0x32" variants="A3XX"/> |
| <doc>Load a buffer with pre-fetch enabled</doc> |
| <value name="CP_INDIRECT_BUFFER_PFE" value="0x3f" variants="A5XX"/> |
| <doc>Set bin (?)</doc> |
| <value name="CP_SET_BIN" value="0x4c" variants="A2XX"/> |
| |
| <doc>test 2 memory locations to dword values specified</doc> |
| <value name="CP_TEST_TWO_MEMS" value="0x71"/> |
| |
| <doc>Write register, ignoring context state for context sensitive registers</doc> |
| <value name="CP_REG_WR_NO_CTXT" value="0x78"/> |
| |
| <doc>Record the real-time when this packet is processed by PFP</doc> |
| <value name="CP_RECORD_PFP_TIMESTAMP" value="0x11"/> |
| |
| <!-- Used to switch GPU between secure and non-secure modes --> |
| <value name="CP_SET_SECURE_MODE" value="0x66"/> |
| |
| <doc>PFP waits until the FIFO between the PFP and the ME is empty</doc> |
| <value name="CP_WAIT_FOR_ME" value="0x13"/> |
| |
| <!-- for a4xx --> |
| <doc> |
| Used a bit like CP_SET_CONSTANT on a2xx, but can write multiple |
| groups of registers. Looks like it can be used to create state |
| objects in GPU memory, and on state change only emit pointer |
| (via CP_SET_DRAW_STATE), which should be nice for reducing CPU |
| overhead: |
| |
| (A4x) save PM4 stream pointers to execute upon a visible draw |
| </doc> |
| <value name="CP_SET_DRAW_STATE" value="0x43" variants="A4XX-"/> |
| <value name="CP_DRAW_INDX_OFFSET" value="0x38"/> |
| <value name="CP_DRAW_INDIRECT" value="0x28" variants="A4XX-"/> |
| <value name="CP_DRAW_INDX_INDIRECT" value="0x29" variants="A4XX-"/> |
| <value name="CP_DRAW_INDIRECT_MULTI" value="0x2a" variants="A6XX-"/> |
| <value name="CP_DRAW_AUTO" value="0x24"/> |
| |
| <doc> |
| Enable or disable predication globally. Also resets the |
| predicate to "passing" and the local bit to enabled when |
| enabling global predication. |
| </doc> |
| <value name="CP_DRAW_PRED_ENABLE_GLOBAL" value="0x19"/> |
| |
| <doc> |
| Enable or disable predication locally. Unlike globally enabling |
| predication, this packet doesn't touch any other state. |
| Predication only happens when enabled globally and locally and a |
| predicate has been set. This should be used for internal draws |
| which aren't supposed to use the predication state: |
| |
| CP_DRAW_PRED_ENABLE_LOCAL(0) |
| ... do draw... |
| CP_DRAW_PRED_ENABLE_LOCAL(1) |
| </doc> |
| <value name="CP_DRAW_PRED_ENABLE_LOCAL" value="0x1a"/> |
| |
| <doc> |
| Latch a draw predicate into the internal register. |
| </doc> |
| <value name="CP_DRAW_PRED_SET" value="0x4e"/> |
| |
| <doc> |
| for A4xx |
| Write to register with address that does not fit into type-0 pkt |
| </doc> |
| <value name="CP_WIDE_REG_WRITE" value="0x74" variants="A4XX"/> |
| |
| <doc>copy from ME scratch RAM to a register</doc> |
| <value name="CP_SCRATCH_TO_REG" value="0x4d"/> |
| |
| <doc>Copy from REG to ME scratch RAM</doc> |
| <value name="CP_REG_TO_SCRATCH" value="0x4a"/> |
| |
| <doc>Wait for memory writes to complete</doc> |
| <value name="CP_WAIT_MEM_WRITES" value="0x12"/> |
| |
| <doc>Conditional execution based on register comparison</doc> |
| <value name="CP_COND_REG_EXEC" value="0x47"/> |
| |
| <doc>Memory to REG copy</doc> |
| <value name="CP_MEM_TO_REG" value="0x42"/> |
| |
| <value name="CP_EXEC_CS_INDIRECT" value="0x41" variants="A4XX-"/> |
| <value name="CP_EXEC_CS" value="0x33"/> |
| |
| <doc> |
| for a5xx |
| </doc> |
| <value name="CP_PERFCOUNTER_ACTION" value="0x50" variants="A5XX"/> |
| <!-- switches SMMU pagetable, used on a5xx+ only --> |
| <value name="CP_SMMU_TABLE_UPDATE" value="0x53" variants="A5XX-"/> |
| <!-- for a6xx --> |
| <doc>Tells CP the current mode of GPU operation</doc> |
| <value name="CP_SET_MARKER" value="0x65" variants="A6XX-"/> |
| <doc>Instruct CP to set a few internal CP registers</doc> |
| <value name="CP_SET_PSEUDO_REG" value="0x56" variants="A6XX-"/> |
| <!-- |
| pairs of regid and value.. seems to be used to program some TF |
| related regs: |
| --> |
| <value name="CP_CONTEXT_REG_BUNCH" value="0x5c" variants="A5XX-"/> |
| <!-- A5XX Enable yield in RB only --> |
| <value name="CP_YIELD_ENABLE" value="0x1c" variants="A5XX"/> |
| <doc> |
| Enables IB2 skipping. If both GLOBAL and LOCAL are 1 and |
| nothing is left in the visibility stream, then |
| CP_INDIRECT_BUFFER will be skipped, and draws will early return |
| from their IB. |
| </doc> |
| <value name="CP_SKIP_IB2_ENABLE_GLOBAL" value="0x1d" variants="A5XX-"/> |
| <value name="CP_SKIP_IB2_ENABLE_LOCAL" value="0x23" variants="A5XX-"/> |
| <value name="CP_SET_SUBDRAW_SIZE" value="0x35" variants="A5XX-"/> |
| <value name="CP_WHERE_AM_I" value="0x62" variants="A5XX-"/> |
| <value name="CP_SET_VISIBILITY_OVERRIDE" value="0x64" variants="A5XX-"/> |
| <!-- Enable/Disable/Defer A5x global preemption model --> |
| <value name="CP_PREEMPT_ENABLE_GLOBAL" value="0x69" variants="A5XX"/> |
| <!-- Enable/Disable A5x local preemption model --> |
| <value name="CP_PREEMPT_ENABLE_LOCAL" value="0x6a" variants="A5XX"/> |
| <!-- Yield token on a5xx similar to CP_PREEMPT on a4xx --> |
| <value name="CP_CONTEXT_SWITCH_YIELD" value="0x6b" variants="A5XX-"/> |
| <!-- Inform CP about current render mode (needed for a5xx preemption) --> |
| <value name="CP_SET_RENDER_MODE" value="0x6c" variants="A5XX"/> |
| <value name="CP_COMPUTE_CHECKPOINT" value="0x6e" variants="A5XX"/> |
| <!-- check if this works on earlier.. --> |
| <value name="CP_MEM_TO_MEM" value="0x73" variants="A5XX-"/> |
| |
| <doc> |
| General purpose 2D blit engine for image transfers and mipmap |
| generation. Reads through UCHE, writes through the CCU cache in |
| the PS stage. |
| </doc> |
| <value name="CP_BLIT" value="0x2c" variants="A5XX-"/> |
| |
| <!-- Test specified bit in specified register and set predicate --> |
| <value name="CP_REG_TEST" value="0x39" variants="A5XX-"/> |
| |
| <!-- |
| Seems to set the mode flags which control which CP_SET_DRAW_STATE |
| packets are executed, based on their ENABLE_MASK values |
| |
| CP_SET_MODE w/ payload of 0x1 seems to cause CP_SET_DRAW_STATE |
| packets w/ ENABLE_MASK & 0x6 to execute immediately |
| --> |
| <value name="CP_SET_MODE" value="0x63" variants="A6XX-"/> |
| |
| <!-- |
| Seems like there are now separate blocks of state for VS vs FS/CS |
| (probably these amounts to geometry vs fragments so that geometry |
| stage of the pipeline for next draw can start while fragment stage |
| of current draw is still running. The format of the payload of the |
| packets is the same, the only difference is the offsets of the regs |
| the firmware code that handles the packet writes. |
| |
| Note that for CL, starting with a6xx, the preferred # of local |
| threads is no longer the same as the max, implying that the shader |
| core can now run warps from unrelated shaders (ie. |
| CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE vs |
| CL_KERNEL_WORK_GROUP_SIZE) |
| --> |
| <value name="CP_LOAD_STATE6_GEOM" value="0x32" variants="A6XX-"/> |
| <value name="CP_LOAD_STATE6_FRAG" value="0x34" variants="A6XX-"/> |
| <!-- |
| Note: For IBO state (Image/SSBOs) which have shared state across |
| shader stages, for 3d pipeline CP_LOAD_STATE6 is used. But for |
| compute shaders, CP_LOAD_STATE6_FRAG is used. Possibly they are |
| interchangable. |
| --> |
| <value name="CP_LOAD_STATE6" value="0x36" variants="A6XX-"/> |
| |
| <!-- internal packets: --> |
| <value name="IN_IB_PREFETCH_END" value="0x17" variants="A2XX"/> |
| <value name="IN_SUBBLK_PREFETCH" value="0x1f" variants="A2XX"/> |
| <value name="IN_INSTR_PREFETCH" value="0x20" variants="A2XX"/> |
| <value name="IN_INSTR_MATCH" value="0x47" variants="A2XX"/> |
| <value name="IN_CONST_PREFETCH" value="0x49" variants="A2XX"/> |
| <value name="IN_INCR_UPDT_STATE" value="0x55" variants="A2XX"/> |
| <value name="IN_INCR_UPDT_CONST" value="0x56" variants="A2XX"/> |
| <value name="IN_INCR_UPDT_INSTR" value="0x57" variants="A2XX"/> |
| |
| <!-- internal jumptable entries on a6xx+, possibly a5xx: --> |
| |
| <!-- jmptable entry used to handle type4 packet on a5xx+: --> |
| <value name="PKT4" value="0x04" variants="A5XX-"/> |
| <!-- called when ROQ is empty, "returns" from an IB or merged sequence of IBs --> |
| <value name="IN_IB_END" value="0x0a" variants="A6XX-"/> |
| <!-- handles IFPC save/restore --> |
| <value name="IN_GMU_INTERRUPT" value="0x0b" variants="A6XX-"/> |
| <!-- preemption/context-swtich routine --> |
| <value name="IN_PREEMPT" value="0x0f" variants="A6XX-"/> |
| |
| <!-- TODO do these exist on A5xx? --> |
| <value name="CP_SCRATCH_WRITE" value="0x4c" variants="A6XX"/> |
| <value name="CP_REG_TO_MEM_OFFSET_MEM" value="0x74" variants="A6XX-"/> |
| <value name="CP_REG_TO_MEM_OFFSET_REG" value="0x72" variants="A6XX-"/> |
| <value name="CP_WAIT_MEM_GTE" value="0x14" variants="A6XX"/> |
| <value name="CP_WAIT_TWO_REGS" value="0x70" variants="A6XX"/> |
| <value name="CP_MEMCPY" value="0x75" variants="A6XX-"/> |
| <value name="CP_SET_BIN_DATA5_OFFSET" value="0x2e" variants="A6XX-"/> |
| <!-- A750+, set in place of CP_SET_BIN_DATA5_OFFSET but has different values --> |
| <value name="CP_SET_UNK_BIN_DATA" value="0x2d" variants="A7XX-"/> |
| <doc> |
| Write CP_CONTEXT_SWITCH_*_INFO from CP to the following dwords, |
| and forcibly switch to the indicated context. |
| </doc> |
| <value name="CP_CONTEXT_SWITCH" value="0x54" variants="A6XX"/> |
| <value name="CP_SET_AMBLE" value="0x55" variants="A6XX-"/> |
| |
| <!-- |
| Seems to always have the payload: |
| 00000002 00008801 00004010 |
| or: |
| 00000002 00008801 00004090 |
| or: |
| 00000002 00008801 00000010 |
| 00000002 00008801 00010010 |
| 00000002 00008801 00d64010 |
| ... |
| Note set for compute shaders.. |
| Is 0x8801 a register offset? |
| This appears to be a special sort of register write packet |
| more or less, but the firmware has some special handling.. |
| Seems like it intercepts/modifies certain register offsets, |
| but others are treated like a normal PKT4 reg write. I |
| guess there are some registers that the fw controls certain |
| bits. |
| --> |
| <value name="CP_REG_WRITE" value="0x6d" variants="A6XX"/> |
| |
| <doc> |
| These first appear in a650_sqe.bin. They can in theory be used |
| to loop any sequence of IB1 commands, but in practice they are |
| used to loop over bins. There is a fixed-size per-iteration |
| prefix, used to set per-bin state, and then the following IB1 |
| commands are executed until CP_END_BIN which are always the same |
| for each iteration and usually contain a list of |
| CP_INDIRECT_BUFFER calls to IB2 commands which setup state and |
| execute restore/draw/save commands. This replaces the previous |
| technique of just repeating the CP_INDIRECT_BUFFER calls and |
| "unrolling" the loop. |
| </doc> |
| <value name="CP_START_BIN" value="0x50" variants="A6XX-"/> |
| <value name="CP_END_BIN" value="0x51" variants="A6XX-"/> |
| |
| <doc> Make next dword 1 to disable preemption, 0 to re-enable it. </doc> |
| <value name="CP_PREEMPT_DISABLE" value="0x6c" variants="A6XX"/> |
| |
| <value name="CP_WAIT_TIMESTAMP" value="0x14" variants="A7XX-"/> |
| <value name="CP_GLOBAL_TIMESTAMP" value="0x15" variants="A7XX-"/> <!-- payload 1 dword --> |
| <value name="CP_LOCAL_TIMESTAMP" value="0x16" variants="A7XX-"/> <!-- payload 1 dword, follows 0x15 --> |
| <value name="CP_THREAD_CONTROL" value="0x17" variants="A7XX-"/> |
| <!-- payload 4 dwords, last two could be render target addr (one pkt per MRT), possibly used for GMEM save/restore?--> |
| <value name="CP_RESOURCE_LIST" value="0x18" variants="A7XX-"/> |
| <doc> Can clear BV/BR counters, or wait until one catches up to another </doc> |
| <value name="CP_BV_BR_COUNT_OPS" value="0x1b" variants="A7XX-"/> |
| <doc> Clears, adds to local, or adds to global timestamp </doc> |
| <value name="CP_MODIFY_TIMESTAMP" value="0x1c" variants="A7XX-"/> |
| <!-- similar to CP_CONTEXT_REG_BUNCH, but discards first two dwords?? --> |
| <value name="CP_CONTEXT_REG_BUNCH2" value="0x5d" variants="A7XX-"/> |
| <doc> |
| Write to a scratch memory that is read by CP_REG_TEST with |
| SOURCE_SCRATCH_MEM set. It's not the same scratch as scratch registers. |
| However it uses the same memory space. |
| </doc> |
| <value name="CP_MEM_TO_SCRATCH_MEM" value="0x49" variants="A7XX-"/> |
| |
| <doc> |
| Executes an array of fixed-size command buffers where each |
| buffer is assumed to have one draw call, skipping buffers with |
| non-visible draw calls. |
| </doc> |
| <value name="CP_FIXED_STRIDE_DRAW_TABLE" value="0x7f" variants="A7XX-"/> |
| |
| <doc>Reset various on-chip state used for synchronization</doc> |
| <value name="CP_RESET_CONTEXT_STATE" value="0x1f" variants="A7XX-"/> |
| </enum> |
| |
| |
| <domain name="CP_LOAD_STATE" width="32"> |
| <doc>Load state, a3xx (and later?)</doc> |
| <enum name="adreno_state_block"> |
| <value name="SB_VERT_TEX" value="0"/> |
| <value name="SB_VERT_MIPADDR" value="1"/> |
| <value name="SB_FRAG_TEX" value="2"/> |
| <value name="SB_FRAG_MIPADDR" value="3"/> |
| <value name="SB_VERT_SHADER" value="4"/> |
| <value name="SB_GEOM_SHADER" value="5"/> |
| <value name="SB_FRAG_SHADER" value="6"/> |
| <value name="SB_COMPUTE_SHADER" value="7"/> |
| </enum> |
| <enum name="adreno_state_type"> |
| <value name="ST_SHADER" value="0"/> |
| <value name="ST_CONSTANTS" value="1"/> |
| </enum> |
| <enum name="adreno_state_src"> |
| <value name="SS_DIRECT" value="0"> |
| <doc>inline with the CP_LOAD_STATE packet</doc> |
| </value> |
| <value name="SS_INVALID_ALL_IC" value="2"/> |
| <value name="SS_INVALID_PART_IC" value="3"/> |
| <value name="SS_INDIRECT" value="4"> |
| <doc>in buffer pointed to by EXT_SRC_ADDR</doc> |
| </value> |
| <value name="SS_INDIRECT_TCM" value="5"/> |
| <value name="SS_INDIRECT_STM" value="6"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="DST_OFF" low="0" high="15" type="uint"/> |
| <bitfield name="STATE_SRC" low="16" high="18" type="adreno_state_src"/> |
| <bitfield name="STATE_BLOCK" low="19" high="21" type="adreno_state_block"/> |
| <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="STATE_TYPE" low="0" high="1" type="adreno_state_type"/> |
| <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_LOAD_STATE4" width="32" varset="chip"> |
| <doc>Load state, a4xx+</doc> |
| <enum name="a4xx_state_block"> |
| <!-- |
| unknown: 0x7 and 0xf <- seen in compute shader |
| |
| STATE_BLOCK = 0x6, STATE_TYPE = 0x2 possibly used for preemption? |
| Seen in some GL shaders. Payload is NUM_UNIT dwords, and it contains |
| the gpuaddr of the following shader constants block. DST_OFF seems |
| to specify which shader stage: |
| |
| 16 -> vert |
| 36 -> tcs |
| 56 -> tes |
| 76 -> geom |
| 96 -> frag |
| |
| Example: |
| |
| opcode: CP_LOAD_STATE4 (30) (12 dwords) |
| { DST_OFF = 16 | STATE_SRC = SS4_DIRECT | STATE_BLOCK = 0x6 | NUM_UNIT = 4 } |
| { STATE_TYPE = 0x2 | EXT_SRC_ADDR = 0 } |
| { EXT_SRC_ADDR_HI = 0 } |
| 0000: c0264100 00000000 00000000 00000000 |
| 0000: 70b0000b 01180010 00000002 00000000 c0264100 00000000 00000000 00000000 |
| |
| opcode: CP_LOAD_STATE4 (30) (4 dwords) |
| { DST_OFF = 16 | STATE_SRC = SS4_INDIRECT | STATE_BLOCK = SB4_VS_SHADER | NUM_UNIT = 4 } |
| { STATE_TYPE = ST4_CONSTANTS | EXT_SRC_ADDR = 0xc0264100 } |
| { EXT_SRC_ADDR_HI = 0 } |
| 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 |
| 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 |
| 0000: 00000040 0000000c 00000000 00000000 00000000 00000000 00000000 00000000 |
| |
| STATE_BLOCK = 0x6, STATE_TYPE = 0x1, seen in compute shader. NUM_UNITS * 2 dwords. |
| |
| --> |
| <value name="SB4_VS_TEX" value="0x0"/> |
| <value name="SB4_HS_TEX" value="0x1"/> <!-- aka. TCS --> |
| <value name="SB4_DS_TEX" value="0x2"/> <!-- aka. TES --> |
| <value name="SB4_GS_TEX" value="0x3"/> |
| <value name="SB4_FS_TEX" value="0x4"/> |
| <value name="SB4_CS_TEX" value="0x5"/> |
| <value name="SB4_VS_SHADER" value="0x8"/> |
| <value name="SB4_HS_SHADER" value="0x9"/> |
| <value name="SB4_DS_SHADER" value="0xa"/> |
| <value name="SB4_GS_SHADER" value="0xb"/> |
| <value name="SB4_FS_SHADER" value="0xc"/> |
| <value name="SB4_CS_SHADER" value="0xd"/> |
| <!-- |
| for SSBO, STATE_TYPE=0 appears to be addresses (four dwords each), |
| STATE_TYPE=1 sizes, STATE_TYPE=2 addresses again (two dwords each) |
| |
| Compute has it's own dedicated SSBO state, it seems, but the rest |
| of the stages share state |
| --> |
| <value name="SB4_SSBO" value="0xe"/> |
| <value name="SB4_CS_SSBO" value="0xf"/> |
| </enum> |
| <enum name="a4xx_state_type"> |
| <value name="ST4_SHADER" value="0"/> |
| <value name="ST4_CONSTANTS" value="1"/> |
| <value name="ST4_UBO" value="2"/> |
| </enum> |
| <enum name="a4xx_state_src"> |
| <value name="SS4_DIRECT" value="0"/> |
| <value name="SS4_INDIRECT" value="2"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="DST_OFF" low="0" high="13" type="uint"/> |
| <bitfield name="STATE_SRC" low="16" high="17" type="a4xx_state_src"/> |
| <bitfield name="STATE_BLOCK" low="18" high="21" type="a4xx_state_block"/> |
| <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="STATE_TYPE" low="0" high="1" type="a4xx_state_type"/> |
| <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="EXT_SRC_ADDR_HI" low="0" high="31" shr="0"/> |
| </reg32> |
| </domain> |
| |
| <!-- looks basically same CP_LOAD_STATE4 --> |
| <domain name="CP_LOAD_STATE6" width="32" varset="chip"> |
| <doc>Load state, a6xx+</doc> |
| <enum name="a6xx_state_block"> |
| <value name="SB6_VS_TEX" value="0x0"/> |
| <value name="SB6_HS_TEX" value="0x1"/> <!-- aka. TCS --> |
| <value name="SB6_DS_TEX" value="0x2"/> <!-- aka. TES --> |
| <value name="SB6_GS_TEX" value="0x3"/> |
| <value name="SB6_FS_TEX" value="0x4"/> |
| <value name="SB6_CS_TEX" value="0x5"/> |
| <value name="SB6_VS_SHADER" value="0x8"/> |
| <value name="SB6_HS_SHADER" value="0x9"/> |
| <value name="SB6_DS_SHADER" value="0xa"/> |
| <value name="SB6_GS_SHADER" value="0xb"/> |
| <value name="SB6_FS_SHADER" value="0xc"/> |
| <value name="SB6_CS_SHADER" value="0xd"/> |
| <value name="SB6_IBO" value="0xe"/> |
| <value name="SB6_CS_IBO" value="0xf"/> |
| </enum> |
| <enum name="a6xx_state_type"> |
| <value name="ST6_SHADER" value="0"/> |
| <value name="ST6_CONSTANTS" value="1"/> |
| <value name="ST6_UBO" value="2"/> |
| <value name="ST6_IBO" value="3"/> |
| </enum> |
| <enum name="a6xx_state_src"> |
| <value name="SS6_DIRECT" value="0"/> |
| <value name="SS6_BINDLESS" value="1"/> <!-- TODO does this exist on a4xx/a5xx? --> |
| <value name="SS6_INDIRECT" value="2"/> |
| <doc> |
| SS6_UBO used by the a6xx vulkan blob with tesselation constants |
| in this case, EXT_SRC_ADDR is (ubo_id shl 16 | offset) |
| to load constants from a UBO loaded with DST_OFF = 14 and offset 0, |
| EXT_SRC_ADDR = 0xe0000 |
| (offset is a guess, should be in bytes given that maxUniformBufferRange=64k) |
| </doc> |
| <value name="SS6_UBO" value="3"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="DST_OFF" low="0" high="13" type="uint"/> |
| <bitfield name="STATE_TYPE" low="14" high="15" type="a6xx_state_type"/> |
| <bitfield name="STATE_SRC" low="16" high="17" type="a6xx_state_src"/> |
| <bitfield name="STATE_BLOCK" low="18" high="21" type="a6xx_state_block"/> |
| <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="EXT_SRC_ADDR_HI" low="0" high="31" shr="0"/> |
| </reg32> |
| <reg64 offset="1" name="EXT_SRC_ADDR" type="address"/> |
| </domain> |
| |
| <bitset name="vgt_draw_initiator" inline="yes"> |
| <bitfield name="PRIM_TYPE" low="0" high="5" type="pc_di_primtype"/> |
| <bitfield name="SOURCE_SELECT" low="6" high="7" type="pc_di_src_sel"/> |
| <bitfield name="VIS_CULL" low="9" high="10" type="pc_di_vis_cull_mode"/> |
| <bitfield name="INDEX_SIZE" pos="11" type="pc_di_index_size"/> |
| <bitfield name="NOT_EOP" pos="12" type="boolean"/> |
| <bitfield name="SMALL_INDEX" pos="13" type="boolean"/> |
| <bitfield name="PRE_DRAW_INITIATOR_ENABLE" pos="14" type="boolean"/> |
| <bitfield name="NUM_INSTANCES" low="24" high="31" type="uint"/> |
| </bitset> |
| |
| <!-- changed on a4xx: --> |
| <enum name="a4xx_index_size"> |
| <value name="INDEX4_SIZE_8_BIT" value="0"/> |
| <value name="INDEX4_SIZE_16_BIT" value="1"/> |
| <value name="INDEX4_SIZE_32_BIT" value="2"/> |
| </enum> |
| |
| <enum name="a6xx_patch_type"> |
| <value name="TESS_QUADS" value="0"/> |
| <value name="TESS_TRIANGLES" value="1"/> |
| <value name="TESS_ISOLINES" value="2"/> |
| </enum> |
| |
| <bitset name="vgt_draw_initiator_a4xx" inline="yes"> |
| <!-- When the 0x20 bit is set, it's the number of patch vertices - 1 --> |
| <bitfield name="PRIM_TYPE" low="0" high="5" type="pc_di_primtype"/> |
| <bitfield name="SOURCE_SELECT" low="6" high="7" type="pc_di_src_sel"/> |
| <bitfield name="VIS_CULL" low="8" high="9" type="pc_di_vis_cull_mode"/> |
| <bitfield name="INDEX_SIZE" low="10" high="11" type="a4xx_index_size"/> |
| <bitfield name="PATCH_TYPE" low="12" high="13" type="a6xx_patch_type"/> |
| <bitfield name="GS_ENABLE" pos="16" type="boolean"/> |
| <bitfield name="TESS_ENABLE" pos="17" type="boolean"/> |
| </bitset> |
| |
| <domain name="CP_DRAW_INDX" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="VIZ_QUERY" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1" type="vgt_draw_initiator"/> |
| <reg32 offset="2" name="2"> |
| <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="INDX_BASE" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="INDX_SIZE" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DRAW_INDX_2" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="VIZ_QUERY" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1" type="vgt_draw_initiator"/> |
| <reg32 offset="2" name="2"> |
| <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/> |
| </reg32> |
| <!-- followed by NUM_INDICES indices.. --> |
| </domain> |
| |
| <domain name="CP_DRAW_INDX_OFFSET" width="32"> |
| <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> |
| <reg32 offset="1" name="1"> |
| <bitfield name="NUM_INSTANCES" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="FIRST_INDX" low="0" high="31"/> |
| </reg32> |
| |
| <stripe varset="chip" variants="A5XX-"> |
| <reg32 offset="4" name="4"> |
| <bitfield name="INDX_BASE_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="INDX_BASE_HI" low="0" high="31"/> |
| </reg32> |
| <reg64 offset="4" name="INDX_BASE" type="address"/> |
| <reg32 offset="6" name="6"> |
| <!-- max # of elements in index buffer --> |
| <bitfield name="MAX_INDICES" low="0" high="31"/> |
| </reg32> |
| </stripe> |
| |
| <reg32 offset="4" name="4"> |
| <bitfield name="INDX_BASE" low="0" high="31" type="address"/> |
| </reg32> |
| |
| <reg32 offset="5" name="5"> |
| <bitfield name="INDX_SIZE" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DRAW_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-"> |
| <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> |
| <stripe varset="chip" variants="A4XX"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="INDIRECT" low="0" high="31"/> |
| </reg32> |
| </stripe> |
| <stripe varset="chip" variants="A5XX-"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="INDIRECT_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="INDIRECT_HI" low="0" high="31"/> |
| </reg32> |
| <reg64 offset="1" name="INDIRECT" type="address"/> |
| </stripe> |
| </domain> |
| |
| <domain name="CP_DRAW_INDX_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-"> |
| <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> |
| <stripe varset="chip" variants="A4XX"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="INDX_BASE" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <!-- max # of bytes in index buffer --> |
| <bitfield name="INDX_SIZE" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="INDIRECT" low="0" high="31"/> |
| </reg32> |
| </stripe> |
| <stripe varset="chip" variants="A5XX-"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="INDX_BASE_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="INDX_BASE_HI" low="0" high="31"/> |
| </reg32> |
| <reg64 offset="1" name="INDX_BASE" type="address"/> |
| <reg32 offset="3" name="3"> |
| <!-- max # of elements in index buffer --> |
| <bitfield name="MAX_INDICES" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="INDIRECT_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="INDIRECT_HI" low="0" high="31"/> |
| </reg32> |
| <reg64 offset="4" name="INDIRECT" type="address"/> |
| </stripe> |
| </domain> |
| |
| <domain name="CP_DRAW_INDIRECT_MULTI" width="32" varset="chip" prefix="chip" variants="A6XX-"> |
| <enum name="a6xx_draw_indirect_opcode"> |
| <value name="INDIRECT_OP_NORMAL" value="0x2"/> |
| <value name="INDIRECT_OP_INDEXED" value="0x4"/> |
| <value name="INDIRECT_OP_INDIRECT_COUNT" value="0x6"/> |
| <value name="INDIRECT_OP_INDIRECT_COUNT_INDEXED" value="0x7"/> |
| </enum> |
| <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> |
| <reg32 offset="1" name="1"> |
| <bitfield name="OPCODE" low="0" high="3" type="a6xx_draw_indirect_opcode" addvariant="yes"/> |
| <doc> |
| DST_OFF same as in CP_LOAD_STATE6 - vec4 VS const at this offset will |
| be updated for each draw to {draw_id, first_vertex, first_instance, 0} |
| value of 0 disables it |
| </doc> |
| <bitfield name="DST_OFF" low="8" high="21" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="DRAW_COUNT" type="uint"/> |
| <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_NORMAL"> |
| <reg64 offset="3" name="INDIRECT" type="address"/> |
| <reg32 offset="5" name="STRIDE" type="uint"/> |
| </stripe> |
| <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDEXED" prefix="INDEXED"> |
| <reg64 offset="3" name="INDEX" type="address"/> |
| <reg32 offset="5" name="MAX_INDICES" type="uint"/> |
| <reg64 offset="6" name="INDIRECT" type="address"/> |
| <reg32 offset="8" name="STRIDE" type="uint"/> |
| </stripe> |
| <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDIRECT_COUNT" prefix="INDIRECT"> |
| <reg64 offset="3" name="INDIRECT" type="address"/> |
| <reg64 offset="5" name="INDIRECT_COUNT" type="address"/> |
| <reg32 offset="7" name="STRIDE" type="uint"/> |
| </stripe> |
| <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDIRECT_COUNT_INDEXED" prefix="INDIRECT_INDEXED"> |
| <reg64 offset="3" name="INDEX" type="address"/> |
| <reg32 offset="5" name="MAX_INDICES" type="uint"/> |
| <reg64 offset="6" name="INDIRECT" type="address"/> |
| <reg64 offset="8" name="INDIRECT_COUNT" type="address"/> |
| <reg32 offset="10" name="STRIDE" type="uint"/> |
| </stripe> |
| </domain> |
| |
| <domain name="CP_DRAW_AUTO" width="32"> |
| <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> |
| <reg32 offset="1" name="1"> |
| <bitfield name="NUM_INSTANCES" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg64 offset="2" name="NUM_VERTICES_BASE" type="address"/> |
| <reg32 offset="4" name="4"> |
| <bitfield name="NUM_VERTICES_OFFSET" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="STRIDE" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DRAW_PRED_ENABLE_GLOBAL" width="32" varset="chip"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ENABLE" pos="0" type="boolean"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DRAW_PRED_ENABLE_LOCAL" width="32" varset="chip"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ENABLE" pos="0" type="boolean"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DRAW_PRED_SET" width="32" varset="chip"> |
| <enum name="cp_draw_pred_src"> |
| <!-- |
| Sources 1-4 seem to be about combining reading |
| SO/primitive queries and setting the predicate, which is |
| a DX11-specific optimization (since in DX11 you can only |
| predicate on the result of queries). |
| --> |
| <value name="PRED_SRC_MEM" value="5"> |
| <doc> |
| Read a 64-bit value at the given address and |
| test if it equals/doesn't equal 0. |
| </doc> |
| </value> |
| </enum> |
| <enum name="cp_draw_pred_test"> |
| <value name="NE_0_PASS" value="0"/> |
| <value name="EQ_0_PASS" value="1"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="SRC" low="4" high="7" type="cp_draw_pred_src"/> |
| <bitfield name="TEST" pos="8" type="cp_draw_pred_test"/> |
| </reg32> |
| <reg64 offset="1" name="MEM_ADDR" type="address"/> |
| </domain> |
| |
| <domain name="CP_SET_DRAW_STATE" width="32" varset="chip" variants="A4XX-"> |
| <array offset="0" stride="3" length="100"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="COUNT" low="0" high="15" type="uint"/> |
| <bitfield name="DIRTY" pos="16" type="boolean"/> |
| <bitfield name="DISABLE" pos="17" type="boolean"/> |
| <bitfield name="DISABLE_ALL_GROUPS" pos="18" type="boolean"/> |
| <bitfield name="LOAD_IMMED" pos="19" type="boolean"/> |
| <bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/> |
| <bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/> |
| <bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/> |
| <bitfield name="GROUP_ID" low="24" high="28" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| </array> |
| </domain> |
| |
| <domain name="CP_SET_BIN" width="32"> |
| <doc>value at offset 0 always seems to be 0x00000000..</doc> |
| <reg32 offset="0" name="0"/> |
| <reg32 offset="1" name="1"> |
| <bitfield name="X1" low="0" high="15" type="uint"/> |
| <bitfield name="Y1" low="16" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="X2" low="0" high="15" type="uint"/> |
| <bitfield name="Y2" low="16" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_BIN_DATA" width="32"> |
| <reg32 offset="0" name="0"> |
| <!-- corresponds to VSC_PIPE[n].DATA_ADDR --> |
| <bitfield name="BIN_DATA_ADDR" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <!-- seesm to correspond to VSC_SIZE_ADDRESS --> |
| <bitfield name="BIN_SIZE_ADDRESS" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_BIN_DATA5" width="32"> |
| <reg32 offset="0" name="0"> |
| <!-- equiv to PC_VSTREAM_CONTROL.SIZE on a3xx/a4xx: --> |
| <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/> |
| <!-- equiv to PC_VSTREAM_CONTROL.N on a3xx/a4xx: --> |
| <bitfield name="VSC_N" low="22" high="26" type="uint"/> |
| </reg32> |
| <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> |
| <reg32 offset="1" name="1"> |
| <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> |
| <reg32 offset="3" name="3"> |
| <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/> |
| </reg32> |
| <!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: --> |
| <reg32 offset="5" name="5"> |
| <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="6" name="6"> |
| <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/> |
| </reg32> |
| <!-- |
| a7xx adds a few more addresses to the end of the pkt |
| --> |
| <reg64 offset="7" name="7"/> |
| <reg64 offset="9" name="9"/> |
| </domain> |
| |
| <domain name="CP_SET_BIN_DATA5_OFFSET" width="32"> |
| <doc> |
| Like CP_SET_BIN_DATA5, but set the pointers as offsets from the |
| pointers stored in VSC_PIPE_{DATA,DATA2,SIZE}_ADDRESS. Useful |
| for Vulkan where these values aren't known when the command |
| stream is recorded. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <!-- equiv to PC_VSTREAM_CONTROL.SIZE on a3xx/a4xx: --> |
| <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/> |
| <!-- equiv to PC_VSTREAM_CONTROL.N on a3xx/a4xx: --> |
| <bitfield name="VSC_N" low="22" high="26" type="uint"/> |
| </reg32> |
| <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> |
| <reg32 offset="1" name="1"> |
| <bitfield name="BIN_DATA_OFFSET" low="0" high="31" type="uint"/> |
| </reg32> |
| <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> |
| <reg32 offset="2" name="2"> |
| <bitfield name="BIN_SIZE_OFFSET" low="0" high="31" type="uint"/> |
| </reg32> |
| <!-- BIN_DATA2_ADDR -> VSC_PIPE[p].DATA2_ADDRESS --> |
| <reg32 offset="3" name="3"> |
| <bitfield name="BIN_DATA2_OFFSET" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_REG_RMW" width="32"> |
| <doc> |
| Modifies DST_REG using two sources that can either be registers |
| or immediates. If SRC1_ADD is set, then do the following: |
| |
| $dst = (($dst & $src0) rot $rotate) + $src1 |
| |
| Otherwise: |
| |
| $dst = (($dst & $src0) rot $rotate) | $src1 |
| |
| Here "rot" means rotate left. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="DST_REG" low="0" high="17" type="hex"/> |
| <bitfield name="ROTATE" low="24" high="28" type="uint"/> |
| <bitfield name="SRC1_ADD" pos="29" type="boolean"/> |
| <bitfield name="SRC1_IS_REG" pos="30" type="boolean"/> |
| <bitfield name="SRC0_IS_REG" pos="31" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="SRC0" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="SRC1" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_REG_TO_MEM" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <!-- number of registers/dwords copied is max(CNT, 1). --> |
| <bitfield name="CNT" low="18" high="29" type="uint"/> |
| <bitfield name="64B" pos="30" type="boolean"/> |
| <bitfield name="ACCUMULATE" pos="31" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="DEST" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="DEST_HI" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_REG_TO_MEM_OFFSET_REG" width="32"> |
| <doc> |
| Like CP_REG_TO_MEM, but the memory address to write to can be |
| offsetted using either one or two registers or scratch |
| registers. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <!-- number of registers/dwords copied is max(CNT, 1). --> |
| <bitfield name="CNT" low="18" high="29" type="uint"/> |
| <bitfield name="64B" pos="30" type="boolean"/> |
| <bitfield name="ACCUMULATE" pos="31" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="DEST" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="DEST_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="OFFSET0" low="0" high="17" type="hex"/> |
| <bitfield name="OFFSET0_SCRATCH" pos="19" type="boolean"/> |
| </reg32> |
| <!-- followed by an optional identical OFFSET1 dword --> |
| </domain> |
| |
| <domain name="CP_REG_TO_MEM_OFFSET_MEM" width="32"> |
| <doc> |
| Like CP_REG_TO_MEM, but the memory address to write to can be |
| offsetted using a DWORD in memory. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <!-- number of registers/dwords copied is max(CNT, 1). --> |
| <bitfield name="CNT" low="18" high="29" type="uint"/> |
| <bitfield name="64B" pos="30" type="boolean"/> |
| <bitfield name="ACCUMULATE" pos="31" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="DEST" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="DEST_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="OFFSET_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="OFFSET_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_MEM_TO_REG" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <!-- number of registers/dwords copied is max(CNT, 1). --> |
| <bitfield name="CNT" low="19" high="29" type="uint"/> |
| <!-- shift each DWORD left by 2 while copying --> |
| <bitfield name="SHIFT_BY_2" pos="30" type="boolean"/> |
| <!-- does the same thing as CP_MEM_TO_MEM::UNK31 --> |
| <bitfield name="UNK31" pos="31" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="SRC" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> |
| <bitfield name="SRC_HI" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_MEM_TO_MEM" width="32"> |
| <reg32 offset="0" name="0"> |
| <!-- |
| not sure how many src operands we have, but the low |
| bits negate the n'th src argument. |
| --> |
| <bitfield name="NEG_A" pos="0" type="boolean"/> |
| <bitfield name="NEG_B" pos="1" type="boolean"/> |
| <bitfield name="NEG_C" pos="2" type="boolean"/> |
| |
| <!-- if set treat src/dst as 64bit values --> |
| <bitfield name="DOUBLE" pos="29" type="boolean"/> |
| <!-- execute CP_WAIT_FOR_MEM_WRITES beforehand --> |
| <bitfield name="WAIT_FOR_MEM_WRITES" pos="30" type="boolean"/> |
| <!-- some other kind of wait --> |
| <bitfield name="UNK31" pos="31" type="boolean"/> |
| </reg32> |
| <!-- |
| followed by sequence of addresses.. the first is the |
| destination and the rest are N src addresses which are |
| summed (after being negated if NEG_x bit set) allowing |
| to do things like 'result += end - start' (which turns |
| out to be useful for queries and accumulating results |
| across multiple tiles) |
| --> |
| </domain> |
| |
| <domain name="CP_MEMCPY" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="DWORDS" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="SRC_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="SRC_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="DST_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="DST_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_REG_TO_SCRATCH" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <bitfield name="SCRATCH" low="20" high="22" type="uint"/> |
| <!-- number of registers/dwords copied is CNT + 1. --> |
| <bitfield name="CNT" low="24" high="26" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SCRATCH_TO_REG" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG" low="0" high="17" type="hex"/> |
| <!-- note: CP_MEM_TO_REG always sets this when writing to the register --> |
| <bitfield name="UNK18" pos="18" type="boolean"/> |
| <bitfield name="SCRATCH" low="20" high="22" type="uint"/> |
| <!-- number of registers/dwords copied is CNT + 1. --> |
| <bitfield name="CNT" low="24" high="26" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SCRATCH_WRITE" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="SCRATCH" low="20" high="22" type="uint"/> |
| </reg32> |
| <!-- followed by one or more DWORDs to write to scratch registers --> |
| </domain> |
| |
| <domain name="CP_MEM_WRITE" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ADDR_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_HI" low="0" high="31"/> |
| </reg32> |
| <!-- followed by the DWORDs to write --> |
| </domain> |
| |
| <enum name="cp_cond_function"> |
| <value value="0" name="WRITE_ALWAYS"/> |
| <value value="1" name="WRITE_LT"/> |
| <value value="2" name="WRITE_LE"/> |
| <value value="3" name="WRITE_EQ"/> |
| <value value="4" name="WRITE_NE"/> |
| <value value="5" name="WRITE_GE"/> |
| <value value="6" name="WRITE_GT"/> |
| </enum> |
| |
| <domain name="CP_COND_WRITE" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> |
| <bitfield name="POLL_MEMORY" pos="4" type="boolean"/> |
| <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="POLL_ADDR" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="REF" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="MASK" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="WRITE_ADDR" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="WRITE_DATA" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <enum name="poll_memory_type"> |
| <value value="0" name="POLL_REGISTER"/> |
| <value value="1" name="POLL_MEMORY"/> |
| <value value="2" name="POLL_SCRATCH"/> |
| <value value="3" name="POLL_ON_CHIP" varset="chip" variants="A7XX-"/> |
| </enum> |
| |
| <domain name="CP_COND_WRITE5" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> |
| <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/> |
| <!-- POLL_REGISTER polls a register at POLL_ADDR_LO. --> |
| <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> |
| <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="REF" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="MASK" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="WRITE_ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="6" name="6"> |
| <bitfield name="WRITE_ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="7" name="7"> |
| <bitfield name="WRITE_DATA" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_WAIT_MEM_GTE" width="32"> |
| <doc> |
| Wait until a memory value is greater than or equal to the |
| reference, using signed comparison. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <!-- Reserved for flags, presumably? Unused in FW --> |
| <bitfield name="RESERVED" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="REF" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_WAIT_REG_MEM" width="32"> |
| <doc> |
| This uses the same internal comparison as CP_COND_WRITE, |
| but waits until the comparison is true instead. It busy-loops in |
| the CP for the given number of cycles before trying again. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> |
| <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/> |
| <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> |
| <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="REF" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="MASK" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="DELAY_LOOP_CYCLES" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_WAIT_TWO_REGS" width="32"> |
| <doc> |
| Waits for REG0 to not be 0 or REG1 to not equal REF |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="REG0" low="0" high="17" type="hex"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="REG1" low="0" high="17" type="hex"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="REF" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_DISPATCH_COMPUTE" width="32"> |
| <reg32 offset="0" name="0"/> |
| <reg32 offset="1" name="1"> |
| <bitfield name="X" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="Y" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="Z" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_RENDER_MODE" width="32"> |
| <enum name="render_mode_cmd"> |
| <value value="1" name="BYPASS"/> |
| <value value="2" name="BINNING"/> |
| <value value="3" name="GMEM"/> |
| <value value="5" name="BLIT2D"/> |
| <!-- placeholder name.. used when CP_BLIT packets with BLIT_OP_SCALE?? --> |
| <value value="7" name="BLIT2DSCALE"/> |
| <!-- 8 set before going back to BYPASS exiting 2D --> |
| <value value="8" name="END2D"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="MODE" low="0" high="8" type="render_mode_cmd"/> |
| <!-- |
| normally 0x1/0x3, sometimes see 0x5/0x8 with unknown registers in |
| 0x21xx range.. possibly (at least some) a5xx variants have a |
| 2d core? |
| --> |
| </reg32> |
| <!-- I think first buffer is for GPU to save context in case of ctx switch? --> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR_0_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <!-- |
| set when in GMEM.. maybe indicates GMEM contents need to be |
| preserved on ctx switch? |
| --> |
| <bitfield name="VSC_ENABLE" pos="3" type="boolean"/> |
| <bitfield name="GMEM_ENABLE" pos="4" type="boolean"/> |
| </reg32> |
| <reg32 offset="4" name="4"/> |
| <!-- second buffer looks like some cmdstream.. length in dwords: --> |
| <reg32 offset="5" name="5"> |
| <bitfield name="ADDR_1_LEN" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="6" name="6"> |
| <bitfield name="ADDR_1_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="7" name="7"> |
| <bitfield name="ADDR_1_HI" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <!-- this looks fairly similar to CP_SET_RENDER_MODE minus first dword --> |
| <domain name="CP_COMPUTE_CHECKPOINT" width="32"> |
| <!-- I think first buffer is for GPU to save context in case of ctx switch? --> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ADDR_0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_0_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| </reg32> |
| <reg32 offset="3" name="3"/> |
| <!-- second buffer looks like some cmdstream.. length in dwords: --> |
| <reg32 offset="4" name="4"> |
| <bitfield name="ADDR_1_LEN" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="ADDR_1_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="6" name="6"> |
| <bitfield name="ADDR_1_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="7" name="7"/> |
| </domain> |
| |
| <domain name="CP_PERFCOUNTER_ACTION" width="32"> |
| <reg32 offset="0" name="0"> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR_0_HI" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain varset="chip" name="CP_EVENT_WRITE" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="EVENT" low="0" high="7" type="vgt_event_type"/> |
| <!-- when set, write back timestamp instead of value from packet: --> |
| <bitfield name="TIMESTAMP" pos="30" type="boolean"/> |
| <bitfield name="IRQ" pos="31" type="boolean"/> |
| </reg32> |
| <!-- |
| TODO what is gpuaddr for, seems to be all 0's.. maybe needed for |
| context switch? |
| --> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR_0_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <!-- ??? --> |
| </reg32> |
| </domain> |
| |
| <domain varset="chip" name="CP_EVENT_WRITE7" width="32"> |
| <enum name="event_write_src"> |
| <!-- Write payload[0] --> |
| <value value="0" name="EV_WRITE_USER_32B"/> |
| <!-- Write payload[0] payload[1] --> |
| <value value="1" name="EV_WRITE_USER_64B"/> |
| <!-- Write (TIMESTAMP_GLOBAL + TIMESTAMP_LOCAL) --> |
| <value value="2" name="EV_WRITE_TIMESTAMP_SUM"/> |
| <value value="3" name="EV_WRITE_ALWAYSON"/> |
| <!-- Write payload[1] regs starting at payload[0] offset --> |
| <value value="4" name="EV_WRITE_REGS_CONTENT"/> |
| </enum> |
| |
| <enum name="event_write_dst"> |
| <value value="0" name="EV_DST_RAM"/> |
| <value value="1" name="EV_DST_ONCHIP"/> |
| </enum> |
| |
| <reg32 offset="0" name="0"> |
| <bitfield name="EVENT" low="0" high="7" type="vgt_event_type"/> |
| <bitfield name="WRITE_SAMPLE_COUNT" pos="12" type="boolean"/> |
| <!-- Write sample count at (iova + 16) --> |
| <bitfield name="SAMPLE_COUNT_END_OFFSET" pos="13" type="boolean"/> |
| <!-- *(iova + 8) = *(iova + 16) - *iova --> |
| <bitfield name="WRITE_SAMPLE_COUNT_DIFF" pos="14" type="boolean"/> |
| |
| <!-- Next 4 flags are valid to set only when concurrent binning is enabled --> |
| <!-- Increment 16b BV counter. Valid only in BV pipe --> |
| <bitfield name="INC_BV_COUNT" pos="16" type="boolean"/> |
| <!-- Increment 16b BR counter. Valid only in BR pipe --> |
| <bitfield name="INC_BR_COUNT" pos="17" type="boolean"/> |
| <bitfield name="CLEAR_RENDER_RESOURCE" pos="18" type="boolean"/> |
| <bitfield name="CLEAR_LRZ_RESOURCE" pos="19" type="boolean"/> |
| |
| <bitfield name="WRITE_SRC" low="20" high="22" type="event_write_src"/> |
| <bitfield name="WRITE_DST" pos="24" type="event_write_dst" addvariant="yes"/> |
| <!-- Writes into WRITE_DST from WRITE_SRC. RB_DONE_TS requires WRITE_ENABLED. --> |
| <bitfield name="WRITE_ENABLED" pos="27" type="boolean"/> |
| </reg32> |
| |
| <stripe varset="event_write_dst" variants="EV_DST_RAM"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR_0_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="PAYLOAD_0" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="PAYLOAD_1" low="0" high="31"/> |
| </reg32> |
| </stripe> |
| |
| <stripe varset="event_write_dst" variants="EV_DST_ONCHIP"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ONCHIP_ADDR_0" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="PAYLOAD_0" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="PAYLOAD_1" low="0" high="31"/> |
| </reg32> |
| </stripe> |
| </domain> |
| |
| <domain name="CP_BLIT" width="32"> |
| <enum name="cp_blit_cmd"> |
| <value value="0" name="BLIT_OP_FILL"/> |
| <value value="1" name="BLIT_OP_COPY"/> |
| <value value="3" name="BLIT_OP_SCALE"/> <!-- used for mipmap generation --> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="OP" low="0" high="3" type="cp_blit_cmd"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="SRC_X1" low="0" high="13" type="uint"/> |
| <bitfield name="SRC_Y1" low="16" high="29" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="SRC_X2" low="0" high="13" type="uint"/> |
| <bitfield name="SRC_Y2" low="16" high="29" type="uint"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="DST_X1" low="0" high="13" type="uint"/> |
| <bitfield name="DST_Y1" low="16" high="29" type="uint"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="DST_X2" low="0" high="13" type="uint"/> |
| <bitfield name="DST_Y2" low="16" high="29" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_EXEC_CS" width="32"> |
| <reg32 offset="0" name="0"> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="NGROUPS_X" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="NGROUPS_Y" low="0" high="31" type="uint"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="NGROUPS_Z" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_EXEC_CS_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-"> |
| <reg32 offset="0" name="0"> |
| </reg32> |
| <stripe varset="chip" variants="A4XX"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <!-- localsize is value minus one: --> |
| <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> |
| <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/> |
| <bitfield name="LOCALSIZEZ" low="22" high="31" type="uint"/> |
| </reg32> |
| </stripe> |
| <stripe varset="chip" variants="A5XX-"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <!-- localsize is value minus one: --> |
| <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> |
| <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/> |
| <bitfield name="LOCALSIZEZ" low="22" high="31" type="uint"/> |
| </reg32> |
| </stripe> |
| </domain> |
| |
| <domain name="CP_SET_MARKER" width="32" varset="chip" prefix="chip" variants="A6XX-"> |
| <doc>Tell CP the current operation mode, indicates save and restore procedure</doc> |
| <enum name="a6xx_marker"> |
| <value value="1" name="RM6_BYPASS"/> |
| <value value="2" name="RM6_BINNING"/> |
| <value value="4" name="RM6_GMEM"/> |
| <value value="5" name="RM6_ENDVIS"/> |
| <value value="6" name="RM6_RESOLVE"/> |
| <value value="7" name="RM6_YIELD"/> |
| <value value="8" name="RM6_COMPUTE"/> |
| <value value="0xc" name="RM6_BLIT2DSCALE"/> <!-- no-op (at least on current sqe fw) --> |
| |
| <!-- |
| These values come from a6xx_set_marker() in the |
| downstream kernel, and they can only be set by the kernel |
| --> |
| <value value="0xd" name="RM6_IB1LIST_START"/> |
| <value value="0xe" name="RM6_IB1LIST_END"/> |
| <!-- IFPC - inter-frame power collapse --> |
| <value value="0x100" name="RM6_IFPC_ENABLE"/> |
| <value value="0x101" name="RM6_IFPC_DISABLE"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <!-- |
| NOTE: blob driver and some versions of freedreno/turnip set |
| b4, which is unused (at least by current sqe fw), but interferes |
| with parsing if we extend the size of the bitfield to include |
| b8 (only sent by kernel mode driver). Really, the way the |
| parsing works in the firmware, only b0-b3 are considered, but |
| if b8 is set, the low bits are interpreted differently. To |
| model this, without getting confused by spurious b4, this is |
| described as two overlapping bitfields: |
| --> |
| <bitfield name="MODE" low="0" high="8" type="a6xx_marker"/> |
| <bitfield name="MARKER" low="0" high="3" type="a6xx_marker"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_PSEUDO_REG" width="32" varset="chip" prefix="chip" variants="A6XX-"> |
| <doc>Set internal CP registers, used to indicate context save data addresses</doc> |
| <enum name="pseudo_reg"> |
| <value value="0" name="SMMU_INFO"/> |
| <value value="1" name="NON_SECURE_SAVE_ADDR"/> |
| <value value="2" name="SECURE_SAVE_ADDR"/> |
| <value value="3" name="NON_PRIV_SAVE_ADDR"/> |
| <value value="4" name="COUNTER"/> |
| |
| <!-- |
| On a6xx the registers are set directly and CP_SET_BIN_DATA5_OFFSET reads them, |
| but that doesn't work with concurrent binning because BR will be reading from |
| a different set of streams than BV is writing, so on a7xx we have these |
| pseudo-regs instead, which do the right thing. |
| |
| The corresponding VSC registers exist, and they're written by BV when it |
| encounters CP_SET_PSEUDO_REG. When BR later encounters the same CP_SET_PSEUDO_REG |
| it will only write some private scratch registers which are read by |
| CP_SET_BIN_DATA5_OFFSET. |
| |
| If concurrent binning is disabled then BR also does binning so it will also |
| write the "real" registers in BR. |
| --> |
| <value value="8" name="DRAW_STRM_ADDRESS"/> |
| <value value="9" name="DRAW_STRM_SIZE_ADDRESS"/> |
| <value value="10" name="PRIM_STRM_ADDRESS"/> |
| <value value="11" name="UNK_STRM_ADDRESS"/> |
| <value value="12" name="UNK_STRM_SIZE_ADDRESS"/> |
| |
| <value value="16" name="BINDLESS_BASE_0_ADDR"/> |
| <value value="17" name="BINDLESS_BASE_1_ADDR"/> |
| <value value="18" name="BINDLESS_BASE_2_ADDR"/> |
| <value value="19" name="BINDLESS_BASE_3_ADDR"/> |
| <value value="20" name="BINDLESS_BASE_4_ADDR"/> |
| <value value="21" name="BINDLESS_BASE_5_ADDR"/> |
| <value value="22" name="BINDLESS_BASE_6_ADDR"/> |
| </enum> |
| <array offset="0" stride="3" length="100"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="PSEUDO_REG" low="0" high="10" type="pseudo_reg"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="HI" low="0" high="31"/> |
| </reg32> |
| </array> |
| </domain> |
| |
| <domain name="CP_REG_TEST" width="32" varset="chip" prefix="chip" variants="A6XX-"> |
| <doc> |
| Tests bit in specified register and sets predicate for CP_COND_REG_EXEC. |
| So: |
| |
| opcode: CP_REG_TEST (39) (2 dwords) |
| { REG = 0xc10 | BIT = 0 } |
| 0000: 70b90001 00000c10 |
| opcode: CP_COND_REG_EXEC (47) (3 dwords) |
| 0000: 70c70002 10000000 00000004 |
| opcode: CP_INDIRECT_BUFFER (3f) (4 dwords) |
| |
| Will execute the CP_INDIRECT_BUFFER only if b0 in the register at |
| offset 0x0c10 is 1 |
| </doc> |
| <enum name="source_type"> |
| <value value="0" name="SOURCE_REG"/> |
| <!-- Don't confuse with scratch registers, this is a separate memory |
| written into by CP_MEM_TO_SCRATCH_MEM. --> |
| <value value="1" name="SOURCE_SCRATCH_MEM" varset="chip" variants="A7XX-"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <!-- the register to test --> |
| <bitfield name="REG" low="0" high="17" varset="source_type" variants="SOURCE_REG"/> |
| <bitfield name="SCRATCH_MEM_OFFSET" low="0" high="17" varset="source_type" variants="SOURCE_SCRATCH_MEM"/> |
| <bitfield name="SOURCE" pos="18" type="source_type" addvariant="yes"/> |
| <!-- the bit to test --> |
| <bitfield name="BIT" low="20" high="24" type="uint"/> |
| <!-- skip implied CP_WAIT_FOR_ME --> |
| <bitfield name="SKIP_WAIT_FOR_ME" pos="25" type="boolean"/> |
| <!-- the predicate bit to set (new in gen3+) --> |
| <bitfield name="PRED_BIT" low="26" high="30" type="uint"/> |
| <!-- update the predicate reg directly (new in gen3+) --> |
| <bitfield name="PRED_UPDATE" pos="31" type="boolean"/> |
| </reg32> |
| |
| <!-- |
| In PRED_UPDATE mode, the predicate reg is updated directly using two |
| more dwords, ignoring other bits: |
| |
| PRED_REG = (PRED_REG & ~PRED_MASK) | (PRED_VAL & PRED_MASK); |
| --> |
| <reg32 offset="1" name="PRED_MASK" type="hex"/> |
| <reg32 offset="2" name="PRED_VAL" type="hex"/> |
| </domain> |
| |
| <!-- I *think* this existed at least as far back as a4xx --> |
| <domain name="CP_COND_REG_EXEC" width="32"> |
| <enum name="compare_mode"> |
| <!-- use the predicate bit set by CP_REG_TEST --> |
| <value value="1" name="PRED_TEST"/> |
| <!-- compare two registers directly for equality --> |
| <value value="2" name="REG_COMPARE"/> |
| <!-- test if certain render modes are set via CP_SET_MARKER --> |
| <value value="3" name="RENDER_MODE" varset="chip" variants="A6XX-"/> |
| <!-- compare REG0 for equality with immediate --> |
| <value value="4" name="REG_COMPARE_IMM" varset="chip" variants="A7XX-"/> |
| <!-- test which of BR/BV are enabled --> |
| <value value="5" name="THREAD_MODE" varset="chip" variants="A7XX-"/> |
| </enum> |
| <reg32 offset="0" name="0" varset="compare_mode"> |
| <bitfield name="REG0" low="0" high="17" variants="REG_COMPARE" type="hex"/> |
| |
| <!-- the predicate bit to test (new in gen3+) --> |
| <bitfield name="PRED_BIT" low="18" high="22" variants="PRED_TEST" type="uint"/> |
| <bitfield name="SKIP_WAIT_FOR_ME" pos="23" varset="chip" variants="A7XX-" type="boolean"/> |
| <!-- With REG_COMPARE instead of register read from ONCHIP memory --> |
| <bitfield name="ONCHIP_MEM" pos="24" varset="chip" variants="A7XX-" type="boolean"/> |
| |
| <!-- |
| Note: these bits have the same meaning, and use the same |
| internal mechanism as the bits in CP_SET_DRAW_STATE. |
| When RENDER_MODE is selected, they're used as |
| a bitmask of which modes pass the test. |
| --> |
| |
| <!-- RM6_BINNING --> |
| <bitfield name="BINNING" pos="25" variants="RENDER_MODE" type="boolean"/> |
| <!-- all others --> |
| <bitfield name="GMEM" pos="26" variants="RENDER_MODE" type="boolean"/> |
| <!-- RM6_BYPASS --> |
| <bitfield name="SYSMEM" pos="27" variants="RENDER_MODE" type="boolean"/> |
| |
| <bitfield name="BV" pos="25" variants="THREAD_MODE" type="boolean"/> |
| <bitfield name="BR" pos="26" variants="THREAD_MODE" type="boolean"/> |
| <bitfield name="LPAC" pos="27" variants="THREAD_MODE" type="boolean"/> |
| |
| <bitfield name="MODE" low="28" high="31" type="compare_mode" addvariant="yes"/> |
| </reg32> |
| |
| <stripe varset="compare_mode" variants="PRED_TEST"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="DWORDS" low="0" high="23" type="uint"/> |
| </reg32> |
| </stripe> |
| |
| <stripe varset="compare_mode" variants="REG_COMPARE"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="REG1" low="0" high="17" type="hex"/> |
| <!-- Instead of register read from ONCHIP memory --> |
| <bitfield name="ONCHIP_MEM" pos="24" varset="chip" variants="A7XX-" type="boolean"/> |
| </reg32> |
| </stripe> |
| |
| <stripe varset="compare_mode" variants="RENDER_MODE"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="DWORDS" low="0" high="23" type="uint"/> |
| </reg32> |
| </stripe> |
| |
| <stripe varset="compare_mode" variants="REG_COMPARE_IMM"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="IMM" low="0" high="31"/> |
| </reg32> |
| </stripe> |
| |
| <stripe varset="compare_mode" variants="THREAD_MODE"> |
| <reg32 offset="1" name="1"> |
| <bitfield name="DWORDS" low="0" high="23" type="uint"/> |
| </reg32> |
| </stripe> |
| |
| <reg32 offset="2" name="2"> |
| <bitfield name="DWORDS" low="0" high="23" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_COND_EXEC" width="32"> |
| <doc> |
| Executes the following DWORDs of commands if the dword at ADDR0 |
| is not equal to 0 and the dword at ADDR1 is less than REF |
| (signed comparison). |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ADDR0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR0_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="ADDR1_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="ADDR1_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="4" name="4"> |
| <bitfield name="REF" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="5" name="5"> |
| <bitfield name="DWORDS" low="0" high="31" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_SET_AMBLE" width="32"> |
| <doc> |
| Used by the userspace and kernel drivers to set various IB's |
| which are executed during context save/restore for handling |
| state that isn't restored by the context switch routine itself. |
| </doc> |
| <enum name="amble_type"> |
| <value name="PREAMBLE_AMBLE_TYPE" value="0"> |
| <doc>Executed unconditionally when switching back to the context.</doc> |
| </value> |
| <value name="BIN_PREAMBLE_AMBLE_TYPE" value="1"> |
| <doc> |
| Executed when switching back after switching |
| away during execution of |
| a CP_SET_MARKER packet with RM6_BIN_RENDER_END as the |
| payload *and* skipsaverestore is set. This is |
| expected to restore static register values not |
| saved when skipsaverestore is set. |
| </doc> |
| </value> |
| <value name="POSTAMBLE_AMBLE_TYPE" value="2"> |
| <doc> |
| Executed when switching away from the context, |
| except for context switches initiated via |
| CP_YIELD. |
| </doc> |
| </value> |
| <value name="KMD_AMBLE_TYPE" value="3"> |
| <doc> |
| This can only be set by the RB (i.e. the kernel) |
| and executes with protected mode off, but |
| is otherwise similar to POSTAMBLE_AMBLE_TYPE. |
| </doc> |
| </value> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ADDR_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="ADDR_HI" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="DWORDS" low="0" high="19" type="uint"/> |
| <bitfield name="TYPE" low="20" high="21" type="amble_type"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_REG_WRITE" width="32"> |
| <enum name="reg_tracker"> |
| <doc> |
| Keep shadow copies of these registers and only set them |
| when drawing, avoiding redundant writes: |
| - VPC_CNTL_0 |
| - HLSQ_CONTROL_1_REG |
| - HLSQ_UNKNOWN_B980 |
| </doc> |
| <value name="TRACK_CNTL_REG" value="0x1"/> |
| <doc> |
| Track RB_RENDER_CNTL, and insert a WFI in the following |
| situation: |
| - There is a write that disables binning |
| - There was a draw with binning left enabled, but in |
| BYPASS mode |
| Presumably this is a hang workaround? |
| </doc> |
| <value name="TRACK_RENDER_CNTL" value="0x2"/> |
| <doc> |
| Do a mysterious CP_EVENT_WRITE 0x3f when the low bit of |
| the data to write is 0. Used by the Vulkan blob with |
| PC_MULTIVIEW_CNTL, but this isn't predicated on particular |
| register(s) like the others. |
| </doc> |
| <value name="UNK_EVENT_WRITE" value="0x4"/> |
| <doc> |
| Tracks GRAS_LRZ_CNTL::GREATER, GRAS_LRZ_CNTL::DIR, and |
| GRAS_LRZ_DEPTH_VIEW with previous values, and if one of |
| the following is true: |
| - GRAS_LRZ_CNTL::GREATER has changed |
| - GRAS_LRZ_CNTL::DIR has changed, the old value is not |
| CUR_DIR_GE, and the new value is not CUR_DIR_DISABLED |
| - GRAS_LRZ_DEPTH_VIEW has changed |
| then it does a LRZ_FLUSH with GRAS_LRZ_CNTL::ENABLE |
| forced to 1. |
| Only exists in a650_sqe.fw. |
| </doc> |
| <value name="TRACK_LRZ" value="0x8"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="TRACKER" low="0" high="3" type="reg_tracker"/> |
| </reg32> |
| <reg32 offset="1" name="1"/> |
| <reg32 offset="2" name="2"/> |
| </domain> |
| |
| <domain name="CP_SMMU_TABLE_UPDATE" width="32"> |
| <doc> |
| Note that the SMMU's definition of TTBRn can take different forms |
| depending on the pgtable format. But a5xx+ only uses aarch64 |
| format. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="TTBR0_LO" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="TTBR0_HI" low="0" high="15"/> |
| <bitfield name="ASID" low="16" high="31"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <doc>Unused, does not apply to aarch64 pgtable format</doc> |
| <bitfield name="CONTEXTIDR" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="CONTEXTBANK" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_START_BIN" width="32"> |
| <reg32 offset="0" name="BIN_COUNT" type="uint"/> |
| <reg64 offset="1" name="PREFIX_ADDR" type="address"/> |
| <reg32 offset="3" name="PREFIX_DWORDS"> |
| <doc> |
| Size of prefix for each bin. For each bin index i, the |
| prefix commands at PREFIX_ADDR + i * PREFIX_DWORDS are |
| executed in an IB2 before the IB1 commands following |
| this packet. |
| </doc> |
| </reg32> |
| <reg32 offset="4" name="BODY_DWORDS"> |
| <doc>Number of dwords after this packet until CP_END_BIN</doc> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_WAIT_TIMESTAMP" width="32"> |
| <enum name="ts_wait_value_src"> |
| <!-- Wait for value at memory address to be >= SRC_0 (signed comparison) --> |
| <value value="0" name="TS_WAIT_GE_32B"/> |
| <!-- Wait for value at memory address to be >= SRC_0 (unsigned) --> |
| <value value="1" name="TS_WAIT_GE_64B"/> |
| <!-- Write (TIMESTAMP_GLOBAL + TIMESTAMP_LOCAL) --> |
| <value value="2" name="TS_WAIT_GE_TIMESTAMP_SUM"/> |
| </enum> |
| |
| <enum name="ts_wait_type"> |
| <value value="0" name="TS_WAIT_RAM"/> |
| <value value="1" name="TS_WAIT_ONCHIP"/> |
| </enum> |
| |
| <reg32 offset="0" name="0"> |
| <bitfield name="WAIT_VALUE_SRC" low="0" high="1" type="ts_wait_value_src"/> |
| <bitfield name="WAIT_DST" pos="4" type="ts_wait_type" addvariant="yes"/> |
| </reg32> |
| |
| <stripe varset="ts_wait_type" variants="TS_WAIT_RAM"> |
| <reg64 offset="1" name="ADDR" type="address"/> |
| </stripe> |
| |
| <stripe varset="ts_wait_type" variants="TS_WAIT_ONCHIP"> |
| <reg32 offset="1" name="ONCHIP_ADDR_0" low="0" high="31"/> |
| </stripe> |
| |
| <reg32 offset="3" name="SRC_0"/> |
| <reg32 offset="4" name="SRC_1"/> |
| </domain> |
| |
| <domain name="CP_BV_BR_COUNT_OPS" width="32"> |
| <enum name="pipe_count_op"> |
| <value name="PIPE_CLEAR_BV_BR" value="0x1"/> |
| <value name="PIPE_SET_BR_OFFSET" value="0x2"/> |
| <!-- Wait until for BV_counter > BR_counter --> |
| <value name="PIPE_BR_WAIT_FOR_BV" value="0x3"/> |
| <!-- Wait until (BR_counter + BR_OFFSET) > BV_counter --> |
| <value name="PIPE_BV_WAIT_FOR_BR" value="0x4"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="OP" low="0" high="3" type="pipe_count_op"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <bitfield name="BR_OFFSET" low="0" high="15" type="uint"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_MODIFY_TIMESTAMP" width="32"> |
| <enum name="timestamp_op"> |
| <value name="MODIFY_TIMESTAMP_CLEAR" value="0"/> |
| <value name="MODIFY_TIMESTAMP_ADD_GLOBAL" value="1"/> |
| <value name="MODIFY_TIMESTAMP_ADD_LOCAL" value="2"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield name="ADD" low="0" high="7" type="uint"/> |
| <bitfield name="OP" low="28" high="31" type="timestamp_op"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_MEM_TO_SCRATCH_MEM" width="32"> |
| <doc> |
| Best guess is that it is a faster way to fetch all the VSC_STATE registers |
| and keep them in a local scratch memory instead of fetching every time |
| when skipping IBs. |
| </doc> |
| <reg32 offset="0" name="0"> |
| <bitfield name="CNT" low="0" high="5" type="uint"/> |
| </reg32> |
| <reg32 offset="1" name="1"> |
| <doc>Scratch memory size is 48 dwords`</doc> |
| <bitfield name="OFFSET" low="0" high="5" type="uint"/> |
| </reg32> |
| <reg32 offset="2" name="2"> |
| <bitfield name="SRC" low="0" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="SRC_HI" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_THREAD_CONTROL" width="32"> |
| <enum name="cp_thread"> |
| <value name="CP_SET_THREAD_BR" value="1"/> <!-- Render --> |
| <value name="CP_SET_THREAD_BV" value="2"/> <!-- Visibility --> |
| <value name="CP_SET_THREAD_BOTH" value="3"/> |
| </enum> |
| <reg32 offset="0" name="0"> |
| <bitfield low="0" high="1" name="THREAD" type="cp_thread"/> |
| <bitfield pos="27" name="CONCURRENT_BIN_DISABLE" type="boolean"/> |
| <bitfield pos="31" name="SYNC_THREADS" type="boolean"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_FIXED_STRIDE_DRAW_TABLE" width="32"> |
| <reg64 offset="0" name="IB_BASE"/> |
| <reg32 offset="2" name="2"> |
| <!-- STRIDE * COUNT --> |
| <bitfield name="IB_SIZE" low="0" high="11"/> |
| <bitfield name="STRIDE" low="20" high="31"/> |
| </reg32> |
| <reg32 offset="3" name="3"> |
| <bitfield name="COUNT" low="0" high="31"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_RESET_CONTEXT_STATE" width="32"> |
| <reg32 offset="0" name="0"> |
| <bitfield name="CLEAR_ON_CHIP_TS" pos="0" type="boolean"/> |
| <bitfield name="CLEAR_RESOURCE_TABLE" pos="1" type="boolean"/> |
| <bitfield name="CLEAR_GLOBAL_LOCAL_TS" pos="2" type="boolean"/> |
| </reg32> |
| </domain> |
| |
| <domain name="CP_INDIRECT_BUFFER" width="32" varset="chip" prefix="chip" variants="A5XX-"> |
| <reg64 offset="0" name="IB_BASE" type="address"/> |
| <reg32 offset="2" name="2"> |
| <bitfield name="IB_SIZE" low="0" high="19"/> |
| </reg32> |
| </domain> |
| |
| </database> |
| |