freedreno: slurp in afuc

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6070>
diff --git a/src/freedreno/afuc/Makefile b/src/freedreno/afuc/Makefile
new file mode 100644
index 0000000..12e6f3a
--- /dev/null
+++ b/src/freedreno/afuc/Makefile
@@ -0,0 +1,368 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 3.17
+
+# Default target executed when no arguments are given to make.
+default_target: all
+
+.PHONY : default_target
+
+# Allow only one "make -f Makefile2" at a time, but pass parallelism.
+.NOTPARALLEL:
+
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canonical targets will work.
+.SUFFIXES:
+
+
+# Disable VCS-based implicit rules.
+% : %,v
+
+
+# Disable VCS-based implicit rules.
+% : RCS/%
+
+
+# Disable VCS-based implicit rules.
+% : RCS/%,v
+
+
+# Disable VCS-based implicit rules.
+% : SCCS/s.%
+
+
+# Disable VCS-based implicit rules.
+% : s.%
+
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+
+# Command-line flag to silence nested $(MAKE).
+$(VERBOSE)MAKESILENT = -s
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+
+# A target that is always out of date.
+cmake_force:
+
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E rm -f
+
+# Escaping for special characters.
+EQUALS = =
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/robclark/src/envytools
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/robclark/src/envytools
+
+#=============================================================================
+# Targets provided globally by CMake.
+
+# Special rule for the target install/strip
+install/strip: preinstall
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..."
+	/usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake
+.PHONY : install/strip
+
+# Special rule for the target install/strip
+install/strip/fast: preinstall/fast
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing the project stripped..."
+	/usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake
+.PHONY : install/strip/fast
+
+# Special rule for the target install/local
+install/local: preinstall
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..."
+	/usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake
+.PHONY : install/local
+
+# Special rule for the target install/local
+install/local/fast: preinstall/fast
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Installing only the local directory..."
+	/usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake
+.PHONY : install/local/fast
+
+# Special rule for the target edit_cache
+edit_cache:
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..."
+	/usr/bin/ccmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
+.PHONY : edit_cache
+
+# Special rule for the target edit_cache
+edit_cache/fast: edit_cache
+
+.PHONY : edit_cache/fast
+
+# Special rule for the target test
+test:
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running tests..."
+	/usr/bin/ctest --force-new-ctest-process $(ARGS)
+.PHONY : test
+
+# Special rule for the target test
+test/fast: test
+
+.PHONY : test/fast
+
+# Special rule for the target install
+install: preinstall
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..."
+	/usr/bin/cmake -P cmake_install.cmake
+.PHONY : install
+
+# Special rule for the target install
+install/fast: preinstall/fast
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Install the project..."
+	/usr/bin/cmake -P cmake_install.cmake
+.PHONY : install/fast
+
+# Special rule for the target list_install_components
+list_install_components:
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Available install components are: \"Unspecified\""
+.PHONY : list_install_components
+
+# Special rule for the target list_install_components
+list_install_components/fast: list_install_components
+
+.PHONY : list_install_components/fast
+
+# Special rule for the target rebuild_cache
+rebuild_cache:
+	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
+	/usr/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
+.PHONY : rebuild_cache
+
+# Special rule for the target rebuild_cache
+rebuild_cache/fast: rebuild_cache
+
+.PHONY : rebuild_cache/fast
+
+# The main all target
+all: cmake_check_build_system
+	cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -E cmake_progress_start /home/robclark/src/envytools/CMakeFiles /home/robclark/src/envytools/afuc/CMakeFiles/progress.marks
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/all
+	$(CMAKE_COMMAND) -E cmake_progress_start /home/robclark/src/envytools/CMakeFiles 0
+.PHONY : all
+
+# The main clean target
+clean:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/clean
+.PHONY : clean
+
+# The main clean target
+clean/fast: clean
+
+.PHONY : clean/fast
+
+# Prepare targets for installation.
+preinstall: all
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/preinstall
+.PHONY : preinstall
+
+# Prepare targets for installation.
+preinstall/fast:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/preinstall
+.PHONY : preinstall/fast
+
+# clear depends
+depend:
+	cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
+.PHONY : depend
+
+# Convenience name for target.
+afuc/CMakeFiles/asm.dir/rule:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/CMakeFiles/asm.dir/rule
+.PHONY : afuc/CMakeFiles/asm.dir/rule
+
+# Convenience name for target.
+asm: afuc/CMakeFiles/asm.dir/rule
+
+.PHONY : asm
+
+# fast build rule for target.
+asm/fast:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/build
+.PHONY : asm/fast
+
+# Convenience name for target.
+afuc/CMakeFiles/disasm.dir/rule:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f CMakeFiles/Makefile2 afuc/CMakeFiles/disasm.dir/rule
+.PHONY : afuc/CMakeFiles/disasm.dir/rule
+
+# Convenience name for target.
+disasm: afuc/CMakeFiles/disasm.dir/rule
+
+.PHONY : disasm
+
+# fast build rule for target.
+disasm/fast:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/build
+.PHONY : disasm/fast
+
+asm.o: asm.c.o
+
+.PHONY : asm.o
+
+# target to build an object file
+asm.c.o:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.o
+.PHONY : asm.c.o
+
+asm.i: asm.c.i
+
+.PHONY : asm.i
+
+# target to preprocess a source file
+asm.c.i:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.i
+.PHONY : asm.c.i
+
+asm.s: asm.c.s
+
+.PHONY : asm.s
+
+# target to generate assembly for a file
+asm.c.s:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/asm.c.s
+.PHONY : asm.c.s
+
+disasm.o: disasm.c.o
+
+.PHONY : disasm.o
+
+# target to build an object file
+disasm.c.o:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.o
+.PHONY : disasm.c.o
+
+disasm.i: disasm.c.i
+
+.PHONY : disasm.i
+
+# target to preprocess a source file
+disasm.c.i:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.i
+.PHONY : disasm.c.i
+
+disasm.s: disasm.c.s
+
+.PHONY : disasm.s
+
+# target to generate assembly for a file
+disasm.c.s:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/disasm.dir/build.make afuc/CMakeFiles/disasm.dir/disasm.c.s
+.PHONY : disasm.c.s
+
+lexer.o: lexer.c.o
+
+.PHONY : lexer.o
+
+# target to build an object file
+lexer.c.o:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.o
+.PHONY : lexer.c.o
+
+lexer.i: lexer.c.i
+
+.PHONY : lexer.i
+
+# target to preprocess a source file
+lexer.c.i:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.i
+.PHONY : lexer.c.i
+
+lexer.s: lexer.c.s
+
+.PHONY : lexer.s
+
+# target to generate assembly for a file
+lexer.c.s:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/lexer.c.s
+.PHONY : lexer.c.s
+
+parser.o: parser.c.o
+
+.PHONY : parser.o
+
+# target to build an object file
+parser.c.o:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.o
+.PHONY : parser.c.o
+
+parser.i: parser.c.i
+
+.PHONY : parser.i
+
+# target to preprocess a source file
+parser.c.i:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.i
+.PHONY : parser.c.i
+
+parser.s: parser.c.s
+
+.PHONY : parser.s
+
+# target to generate assembly for a file
+parser.c.s:
+	cd /home/robclark/src/envytools && $(MAKE) $(MAKESILENT) -f afuc/CMakeFiles/asm.dir/build.make afuc/CMakeFiles/asm.dir/parser.c.s
+.PHONY : parser.c.s
+
+# Help Target
+help:
+	@echo "The following are some of the valid targets for this Makefile:"
+	@echo "... all (the default if no target is provided)"
+	@echo "... clean"
+	@echo "... depend"
+	@echo "... edit_cache"
+	@echo "... install"
+	@echo "... install/local"
+	@echo "... install/strip"
+	@echo "... list_install_components"
+	@echo "... rebuild_cache"
+	@echo "... test"
+	@echo "... asm"
+	@echo "... disasm"
+	@echo "... asm.o"
+	@echo "... asm.i"
+	@echo "... asm.s"
+	@echo "... disasm.o"
+	@echo "... disasm.i"
+	@echo "... disasm.s"
+	@echo "... lexer.o"
+	@echo "... lexer.i"
+	@echo "... lexer.s"
+	@echo "... parser.o"
+	@echo "... parser.i"
+	@echo "... parser.s"
+.PHONY : help
+
+
+
+#=============================================================================
+# Special targets to cleanup operation of make.
+
+# Special rule to run CMake to check the build system integrity.
+# No rule that depends on this can have commands that come from listfiles
+# because they might be regenerated.
+cmake_check_build_system:
+	cd /home/robclark/src/envytools && $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
+.PHONY : cmake_check_build_system
+
diff --git a/src/freedreno/afuc/README.rst b/src/freedreno/afuc/README.rst
new file mode 100644
index 0000000..e06c939
--- /dev/null
+++ b/src/freedreno/afuc/README.rst
@@ -0,0 +1,317 @@
+=====================
+Adreno Five Microcode
+=====================
+
+.. contents::
+
+.. _afuc-introduction:
+
+Introduction
+============
+
+Adreno GPUs prior to 6xx use two micro-controllers to parse the command-stream,
+setup the hardware for draws (or compute jobs), and do various GPU
+housekeeping.  They are relatively simple (basically glorified
+register writers) and basically all their state is in a collection
+of registers.  Ie. there is no stack, and no memory assigned to
+them; any global state like which bank of context registers is to
+be used in the next draw is stored in a register.
+
+The setup is similar to radeon, in fact Adreno 2xx thru 4xx used
+basically the same instruction set as r600.  There is a "PFP"
+(Prefetch Parser) and "ME" (Micro Engine, also confusingly referred
+to as "PM4").  These make up the "CP" ("Command Parser").  The
+PFP runs ahead of the ME, with some PM4 packets handled entirely
+in the PFP.  Between the PFP and ME is a FIFO ("MEQ").  In the
+generations prior to Adreno 5xx, the PFP and ME had different
+instruction sets.
+
+Starting with Adreno 5xx, a new microcontroller with a unified
+instruction set was introduced, although the overall architecture
+and purpose of the two microcontrollers remains the same.
+
+For lack of a better name, this new instruction set is called
+"Adreno Five MicroCode" or "afuc".  (No idea what Qualcomm calls
+it internally.
+
+With Adreno 6xx, the separate PF and ME are replaced with a single
+SQE microcontroller using the same instruction set as 5xx.
+
+.. _afuc-overview:
+
+Instruction Set Overview
+========================
+
+32bit instruction set with basic arithmatic ops that can take
+either two source registers or one src and a 16b immediate.
+
+32 registers, although some are special purpose:
+
+- ``$00`` - always reads zero, otherwise seems to be the PC
+- ``$01`` - current PM4 packet header
+- ``$1c`` - alias ``$rem``, remaining data in packet
+- ``$1d`` - alias ``$addr``
+- ``$1f`` - alias ``$data``
+
+Branch instructions have a delay slot so the following instruction
+is always executed regardless of whether branch is taken or not.
+
+
+.. _afuc-alu:
+
+ALU Instructions
+================
+
+The following instructions are available:
+
+- ``add``   - add
+- ``addhi`` - add + carry (for upper 32b of 64b value)
+- ``sub``   - subtract
+- ``subhi`` - subtract + carry (for upper 32b of 64b value)
+- ``and``   - bitwise AND
+- ``or``    - bitwise OR
+- ``xor``   - bitwise XOR
+- ``not``   - bitwise NOT (no src1)
+- ``shl``   - shift-left
+- ``ushr``  - unsigned shift-right
+- ``ishr``  - signed shift-right
+- ``rot``   - rotate-left (like shift-left with wrap-around)
+- ``mul8``  - multiply low 8b of two src
+- ``min``   - minimum
+- ``max``   - maximum
+- ``comp``  - compare two values
+
+The ALU instructions can take either two src registers, or a src
+plus 16b immediate as 2nd src, ex::
+
+  add $dst, $src, 0x1234   ; src2 is immed
+  add $dst, $src1, $src2   ; src2 is reg
+
+The ``not`` instruction only takes a single source::
+
+  not $dst, $src
+  not $dst, 0x1234
+
+.. _afuc-alu-cmp:
+
+The ``cmp`` instruction returns:
+
+- ``0x00`` if src1 > src2
+- ``0x2b`` if src1 == src2
+- ``0x1e`` if src1 < src2
+
+See explanation in :ref:`afuc-branch`
+
+
+.. _afuc-branch:
+
+Branch Instructions
+===================
+
+The following branch/jump instructions are available:
+
+- ``brne`` - branch if not equal (or bit not set)
+- ``breq`` - branch if equal (or bit set)
+- ``jump`` - unconditional jump
+
+Both ``brne`` and ``breq`` have two forms, comparing the src register
+against either a small immediate (up to 5 bits) or a specific bit::
+
+  breq $src, b3, #somelabel  ; branch if src & (1 << 3)
+  breq $src, 0x3, #somelabel ; branch if src == 3
+
+The branch instructions are encoded with a 16b relative offset.
+Since ``$00`` always reads back zero, it can be used to construct
+an unconditional relative jump.
+
+The :ref:`cmp <afuc-alu-cmp>` instruction can be paired with the
+bit-test variants of ``brne``/``breq`` to implement gt/ge/lt/le,
+due to the bit pattern it returns, for example::
+
+  cmp $04, $02, $03
+  breq $04, b1, #somelabel
+
+will branch if ``$02`` is less than or equal to ``$03``.
+
+
+.. _afuc-call:
+
+Call/Return
+===========
+
+Simple subroutines can be implemented with ``call``/``ret``.  The
+jump instruction encodes a fixed offset.
+
+  TODO not sure how many levels deep function calls can be nested.
+  There isn't really a stack.  Definitely seems to be multiple
+  levels of fxn call, see in PFP: CP_CONTEXT_SWITCH_YIELD -> f13 ->
+  f22.
+
+
+.. _afuc-control:
+
+Config Instructions
+===================
+
+These seem to read/write config state in other parts of CP.  In at
+least some cases I expect these map to CP registers (but possibly
+not directly??)
+
+- ``cread $dst, [$off + addr], flags``
+- ``cwrite $src, [$off + addr], flags``
+
+In cases where no offset is needed, ``$00`` is frequently used as
+the offset.
+
+For example, the following sequences sets::
+
+  ; load CP_INDIRECT_BUFFER parameters from cmdstream:
+  mov $02, $data   ; low 32b of IB target address
+  mov $03, $data   ; high 32b of IB target
+  mov $04, $data   ; IB size in dwords
+
+  ; sanity check # of dwords:
+  breq $04, 0x0, #l23 (#69, 04a2)
+
+  ; this seems something to do with figuring out whether
+  ; we are going from RB->IB1 or IB1->IB2 (ie. so the
+  ; below cwrite instructions update either
+  ; CP_IB1_BASE_LO/HI/BUFSIZE or CP_IB2_BASE_LO/HI/BUFSIZE
+  and $05, $18, 0x0003
+  shl $05, $05, 0x0002
+
+  ; update CP_IBn_BASE_LO/HI/BUFSIZE:
+  cwrite $02, [$05 + 0x0b0], 0x8
+  cwrite $03, [$05 + 0x0b1], 0x8
+  cwrite $04, [$05 + 0x0b2], 0x8
+
+
+
+.. _afuc-reg-access:
+
+Register Access
+===============
+
+The special registers ``$addr`` and ``$data`` can be used to write GPU
+registers, for example, to write::
+
+  mov $addr, CP_SCRATCH_REG[0x2] ; set register to write
+  mov $data, $03                 ; CP_SCRATCH_REG[0x2]
+  mov $data, $04                 ; CP_SCRATCH_REG[0x3]
+  ...
+
+subsequent writes to ``$data`` will increment the address of the register
+to write, so a sequence of consecutive registers can be written
+
+To read::
+
+  mov $addr, CP_SCRATCH_REG[0x2]
+  mov $03, $addr
+  mov $04, $addr
+
+Many registers that are updated frequently have two banks, so they can be
+updated without stalling for previous draw to finish.  These banks are
+arranged so bit 11 is zero for bank 0 and 1 for bank 1.  The ME fw (at
+least the version I'm looking at) stores this in ``$17``, so to update
+these registers from ME::
+
+  or $addr, $17, VFD_INDEX_OFFSET
+  mov $data, $03
+  ...
+
+Note that PFP doesn't seem to use this approach, instead it does something
+like::
+
+  mov $0c, CP_SCRATCH_REG[0x7]
+  mov $02, 0x789a   ; value
+  cwrite $0c, [$00 + 0x010], 0x8
+  cwrite $02, [$00 + 0x011], 0x8
+
+Like with the ``$addr``/``$data`` approach, the destination register address
+increments on each write.
+
+.. _afuc-mem:
+
+Memory Access
+=============
+
+There are no load/store instructions, as such.  The microcontrollers
+have only indirect memory access via GPU registers.  There are two
+mechanism possible.
+
+Read/Write via CP_NRT Registers
+-------------------------------
+
+This seems to be only used by ME.  If PFP were also using it, they would
+race with each other.  It seems to be primarily used for small reads.
+
+- ``CP_ME_NRT_ADDR_LO``/``_HI`` - write to set the address to read or write
+- ``CP_ME_NRT_DATA`` - write to trigger write to address in ``CP_ME_NRT_ADDR``
+
+The address register increments with successive reads or writes.
+
+Memory Write example::
+
+  ; store 64b value in $04+$05 to 64b address in $02+$03
+  mov $addr, CP_ME_NRT_ADDR_LO
+  mov $data, $02
+  mov $data, $03
+  mov $addr, CP_ME_NRT_DATA
+  mov $data, $04
+  mov $data, $05
+
+Memory Read example::
+
+  ; load 64b value from address in $02+$03 into $04+$05
+  mov $addr, CP_ME_NRT_ADDR_LO
+  mov $data, $02
+  mov $data, $03
+  mov $04, $addr
+  mov $05, $addr
+
+
+Read via Control Instructions
+-----------------------------
+
+This is used by PFP whenever it needs to read memory.  Also seems to be
+used by ME for streaming reads (larger amounts of data).  The DMA access
+seems to be done by ROQ.
+
+  TODO might also be possible for write access
+
+  TODO some of the control commands might be synchronizing access
+  between PFP and ME??
+
+An example from ``CP_DRAW_INDIRECT`` packet handler::
+
+  mov $07, 0x0004  ; # of dwords to read from draw-indirect buffer
+  ; load address of indirect buffer from cmdstream:
+  cwrite $data, [$00 + 0x0b8], 0x8
+  cwrite $data, [$00 + 0x0b9], 0x8
+  ; set # of dwords to read:
+  cwrite $07, [$00 + 0x0ba], 0x8
+  ...
+  ; read parameters from draw-indirect buffer:
+  mov $09, $addr
+  mov $07, $addr
+  cread $12, [$00 + 0x040], 0x8
+  ; the start parameter gets written into MEQ, which ME writes
+  ; to VFD_INDEX_OFFSET register:
+  mov $data, $addr
+
+
+A6XX NOTES
+==========
+
+The ``$14`` register holds global flags set by:
+
+  CP_SKIP_IB2_ENABLE_LOCAL - b8
+  CP_SKIP_IB2_ENABLE_GLOBAL - b9
+  CP_SET_MARKER
+    MODE=GMEM - sets b15
+    MODE=BLIT2D - clears b15, b12, b7
+  CP_SET_MODE - b29+b30
+  CP_SET_VISIBILITY_OVERRIDE - b11, b21, b30?
+  CP_SET_DRAW_STATE - checks b29+b30
+
+  CP_COND_REG_EXEC - checks b10, which should be predicate flag?
diff --git a/src/freedreno/afuc/afuc.h b/src/freedreno/afuc/afuc.h
new file mode 100644
index 0000000..4f9e9d2
--- /dev/null
+++ b/src/freedreno/afuc/afuc.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _AFUC_H_
+#define _AFUC_H_
+
+/*
+TODO kernel debugfs to inject packet into rb for easier experimentation.  It
+should trigger reloading pfp/me and resetting gpu..
+
+Actually maybe it should be flag on submit ioctl to be able to deal w/ relocs,
+should be restricted to CAP_ADMIN and probably compile option too (default=n).
+if flag set, copy cmdstream bo contents into RB instead of IB'ing to it from
+RB.
+ */
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define PACKED __attribute__((__packed__))
+
+/* The opcode is encoded variable length.  Opcodes less than 0x30
+ * are encoded as 5 bits followed by (rep) flag.  Opcodes >= 0x30
+ * (ie. top two bits are '11' are encoded as 6 bits.  See get_opc()
+ */
+typedef enum {
+	OPC_NOP    = 0x00,
+
+	OPC_ADD    = 0x01,  /* add immediate */
+	OPC_ADDHI  = 0x02,  /* add immediate (hi 32b of 64b) */
+	OPC_SUB    = 0x03,  /* subtract immediate */
+	OPC_SUBHI  = 0x04,  /* subtract immediate (hi 32b of 64b) */
+	OPC_AND    = 0x05,  /* AND immediate */
+	OPC_OR     = 0x06,  /* OR immediate */
+	OPC_XOR    = 0x07,  /* XOR immediate */
+	OPC_NOT    = 0x08,  /* bitwise not of immed (src1 ignored) */
+	OPC_SHL    = 0x09,  /* shift-left immediate */
+	OPC_USHR   = 0x0a,  /* unsigned shift right by immediate */
+	OPC_ISHR   = 0x0b,  /* signed shift right by immediate */
+	OPC_ROT    = 0x0c,  /* rotate left (left shift with wrap-around) */
+	OPC_MUL8   = 0x0d,  /* 8bit multiply by immediate */
+	OPC_MIN    = 0x0e,
+	OPC_MAX    = 0x0f,
+	OPC_CMP    = 0x10,  /* compare src to immed */
+	OPC_MOVI   = 0x11,  /* move immediate */
+
+	/* Return the most-significant bit of src2, or 0 if src2 == 0 (the
+	 * same as if src2 == 1). src1 is ignored. Note that this overlaps
+	 * with STORE6, so it can only be used with the two-source encoding.
+	 */
+	OPC_MSB    = 0x14,
+
+
+	OPC_ALU    = 0x13,  /* ALU instruction with two src registers */
+
+	/* These seem something to do with setting some external state..
+	 * doesn't seem to map *directly* to registers, but I guess that
+	 * is where things end up.  For example, this sequence in the
+	 * CP_INDIRECT_BUFFER handler:
+	 *
+	 *     mov $02, $data   ; low 32b of IB target address
+	 *     mov $03, $data   ; high 32b of IB target
+	 *     mov $04, $data   ; IB size in dwords
+	 *     breq $04, 0x0, #l23 (#69, 04a2)
+	 *     and $05, $18, 0x0003
+	 *     shl $05, $05, 0x0002
+	 *     cwrite $02, [$05 + 0x0b0], 0x8
+	 *     cwrite $03, [$05 + 0x0b1], 0x8
+	 *     cwrite $04, [$05 + 0x0b2], 0x8
+	 *
+	 * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and
+	 * 0x0b22->0x0b24 (IB2).  Presumably $05 ends up w/ different value
+	 * for RB->IB1 vs IB1->IB2.
+	 */
+	OPC_CWRITE5 = 0x15,
+	OPC_CREAD5  = 0x16,
+
+	/* A6xx shuffled around the cwrite/cread opcodes and added new opcodes
+	 * that let you read/write directly to memory (and bypass the IOMMU?).
+	 */
+	OPC_STORE6  = 0x14,
+	OPC_CWRITE6 = 0x15,
+	OPC_LOAD6   = 0x16,
+	OPC_CREAD6  = 0x17,
+
+	OPC_BRNEI  = 0x30,  /* relative branch (if $src != immed) */
+	OPC_BREQI  = 0x31,  /* relative branch (if $src == immed) */
+	OPC_BRNEB  = 0x32,  /* relative branch (if bit not set) */
+	OPC_BREQB  = 0x33,  /* relative branch (if bit is set) */
+	OPC_RET    = 0x34,  /* return */
+	OPC_CALL   = 0x35,  /* "function" call */
+	OPC_WIN    = 0x36,  /* wait for input (ie. wait for WPTR to advance) */
+	OPC_PREEMPTLEAVE6 = 0x38,  /* try to leave preemption */
+} afuc_opc;
+
+
+typedef union PACKED {
+	/* addi, subi, andi, ori, xori, etc: */
+	struct PACKED {
+		uint32_t uimm    : 16;
+		uint32_t dst     : 5;
+		uint32_t src     : 5;
+		uint32_t hdr     : 6;
+	} alui;
+	struct PACKED {
+		uint32_t uimm    : 16;
+		uint32_t dst     : 5;
+		uint32_t shift   : 5;
+		uint32_t hdr     : 6;
+	} movi;
+	struct PACKED {
+		uint32_t alu     : 5;
+		uint32_t pad     : 6;
+		uint32_t dst     : 5;
+		uint32_t src2    : 5;
+		uint32_t src1    : 5;
+		uint32_t hdr     : 6;
+	} alu;
+	struct PACKED {
+		uint32_t uimm    : 12;
+		uint32_t flags   : 4;
+		uint32_t src1    : 5;     /* dst (cread) or src (cwrite) register */
+		uint32_t src2    : 5;     /* read or write address is src2+uimm */
+		uint32_t hdr     : 6;
+	} control;
+	struct PACKED {
+		int32_t  ioff    : 16;    /* relative offset */
+		uint32_t bit_or_imm : 5;
+		uint32_t src     : 5;
+		uint32_t hdr     : 6;
+	} br;
+	struct PACKED {
+		uint32_t uoff    : 26;    /* absolute (unsigned) offset */
+		uint32_t hdr     : 6;
+	} call;
+	struct PACKED {
+		uint32_t pad     : 26;
+		uint32_t hdr     : 6;
+	} waitin;
+	struct PACKED {
+		uint32_t pad     : 26;
+		uint32_t opc_r   : 6;
+	};
+
+} afuc_instr;
+
+static inline void
+afuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep)
+{
+	if (ai->opc_r < 0x30) {
+		*opc = ai->opc_r >> 1;
+		*rep = ai->opc_r & 0x1;
+	} else {
+		*opc = ai->opc_r;
+		*rep = false;
+	}
+}
+
+static inline void
+afuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep)
+{
+	if (opc < 0x30) {
+		ai->opc_r = opc << 1;
+		ai->opc_r |= !!rep;
+	} else {
+		ai->opc_r = opc;
+	}
+}
+
+#endif /* _AFUC_H_ */
diff --git a/src/freedreno/afuc/asm.c b/src/freedreno/afuc/asm.c
new file mode 100644
index 0000000..321d06a
--- /dev/null
+++ b/src/freedreno/afuc/asm.c
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+
+#include "afuc.h"
+#include "rnn.h"
+#include "rnndec.h"
+#include "parser.h"
+#include "asm.h"
+
+int gpuver;
+
+
+static struct rnndeccontext *ctx;
+static struct rnndb *db;
+static struct rnndomain *control_regs;
+struct rnndomain *dom[2];
+
+
+/* bit lame to hard-code max but fw sizes are small */
+static struct asm_instruction instructions[0x2000];
+static unsigned num_instructions;
+
+static struct asm_label labels[0x512];
+static unsigned num_labels;
+
+struct asm_instruction *next_instr(int tok)
+{
+	struct asm_instruction *ai = &instructions[num_instructions++];
+	assert(num_instructions < ARRAY_SIZE(instructions));
+	ai->tok = tok;
+	return ai;
+}
+
+void decl_label(const char *str)
+{
+	struct asm_label *label = &labels[num_labels++];
+
+	assert(num_labels < ARRAY_SIZE(labels));
+
+	label->offset = num_instructions;
+	label->label = str;
+}
+
+static int resolve_label(const char *str)
+{
+	int i;
+
+	for (i = 0; i < num_labels; i++) {
+		struct asm_label *label = &labels[i];
+
+		if (!strcmp(str, label->label)) {
+			return label->offset;
+		}
+	}
+
+	fprintf(stderr, "Undeclared label: %s\n", str);
+	exit(2);
+}
+
+static afuc_opc tok2alu(int tok)
+{
+	switch (tok) {
+	case T_OP_ADD:   return OPC_ADD;
+	case T_OP_ADDHI: return OPC_ADDHI;
+	case T_OP_SUB:   return OPC_SUB;
+	case T_OP_SUBHI: return OPC_SUBHI;
+	case T_OP_AND:   return OPC_AND;
+	case T_OP_OR:    return OPC_OR;
+	case T_OP_XOR:   return OPC_XOR;
+	case T_OP_NOT:   return OPC_NOT;
+	case T_OP_SHL:   return OPC_SHL;
+	case T_OP_USHR:  return OPC_USHR;
+	case T_OP_ISHR:  return OPC_ISHR;
+	case T_OP_ROT:   return OPC_ROT;
+	case T_OP_MUL8:  return OPC_MUL8;
+	case T_OP_MIN:   return OPC_MIN;
+	case T_OP_MAX:   return OPC_MAX;
+	case T_OP_CMP:   return OPC_CMP;
+	case T_OP_MSB:   return OPC_MSB;
+	default:
+		assert(0);
+		return -1;
+	}
+}
+
+static void emit_instructions(int outfd)
+{
+	int i;
+
+	/* there is an extra 0x00000000 which kernel strips off.. we could
+	 * perhaps use it for versioning.
+	 */
+	i = 0;
+	write(outfd, &i, 4);
+
+	for (i = 0; i < num_instructions; i++) {
+		struct asm_instruction *ai = &instructions[i];
+		afuc_instr instr = {0};
+		afuc_opc opc;
+
+		/* special case, 2nd dword is patched up w/ # of instructions
+		 * (ie. offset of jmptbl)
+		 */
+		if (i == 1) {
+			assert(ai->is_literal);
+			ai->literal &= ~0xffff;
+			ai->literal |= num_instructions;
+		}
+
+		if (ai->is_literal) {
+			write(outfd, &ai->literal, 4);
+			continue;
+		}
+
+		switch (ai->tok) {
+		case T_OP_NOP:
+			opc = OPC_NOP;
+			if (gpuver >= 6)
+				instr.pad = 0x1000000;
+			break;
+		case T_OP_ADD:
+		case T_OP_ADDHI:
+		case T_OP_SUB:
+		case T_OP_SUBHI:
+		case T_OP_AND:
+		case T_OP_OR:
+		case T_OP_XOR:
+		case T_OP_NOT:
+		case T_OP_SHL:
+		case T_OP_USHR:
+		case T_OP_ISHR:
+		case T_OP_ROT:
+		case T_OP_MUL8:
+		case T_OP_MIN:
+		case T_OP_MAX:
+		case T_OP_CMP:
+		case T_OP_MSB:
+			if (ai->has_immed) {
+				/* MSB overlaps with STORE */
+				assert(ai->tok != T_OP_MSB);
+				opc = tok2alu(ai->tok);
+				instr.alui.dst = ai->dst;
+				instr.alui.src = ai->src1;
+				instr.alui.uimm = ai->immed;
+			} else {
+				opc = OPC_ALU;
+				instr.alu.dst  = ai->dst;
+				instr.alu.src1 = ai->src1;
+				instr.alu.src2 = ai->src2;
+				instr.alu.alu = tok2alu(ai->tok);
+			}
+			break;
+		case T_OP_MOV:
+			/* move can either be encoded as movi (ie. move w/ immed) or
+			 * an alu instruction
+			 */
+			if (ai->has_immed) {
+				opc = OPC_MOVI;
+				instr.movi.dst = ai->dst;
+				instr.movi.uimm = ai->immed;
+				instr.movi.shift = ai->shift;
+			} else if (ai->label) {
+				/* mov w/ a label is just an alias for an immediate, this
+				 * is useful to load the address of a constant table into
+				 * a register:
+				 */
+				opc = OPC_MOVI;
+				instr.movi.dst = ai->dst;
+				instr.movi.uimm = resolve_label(ai->label);
+				instr.movi.shift = ai->shift;
+			} else {
+				/* encode as: or $dst, $00, $src */
+				opc = OPC_ALU;
+				instr.alu.dst  = ai->dst;
+				instr.alu.src1 = 0x00;      /* $00 reads-back 0 */
+				instr.alu.src2 = ai->src1;
+				instr.alu.alu = OPC_OR;
+			}
+			break;
+		case T_OP_CWRITE:
+		case T_OP_CREAD:
+		case T_OP_STORE:
+		case T_OP_LOAD:
+			if (gpuver >= 6) {
+				if (ai->tok == T_OP_CWRITE) {
+					opc = OPC_CWRITE6;
+				} else if (ai->tok == T_OP_CREAD) {
+					opc = OPC_CREAD6;
+				} else if (ai->tok == T_OP_STORE) {
+					opc = OPC_STORE6;
+				} else if (ai->tok == T_OP_LOAD) {
+					opc = OPC_LOAD6;
+				}
+			} else {
+				if (ai->tok == T_OP_CWRITE) {
+					opc = OPC_CWRITE5;
+				} else if (ai->tok == T_OP_CREAD) {
+					opc = OPC_CREAD5;
+				} else if (ai->tok == T_OP_STORE ||
+					   ai->tok == T_OP_LOAD) {
+					fprintf(stderr, "load and store do not exist on a5xx\n");
+					exit(1);
+				}
+			}
+			instr.control.src1 = ai->src1;
+			instr.control.src2 = ai->src2;
+			instr.control.flags = ai->bit;
+			instr.control.uimm = ai->immed;
+			break;
+		case T_OP_BRNE:
+		case T_OP_BREQ:
+			if (ai->has_immed) {
+				opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEI : OPC_BREQI;
+				instr.br.bit_or_imm = ai->immed;
+			} else {
+				opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEB : OPC_BREQB;
+				instr.br.bit_or_imm = ai->bit;
+			}
+			instr.br.src = ai->src1;
+			instr.br.ioff = resolve_label(ai->label) - i;
+			break;
+		case T_OP_RET:
+			opc = OPC_RET;
+			break;
+		case T_OP_CALL:
+			opc = OPC_CALL;
+			instr.call.uoff = resolve_label(ai->label);
+			break;
+		case T_OP_PREEMPTLEAVE:
+			opc = OPC_PREEMPTLEAVE6;
+			instr.call.uoff = resolve_label(ai->label);
+			break;
+		case T_OP_JUMP:
+			/* encode jump as: brne $00, b0, #label */
+			opc = OPC_BRNEB;
+			instr.br.bit_or_imm = 0;
+			instr.br.src = 0x00;       /* $00 reads-back 0.. compare to 0 */
+			instr.br.ioff = resolve_label(ai->label) - i;
+			break;
+		case T_OP_WAITIN:
+			opc = OPC_WIN;
+			break;
+		default:
+			assert(0);
+		}
+
+		afuc_set_opc(&instr, opc, ai->rep);
+
+		write(outfd, &instr, 4);
+	}
+
+}
+
+static int find_enum_val(struct rnnenum *en, const char *name)
+{
+	int i;
+
+	for (i = 0; i < en->valsnum; i++)
+		if (en->vals[i]->valvalid && !strcmp(name, en->vals[i]->name))
+			return en->vals[i]->value;
+
+	return -1;
+}
+
+static int find_reg(struct rnndomain *dom, const char *name)
+{
+	int i;
+
+	for (i = 0; i < dom->subelemsnum; i++)
+		if (!strcmp(name, dom->subelems[i]->name))
+			return dom->subelems[i]->offset;
+
+	return -1;
+}
+
+unsigned parse_control_reg(const char *name)
+{
+	/* skip leading "@" */
+	int val = find_reg(control_regs, name + 1);
+	if (val < 0) {
+		printf("invalid control reg: %s\n", name);
+		exit(2);
+	}
+	return (unsigned)val;
+}
+
+static void emit_jumptable(int outfd)
+{
+	struct rnnenum *en = rnn_findenum(ctx->db, "adreno_pm4_type3_packets");
+	uint32_t jmptable[0x80] = {0};
+	int i;
+
+	for (i = 0; i < num_labels; i++) {
+		struct asm_label *label = &labels[i];
+		int id = find_enum_val(en, label->label);
+
+		/* if it doesn't match a known PM4 packet-id, try to match UNKN%d: */
+		if (id < 0) {
+			if (sscanf(label->label, "UNKN%d", &id) != 1) {
+				/* if still not found, must not belong in jump-table: */
+				continue;
+			}
+		}
+
+		jmptable[id] = label->offset;
+	}
+
+	write(outfd, jmptable, sizeof(jmptable));
+}
+
+static void usage(void)
+{
+	fprintf(stderr, "Usage:\n"
+			"\tasm [-g GPUVER] filename.asm filename.fw\n"
+			"\t\t-g - specify GPU version (5, etc)\n"
+		);
+	exit(2);
+}
+
+int main(int argc, char **argv)
+{
+	FILE *in;
+	char *file, *outfile, *name, *control_reg_name;
+	int c, ret, outfd;
+
+	/* Argument parsing: */
+	while ((c = getopt (argc, argv, "g:")) != -1) {
+		switch (c) {
+			case 'g':
+				gpuver = atoi(optarg);
+				break;
+			default:
+				usage();
+		}
+	}
+
+	if (optind >= (argc + 1)) {
+		fprintf(stderr, "no file specified!\n");
+		usage();
+	}
+
+	file = argv[optind];
+	outfile = argv[optind + 1];
+
+	outfd = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+	if (outfd < 0) {
+		fprintf(stderr, "could not open \"%s\"\n", outfile);
+		usage();
+	}
+
+	in = fopen(file, "r");
+	if (!in) {
+		fprintf(stderr, "could not open \"%s\"\n", file);
+		usage();
+	}
+
+	yyset_in(in);
+
+	/* if gpu version not specified, infer from filename: */
+	if (!gpuver) {
+		if (strstr(file, "a5")) {
+			gpuver = 5;
+		} else if (strstr(file, "a6")) {
+			gpuver = 6;
+		}
+	}
+
+	switch (gpuver) {
+	case 6:
+		name = "A6XX";
+		control_reg_name = "A6XX_CONTROL_REG";
+		break;
+	case 5:
+		name = "A5XX";
+		control_reg_name = "A5XX_CONTROL_REG";
+		break;
+	default:
+		fprintf(stderr, "unknown GPU version!\n");
+		usage();
+	}
+
+	rnn_init();
+	db = rnn_newdb();
+
+	ctx = rnndec_newcontext(db);
+
+	rnn_parsefile(db, "adreno.xml");
+	dom[0] = rnn_finddomain(db, name);
+	dom[1] = rnn_finddomain(db, "AXXX");
+	control_regs = rnn_finddomain(db, control_reg_name);
+
+	ret = yyparse();
+	if (ret) {
+		fprintf(stderr, "parse failed: %d\n", ret);
+		return ret;
+	}
+
+	emit_instructions(outfd);
+	emit_jumptable(outfd);
+
+	close(outfd);
+
+	return 0;
+}
diff --git a/src/freedreno/afuc/asm.h b/src/freedreno/afuc/asm.h
new file mode 100644
index 0000000..03fb150
--- /dev/null
+++ b/src/freedreno/afuc/asm.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _ASM_H_
+#define _ASM_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+#include "afuc.h"
+
+extern int gpuver;
+
+/**
+ * Intermediate representation for an instruction, before final encoding.
+ * This mostly exists because we need to resolve label offset's in a 2nd
+ * pass, but also so that parser.y doesn't really need to care so much
+ * about the different encodings for 2src regs vs 1src+immed, or mnemonics
+ */
+struct asm_instruction {
+	int tok;
+	int dst;
+	int src1;
+	int src2;
+	int immed;
+	int shift;
+	int bit;
+	uint32_t literal;
+	const char *label;
+
+	bool has_immed : 1;
+	bool has_shift : 1;
+	bool has_bit   : 1;
+	bool is_literal : 1;
+	bool rep        : 1;
+};
+
+struct asm_label {
+	unsigned offset;
+	const char *label;
+};
+
+struct asm_instruction *next_instr(int tok);
+void decl_label(const char *str);
+
+
+static inline uint32_t
+parse_reg(const char *str)
+{
+	char *retstr;
+	long int ret;
+
+	if (!strcmp(str, "$rem"))
+		return 0x1c;
+	else if (!strcmp(str, "$addr"))
+		return 0x1d;
+	else if (!strcmp(str, "$addr2"))
+		return 0x1e;
+	else if (!strcmp(str, "$data"))
+		return 0x1f;
+
+	ret = strtol(str + 1, &retstr, 16);
+
+	if (*retstr != '\0') {
+		printf("invalid register: %s\n", str);
+		exit(2);
+	}
+
+	return ret;
+}
+
+static inline uint32_t
+parse_literal(const char *str)
+{
+	char *retstr;
+	long int ret;
+
+	ret = strtol(str + 1, &retstr, 16);
+
+	if (*retstr != ']') {
+		printf("invalid literal: %s\n", str);
+		exit(2);
+	}
+
+	return ret;
+}
+
+static inline uint32_t
+parse_bit(const char *str)
+{
+	return strtol(str + 1, NULL, 10);
+}
+
+unsigned parse_control_reg(const char *name);
+
+/* string trailing ':' off label: */
+static inline const char *
+parse_label_decl(const char *str)
+{
+	char *s = strdup(str);
+	s[strlen(s) - 1] = '\0';
+	return s;
+}
+
+void yyset_in (FILE *  _in_str );
+
+
+#endif /* _ASM_H_ */
diff --git a/src/freedreno/afuc/disasm.c b/src/freedreno/afuc/disasm.c
new file mode 100644
index 0000000..ea9f34c
--- /dev/null
+++ b/src/freedreno/afuc/disasm.c
@@ -0,0 +1,829 @@
+/*
+ * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+
+#include "afuc.h"
+#include "rnn.h"
+#include "rnndec.h"
+
+static int gpuver;
+
+
+static struct rnndeccontext *ctx;
+static struct rnndb *db;
+static struct rnndomain *control_regs;
+struct rnndomain *dom[2];
+const char *variant;
+
+/* non-verbose mode should output something suitable to feed back into
+ * assembler.. verbose mode has additional output useful for debugging
+ * (like unexpected bits that are set)
+ */
+static bool verbose = false;
+
+static void print_gpu_reg(uint32_t regbase)
+{
+	struct rnndomain *d = NULL;
+
+	if (regbase < 0x100)
+		return;
+
+	if (rnndec_checkaddr(ctx, dom[0], regbase, 0))
+		d = dom[0];
+	else if (rnndec_checkaddr(ctx, dom[1], regbase, 0))
+		d = dom[1];
+
+	if (d) {
+		struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, d, regbase, 0);
+		if (info) {
+			printf("\t; %s", info->name);
+			free(info->name);
+			free(info);
+			return;
+		}
+	}
+}
+
+static void printc(const char *c, const char *fmt, ...)
+{
+	va_list args;
+	printf("%s", c);
+	va_start(args, fmt);
+	vprintf(fmt, args);
+	va_end(args);
+	printf("%s", ctx->colors->reset);
+}
+
+#define printerr(fmt, ...) printc(ctx->colors->err, fmt, ##__VA_ARGS__)
+#define printlbl(fmt, ...) printc(ctx->colors->btarg, fmt, ##__VA_ARGS__)
+
+static void print_reg(unsigned reg)
+{
+// XXX seems like *reading* $00 --> literal zero??
+// seems like read from $1c gives packet remaining len??
+// $01 current packet header, writing to $01 triggers
+// parsing header and jumping to appropriate handler.
+	if (reg == 0x1c)
+		printf("$rem");      /* remainding dwords in packet */
+	else if (reg == 0x1d)
+		printf("$addr");
+	else if (reg == 0x1e)
+		printf("$addr2");   // XXX
+	else if (reg == 0x1f)
+		printf("$data");
+	else
+		printf("$%02x", reg);
+}
+
+static void print_src(unsigned reg)
+{
+	print_reg(reg);
+}
+
+static void print_dst(unsigned reg)
+{
+	print_reg(reg);
+}
+
+static void print_alu_name(afuc_opc opc, uint32_t instr)
+{
+	if (opc == OPC_ADD) {
+		printf("add ");
+	} else if (opc == OPC_ADDHI) {
+		printf("addhi ");
+	} else if (opc == OPC_SUB) {
+		printf("sub ");
+	} else if (opc == OPC_SUBHI) {
+		printf("subhi ");
+	} else if (opc == OPC_AND) {
+		printf("and ");
+	} else if (opc == OPC_OR) {
+		printf("or ");
+	} else if (opc == OPC_XOR) {
+		printf("xor ");
+	} else if (opc == OPC_NOT) {
+		printf("not ");
+	} else if (opc == OPC_SHL) {
+		printf("shl ");
+	} else if (opc == OPC_USHR) {
+		printf("ushr ");
+	} else if (opc == OPC_ISHR) {
+		printf("ishr ");
+	} else if (opc == OPC_ROT) {
+		printf("rot ");
+	} else if (opc == OPC_MUL8) {
+		printf("mul8 ");
+	} else if (opc == OPC_MIN) {
+		printf("min ");
+	} else if (opc == OPC_MAX) {
+		printf("max ");
+	} else if (opc == OPC_CMP) {
+		printf("cmp ");
+	} else if (opc == OPC_MSB) {
+		printf("msb ");
+	} else {
+		printerr("[%08x]", instr);
+		printf("  ; alu%02x ", opc);
+	}
+}
+
+static char *getpm4(uint32_t id)
+{
+	struct rnnenum *en = rnn_findenum(ctx->db, "adreno_pm4_type3_packets");
+	if (en) {
+		int i;
+		for (i = 0; i < en->valsnum; i++)
+			if (en->vals[i]->valvalid && en->vals[i]->value == id) {
+				const char *v = en->vals[i]->varinfo.variantsstr;
+				if (v && !strstr(v, variant))
+					continue;
+				return en->vals[i]->name;
+			}
+	}
+	return NULL;
+}
+
+static inline unsigned
+_odd_parity_bit(unsigned val)
+{
+	/* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
+	 * note that we want odd parity so 0x6996 is inverted.
+	 */
+	val ^= val >> 16;
+	val ^= val >> 8;
+	val ^= val >> 4;
+	val &= 0xf;
+	return (~0x6996 >> val) & 1;
+}
+
+static struct {
+	uint32_t offset;
+	uint32_t num_jump_labels;
+	uint32_t jump_labels[256];
+} jump_labels[1024];
+int num_jump_labels;
+
+static void add_jump_table_entry(uint32_t n, uint32_t offset)
+{
+	int i;
+
+	if (n > 128) /* can't possibly be a PM4 PKT3.. */
+		return;
+
+	for (i = 0; i < num_jump_labels; i++)
+		if (jump_labels[i].offset == offset)
+			goto add_label;
+
+	num_jump_labels = i + 1;
+	jump_labels[i].offset = offset;
+	jump_labels[i].num_jump_labels = 0;
+
+add_label:
+	jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n;
+	assert(jump_labels[i].num_jump_labels < 256);
+}
+
+static int get_jump_table_entry(uint32_t offset)
+{
+	int i;
+
+	for (i = 0; i < num_jump_labels; i++)
+		if (jump_labels[i].offset == offset)
+			return i;
+
+	return -1;
+}
+
+static uint32_t label_offsets[0x512];
+static int num_label_offsets;
+
+static int label_idx(uint32_t offset, bool create)
+{
+	int i;
+	for (i = 0; i < num_label_offsets; i++)
+		if (offset == label_offsets[i])
+			return i;
+	if (!create)
+		return -1;
+	label_offsets[i] = offset;
+	num_label_offsets = i+1;
+	return i;
+}
+
+static const char *
+label_name(uint32_t offset, bool allow_jt)
+{
+	static char name[8];
+	int lidx;
+
+	if (allow_jt) {
+		lidx = get_jump_table_entry(offset);
+		if (lidx >= 0) {
+			int j;
+			for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) {
+				uint32_t jump_label = jump_labels[lidx].jump_labels[j];
+				char *str = getpm4(jump_label);
+				if (str)
+					return str;
+			}
+			// if we don't find anything w/ known name, maybe we should
+			// return UNKN%d to at least make it clear that this is some
+			// sort of jump-table entry?
+		}
+	}
+
+	lidx = label_idx(offset, false);
+	if (lidx < 0)
+		return NULL;
+	sprintf(name, "l%03d", lidx);
+	return name;
+}
+
+
+static uint32_t fxn_offsets[0x512];
+static int num_fxn_offsets;
+
+static int fxn_idx(uint32_t offset, bool create)
+{
+	int i;
+	for (i = 0; i < num_fxn_offsets; i++)
+		if (offset == fxn_offsets[i])
+			return i;
+	if (!create)
+		return -1;
+	fxn_offsets[i] = offset;
+	num_fxn_offsets = i+1;
+	return i;
+}
+
+static const char *
+fxn_name(uint32_t offset)
+{
+	static char name[8];
+	int fidx = fxn_idx(offset, false);
+	if (fidx < 0)
+		return NULL;
+	sprintf(name, "fxn%02d", fidx);
+	return name;
+}
+
+static void print_control_reg(uint32_t id)
+{
+	if (rnndec_checkaddr(ctx, control_regs, id, 0)) {
+		struct rnndecaddrinfo *info = rnndec_decodeaddr(ctx, control_regs, id, 0);
+		printf("@%s", info->name);
+		free(info->name);
+		free(info);
+	} else {
+		printf("0x%03x", id);
+	}
+}
+
+static void disasm(uint32_t *buf, int sizedwords)
+{
+	uint32_t *instrs = buf;
+	const int jmptbl_start = instrs[1] & 0xffff;
+	uint32_t *jmptbl = &buf[jmptbl_start];
+	afuc_opc opc;
+	bool rep;
+	int i;
+
+
+	/* parse jumptable: */
+	for (i = 0; i < 0x80; i++) {
+		unsigned offset = jmptbl[i];
+		unsigned n = i;// + CP_NOP;
+		add_jump_table_entry(n, offset);
+	}
+
+	/* do a pre-pass to find instructions that are potential branch targets,
+	 * and add labels for them:
+	 */
+	for (i = 0; i < jmptbl_start; i++) {
+		afuc_instr *instr = (void *)&instrs[i];
+
+		afuc_get_opc(instr, &opc, &rep);
+
+		switch (opc) {
+		case OPC_BRNEI:
+		case OPC_BREQI:
+		case OPC_BRNEB:
+		case OPC_BREQB:
+			label_idx(i + instr->br.ioff, true);
+			break;
+		case OPC_PREEMPTLEAVE6:
+			if (gpuver >= 6)
+				label_idx(instr->call.uoff, true);
+			break;
+		case OPC_CALL:
+			fxn_idx(instr->call.uoff, true);
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* print instructions: */
+	for (i = 0; i < jmptbl_start; i++) {
+		int jump_label_idx;
+		afuc_instr *instr = (void *)&instrs[i];
+		const char *fname, *lname;
+		afuc_opc opc;
+		bool rep;
+
+		afuc_get_opc(instr, &opc, &rep);
+
+		lname = label_name(i, false);
+		fname = fxn_name(i);
+		jump_label_idx = get_jump_table_entry(i);
+
+		if (jump_label_idx >= 0) {
+			int j;
+			printf("\n");
+			for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) {
+				uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j];
+				char *name = getpm4(jump_label);
+				if (name) {
+					printlbl("%s", name);
+				} else {
+					printlbl("UNKN%d", jump_label);
+				}
+				printf(":\n");
+			}
+		}
+
+		if (fname) {
+			printlbl("%s", fname);
+			printf(":\n");
+		}
+
+		if (lname) {
+			printlbl(" %s", lname);
+			printf(":");
+		} else {
+			printf("      ");
+		}
+
+
+		if (verbose) {
+			printf("\t%04x: %08x  ", i, instrs[i]);
+		} else {
+			printf("  ");
+		}
+
+		switch (opc) {
+		case OPC_NOP: {
+			/* a6xx changed the default immediate, and apparently 0
+			 * is illegal now.
+			 */
+			const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0;
+			if (instrs[i] != nop) {
+				printerr("[%08x]", instrs[i]);
+				printf("  ; ");
+			}
+			if (rep)
+				printf("(rep)");
+			printf("nop");
+			print_gpu_reg(instrs[i]);
+
+			break;
+		}
+		case OPC_ADD:
+		case OPC_ADDHI:
+		case OPC_SUB:
+		case OPC_SUBHI:
+		case OPC_AND:
+		case OPC_OR:
+		case OPC_XOR:
+		case OPC_NOT:
+		case OPC_SHL:
+		case OPC_USHR:
+		case OPC_ISHR:
+		case OPC_ROT:
+		case OPC_MUL8:
+		case OPC_MIN:
+		case OPC_MAX:
+		case OPC_CMP: {
+			bool src1 = true;
+
+			if (opc == OPC_NOT)
+				src1 = false;
+
+			if (rep)
+				printf("(rep)");
+
+			print_alu_name(opc, instrs[i]);
+			print_dst(instr->alui.dst);
+			printf(", ");
+			if (src1) {
+				print_src(instr->alui.src);
+				printf(", ");
+			}
+			printf("0x%04x", instr->alui.uimm);
+			print_gpu_reg(instr->alui.uimm);
+
+			/* print out unexpected bits: */
+			if (verbose) {
+				if (instr->alui.src && !src1)
+					printerr("  (src=%02x)", instr->alui.src);
+			}
+
+			break;
+		}
+		case OPC_MOVI: {
+			if (rep)
+				printf("(rep)");
+			printf("mov ");
+			print_dst(instr->movi.dst);
+			printf(", 0x%04x", instr->movi.uimm);
+			if (instr->movi.shift)
+				printf(" << %u", instr->movi.shift);
+
+			/* using mov w/ << 16 is popular way to construct a pkt7
+			 * header to send (for ex, from PFP to ME), so check that
+			 * case first
+			 */
+			if ((instr->movi.shift == 16) &&
+					((instr->movi.uimm & 0xff00) == 0x7000)) {
+				unsigned opc, p;
+
+				opc = instr->movi.uimm & 0x7f;
+				p = _odd_parity_bit(opc);
+
+				/* So, you'd think that checking the parity bit would be
+				 * a good way to rule out false positives, but seems like
+				 * ME doesn't really care.. at least it would filter out
+				 * things that look like actual legit packets between
+				 * PFP and ME..
+				 */
+				if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) {
+					const char *name = getpm4(opc);
+					printf("\t; ");
+					if (name)
+						printlbl("%s", name);
+					else
+						printlbl("UNKN%u", opc);
+					break;
+				}
+			}
+
+			print_gpu_reg(instr->movi.uimm << instr->movi.shift);
+
+			break;
+		}
+		case OPC_ALU: {
+			bool src1 = true;
+
+			if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB)
+				src1 = false;
+
+			if (instr->alu.pad)
+				printf("[%08x]  ; ", instrs[i]);
+
+			if (rep)
+				printf("(rep)");
+
+			/* special case mnemonics:
+			 *   reading $00 seems to always yield zero, and so:
+			 *      or $dst, $00, $src -> mov $dst, $src
+			 *   Maybe add one for negate too, ie.
+			 *      sub $dst, $00, $src ???
+			 */
+			if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) {
+				printf("mov ");
+				src1 = false;
+			} else {
+				print_alu_name(instr->alu.alu, instrs[i]);
+			}
+
+			print_dst(instr->alu.dst);
+			if (src1) {
+				printf(", ");
+				print_src(instr->alu.src1);
+			}
+			printf(", ");
+			print_src(instr->alu.src2);
+
+			/* print out unexpected bits: */
+			if (verbose) {
+				if (instr->alu.pad)
+					printerr("  (pad=%03x)", instr->alu.pad);
+				if (instr->alu.src1 && !src1)
+					printerr("  (src1=%02x)", instr->alu.src1);
+			}
+			break;
+		}
+		case OPC_CWRITE6:
+		case OPC_CREAD6:
+		case OPC_STORE6:
+		case OPC_LOAD6: {
+			if (rep)
+				printf("(rep)");
+
+			bool is_control_reg = true;
+			if (gpuver >= 6) {
+				switch (opc) {
+				case OPC_CWRITE6:
+					printf("cwrite ");
+					break;
+				case OPC_CREAD6:
+					printf("cread ");
+					break;
+				case OPC_STORE6:
+					is_control_reg = false;
+					printf("store ");
+					break;
+				case OPC_LOAD6:
+					is_control_reg = false;
+					printf("load ");
+					break;
+				default:
+					assert(!"unreachable");
+				}
+			} else {
+				switch (opc) {
+				case OPC_CWRITE5:
+					printf("cwrite ");
+					break;
+				case OPC_CREAD5:
+					printf("cread ");
+					break;
+				default:
+					fprintf(stderr, "A6xx control opcode on A5xx?\n");
+					exit(1);
+				}
+			}
+
+			print_src(instr->control.src1);
+			printf(", [");
+			print_src(instr->control.src2);
+			printf(" + ");
+			if (is_control_reg && instr->control.flags != 0x4)
+				print_control_reg(instr->control.uimm);
+			else
+				printf("0x%03x", instr->control.uimm);
+			printf("], 0x%x", instr->control.flags);
+			break;
+		}
+		case OPC_BRNEI:
+		case OPC_BREQI:
+		case OPC_BRNEB:
+		case OPC_BREQB: {
+			unsigned off = i + instr->br.ioff;
+
+			assert(!rep);
+
+			/* Since $00 reads back zero, it can be used as src for
+			 * unconditional branches.  (This only really makes sense
+			 * for the BREQB.. or possible BRNEI if imm==0.)
+			 *
+			 * If bit=0 then branch is taken if *all* bits are zero.
+			 * Otherwise it is taken if bit (bit-1) is clear.
+			 *
+			 * Note the instruction after a jump/branch is executed
+			 * regardless of whether branch is taken, so use nop or
+			 * take that into account in code.
+			 */
+			if (instr->br.src || (opc != OPC_BRNEB)) {
+				bool immed = false;
+
+				if (opc == OPC_BRNEI) {
+					printf("brne ");
+					immed = true;
+				} else if (opc == OPC_BREQI) {
+					printf("breq ");
+					immed = true;
+				} else if (opc == OPC_BRNEB) {
+					printf("brne ");
+				} else if (opc == OPC_BREQB) {
+					printf("breq ");
+				}
+				print_src(instr->br.src);
+				if (immed) {
+					printf(", 0x%x,", instr->br.bit_or_imm);
+				} else {
+					printf(", b%u,", instr->br.bit_or_imm);
+				}
+			} else {
+				printf("jump");
+				if (verbose && instr->br.bit_or_imm) {
+					printerr("  (src=%03x, bit=%03x) ",
+						instr->br.src, instr->br.bit_or_imm);
+				}
+			}
+
+			printf(" #");
+			printlbl("%s", label_name(off, true));
+			if (verbose)
+				printf(" (#%d, %04x)", instr->br.ioff, off);
+			break;
+		}
+		case OPC_CALL:
+			assert(!rep);
+			printf("call #");
+			printlbl("%s", fxn_name(instr->call.uoff));
+			if (verbose) {
+				printf(" (%04x)", instr->call.uoff);
+				if (instr->br.bit_or_imm || instr->br.src) {
+					printerr("  (src=%03x, bit=%03x) ",
+						instr->br.src, instr->br.bit_or_imm);
+				}
+			}
+			break;
+		case OPC_RET:
+			assert(!rep);
+			if (instr->pad)
+				printf("[%08x]  ; ", instrs[i]);
+			printf("ret");
+			break;
+		case OPC_WIN:
+			assert(!rep);
+			if (instr->waitin.pad)
+				printf("[%08x]  ; ", instrs[i]);
+			printf("waitin");
+			if (verbose && instr->waitin.pad)
+				printerr("  (pad=%x)", instr->waitin.pad);
+			break;
+		case OPC_PREEMPTLEAVE6:
+			if (gpuver < 6) {
+				printf("[%08x]  ; op38", instrs[i]);
+			}
+			printf("preemptleave #");
+			printlbl("%s", label_name(instr->call.uoff, true));
+			break;
+		default:
+			printerr("[%08x]", instrs[i]);
+			printf("  ; op%02x ", opc);
+			print_dst(instr->alui.dst);
+			printf(", ");
+			print_src(instr->alui.src);
+			print_gpu_reg(instrs[i] & 0xffff);
+			break;
+		}
+		printf("\n");
+	}
+
+	/* print jumptable: */
+	if (verbose) {
+		printf(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
+		printf("; JUMP TABLE\n");
+		for (i = 0; i < 0x7f; i++) {
+			int n = i;// + CP_NOP;
+			uint32_t offset = jmptbl[i];
+			char *name = getpm4(n);
+			printf("%3d %02x: ", n, n);
+			printf("%04x", offset);
+			if (name) {
+				printf("   ; %s", name);
+			} else {
+				printf("   ; UNKN%d", n);
+			}
+			printf("\n");
+		}
+	}
+}
+
+#define CHUNKSIZE 4096
+
+static char * readfile(const char *path, int *sz)
+{
+	char *buf = NULL;
+	int fd, ret, n = 0;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return NULL;
+
+	while (1) {
+		buf = realloc(buf, n + CHUNKSIZE);
+		ret = read(fd, buf + n, CHUNKSIZE);
+		if (ret < 0) {
+			free(buf);
+			*sz = 0;
+			return NULL;
+		} else if (ret < CHUNKSIZE) {
+			n += ret;
+			*sz = n;
+			return buf;
+		} else {
+			n += CHUNKSIZE;
+		}
+	}
+}
+
+static void usage(void)
+{
+	fprintf(stderr, "Usage:\n"
+			"\tdisasm [-g GPUVER] [-v] [-c] filename.asm\n"
+			"\t\t-g - specify GPU version (5, etc)\n"
+			"\t\t-c - use colors\n"
+			"\t\t-v - verbose output\n"
+		);
+	exit(2);
+}
+
+int main(int argc, char **argv)
+{
+	uint32_t *buf;
+	char *file, *control_reg_name;
+	bool colors = false;
+	int sz, c;
+
+	/* Argument parsing: */
+	while ((c = getopt (argc, argv, "g:vc")) != -1) {
+		switch (c) {
+			case 'g':
+				gpuver = atoi(optarg);
+				break;
+			case 'v':
+				verbose = true;
+				break;
+			case 'c':
+				colors = true;
+				break;
+			default:
+				usage();
+		}
+	}
+
+	if (optind >= argc) {
+		fprintf(stderr, "no file specified!\n");
+		usage();
+	}
+
+	file = argv[optind];
+
+	/* if gpu version not specified, infer from filename: */
+	if (!gpuver) {
+		if (strstr(file, "a5")) {
+			gpuver = 5;
+		} else if (strstr(file, "a6")) {
+			gpuver = 6;
+		}
+	}
+
+	switch (gpuver) {
+	case 6:
+		printf("; a6xx microcode\n");
+		variant = "A6XX";
+		control_reg_name = "A6XX_CONTROL_REG";
+		break;
+	case 5:
+		printf("; a5xx microcode\n");
+		variant = "A5XX";
+		control_reg_name = "A5XX_CONTROL_REG";
+		break;
+	default:
+		fprintf(stderr, "unknown GPU version!\n");
+		usage();
+	}
+
+	rnn_init();
+	db = rnn_newdb();
+
+	ctx = rnndec_newcontext(db);
+	ctx->colors = colors ? &envy_def_colors : &envy_null_colors;
+
+	rnn_parsefile(db, "adreno.xml");
+	dom[0] = rnn_finddomain(db, variant);
+	dom[1] = rnn_finddomain(db, "AXXX");
+	control_regs = rnn_finddomain(db, control_reg_name);
+
+	buf = (uint32_t *)readfile(file, &sz);
+
+	printf("; Disassembling microcode: %s\n", file);
+	printf("; Version: %08x\n\n", buf[1]);
+	disasm(&buf[1], sz/4 - 1);
+
+	return 0;
+}
diff --git a/src/freedreno/afuc/lexer.l b/src/freedreno/afuc/lexer.l
new file mode 100644
index 0000000..aacc947
--- /dev/null
+++ b/src/freedreno/afuc/lexer.l
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+%{
+#include <stdlib.h>
+#include "parser.h"
+#include "asm.h"
+
+#define TOKEN(t) (yylval.tok = t)
+extern YYSTYPE yylval;
+
+%}
+
+%option noyywrap
+
+%%
+"\n"                              yylineno++;
+[ \t]                             ; /* ignore whitespace */
+";"[^\n]*"\n"                     yylineno++; /* ignore comments */
+[1-9][0-9]*                       yylval.num = strtoul(yytext, NULL, 0);    return T_INT;
+"0x"[0-9a-fA-F]*                  yylval.num = strtoul(yytext, NULL, 0);    return T_HEX;
+
+"$"[0-9a-fA-F][0-9a-fA-F]         yylval.num = parse_reg(yytext); return T_REGISTER;
+"$"[a-zA-Z][a-zA-Z0-9]*           yylval.num = parse_reg(yytext); return T_REGISTER;
+"b"[0-9][0-9]*                    yylval.num = parse_bit(yytext); return T_BIT;
+"@"[a-zA-Z_][a-zA-Z0-9_]*         yylval.num = parse_control_reg(yytext); return T_CONTROL_REG;
+"#"[a-zA-Z_][a-zA-Z0-9_]*         yylval.str = strdup(yytext+1);  return T_LABEL_REF; /* label reference */
+[a-zA-Z_][a-zA-Z0-9_]*":"         yylval.str = parse_label_decl(yytext); return T_LABEL_DECL; /* label declaration */
+"["[0-9a-fA-F][0-9a-fA-F]*"]"     yylval.num = parse_literal(yytext); return T_LITERAL;
+
+                                  /* instructions: */
+"nop"                             return TOKEN(T_OP_NOP);
+"add"                             return TOKEN(T_OP_ADD);
+"addhi"                           return TOKEN(T_OP_ADDHI);
+"sub"                             return TOKEN(T_OP_SUB);
+"subhi"                           return TOKEN(T_OP_SUBHI);
+"and"                             return TOKEN(T_OP_AND);
+"or"                              return TOKEN(T_OP_OR);
+"xor"                             return TOKEN(T_OP_XOR);
+"not"                             return TOKEN(T_OP_NOT);
+"shl"                             return TOKEN(T_OP_SHL);
+"ushr"                            return TOKEN(T_OP_USHR);
+"ishr"                            return TOKEN(T_OP_ISHR);
+"rot"                             return TOKEN(T_OP_ROT);
+"mul8"                            return TOKEN(T_OP_MUL8);
+"min"                             return TOKEN(T_OP_MIN);
+"max"                             return TOKEN(T_OP_MAX);
+"cmp"                             return TOKEN(T_OP_CMP);
+"msb"                             return TOKEN(T_OP_MSB);
+"mov"                             return TOKEN(T_OP_MOV);
+"cwrite"                          return TOKEN(T_OP_CWRITE);
+"cread"                           return TOKEN(T_OP_CREAD);
+"store"                           return TOKEN(T_OP_STORE);
+"load"                            return TOKEN(T_OP_LOAD);
+"brne"                            return TOKEN(T_OP_BRNE);
+"breq"                            return TOKEN(T_OP_BREQ);
+"ret"                             return TOKEN(T_OP_RET);
+"call"                            return TOKEN(T_OP_CALL);
+"jump"                            return TOKEN(T_OP_JUMP);
+"waitin"                          return TOKEN(T_OP_WAITIN);
+"preemptleave"			  return TOKEN(T_OP_PREEMPTLEAVE);
+"<<"                              return TOKEN(T_LSHIFT);
+"(rep)"                           return TOKEN(T_REP);
+
+","                               return ',';
+"["                               return '[';
+"]"                               return ']';
+"+"                               return '+';
+
+.                                 fprintf(stderr, "error at line %d: Unknown token: %s\n", yyget_lineno(), yytext); yyterminate();
+
+%%
diff --git a/src/freedreno/afuc/meson.build b/src/freedreno/afuc/meson.build
new file mode 100644
index 0000000..8a62a33
--- /dev/null
+++ b/src/freedreno/afuc/meson.build
@@ -0,0 +1,69 @@
+# Copyright © 2020 Google, Inc
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+afuc_parser = custom_target(
+  'parser.[ch]',
+  input: 'parser.y',
+  output: ['parser.c', 'parser.h'],
+  command: [
+    prog_bison, '@INPUT@', '--defines=@OUTPUT1@', '--output=@OUTPUT0@'
+  ]
+)
+
+afuc_lexer = custom_target(
+  'lexer.c',
+  input: 'lexer.l',
+  output: 'lexer.c',
+  command: [
+    prog_flex, '-o', '@OUTPUT@', '@INPUT@'
+  ]
+)
+
+asm = executable(
+  'asm',
+  [
+    'asm.c',
+    afuc_lexer,
+    afuc_parser,
+  ],
+  include_directories: [
+    inc_freedreno_rnn,
+  ],
+  link_with: [
+    libfreedreno_rnn,
+  ],
+  dependencies: [],
+  build_by_default : with_tools.contains('freedreno'),
+  install: false,
+)
+
+disasm = executable(
+  'disasm',
+  'disasm.c',
+  include_directories: [
+    inc_freedreno_rnn,
+  ],
+  link_with: [
+    libfreedreno_rnn,
+  ],
+  dependencies: [],
+  build_by_default : with_tools.contains('freedreno'),
+  install: false
+)
diff --git a/src/freedreno/afuc/parser.y b/src/freedreno/afuc/parser.y
new file mode 100644
index 0000000..9f82286
--- /dev/null
+++ b/src/freedreno/afuc/parser.y
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+%{
+#define YYDEBUG 0
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "asm.h"
+
+
+int yyget_lineno(void);
+
+#ifdef YYDEBUG
+int yydebug;
+#endif
+
+extern int yylex(void);
+typedef void *YY_BUFFER_STATE;
+extern YY_BUFFER_STATE yy_scan_string(const char *);
+extern void yy_delete_buffer(YY_BUFFER_STATE);
+
+int yyparse(void);
+
+void yyerror(const char *error);
+void yyerror(const char *error)
+{
+	fprintf(stderr, "error at line %d: %s\n", yyget_lineno(), error);
+}
+
+static struct asm_instruction *instr;   /* current instruction */
+
+static void
+new_instr(int tok)
+{
+	instr = next_instr(tok);
+}
+
+static void
+dst(int num)
+{
+	instr->dst = num;
+}
+
+static void
+src1(int num)
+{
+	instr->src1 = num;
+}
+
+static void
+src2(int num)
+{
+	instr->src2 = num;
+}
+
+static void
+immed(int num)
+{
+	instr->immed = num;
+	instr->has_immed = true;
+}
+
+static void
+shift(int num)
+{
+	instr->shift = num;
+	instr->has_shift = true;
+}
+
+static void
+bit(int num)
+{
+	instr->bit = num;
+	instr->has_bit = true;
+}
+
+static void
+literal(uint32_t num)
+{
+	instr->literal = num;
+	instr->is_literal = true;
+}
+
+static void
+label(const char *str)
+{
+	instr->label = str;
+}
+
+%}
+
+%union {
+	int tok;
+	uint32_t num;
+	const char *str;
+}
+
+%{
+static void print_token(FILE *file, int type, YYSTYPE value)
+{
+	fprintf(file, "\ntype: %d\n", type);
+}
+
+#define YYPRINT(file, type, value) print_token(file, type, value)
+%}
+
+%token <num> T_INT
+%token <num> T_HEX
+%token <num> T_CONTROL_REG
+%token <str> T_LABEL_DECL
+%token <str> T_LABEL_REF
+%token <num> T_LITERAL
+%token <num> T_BIT
+%token <num> T_REGISTER
+
+%token <tok> T_OP_NOP
+%token <tok> T_OP_ADD
+%token <tok> T_OP_ADDHI
+%token <tok> T_OP_SUB
+%token <tok> T_OP_SUBHI
+%token <tok> T_OP_AND
+%token <tok> T_OP_OR
+%token <tok> T_OP_XOR
+%token <tok> T_OP_NOT
+%token <tok> T_OP_SHL
+%token <tok> T_OP_USHR
+%token <tok> T_OP_ISHR
+%token <tok> T_OP_ROT
+%token <tok> T_OP_MUL8
+%token <tok> T_OP_MIN
+%token <tok> T_OP_MAX
+%token <tok> T_OP_CMP
+%token <tok> T_OP_MSB
+%token <tok> T_OP_MOV
+%token <tok> T_OP_CWRITE
+%token <tok> T_OP_CREAD
+%token <tok> T_OP_STORE
+%token <tok> T_OP_LOAD
+%token <tok> T_OP_BRNE
+%token <tok> T_OP_BREQ
+%token <tok> T_OP_RET
+%token <tok> T_OP_CALL
+%token <tok> T_OP_JUMP
+%token <tok> T_OP_WAITIN
+%token <tok> T_OP_PREEMPTLEAVE
+%token <tok> T_LSHIFT
+%token <tok> T_REP
+
+%type <num> reg
+%type <num> immediate
+
+%error-verbose
+
+%start instrs
+
+%%
+
+instrs:            instr_or_label instrs
+|                  instr_or_label
+
+instr_or_label:    instr_r
+|                  T_REP instr_r    { instr->rep = true; }
+|                  branch_instr
+|                  other_instr
+|                  T_LABEL_DECL   { decl_label($1); }
+
+/* instructions that can optionally have (rep) flag: */
+instr_r:           alu_instr
+|                  config_instr
+
+/* need to special case:
+ * - not (single src, possibly an immediate)
+ * - msb (single src, must be reg)
+ * - mov (single src, plus possibly a shift)
+ * from the other ALU instructions:
+ */
+
+alu_msb_instr:     T_OP_MSB reg ',' reg        { new_instr($1); dst($2); src2($4); }
+
+alu_not_instr:     T_OP_NOT reg ',' reg        { new_instr($1); dst($2); src2($4); }
+|                  T_OP_NOT reg ',' immediate  { new_instr($1); dst($2); immed($4); }
+
+alu_mov_instr:     T_OP_MOV reg ',' reg        { new_instr($1); dst($2); src1($4); }
+|                  T_OP_MOV reg ',' immediate T_LSHIFT immediate {
+                       new_instr($1); dst($2); immed($4); shift($6);
+}
+|                  T_OP_MOV reg ',' immediate  { new_instr($1); dst($2); immed($4); }
+|                  T_OP_MOV reg ',' T_LABEL_REF T_LSHIFT immediate {
+                       new_instr($1); dst($2); label($4); shift($6);
+}
+|                  T_OP_MOV reg ',' T_LABEL_REF { new_instr($1); dst($2); label($4); }
+
+alu_2src_op:       T_OP_ADD       { new_instr($1); }
+|                  T_OP_ADDHI     { new_instr($1); }
+|                  T_OP_SUB       { new_instr($1); }
+|                  T_OP_SUBHI     { new_instr($1); }
+|                  T_OP_AND       { new_instr($1); }
+|                  T_OP_OR        { new_instr($1); }
+|                  T_OP_XOR       { new_instr($1); }
+|                  T_OP_SHL       { new_instr($1); }
+|                  T_OP_USHR      { new_instr($1); }
+|                  T_OP_ISHR      { new_instr($1); }
+|                  T_OP_ROT       { new_instr($1); }
+|                  T_OP_MUL8      { new_instr($1); }
+|                  T_OP_MIN       { new_instr($1); }
+|                  T_OP_MAX       { new_instr($1); }
+|                  T_OP_CMP       { new_instr($1); }
+
+alu_2src_instr:    alu_2src_op reg ',' reg ',' reg { dst($2); src1($4); src2($6); }
+|                  alu_2src_op reg ',' reg ',' immediate { dst($2); src1($4); immed($6); }
+
+alu_instr:         alu_2src_instr
+|                  alu_msb_instr
+|                  alu_not_instr
+|                  alu_mov_instr
+
+config_op:         T_OP_CWRITE    { new_instr($1); }
+|                  T_OP_CREAD     { new_instr($1); }
+|                  T_OP_LOAD      { new_instr($1); }
+|                  T_OP_STORE     { new_instr($1); }
+
+config_instr:      config_op reg ',' '[' reg '+' immediate ']' ',' immediate {
+                       src1($2); src2($5); immed($7); bit($10);
+}
+
+branch_op:         T_OP_BRNE      { new_instr($1); }
+|                  T_OP_BREQ      { new_instr($1); }
+
+branch_instr:      branch_op reg ',' T_BIT ',' T_LABEL_REF     { src1($2); bit($4); label($6); }
+|                  branch_op reg ',' immediate ',' T_LABEL_REF { src1($2); immed($4); label($6); }
+
+other_instr:       T_OP_CALL T_LABEL_REF { new_instr($1); label($2); }
+|                  T_OP_PREEMPTLEAVE T_LABEL_REF { new_instr($1); label($2); }
+|                  T_OP_RET              { new_instr($1); }
+|                  T_OP_JUMP T_LABEL_REF { new_instr($1); label($2); }
+|                  T_OP_WAITIN           { new_instr($1); }
+|                  T_OP_NOP              { new_instr($1); }
+|                  T_LITERAL             { new_instr($1); literal($1); }
+
+reg:               T_REGISTER
+
+immediate:         T_HEX
+|                  T_INT
+|                  T_CONTROL_REG
+|                  T_CONTROL_REG '+' immediate { $$ = $1 + $3; }
+
diff --git a/src/freedreno/meson.build b/src/freedreno/meson.build
index 6405a7d..3df6dfb 100644
--- a/src/freedreno/meson.build
+++ b/src/freedreno/meson.build
@@ -35,6 +35,7 @@
 if dep_libxml2.found()
   subdir('rnn')
   subdir('decode')
+  subdir('afuc')
 endif
 
 if with_tools.contains('drm-shim')