| # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s |
| # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s |
| |
| --- |
| # Trivial clause at beginning of program |
| name: trivial_smem_clause_load_smrd4_x1 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1 |
| ; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Trivial clause at beginning of program |
| name: trivial_smem_clause_load_smrd4_x2 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2 |
| ; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr1 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr1 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Trivial clause at beginning of program |
| name: trivial_smem_clause_load_smrd4_x3 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3 |
| ; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: %sgpr1 = S_LOAD_DWORD_IMM %sgpr6_sgpr7, 0, 0 |
| ; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| %sgpr1 = S_LOAD_DWORD_IMM %sgpr6_sgpr7, 0, 0 |
| %sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Trivial clause at beginning of program |
| name: trivial_smem_clause_load_smrd4_x4 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4 |
| ; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: %sgpr1 = S_LOAD_DWORD_IMM %sgpr8_sgpr9, 0, 0 |
| ; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0 |
| ; GCN-NEXT: %sgpr3 = S_LOAD_DWORD_IMM %sgpr16_sgpr17, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| %sgpr1 = S_LOAD_DWORD_IMM %sgpr8_sgpr9, 0, 0 |
| %sgpr2 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0 |
| %sgpr3 = S_LOAD_DWORD_IMM %sgpr16_sgpr17, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Reuse of same input pointer is OK |
| name: trivial_smem_clause_load_smrd4_x2_sameptr |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr |
| ; GCN: %sgpr12 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr12 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # 32-bit load partially clobbers its own ptr reg |
| name: smrd_load4_overwrite_ptr_lo |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo |
| ; GCN: %sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # 32-bit load partially clobbers its own ptr reg |
| name: smrd_load4_overwrite_ptr_hi |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi |
| ; GCN: %sgpr11 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr11 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # 64-bit load clobbers its own ptr reg |
| name: smrd_load8_overwrite_ptr |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: smrd_load8_overwrite_ptr |
| ; GCN: %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt |
| # breaks the clause. |
| |
| name: break_smem_clause_at_max_smem_clause_size_smrd_load4 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4 |
| ; GCN: %sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr14 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr15 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr16 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr17 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr18 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr19 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr20 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr21 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr22 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr23 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr24 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr25 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr26 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr27 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr28 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr0 = S_LOAD_DWORD_IMM %sgpr30_sgpr31, 0, 0 |
| ; GCN-NEXT: %sgpr0 = S_MOV_B32 %sgpr0, implicit %sgpr13, implicit %sgpr14, implicit %sgpr15, implicit %sgpr16, implicit %sgpr17, implicit %sgpr18, implicit %sgpr19, implicit %sgpr20, implicit %sgpr21, implicit %sgpr22, implicit %sgpr23, implicit %sgpr24, implicit %sgpr25, implicit %sgpr26, implicit %sgpr27, implicit %sgpr28 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr13 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr14 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr15 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr16 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| |
| %sgpr17 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr18 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr19 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr20 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| |
| %sgpr21 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr22 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr23 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr24 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| |
| %sgpr25 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr26 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr27 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr28 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| |
| %sgpr0 = S_LOAD_DWORD_IMM %sgpr30_sgpr31, 0, 0 |
| %sgpr0 = S_MOV_B32 %sgpr0, implicit %sgpr13, implicit %sgpr14, implicit %sgpr15, implicit %sgpr16, implicit %sgpr17, implicit %sgpr18, implicit %sgpr19, implicit %sgpr20, implicit %sgpr21, implicit %sgpr22, implicit %sgpr23, implicit %sgpr24, implicit %sgpr25, implicit %sgpr26, implicit %sgpr27, implicit %sgpr28 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: break_smem_clause_simple_load_smrd4_lo_ptr |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr |
| ; GCN: %sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; XNACK-NEXT: S_NOP 0 |
| ; GCN-NEXT: %sgpr12 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr10 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr12 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: break_smem_clause_simple_load_smrd4_hi_ptr |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr |
| ; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr3 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr3 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: break_smem_clause_simple_load_smrd8_ptr |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr |
| ; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0 |
| ; XNACK-NEXT: S_NOP 0 |
| ; GCN-NEXT: %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: break_smem_clause_simple_load_smrd16_ptr |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr |
| ; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM %sgpr6_sgpr7, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM %sgpr6_sgpr7, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: break_smem_clause_block_boundary_load_smrd8_ptr |
| |
| body: | |
| ; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr |
| ; GCN: bb.0: |
| ; GCN: successors: %bb.1(0x80000000) |
| ; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN: bb.1: |
| ; XNACK-NEXT: S_NOP 0 |
| ; GCN-NEXT: %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| bb.0: |
| %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0 |
| |
| bb.1: |
| %sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM %sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # The load clobbers the pointer of the store, so it needs to break. |
| |
| name: break_smem_clause_store_load_into_ptr_smrd4 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4 |
| ; GCN: S_STORE_DWORD_IMM %sgpr16, %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr12 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| S_STORE_DWORD_IMM %sgpr16, %sgpr10_sgpr11, 0, 0 |
| %sgpr12 = S_LOAD_DWORD_IMM %sgpr14_sgpr15, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # The load clobbers the data of the store, so it needs to break. |
| # FIXME: Would it be better to s_nop and wait later? |
| |
| name: break_smem_clause_store_load_into_data_smrd4 |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4 |
| ; GCN: S_STORE_DWORD_IMM %sgpr8, %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr8 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| S_STORE_DWORD_IMM %sgpr8, %sgpr10_sgpr11, 0, 0 |
| %sgpr8 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Regular VALU instruction breaks clause, no nop needed |
| name: valu_inst_breaks_smem_clause |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: valu_inst_breaks_smem_clause |
| ; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %vgpr8 = V_MOV_B32_e32 0, implicit %exec |
| ; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %vgpr8 = V_MOV_B32_e32 0, implicit %exec |
| %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # Regular SALU instruction breaks clause, no nop needed |
| name: salu_inst_breaks_smem_clause |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: salu_inst_breaks_smem_clause |
| ; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %sgpr8 = S_MOV_B32 0 |
| ; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %sgpr8 = S_MOV_B32 0 |
| %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| name: ds_inst_breaks_smem_clause |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: ds_inst_breaks_smem_clause |
| ; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec |
| ; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %vgpr8 = DS_READ_B32 %vgpr9, 0, 0, implicit %m0, implicit %exec |
| %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| |
| name: flat_inst_breaks_smem_clause |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: flat_inst_breaks_smem_clause |
| ; GCN: %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| ; GCN-NEXT: %vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr |
| ; GCN-NEXT: %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0 = S_LOAD_DWORD_IMM %sgpr10_sgpr11, 0, 0 |
| %vgpr0 = FLAT_LOAD_DWORD %vgpr0_vgpr1, 0, 0, 0, implicit %exec, implicit %flat_scr |
| %sgpr2 = S_LOAD_DWORD_IMM %sgpr12_sgpr13, 0, 0 |
| S_ENDPGM |
| ... |
| --- |
| # FIXME: Should this be handled? |
| name: implicit_use_breaks_smem_clause |
| |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: implicit_use_breaks_smem_clause |
| ; GCN: %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0, implicit %sgpr12_sgpr13 |
| ; XNACK-NEXT: S_NOP 0 |
| ; GCN-NEXT: %sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM %sgpr6_sgpr7, 0, 0 |
| ; GCN-NEXT: S_ENDPGM |
| %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM %sgpr10_sgpr11, 0, 0, implicit %sgpr12_sgpr13 |
| %sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM %sgpr6_sgpr7, 0, 0 |
| S_ENDPGM |
| ... |