| // Copyright © 2022 Collabora, Ltd. |
| // SPDX-License-Identifier: MIT |
| |
| use crate::ir::*; |
| use crate::legalize::{ |
| src_is_reg, src_is_upred_reg, swap_srcs_if_not_reg, LegalizeBuildHelpers, |
| LegalizeBuilder, |
| }; |
| use bitview::*; |
| |
| use rustc_hash::FxHashMap; |
| use std::ops::Range; |
| |
| /// A per-op trait that implements Volta+ opcode semantics |
| trait SM70Op { |
| fn legalize(&mut self, b: &mut LegalizeBuilder); |
| fn encode(&self, e: &mut SM70Encoder<'_>); |
| } |
| |
| struct SM70Encoder<'a> { |
| sm: u8, |
| ip: usize, |
| labels: &'a FxHashMap<Label, usize>, |
| inst: [u32; 4], |
| } |
| |
| impl BitViewable for SM70Encoder<'_> { |
| fn bits(&self) -> usize { |
| BitView::new(&self.inst).bits() |
| } |
| |
| fn get_bit_range_u64(&self, range: Range<usize>) -> u64 { |
| BitView::new(&self.inst).get_bit_range_u64(range) |
| } |
| } |
| |
| impl BitMutViewable for SM70Encoder<'_> { |
| fn set_bit_range_u64(&mut self, range: Range<usize>, val: u64) { |
| BitMutView::new(&mut self.inst).set_bit_range_u64(range, val); |
| } |
| } |
| |
| impl SetFieldU64 for SM70Encoder<'_> { |
| fn set_field_u64(&mut self, range: Range<usize>, val: u64) { |
| BitMutView::new(&mut self.inst).set_field_u64(range, val); |
| } |
| } |
| |
| impl SM70Encoder<'_> { |
| /// Maximum encodable UGPR |
| /// |
| /// This may be different from the actual maximum supported by hardware. |
| fn ugpr_max(&self) -> u32 { |
| if self.sm >= 100 { |
| 255 |
| } else { |
| 63 |
| } |
| } |
| |
| fn zero_reg(&self, file: RegFile) -> RegRef { |
| let nr = match file { |
| RegFile::GPR => 255, |
| RegFile::UGPR => self.ugpr_max(), |
| _ => panic!("Not a GPR"), |
| }; |
| RegRef::new(file, nr, 1) |
| } |
| |
| fn true_reg(&self, file: RegFile) -> RegRef { |
| RegRef::new(file, 7, 1) |
| } |
| |
| fn set_opcode(&mut self, opcode: u16) { |
| self.set_field(0..12, opcode); |
| } |
| |
| fn set_reg(&mut self, range: Range<usize>, reg: RegRef) { |
| assert!(range.len() == 8); |
| assert!(reg.file() == RegFile::GPR); |
| self.set_field(range, reg.base_idx()); |
| } |
| |
| fn set_ureg(&mut self, range: Range<usize>, reg: RegRef) { |
| assert!(self.sm >= 73); |
| assert!(range.len() == 8); |
| assert!(reg.file() == RegFile::UGPR); |
| assert!(reg.base_idx() <= self.ugpr_max()); |
| self.set_field(range, reg.base_idx()); |
| } |
| |
| fn set_pred_reg(&mut self, range: Range<usize>, reg: RegRef) { |
| assert!(range.len() == 3); |
| assert!(reg.base_idx() <= 7); |
| assert!(reg.comps() == 1); |
| self.set_field(range, reg.base_idx()); |
| } |
| |
| fn set_reg_src(&mut self, range: Range<usize>, src: &Src) { |
| assert!(src.is_unmodified()); |
| match src.src_ref { |
| SrcRef::Zero => self.set_reg(range, self.zero_reg(RegFile::GPR)), |
| SrcRef::Reg(reg) => self.set_reg(range, reg), |
| _ => panic!("Not a register"), |
| } |
| } |
| |
| fn set_ureg_src(&mut self, range: Range<usize>, src: &Src) { |
| assert!(src.src_mod.is_none()); |
| match src.src_ref { |
| SrcRef::Zero => self.set_ureg(range, self.zero_reg(RegFile::UGPR)), |
| SrcRef::Reg(reg) => self.set_ureg(range, reg), |
| _ => panic!("Not a register"), |
| } |
| } |
| |
| fn set_pred_dst(&mut self, range: Range<usize>, dst: &Dst) { |
| match dst { |
| Dst::None => self.set_pred_reg(range, self.true_reg(RegFile::Pred)), |
| Dst::Reg(reg) => self.set_pred_reg(range, *reg), |
| _ => panic!("Not a register"), |
| } |
| } |
| |
| fn set_pred_src_file( |
| &mut self, |
| range: Range<usize>, |
| not_bit: usize, |
| src: &Src, |
| file: RegFile, |
| ) { |
| let (not, reg) = match src.src_ref { |
| SrcRef::True => (false, self.true_reg(file)), |
| SrcRef::False => (true, self.true_reg(file)), |
| SrcRef::Reg(reg) => { |
| assert!(reg.file() == file); |
| (false, reg) |
| } |
| _ => panic!("Not a register"), |
| }; |
| self.set_pred_reg(range, reg); |
| self.set_bit(not_bit, not ^ src_mod_is_bnot(src.src_mod)); |
| } |
| |
| fn set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: &Src) { |
| self.set_pred_src_file(range, not_bit, src, RegFile::Pred); |
| } |
| |
| fn set_upred_src( |
| &mut self, |
| range: Range<usize>, |
| not_bit: usize, |
| src: &Src, |
| ) { |
| self.set_pred_src_file(range, not_bit, src, RegFile::UPred); |
| } |
| |
| fn set_src_cb(&mut self, range: Range<usize>, cx_bit: usize, cb: &CBufRef) { |
| let mut v = BitMutView::new_subset(self, range); |
| v.set_field(6..22, cb.offset); |
| match cb.buf { |
| CBuf::Binding(idx) => { |
| v.set_field(22..27, idx); |
| self.set_bit(cx_bit, false); |
| } |
| CBuf::BindlessUGPR(reg) => { |
| assert!(reg.base_idx() <= 63); |
| assert!(reg.file() == RegFile::UGPR); |
| v.set_field(0..6, reg.base_idx()); |
| self.set_bit(cx_bit, true); |
| } |
| CBuf::BindlessSSA(_) => panic!("SSA values must be lowered"), |
| } |
| } |
| |
| fn set_pred(&mut self, pred: &Pred) { |
| assert!(!pred.is_false()); |
| self.set_pred_reg( |
| 12..15, |
| match pred.pred_ref { |
| PredRef::None => self.true_reg(RegFile::Pred), |
| PredRef::Reg(reg) => reg, |
| PredRef::SSA(_) => panic!("SSA values must be lowered"), |
| }, |
| ); |
| self.set_bit(15, pred.pred_inv); |
| } |
| |
| fn set_dst(&mut self, dst: &Dst) { |
| match dst { |
| Dst::None => self.set_reg(16..24, self.zero_reg(RegFile::GPR)), |
| Dst::Reg(reg) => self.set_reg(16..24, *reg), |
| _ => panic!("Not a register"), |
| } |
| } |
| |
| fn set_udst(&mut self, dst: &Dst) { |
| match dst { |
| Dst::None => self.set_ureg(16..24, self.zero_reg(RegFile::UGPR)), |
| Dst::Reg(reg) => self.set_ureg(16..24, *reg), |
| _ => panic!("Not a register"), |
| } |
| } |
| |
| fn set_bar_reg(&mut self, range: Range<usize>, reg: RegRef) { |
| assert!(range.len() == 4); |
| assert!(reg.file() == RegFile::Bar); |
| assert!(reg.comps() == 1); |
| self.set_field(range, reg.base_idx()); |
| } |
| |
| fn set_bar_dst(&mut self, range: Range<usize>, dst: &Dst) { |
| self.set_bar_reg(range, *dst.as_reg().unwrap()); |
| } |
| |
| fn set_bar_src(&mut self, range: Range<usize>, src: &Src) { |
| assert!(src.is_unmodified()); |
| self.set_bar_reg(range, *src.src_ref.as_reg().unwrap()); |
| } |
| |
| fn set_instr_deps(&mut self, deps: &InstrDeps) { |
| self.set_field(105..109, deps.delay); |
| self.set_bit(109, deps.yld); |
| self.set_field(110..113, deps.wr_bar().unwrap_or(7)); |
| self.set_field(113..116, deps.rd_bar().unwrap_or(7)); |
| self.set_field(116..122, deps.wt_bar_mask); |
| self.set_field(122..126, deps.reuse_mask); |
| } |
| } |
| |
| // |
| // Helpers for encoding of ALU instructions |
| // |
| |
| struct ALURegRef { |
| pub reg: RegRef, |
| pub abs: bool, |
| pub neg: bool, |
| pub swizzle: SrcSwizzle, |
| } |
| |
| struct ALUCBufRef { |
| pub cb: CBufRef, |
| pub abs: bool, |
| pub neg: bool, |
| pub swizzle: SrcSwizzle, |
| } |
| |
| enum ALUSrc { |
| None, |
| Imm32(u32), |
| Reg(ALURegRef), |
| UReg(ALURegRef), |
| CBuf(ALUCBufRef), |
| } |
| |
| fn src_is_zero_or_gpr(src: &Src) -> bool { |
| match src.src_ref { |
| SrcRef::Zero => true, |
| SrcRef::Reg(reg) => reg.file() == RegFile::GPR, |
| _ => false, |
| } |
| } |
| |
| fn src_mod_has_abs(src_mod: SrcMod) -> bool { |
| match src_mod { |
| SrcMod::None | SrcMod::FNeg | SrcMod::INeg | SrcMod::BNot => false, |
| SrcMod::FAbs | SrcMod::FNegAbs => true, |
| } |
| } |
| |
| fn src_mod_has_neg(src_mod: SrcMod) -> bool { |
| match src_mod { |
| SrcMod::None | SrcMod::FAbs => false, |
| SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::INeg | SrcMod::BNot => true, |
| } |
| } |
| |
| fn src_mod_is_bnot(src_mod: SrcMod) -> bool { |
| match src_mod { |
| SrcMod::None => false, |
| SrcMod::BNot => true, |
| _ => panic!("Not an predicate source modifier"), |
| } |
| } |
| |
| fn dst_is_bar(dst: &Dst) -> bool { |
| match dst { |
| Dst::None => false, |
| Dst::SSA(ssa) => ssa.file().unwrap() == RegFile::Bar, |
| Dst::Reg(reg) => reg.file() == RegFile::Bar, |
| } |
| } |
| |
| impl ALUSrc { |
| fn from_src( |
| e: &SM70Encoder<'_>, |
| src: Option<&Src>, |
| op_is_uniform: bool, |
| ) -> ALUSrc { |
| let Some(src) = src else { |
| return ALUSrc::None; |
| }; |
| |
| match &src.src_ref { |
| SrcRef::Zero | SrcRef::Reg(_) => { |
| let reg = match src.src_ref { |
| SrcRef::Zero => { |
| let file = if op_is_uniform { |
| RegFile::UGPR |
| } else { |
| RegFile::GPR |
| }; |
| e.zero_reg(file) |
| } |
| SrcRef::Reg(reg) => reg, |
| _ => panic!("Invalid source ref"), |
| }; |
| assert!(reg.comps() <= 2); |
| let alu_ref = ALURegRef { |
| reg: reg, |
| abs: src_mod_has_abs(src.src_mod), |
| neg: src_mod_has_neg(src.src_mod), |
| swizzle: src.src_swizzle, |
| }; |
| if op_is_uniform { |
| assert!(reg.file() == RegFile::UGPR); |
| ALUSrc::Reg(alu_ref) |
| } else { |
| match reg.file() { |
| RegFile::GPR => ALUSrc::Reg(alu_ref), |
| RegFile::UGPR => ALUSrc::UReg(alu_ref), |
| _ => panic!("Invalid ALU register file"), |
| } |
| } |
| } |
| SrcRef::Imm32(i) => { |
| assert!(src.is_unmodified()); |
| assert!(src.src_swizzle.is_none()); |
| ALUSrc::Imm32(*i) |
| } |
| SrcRef::CBuf(cb) => { |
| let alu_ref = ALUCBufRef { |
| cb: cb.clone(), |
| abs: src_mod_has_abs(src.src_mod), |
| neg: src_mod_has_neg(src.src_mod), |
| swizzle: src.src_swizzle, |
| }; |
| ALUSrc::CBuf(alu_ref) |
| } |
| _ => panic!("Invalid ALU source"), |
| } |
| } |
| } |
| |
| impl SM70Encoder<'_> { |
| fn set_swizzle(&mut self, range: Range<usize>, swizzle: SrcSwizzle) { |
| assert!(range.len() == 2); |
| |
| self.set_field( |
| range, |
| match swizzle { |
| SrcSwizzle::None => 0x00_u8, |
| SrcSwizzle::Xx => 0x02_u8, |
| SrcSwizzle::Yy => 0x03_u8, |
| }, |
| ); |
| } |
| |
| fn set_alu_reg( |
| &mut self, |
| range: Range<usize>, |
| abs_bit: usize, |
| neg_bit: usize, |
| swizzle_range: Range<usize>, |
| file: RegFile, |
| is_fp16_alu: bool, |
| reg: &ALURegRef, |
| ) { |
| match file { |
| RegFile::GPR => self.set_reg(range, reg.reg), |
| RegFile::UGPR => self.set_ureg(range, reg.reg), |
| _ => panic!("Invalid ALU src register file"), |
| } |
| |
| self.set_bit(abs_bit, reg.abs); |
| self.set_bit(neg_bit, reg.neg); |
| |
| if is_fp16_alu { |
| self.set_swizzle(swizzle_range, reg.swizzle); |
| } else { |
| assert!(reg.swizzle == SrcSwizzle::None); |
| } |
| } |
| |
| fn encode_alu_src0( |
| &mut self, |
| src: &ALUSrc, |
| file: RegFile, |
| is_fp16_alu: bool, |
| ) { |
| let reg = match src { |
| ALUSrc::None => return, |
| ALUSrc::Reg(reg) => reg, |
| _ => panic!("Invalid ALU src"), |
| }; |
| self.set_alu_reg(24..32, 73, 72, 74..76, file, is_fp16_alu, reg); |
| } |
| |
| fn encode_alu_src2( |
| &mut self, |
| src: &ALUSrc, |
| file: RegFile, |
| is_fp16_alu: bool, |
| ) { |
| let reg = match src { |
| ALUSrc::None => return, |
| ALUSrc::Reg(reg) => reg, |
| _ => panic!("Invalid ALU src"), |
| }; |
| self.set_alu_reg( |
| 64..72, |
| if is_fp16_alu { 83 } else { 74 }, |
| if is_fp16_alu { 84 } else { 75 }, |
| 81..83, |
| file, |
| is_fp16_alu, |
| reg, |
| ); |
| } |
| |
| fn encode_alu_reg(&mut self, reg: &ALURegRef, is_fp16_alu: bool) { |
| self.set_alu_reg( |
| 32..40, |
| 62, |
| 63, |
| 60..62, |
| RegFile::GPR, |
| is_fp16_alu, |
| reg, |
| ); |
| } |
| |
| fn encode_alu_ureg(&mut self, reg: &ALURegRef, is_fp16_alu: bool) { |
| self.set_ureg(32..40, reg.reg); |
| self.set_bit(62, reg.abs); |
| self.set_bit(63, reg.neg); |
| |
| if is_fp16_alu { |
| self.set_swizzle(60..62, reg.swizzle); |
| } else { |
| assert!(reg.swizzle == SrcSwizzle::None); |
| } |
| |
| self.set_bit(91, true); |
| } |
| |
| fn encode_alu_imm(&mut self, imm: &u32) { |
| self.set_field(32..64, *imm); |
| } |
| |
| fn encode_alu_cb(&mut self, cb: &ALUCBufRef, is_fp16_alu: bool) { |
| self.set_src_cb(32..59, 91, &cb.cb); |
| self.set_bit(62, cb.abs); |
| self.set_bit(63, cb.neg); |
| |
| if is_fp16_alu { |
| self.set_swizzle(60..62, cb.swizzle); |
| } else { |
| assert!(cb.swizzle == SrcSwizzle::None); |
| } |
| } |
| |
| fn encode_alu_base( |
| &mut self, |
| opcode: u16, |
| dst: Option<&Dst>, |
| src0: Option<&Src>, |
| src1: Option<&Src>, |
| src2: Option<&Src>, |
| is_fp16_alu: bool, |
| ) { |
| if let Some(dst) = dst { |
| self.set_dst(dst); |
| } |
| |
| let src0 = ALUSrc::from_src(self, src0, false); |
| let src1 = ALUSrc::from_src(self, src1, false); |
| let src2 = ALUSrc::from_src(self, src2, false); |
| |
| self.encode_alu_src0(&src0, RegFile::GPR, is_fp16_alu); |
| |
| let form = match &src2 { |
| ALUSrc::None | ALUSrc::Reg(_) => { |
| self.encode_alu_src2(&src2, RegFile::GPR, is_fp16_alu); |
| match &src1 { |
| ALUSrc::None => 1_u8, // form |
| ALUSrc::Reg(reg1) => { |
| self.encode_alu_reg(reg1, is_fp16_alu); |
| 1_u8 // form |
| } |
| ALUSrc::UReg(reg1) => { |
| self.encode_alu_ureg(reg1, is_fp16_alu); |
| 6_u8 // form |
| } |
| ALUSrc::Imm32(imm1) => { |
| self.encode_alu_imm(imm1); |
| 4_u8 // form |
| } |
| ALUSrc::CBuf(cb1) => { |
| self.encode_alu_cb(cb1, is_fp16_alu); |
| 5_u8 // form |
| } |
| } |
| } |
| ALUSrc::UReg(reg2) => { |
| self.encode_alu_ureg(reg2, is_fp16_alu); |
| self.encode_alu_src2(&src1, RegFile::GPR, is_fp16_alu); |
| 7_u8 // form |
| } |
| ALUSrc::Imm32(imm2) => { |
| self.encode_alu_imm(imm2); |
| self.encode_alu_src2(&src1, RegFile::GPR, is_fp16_alu); |
| 2_u8 // form |
| } |
| ALUSrc::CBuf(cb2) => { |
| // TODO set_src_cx |
| self.encode_alu_cb(cb2, is_fp16_alu); |
| self.encode_alu_src2(&src1, RegFile::GPR, is_fp16_alu); |
| 3_u8 // form |
| } |
| }; |
| |
| self.set_field(0..9, opcode); |
| self.set_field(9..12, form); |
| } |
| |
| fn encode_alu( |
| &mut self, |
| opcode: u16, |
| dst: Option<&Dst>, |
| src0: Option<&Src>, |
| src1: Option<&Src>, |
| src2: Option<&Src>, |
| ) { |
| self.encode_alu_base(opcode, dst, src0, src1, src2, false); |
| } |
| |
| fn encode_fp16_alu( |
| &mut self, |
| opcode: u16, |
| dst: Option<&Dst>, |
| src0: Option<&Src>, |
| src1: Option<&Src>, |
| src2: Option<&Src>, |
| ) { |
| self.encode_alu_base(opcode, dst, src0, src1, src2, true); |
| } |
| |
| fn encode_ualu( |
| &mut self, |
| opcode: u16, |
| dst: Option<&Dst>, |
| src0: Option<&Src>, |
| src1: Option<&Src>, |
| src2: Option<&Src>, |
| ) { |
| if let Some(dst) = dst { |
| self.set_udst(dst); |
| } |
| |
| let src0 = ALUSrc::from_src(self, src0, true); |
| let src1 = ALUSrc::from_src(self, src1, true); |
| let src2 = ALUSrc::from_src(self, src2, true); |
| |
| // All uniform ALU requires bit 91 set |
| self.set_bit(91, true); |
| |
| self.encode_alu_src0(&src0, RegFile::UGPR, false); |
| let form = match &src2 { |
| ALUSrc::None | ALUSrc::Reg(_) => { |
| self.encode_alu_src2(&src2, RegFile::UGPR, false); |
| match &src1 { |
| ALUSrc::None => 1_u8, // form |
| ALUSrc::Reg(reg1) => { |
| self.encode_alu_ureg(reg1, false); |
| 1_u8 // form |
| } |
| ALUSrc::UReg(_) => panic!("UALU never has UReg"), |
| ALUSrc::Imm32(imm1) => { |
| self.encode_alu_imm(imm1); |
| 4_u8 // form |
| } |
| ALUSrc::CBuf(_) => panic!("UALU does not support cbufs"), |
| } |
| } |
| ALUSrc::UReg(_) => panic!("UALU never has UReg"), |
| ALUSrc::Imm32(imm2) => { |
| self.encode_alu_imm(imm2); |
| self.encode_alu_src2(&src1, RegFile::UGPR, false); |
| 2_u8 // form |
| } |
| ALUSrc::CBuf(_) => panic!("UALU does not support cbufs"), |
| }; |
| |
| self.set_field(0..9, opcode); |
| self.set_field(9..12, form); |
| } |
| |
| fn set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode) { |
| assert!(range.len() == 2); |
| self.set_field( |
| range, |
| match rnd_mode { |
| FRndMode::NearestEven => 0_u8, |
| FRndMode::NegInf => 1_u8, |
| FRndMode::PosInf => 2_u8, |
| FRndMode::Zero => 3_u8, |
| }, |
| ); |
| } |
| } |
| |
| // |
| // Legalization helpers |
| // |
| |
| fn op_gpr(op: &impl DstsAsSlice) -> RegFile { |
| if op.is_uniform() { |
| RegFile::UGPR |
| } else { |
| RegFile::GPR |
| } |
| } |
| |
| /// Helper to legalize extended or external instructions |
| /// |
| /// These are instructions which reach out external units such as load/store |
| /// and texture ops. They typically can't take anything but GPRs and are the |
| /// only types of instructions that support vectors. They also can never be |
| /// uniform so we always evict uniform sources. |
| /// |
| fn legalize_ext_instr(op: &mut impl SrcsAsSlice, b: &mut LegalizeBuilder) { |
| let src_types = op.src_types(); |
| for (i, src) in op.srcs_as_mut_slice().iter_mut().enumerate() { |
| match src_types[i] { |
| SrcType::SSA | SrcType::GPR => match &mut src.src_ref { |
| SrcRef::Zero | SrcRef::True | SrcRef::False => { |
| assert!(src_types[i] != SrcType::SSA); |
| } |
| SrcRef::SSA(ssa) => { |
| b.copy_ssa_ref_if_uniform(ssa); |
| } |
| _ => panic!("Unsupported source reference"), |
| }, |
| SrcType::ALU |
| | SrcType::F16 |
| | SrcType::F16v2 |
| | SrcType::F32 |
| | SrcType::F64 |
| | SrcType::I32 |
| | SrcType::B32 => { |
| panic!("ALU srcs must be legalized explicitly"); |
| } |
| SrcType::Pred => { |
| panic!("Predicates must be legalized explicitly"); |
| } |
| SrcType::Carry => { |
| panic!("Carry is invalid on Volta+"); |
| } |
| SrcType::Bar => (), |
| } |
| } |
| } |
| |
| // |
| // Implementations of SM70Op for each op we support on Volta+ |
| // |
| |
| impl SM70Op for OpFAdd { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if src_is_zero_or_gpr(&self.srcs[1]) { |
| e.encode_alu( |
| 0x021, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ) |
| } else { |
| e.encode_alu( |
| 0x021, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&Src::ZERO), |
| Some(&self.srcs[1]), |
| ) |
| }; |
| e.set_bit(77, self.saturate); |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| e.set_bit(80, self.ftz); |
| } |
| } |
| |
| impl SM70Op for OpFFma { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1, src2] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); |
| b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::F32); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x023, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ); |
| e.set_bit(76, self.dnz); |
| e.set_bit(77, self.saturate); |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| e.set_bit(80, self.ftz); |
| } |
| } |
| |
| impl SM70Op for OpFMnMx { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x009, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&Src::ZERO), |
| ); |
| e.set_pred_src(87..90, 90, &self.min); |
| e.set_bit(80, self.ftz); |
| } |
| } |
| |
| impl SM70Op for OpFMul { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x020, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&Src::ZERO), |
| ); |
| e.set_bit(76, self.dnz); |
| e.set_bit(77, self.saturate); |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| e.set_bit(80, self.ftz); |
| e.set_field(84..87, 0x4_u8); // TODO: PDIV |
| } |
| } |
| |
| impl SM70Encoder<'_> { |
| fn set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp) { |
| assert!(range.len() == 4); |
| self.set_field( |
| range, |
| match op { |
| FloatCmpOp::OrdLt => 0x01_u8, |
| FloatCmpOp::OrdEq => 0x02_u8, |
| FloatCmpOp::OrdLe => 0x03_u8, |
| FloatCmpOp::OrdGt => 0x04_u8, |
| FloatCmpOp::OrdNe => 0x05_u8, |
| FloatCmpOp::OrdGe => 0x06_u8, |
| FloatCmpOp::UnordLt => 0x09_u8, |
| FloatCmpOp::UnordEq => 0x0a_u8, |
| FloatCmpOp::UnordLe => 0x0b_u8, |
| FloatCmpOp::UnordGt => 0x0c_u8, |
| FloatCmpOp::UnordNe => 0x0d_u8, |
| FloatCmpOp::UnordGe => 0x0e_u8, |
| FloatCmpOp::IsNum => 0x07_u8, |
| FloatCmpOp::IsNan => 0x08_u8, |
| }, |
| ); |
| } |
| |
| fn set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp) { |
| assert!(range.len() == 2); |
| self.set_field( |
| range, |
| match op { |
| PredSetOp::And => 0_u8, |
| PredSetOp::Or => 1_u8, |
| PredSetOp::Xor => 2_u8, |
| }, |
| ); |
| } |
| |
| fn set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp) { |
| assert!(range.len() == 3); |
| self.set_field( |
| range, |
| match op { |
| IntCmpOp::False => 0_u8, |
| IntCmpOp::True => 7_u8, |
| IntCmpOp::Eq => 2_u8, |
| IntCmpOp::Ne => 5_u8, |
| IntCmpOp::Lt => 1_u8, |
| IntCmpOp::Le => 3_u8, |
| IntCmpOp::Gt => 4_u8, |
| IntCmpOp::Ge => 6_u8, |
| }, |
| ); |
| } |
| } |
| |
| impl SM70Op for OpFSet { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| if !src_is_reg(src0, gpr) && src_is_reg(src1, gpr) { |
| std::mem::swap(src0, src1); |
| self.cmp_op = self.cmp_op.flip(); |
| } |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x00a, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ); |
| e.set_float_cmp_op(76..80, self.cmp_op); |
| e.set_bit(80, self.ftz); |
| e.set_field(87..90, 0x7_u8); // TODO: src predicate |
| } |
| } |
| |
| impl SM70Op for OpFSetP { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| if !src_is_reg(src0, gpr) && src_is_reg(src1, gpr) { |
| std::mem::swap(src0, src1); |
| self.cmp_op = self.cmp_op.flip(); |
| } |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x00b, |
| None, |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ); |
| |
| e.set_pred_set_op(74..76, self.set_op); |
| e.set_float_cmp_op(76..80, self.cmp_op); |
| e.set_bit(80, self.ftz); |
| |
| e.set_pred_dst(81..84, &self.dst); |
| e.set_pred_dst(84..87, &Dst::None); // dst1 |
| |
| e.set_pred_src(87..90, 90, &self.accum); |
| } |
| } |
| |
| impl SM70Op for OpFSwzAdd { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F32); |
| b.copy_alu_src_if_not_reg(src1, gpr, SrcType::F32); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x822); |
| e.set_dst(&self.dst); |
| |
| e.set_reg_src(24..32, &self.srcs[0]); |
| e.set_reg_src(64..72, &self.srcs[1]); |
| |
| let mut subop = 0x0_u8; |
| |
| for (i, swz_op) in self.ops.iter().enumerate() { |
| let swz_op = match swz_op { |
| FSwzAddOp::Add => 0, |
| FSwzAddOp::SubRight => 2, |
| FSwzAddOp::SubLeft => 1, |
| FSwzAddOp::MoveLeft => 3, |
| }; |
| |
| subop |= swz_op << ((self.ops.len() - i - 1) * 2); |
| } |
| |
| e.set_field(32..40, subop); |
| |
| e.set_bit(77, false); // NDV |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| e.set_bit(80, self.ftz); |
| } |
| } |
| |
| impl SM70Op for OpMuFu { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu(0x108, Some(&self.dst), None, Some(&self.src), None); |
| e.set_field( |
| 74..80, |
| match self.op { |
| MuFuOp::Cos => 0_u8, |
| MuFuOp::Sin => 1_u8, |
| MuFuOp::Exp2 => 2_u8, |
| MuFuOp::Log2 => 3_u8, |
| MuFuOp::Rcp => 4_u8, |
| MuFuOp::Rsq => 5_u8, |
| MuFuOp::Rcp64H => 6_u8, |
| MuFuOp::Rsq64H => 7_u8, |
| MuFuOp::Sqrt => 8_u8, |
| MuFuOp::Tanh => 9_u8, |
| }, |
| ); |
| } |
| } |
| |
| impl SM70Op for OpDAdd { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F64); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x029, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| None, |
| Some(&self.srcs[1]), |
| ); |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| } |
| } |
| |
| impl SM70Op for OpDFma { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1, src2] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F64); |
| b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::F64); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x02b, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ); |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| } |
| } |
| |
| impl SM70Op for OpDMul { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F64); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x028, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ); |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| } |
| } |
| |
| impl SM70Op for OpDSetP { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| if !src_is_reg(src0, gpr) && src_is_reg(src1, gpr) { |
| std::mem::swap(src0, src1); |
| self.cmp_op = self.cmp_op.flip(); |
| } |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F64); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if src_is_zero_or_gpr(&self.srcs[1]) { |
| e.encode_alu( |
| 0x02a, |
| None, |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ) |
| } else { |
| e.encode_alu( |
| 0x02a, |
| None, |
| Some(&self.srcs[0]), |
| None, |
| Some(&self.srcs[1]), |
| ) |
| }; |
| |
| e.set_pred_set_op(74..76, self.set_op); |
| e.set_float_cmp_op(76..80, self.cmp_op); |
| |
| e.set_pred_dst(81..84, &self.dst); |
| e.set_pred_dst(84..87, &Dst::None); /* dst1 */ |
| |
| e.set_pred_src(87..90, 90, &self.accum); |
| } |
| } |
| |
| impl SM70Op for OpHAdd2 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if src_is_zero_or_gpr(&self.srcs[1]) { |
| e.encode_fp16_alu( |
| 0x030, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ) |
| } else { |
| e.encode_fp16_alu( |
| 0x030, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| None, |
| Some(&self.srcs[1]), |
| ) |
| }; |
| |
| e.set_bit(77, self.saturate); |
| e.set_bit(78, self.f32); |
| e.set_bit(80, self.ftz); |
| e.set_bit(85, false); // .BF16_V2 (SM90+) |
| } |
| } |
| |
| impl SM70Op for OpHFma2 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1, src2] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); |
| b.copy_alu_src_if_not_reg(src1, gpr, SrcType::F16v2); |
| b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::F16v2); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_fp16_alu( |
| 0x031, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ); |
| |
| e.set_bit(76, self.dnz); |
| e.set_bit(77, self.saturate); |
| e.set_bit(78, self.f32); |
| e.set_bit(79, false); // .RELU (SM86+) |
| e.set_bit(80, self.ftz); |
| e.set_bit(85, false); // .BF16_V2 (SM86+) |
| } |
| } |
| |
| impl SM70Op for OpHMul2 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_fp16_alu( |
| 0x032, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ); |
| |
| e.set_bit(76, self.dnz); |
| e.set_bit(77, self.saturate); |
| e.set_bit(78, false); // .F32 (SM70-SM75) |
| e.set_bit(79, false); // .RELU (SM86+) |
| e.set_bit(80, self.ftz); |
| e.set_bit(85, false); // .BF16_V2 (SM90+) |
| } |
| } |
| |
| impl SM70Op for OpHSet2 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| if !src_is_reg(src0, gpr) && src_is_reg(src1, gpr) { |
| std::mem::swap(src0, src1); |
| self.cmp_op = self.cmp_op.flip(); |
| } |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if src_is_zero_or_gpr(&self.srcs[1]) { |
| e.encode_fp16_alu( |
| 0x033, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ) |
| } else { |
| e.encode_fp16_alu( |
| 0x033, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| None, |
| Some(&self.srcs[1]), |
| ) |
| }; |
| |
| e.set_bit(65, false); // .BF16_V2 (SM90+) |
| e.set_pred_set_op(69..71, self.set_op); |
| |
| // This differentiate between integer and fp16 output |
| e.set_bit(71, true); // .BF |
| e.set_float_cmp_op(76..80, self.cmp_op); |
| e.set_bit(80, self.ftz); |
| |
| e.set_pred_src(87..90, 90, &self.accum); |
| } |
| } |
| |
| impl SM70Op for OpHSetP2 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| if !src_is_reg(src0, gpr) && src_is_reg(src1, gpr) { |
| std::mem::swap(src0, src1); |
| self.cmp_op = self.cmp_op.flip(); |
| } |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if src_is_zero_or_gpr(&self.srcs[1]) { |
| e.encode_fp16_alu( |
| 0x034, |
| None, |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ) |
| } else { |
| e.encode_fp16_alu( |
| 0x034, |
| None, |
| Some(&self.srcs[0]), |
| None, |
| Some(&self.srcs[1]), |
| ) |
| }; |
| |
| e.set_bit(65, false); // .BF16_V2 (SM90+) |
| e.set_pred_set_op(69..71, self.set_op); |
| e.set_bit(71, self.horizontal); // .H_AND |
| e.set_float_cmp_op(76..80, self.cmp_op); |
| e.set_bit(80, self.ftz); |
| |
| e.set_pred_dst(81..84, &self.dsts[0]); |
| e.set_pred_dst(84..87, &self.dsts[1]); |
| |
| e.set_pred_src(87..90, 90, &self.accum); |
| } |
| } |
| |
| impl SM70Op for OpHMnMx2 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::F16v2); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| assert!(e.sm >= 80); |
| |
| e.encode_fp16_alu( |
| 0x040, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ); |
| |
| // This differentiate between integer and fp16 output |
| e.set_bit(78, false); // .F32 (SM86) |
| e.set_bit(80, self.ftz); |
| e.set_bit(81, false); // .NAN |
| e.set_bit(82, false); // .XORSIGN |
| e.set_bit(85, false); // .BF16_V2 |
| |
| e.set_pred_src(87..90, 90, &self.min); |
| } |
| } |
| |
| impl SM70Op for OpBMsk { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| b.copy_alu_src_if_not_reg(&mut self.pos, gpr, SrcType::ALU); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x09b, |
| Some(&self.dst), |
| Some(&self.pos), |
| Some(&self.width), |
| None, |
| ) |
| } else { |
| e.encode_alu( |
| 0x01b, |
| Some(&self.dst), |
| Some(&self.pos), |
| Some(&self.width), |
| None, |
| ) |
| }; |
| |
| e.set_bit(75, self.wrap); |
| } |
| } |
| |
| impl SM70Op for OpBRev { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu(0x0be, Some(&self.dst), None, Some(&self.src), None) |
| } else { |
| e.encode_alu(0x101, Some(&self.dst), None, Some(&self.src), None) |
| } |
| } |
| } |
| |
| impl SM70Op for OpFlo { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu(0x0bd, Some(&self.dst), None, Some(&self.src), None) |
| } else { |
| e.encode_alu(0x100, Some(&self.dst), None, Some(&self.src), None) |
| }; |
| e.set_pred_dst(81..84, &Dst::None); |
| e.set_field(74..75, self.return_shift_amount as u8); |
| e.set_field(73..74, self.signed as u8); |
| let not_mod = matches!(self.src.src_mod, SrcMod::BNot); |
| e.set_field(63..64, not_mod) |
| } |
| } |
| |
| impl SM70Op for OpIAbs { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu(0x013, Some(&self.dst), None, Some(&self.src), None) |
| } |
| } |
| |
| impl SM70Op for OpIAdd3 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1, src2] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| swap_srcs_if_not_reg(src2, src1, gpr); |
| if !src0.is_unmodified() && !src1.is_unmodified() { |
| assert!(self.overflow[0].is_none()); |
| assert!(self.overflow[1].is_none()); |
| b.copy_alu_src_and_lower_ineg(src0, gpr, SrcType::I32); |
| } |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::I32); |
| b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::I32); |
| if !self.overflow[0].is_none() || !self.overflow[1].is_none() { |
| b.copy_alu_src_if_ineg_imm(src1, gpr, SrcType::I32); |
| b.copy_alu_src_if_ineg_imm(src2, gpr, SrcType::I32); |
| } |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| // Hardware requires at least one of these be unmodified |
| assert!(self.srcs[0].is_unmodified() || self.srcs[1].is_unmodified()); |
| |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x090, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ) |
| } else { |
| e.encode_alu( |
| 0x010, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ) |
| }; |
| |
| e.set_pred_src(87..90, 90, &false.into()); |
| e.set_pred_src(77..80, 80, &false.into()); |
| |
| e.set_pred_dst(81..84, &self.overflow[0]); |
| e.set_pred_dst(84..87, &self.overflow[1]); |
| } |
| } |
| |
| impl SM70Op for OpIAdd3X { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1, src2] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| swap_srcs_if_not_reg(src2, src1, gpr); |
| if !src0.is_unmodified() && !src1.is_unmodified() { |
| let val = b.alloc_ssa(gpr); |
| let old_src0 = std::mem::replace(src0, val.into()); |
| b.push_op(OpIAdd3X { |
| srcs: [Src::ZERO, old_src0, Src::ZERO], |
| overflow: [Dst::None, Dst::None], |
| dst: val.into(), |
| carry: [false.into(), false.into()], |
| }); |
| } |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::B32); |
| b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::B32); |
| if !self.is_uniform() { |
| b.copy_src_if_upred(&mut self.carry[0]); |
| b.copy_src_if_upred(&mut self.carry[1]); |
| } |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| // Hardware requires at least one of these be unmodified |
| assert!(self.srcs[0].is_unmodified() || self.srcs[1].is_unmodified()); |
| |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x090, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ); |
| |
| e.set_upred_src(87..90, 90, &self.carry[0]); |
| e.set_upred_src(77..80, 80, &self.carry[1]); |
| } else { |
| e.encode_alu( |
| 0x010, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ); |
| |
| e.set_pred_src(87..90, 90, &self.carry[0]); |
| e.set_pred_src(77..80, 80, &self.carry[1]); |
| } |
| |
| e.set_bit(74, true); // .X |
| |
| e.set_pred_dst(81..84, &self.overflow[0]); |
| e.set_pred_dst(84..87, &self.overflow[1]); |
| } |
| } |
| |
| impl SM70Op for OpIDp4 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src_type0, src_type1] = &mut self.src_types; |
| let [src0, src1, src2] = &mut self.srcs; |
| if swap_srcs_if_not_reg(src0, src1, gpr) { |
| std::mem::swap(src_type0, src_type1); |
| } |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); |
| b.copy_alu_src_if_ineg_imm(src1, gpr, SrcType::I32); |
| b.copy_alu_src_if_not_reg(src2, gpr, SrcType::ALU); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x026, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ); |
| |
| e.set_bit( |
| 73, |
| match self.src_types[0] { |
| IntType::U8 => false, |
| IntType::I8 => true, |
| _ => panic!("Invalid DP4 source type"), |
| }, |
| ); |
| e.set_bit( |
| 74, |
| match self.src_types[1] { |
| IntType::U8 => false, |
| IntType::I8 => true, |
| _ => panic!("Invalid DP4 source type"), |
| }, |
| ); |
| } |
| } |
| |
| impl SM70Op for OpIMad { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1, src2] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); |
| b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::ALU); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x0a4, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ) |
| } else { |
| e.encode_alu( |
| 0x024, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ) |
| }; |
| e.set_pred_dst(81..84, &Dst::None); |
| e.set_bit(73, self.signed); |
| } |
| } |
| |
| impl SM70Op for OpIMad64 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1, src2] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); |
| b.copy_alu_src_if_both_not_reg(src1, src2, gpr, SrcType::ALU); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x0a5, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ) |
| } else { |
| e.encode_alu( |
| 0x025, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ) |
| }; |
| e.set_pred_dst(81..84, &Dst::None); |
| e.set_bit(73, self.signed); |
| } |
| } |
| |
| impl SM70Op for OpIMnMx { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x017, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ); |
| e.set_pred_src(87..90, 90, &self.min); |
| e.set_bit( |
| 73, |
| match self.cmp_type { |
| IntCmpType::U32 => false, |
| IntCmpType::I32 => true, |
| }, |
| ); |
| if e.sm >= 120 { |
| e.set_bit(74, false); // 64-bit |
| e.set_pred_src(77..80, 80, &true.into()); |
| e.set_pred_dst(81..84, &Dst::None); |
| e.set_pred_dst(84..87, &Dst::None); |
| } |
| } |
| } |
| |
| impl SM70Op for OpISetP { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| if !src_is_reg(src0, gpr) && src_is_reg(src1, gpr) { |
| std::mem::swap(src0, src1); |
| self.cmp_op = self.cmp_op.flip(); |
| } |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); |
| if !self.is_uniform() { |
| b.copy_src_if_upred(&mut self.low_cmp); |
| b.copy_src_if_upred(&mut self.accum); |
| } |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x08c, |
| None, |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ); |
| |
| e.set_upred_src(68..71, 71, &self.low_cmp); |
| e.set_upred_src(87..90, 90, &self.accum); |
| } else { |
| e.encode_alu( |
| 0x00c, |
| None, |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ); |
| |
| e.set_pred_src(68..71, 71, &self.low_cmp); |
| e.set_pred_src(87..90, 90, &self.accum); |
| } |
| |
| e.set_bit(72, self.ex); |
| |
| e.set_field( |
| 73..74, |
| match self.cmp_type { |
| IntCmpType::U32 => 0_u32, |
| IntCmpType::I32 => 1_u32, |
| }, |
| ); |
| e.set_pred_set_op(74..76, self.set_op); |
| e.set_int_cmp_op(76..79, self.cmp_op); |
| |
| e.set_pred_dst(81..84, &self.dst); |
| e.set_pred_dst(84..87, &Dst::None); // dst1 |
| } |
| } |
| |
| impl SM70Op for OpLea { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| b.copy_alu_src_if_not_reg(&mut self.a, gpr, SrcType::ALU); |
| if self.dst_high { |
| b.copy_alu_src_if_both_not_reg( |
| &self.b, |
| &mut self.a_high, |
| gpr, |
| SrcType::ALU, |
| ); |
| } |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| assert!(self.a.src_mod == SrcMod::None); |
| assert!( |
| self.intermediate_mod == SrcMod::None |
| || self.b.src_mod == SrcMod::None |
| ); |
| |
| let zero = 0.into(); |
| let c = if self.dst_high { |
| Some(&self.a_high) |
| } else { |
| // TODO: On Ada and earlier, src2 is ignored if !dst_high. On |
| // Blackwell+, it seems to do something. |
| Some(&zero) |
| }; |
| |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x091, |
| Some(&self.dst), |
| Some(&self.a), |
| Some(&self.b), |
| c, |
| ); |
| } else { |
| e.encode_alu( |
| 0x011, |
| Some(&self.dst), |
| Some(&self.a), |
| Some(&self.b), |
| c, |
| ); |
| } |
| |
| e.set_bit(72, self.intermediate_mod.is_ineg()); |
| e.set_field(75..80, self.shift); |
| e.set_bit(80, self.dst_high); |
| e.set_pred_dst(81..84, &self.overflow); |
| e.set_bit(74, false); // .X |
| } |
| } |
| |
| impl SM70Op for OpLeaX { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| b.copy_alu_src_if_not_reg(&mut self.a, gpr, SrcType::ALU); |
| if self.dst_high { |
| b.copy_alu_src_if_both_not_reg( |
| &self.b, |
| &mut self.a_high, |
| gpr, |
| SrcType::ALU, |
| ); |
| } |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| assert!(self.a.src_mod == SrcMod::None); |
| assert!( |
| self.intermediate_mod == SrcMod::None |
| || self.b.src_mod == SrcMod::None |
| ); |
| |
| let c = if self.dst_high { |
| Some(&self.a_high) |
| } else { |
| // TODO: On Ada and earlier, src2 is ignored if !dst_high. On |
| // Blackwell+, it seems to do something. |
| Some(&Src::ZERO) |
| }; |
| |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x091, |
| Some(&self.dst), |
| Some(&self.a), |
| Some(&self.b), |
| c, |
| ); |
| e.set_upred_src(87..90, 90, &self.carry); |
| } else { |
| e.encode_alu( |
| 0x011, |
| Some(&self.dst), |
| Some(&self.a), |
| Some(&self.b), |
| c, |
| ); |
| e.set_pred_src(87..90, 90, &self.carry); |
| } |
| |
| e.set_bit(72, self.intermediate_mod.is_bnot()); |
| e.set_field(75..80, self.shift); |
| e.set_bit(80, self.dst_high); |
| e.set_pred_dst(81..84, &self.overflow); |
| e.set_bit(74, true); // .X |
| } |
| } |
| |
| fn src_as_lop_imm(src: &Src) -> Option<bool> { |
| let x = match src.src_ref { |
| SrcRef::Zero => false, |
| SrcRef::True => true, |
| SrcRef::False => false, |
| SrcRef::Imm32(i) => { |
| if i == 0 { |
| false |
| } else if i == !0 { |
| true |
| } else { |
| return None; |
| } |
| } |
| _ => return None, |
| }; |
| Some(x ^ src.src_mod.is_bnot()) |
| } |
| |
| fn fold_lop_src(src: &Src, x: &mut u8) { |
| if let Some(i) = src_as_lop_imm(src) { |
| *x = if i { !0 } else { 0 }; |
| } |
| if src.src_mod.is_bnot() { |
| *x = !*x; |
| } |
| } |
| |
| impl SM70Op for OpLop3 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| // Fold constants and modifiers if we can |
| self.op = LogicOp3::new_lut(&|mut x, mut y, mut z| { |
| fold_lop_src(&self.srcs[0], &mut x); |
| fold_lop_src(&self.srcs[1], &mut y); |
| fold_lop_src(&self.srcs[2], &mut z); |
| self.op.eval(x, y, z) |
| }); |
| for src in &mut self.srcs { |
| src.src_mod = SrcMod::None; |
| if src_as_lop_imm(src).is_some() { |
| src.src_ref = SrcRef::Zero; |
| } |
| } |
| |
| let [src0, src1, src2] = &mut self.srcs; |
| if !src_is_reg(src0, gpr) && src_is_reg(src1, gpr) { |
| std::mem::swap(src0, src1); |
| self.op = LogicOp3::new_lut(&|x, y, z| self.op.eval(y, x, z)) |
| } |
| if !src_is_reg(src2, gpr) && src_is_reg(src1, gpr) { |
| std::mem::swap(src2, src1); |
| self.op = LogicOp3::new_lut(&|x, y, z| self.op.eval(x, z, y)) |
| } |
| |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); |
| b.copy_alu_src_if_not_reg(src2, gpr, SrcType::ALU); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x092, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ); |
| |
| e.set_upred_src(87..90, 90, &SrcRef::False.into()); |
| } else { |
| e.encode_alu( |
| 0x012, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&self.srcs[2]), |
| ); |
| |
| e.set_pred_src(87..90, 90, &SrcRef::False.into()); |
| } |
| |
| e.set_field(72..80, self.op.lut); |
| e.set_bit(80, false); // .PAND |
| e.set_field(81..84, 7_u32); // pred |
| } |
| } |
| |
| impl SM70Op for OpPopC { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu(0x0bf, Some(&self.dst), None, Some(&self.src), None) |
| } else { |
| e.encode_alu(0x109, Some(&self.dst), None, Some(&self.src), None) |
| }; |
| |
| let not_mod = matches!(self.src.src_mod, SrcMod::BNot); |
| e.set_field(63..64, not_mod); |
| } |
| } |
| |
| impl SM70Op for OpShf { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| b.copy_alu_src_if_not_reg(&mut self.low, gpr, SrcType::ALU); |
| b.copy_alu_src_if_both_not_reg( |
| &self.shift, |
| &mut self.high, |
| gpr, |
| SrcType::ALU, |
| ); |
| self.reduce_shift_imm(); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x099, |
| Some(&self.dst), |
| Some(&self.low), |
| Some(&self.shift), |
| Some(&self.high), |
| ) |
| } else { |
| e.encode_alu( |
| 0x019, |
| Some(&self.dst), |
| Some(&self.low), |
| Some(&self.shift), |
| Some(&self.high), |
| ) |
| }; |
| |
| e.set_field( |
| 73..75, |
| match self.data_type { |
| IntType::I64 => 0_u8, |
| IntType::U64 => 1_u8, |
| IntType::I32 => 2_u8, |
| IntType::U32 => 3_u8, |
| _ => panic!("Invalid shift data type"), |
| }, |
| ); |
| e.set_bit(75, self.wrap); |
| e.set_bit(76, self.right); |
| e.set_bit(80, self.dst_high); |
| } |
| } |
| |
| impl SM70Op for OpF2F { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| assert!(!self.integer_rnd); |
| if self.src_type.bits() <= 32 && self.dst_type.bits() <= 32 { |
| e.encode_alu(0x104, Some(&self.dst), None, Some(&self.src), None) |
| } else { |
| e.encode_alu(0x110, Some(&self.dst), None, Some(&self.src), None) |
| }; |
| |
| if self.high { |
| e.set_field(60..62, 1_u8); // .H1 |
| } |
| |
| e.set_field(75..77, (self.dst_type.bits() / 8).ilog2()); |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| e.set_bit(80, self.ftz); |
| e.set_field(84..86, (self.src_type.bits() / 8).ilog2()); |
| } |
| } |
| |
| impl SM70Op for OpF2FP { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| swap_srcs_if_not_reg(src0, src1, gpr); |
| |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x03e, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| Some(&Src::ZERO), |
| ); |
| |
| // .MERGE_C behavior |
| // Use src1 and src2, src0 is unused |
| // src1 get converted and packed in the lower 16 bits of dest. |
| // src2 lower or high 16 bits (decided by .H1 flag) get packed in the upper of dest. |
| e.set_bit(78, false); // TODO: .MERGE_C |
| e.set_bit(72, false); // .H1 (MERGE_C only) |
| e.set_rnd_mode(79..81, self.rnd_mode); |
| } |
| } |
| |
| impl SM70Op for OpF2I { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.src_type.bits() <= 32 && self.dst_type.bits() <= 32 { |
| e.encode_alu(0x105, Some(&self.dst), None, Some(&self.src), None) |
| } else { |
| e.encode_alu(0x111, Some(&self.dst), None, Some(&self.src), None) |
| }; |
| |
| e.set_bit(72, self.dst_type.is_signed()); |
| e.set_field(75..77, (self.dst_type.bits() / 8).ilog2()); |
| e.set_bit(77, false); // NTZ |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| e.set_bit(80, self.ftz); |
| e.set_field(84..86, (self.src_type.bits() / 8).ilog2()); |
| } |
| } |
| |
| impl SM70Op for OpI2F { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.src_type.bits() <= 32 && self.dst_type.bits() <= 32 { |
| e.encode_alu(0x106, Some(&self.dst), None, Some(&self.src), None) |
| } else { |
| e.encode_alu(0x112, Some(&self.dst), None, Some(&self.src), None) |
| }; |
| |
| e.set_field(60..62, 0_u8); // TODO: subop |
| e.set_bit(74, self.src_type.is_signed()); |
| e.set_field(75..77, (self.dst_type.bits() / 8).ilog2()); |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| e.set_field(84..86, (self.src_type.bits() / 8).ilog2()); |
| } |
| } |
| |
| impl SM70Op for OpFRnd { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.src_type.bits() <= 32 && self.dst_type.bits() <= 32 { |
| e.encode_alu(0x107, Some(&self.dst), None, Some(&self.src), None) |
| } else { |
| e.encode_alu(0x113, Some(&self.dst), None, Some(&self.src), None) |
| }; |
| |
| e.set_field(84..86, (self.src_type.bits() / 8).ilog2()); |
| e.set_bit(80, self.ftz); |
| e.set_rnd_mode(78..80, self.rnd_mode); |
| e.set_field(75..77, (self.dst_type.bits() / 8).ilog2()); |
| } |
| } |
| |
| impl SM70Op for OpMov { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.set_opcode(0xc82); |
| e.set_udst(&self.dst); |
| |
| // umov is encoded like a non-uniform ALU op |
| let src = ALUSrc::from_src(e, Some(&self.src), true); |
| let form: u8 = match &src { |
| ALUSrc::Reg(reg) => { |
| e.encode_alu_ureg(reg, false); |
| 0x6 // form |
| } |
| ALUSrc::Imm32(imm) => { |
| e.encode_alu_imm(imm); |
| 0x4 // form |
| } |
| _ => panic!("Invalid umov src"), |
| }; |
| e.set_field(9..12, form); |
| } else { |
| e.encode_alu(0x002, Some(&self.dst), None, Some(&self.src), None); |
| e.set_field(72..76, self.quad_lanes); |
| } |
| } |
| } |
| |
| impl SM70Op for OpPrmt { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| let [src0, src1] = &mut self.srcs; |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); |
| b.copy_alu_src_if_not_reg(src1, gpr, SrcType::ALU); |
| self.reduce_sel_imm(); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x96, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.sel), |
| Some(&self.srcs[1]), |
| ) |
| } else { |
| e.encode_alu( |
| 0x16, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.sel), |
| Some(&self.srcs[1]), |
| ) |
| }; |
| |
| e.set_field( |
| 72..75, |
| match self.mode { |
| PrmtMode::Index => 0_u8, |
| PrmtMode::Forward4Extract => 1_u8, |
| PrmtMode::Backward4Extract => 2_u8, |
| PrmtMode::Replicate8 => 3_u8, |
| PrmtMode::EdgeClampLeft => 4_u8, |
| PrmtMode::EdgeClampRight => 5_u8, |
| PrmtMode::Replicate16 => 6_u8, |
| }, |
| ); |
| } |
| } |
| |
| impl SM70Op for OpSel { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| if !self.is_uniform() { |
| b.copy_src_if_upred(&mut self.cond); |
| } |
| let [src0, src1] = &mut self.srcs; |
| if swap_srcs_if_not_reg(src0, src1, gpr) { |
| self.cond = self.cond.clone().bnot(); |
| } |
| b.copy_alu_src_if_not_reg(src0, gpr, SrcType::ALU); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.encode_ualu( |
| 0x087, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ); |
| |
| e.set_upred_src(87..90, 90, &self.cond); |
| } else { |
| e.encode_alu( |
| 0x007, |
| Some(&self.dst), |
| Some(&self.srcs[0]), |
| Some(&self.srcs[1]), |
| None, |
| ); |
| |
| e.set_pred_src(87..90, 90, &self.cond); |
| } |
| } |
| } |
| |
| impl SM70Op for OpShfl { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| b.copy_alu_src_if_not_reg(&mut self.src, gpr, SrcType::GPR); |
| b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, gpr, SrcType::ALU); |
| b.copy_alu_src_if_not_reg_or_imm(&mut self.c, gpr, SrcType::ALU); |
| self.reduce_lane_c_imm(); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| assert!(self.lane.is_unmodified()); |
| assert!(self.c.is_unmodified()); |
| |
| match &self.lane.src_ref { |
| SrcRef::Zero | SrcRef::Reg(_) => match &self.c.src_ref { |
| SrcRef::Zero | SrcRef::Reg(_) => { |
| e.set_opcode(0x389); |
| e.set_reg_src(32..40, &self.lane); |
| e.set_reg_src(64..72, &self.c); |
| } |
| SrcRef::Imm32(imm_c) => { |
| e.set_opcode(0x589); |
| e.set_reg_src(32..40, &self.lane); |
| e.set_field(40..53, *imm_c); |
| } |
| _ => panic!("Invalid instruction form"), |
| }, |
| SrcRef::Imm32(imm_lane) => match &self.c.src_ref { |
| SrcRef::Zero | SrcRef::Reg(_) => { |
| e.set_opcode(0x989); |
| e.set_field(53..58, *imm_lane); |
| e.set_reg_src(64..72, &self.c); |
| } |
| SrcRef::Imm32(imm_c) => { |
| e.set_opcode(0xf89); |
| e.set_field(40..53, *imm_c); |
| e.set_field(53..58, *imm_lane); |
| } |
| _ => panic!("Invalid instruction form"), |
| }, |
| _ => panic!("Invalid instruction form"), |
| }; |
| |
| e.set_dst(&self.dst); |
| e.set_pred_dst(81..84, &self.in_bounds); |
| e.set_reg_src(24..32, &self.src); |
| e.set_field( |
| 58..60, |
| match self.op { |
| ShflOp::Idx => 0_u8, |
| ShflOp::Up => 1_u8, |
| ShflOp::Down => 2_u8, |
| ShflOp::Bfly => 3_u8, |
| }, |
| ); |
| } |
| } |
| |
| impl SM70Op for OpPLop3 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| // Fold constants and modifiers if we can |
| for lop in &mut self.ops { |
| *lop = LogicOp3::new_lut(&|mut x, mut y, mut z| { |
| fold_lop_src(&self.srcs[0], &mut x); |
| fold_lop_src(&self.srcs[1], &mut y); |
| fold_lop_src(&self.srcs[2], &mut z); |
| lop.eval(x, y, z) |
| }); |
| } |
| for src in &mut self.srcs { |
| src.src_mod = SrcMod::None; |
| if src_as_lop_imm(src).is_some() { |
| src.src_ref = SrcRef::True; |
| } |
| } |
| |
| if !self.is_uniform() { |
| // The warp form of plop3 allows a single uniform predicate in |
| // src2. If we have a uniform predicate anywhere, try to move it |
| // there. |
| let [src0, src1, src2] = &mut self.srcs; |
| if src_is_upred_reg(src0) && !src_is_upred_reg(src2) { |
| std::mem::swap(src0, src2); |
| for lop in &mut self.ops { |
| *lop = LogicOp3::new_lut(&|x, y, z| lop.eval(z, y, x)) |
| } |
| } |
| if src_is_upred_reg(src1) && !src_is_upred_reg(src2) { |
| std::mem::swap(src1, src2); |
| for lop in &mut self.ops { |
| *lop = LogicOp3::new_lut(&|x, y, z| lop.eval(x, z, y)) |
| } |
| } |
| b.copy_src_if_upred(src0); |
| b.copy_src_if_upred(src1); |
| } |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.set_opcode(0x89c); |
| |
| e.set_upred_src(68..71, 71, &self.srcs[2]); |
| e.set_upred_src(77..80, 80, &self.srcs[1]); |
| e.set_upred_src(87..90, 90, &self.srcs[0]); |
| } else { |
| e.set_opcode(0x81c); |
| |
| if self.srcs[2] |
| .src_ref |
| .as_reg() |
| .is_some_and(|r| r.is_uniform()) |
| { |
| e.set_upred_src(68..71, 71, &self.srcs[2]); |
| e.set_bit(67, true); |
| } else { |
| e.set_pred_src(68..71, 71, &self.srcs[2]); |
| } |
| e.set_pred_src(77..80, 80, &self.srcs[1]); |
| e.set_pred_src(87..90, 90, &self.srcs[0]); |
| } |
| e.set_field(16..24, self.ops[1].lut); |
| e.set_field(64..67, self.ops[0].lut & 0x7); |
| e.set_field(72..77, self.ops[0].lut >> 3); |
| |
| e.set_pred_dst(81..84, &self.dsts[0]); |
| e.set_pred_dst(84..87, &self.dsts[1]); |
| } |
| } |
| |
| impl SM70Op for OpR2UR { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x3c2); |
| e.set_udst(&self.dst); |
| e.set_reg_src(24..32, &self.src); |
| e.set_pred_dst(81..84, &Dst::None); |
| } |
| } |
| |
| impl SM70Encoder<'_> { |
| fn set_tex_cb_ref(&mut self, range: Range<usize>, cb: TexCBufRef) { |
| assert!(range.len() == 19); |
| let mut v = BitMutView::new_subset(self, range); |
| assert!(cb.offset % 4 == 0); |
| v.set_field(0..14, cb.offset / 4); |
| v.set_field(14..19, cb.idx); |
| } |
| |
| fn set_tex_dim(&mut self, range: Range<usize>, dim: TexDim) { |
| assert!(range.len() == 3); |
| self.set_field( |
| range, |
| match dim { |
| TexDim::_1D => 0_u8, |
| TexDim::Array1D => 4_u8, |
| TexDim::_2D => 1_u8, |
| TexDim::Array2D => 5_u8, |
| TexDim::_3D => 2_u8, |
| TexDim::Cube => 3_u8, |
| TexDim::ArrayCube => 7_u8, |
| }, |
| ); |
| } |
| |
| fn set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode) { |
| assert!(range.len() == 3); |
| if self.sm >= 100 { |
| self.set_field( |
| range, |
| match lod_mode { |
| TexLodMode::Auto => 0_u8, |
| TexLodMode::Bias => 1_u8, |
| TexLodMode::Clamp => 2_u8, |
| // ulb => 0x3 |
| // ulc => 0x4 |
| // lb.ulc => 0x5 |
| TexLodMode::BiasClamp => todo!(), |
| |
| TexLodMode::Zero => 0_u8, |
| TexLodMode::Lod => 1_u8, |
| // ull => 3 |
| }, |
| ); |
| } else { |
| self.set_field( |
| range, |
| match lod_mode { |
| TexLodMode::Auto => 0_u8, |
| TexLodMode::Zero => 1_u8, |
| TexLodMode::Bias => 2_u8, |
| TexLodMode::Lod => 3_u8, |
| TexLodMode::Clamp => 4_u8, |
| TexLodMode::BiasClamp => 5_u8, |
| }, |
| ); |
| } |
| } |
| |
| fn set_image_dim(&mut self, range: Range<usize>, dim: ImageDim) { |
| assert!(range.len() == 3); |
| self.set_field( |
| range, |
| match dim { |
| ImageDim::_1D => 0_u8, |
| ImageDim::_1DBuffer => 1_u8, |
| ImageDim::_1DArray => 2_u8, |
| ImageDim::_2D => 3_u8, |
| ImageDim::_2DArray => 4_u8, |
| ImageDim::_3D => 5_u8, |
| }, |
| ); |
| } |
| |
| fn set_tex_channel_mask( |
| &mut self, |
| range: Range<usize>, |
| channel_mask: ChannelMask, |
| ) { |
| self.set_field(range, channel_mask.to_bits()); |
| } |
| |
| fn set_image_channel_mask( |
| &mut self, |
| range: Range<usize>, |
| channel_mask: ChannelMask, |
| ) { |
| assert!( |
| channel_mask.to_bits() == 0x1 |
| || channel_mask.to_bits() == 0x3 |
| || channel_mask.to_bits() == 0xf |
| ); |
| self.set_field(range, channel_mask.to_bits()); |
| } |
| } |
| |
| fn legalize_tex_instr(op: &mut impl SrcsAsSlice, b: &mut LegalizeBuilder) { |
| // Texture instructions have one or two sources. When they have two, the |
| // second one is optional and we can set rZ instead. |
| let srcs = op.srcs_as_mut_slice(); |
| assert!(matches!(&srcs[0].src_ref, SrcRef::SSA(_))); |
| if let SrcRef::SSA(ssa) = &mut srcs[0].src_ref { |
| b.copy_ssa_ref_if_uniform(ssa); |
| } |
| if srcs.len() > 1 { |
| debug_assert!(srcs.len() == 2); |
| assert!(matches!(&srcs[1].src_ref, SrcRef::SSA(_) | SrcRef::Zero)); |
| if let SrcRef::SSA(ssa) = &mut srcs[1].src_ref { |
| b.copy_ssa_ref_if_uniform(ssa); |
| } |
| } |
| } |
| |
| impl SM70Op for OpTex { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_tex_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.tex { |
| TexRef::Bound(_) => { |
| panic!("SM70+ doesn't have legacy bound textures"); |
| } |
| TexRef::CBuf(cb) => { |
| assert!(e.sm < 100); |
| e.set_opcode(0xb60); |
| e.set_tex_cb_ref(40..59, cb); |
| } |
| TexRef::Bindless => { |
| if e.sm >= 100 { |
| e.set_opcode(0xd61); |
| e.set_bit(91, true); |
| } else { |
| e.set_opcode(0x361); |
| e.set_bit(59, true); // .B |
| } |
| } |
| } |
| |
| e.set_dst(&self.dsts[0]); |
| if let Dst::Reg(reg) = self.dsts[1] { |
| e.set_reg(64..72, reg); |
| } else { |
| e.set_field(64..72, 255_u8); |
| } |
| e.set_pred_dst(81..84, &self.fault); |
| |
| e.set_reg_src(24..32, &self.srcs[0]); |
| e.set_reg_src(32..40, &self.srcs[1]); |
| |
| if e.sm >= 100 { |
| e.set_field(48..56, 0xff_u8); // ureg |
| e.set_bit(59, self.lod_mode.is_explicit_lod()); |
| } |
| |
| e.set_tex_dim(61..64, self.dim); |
| e.set_tex_channel_mask(72..76, self.channel_mask); |
| e.set_bit(76, self.offset_mode == TexOffsetMode::AddOffI); |
| e.set_bit(77, false); // ToDo: NDV |
| e.set_bit(78, self.z_cmpr); |
| e.set_eviction_priority(&self.mem_eviction_priority); |
| e.set_tex_lod_mode(87..90, self.lod_mode); |
| e.set_bit(90, self.nodep); |
| } |
| } |
| |
| impl SM70Op for OpTld { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_tex_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.tex { |
| TexRef::Bound(_) => { |
| panic!("SM70+ doesn't have legacy bound textures"); |
| } |
| TexRef::CBuf(cb) => { |
| assert!(e.sm < 100); |
| e.set_opcode(0xb66); |
| e.set_tex_cb_ref(40..59, cb); |
| } |
| TexRef::Bindless => { |
| if e.sm >= 100 { |
| e.set_opcode(0xd67); |
| e.set_bit(91, true); |
| } else { |
| e.set_opcode(0x367); |
| e.set_bit(59, true); // .B |
| } |
| } |
| } |
| |
| e.set_dst(&self.dsts[0]); |
| if let Dst::Reg(reg) = self.dsts[1] { |
| e.set_reg(64..72, reg); |
| } else { |
| e.set_field(64..72, 255_u8); |
| } |
| e.set_pred_dst(81..84, &self.fault); |
| |
| e.set_reg_src(24..32, &self.srcs[0]); |
| e.set_reg_src(32..40, &self.srcs[1]); |
| |
| if e.sm >= 100 { |
| e.set_field(48..56, 0xff_u8); // ureg |
| } |
| |
| e.set_tex_dim(61..64, self.dim); |
| e.set_tex_channel_mask(72..76, self.channel_mask); |
| e.set_bit(76, self.offset_mode == TexOffsetMode::AddOffI); |
| |
| // bit 77: .CL |
| e.set_bit(78, self.is_ms); |
| // bits 79..81: .F16 |
| e.set_eviction_priority(&self.mem_eviction_priority); |
| assert!(self.lod_mode.is_explicit_lod()); |
| e.set_tex_lod_mode(87..90, self.lod_mode); |
| e.set_bit(90, self.nodep); |
| } |
| } |
| |
| impl SM70Op for OpTld4 { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_tex_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.tex { |
| TexRef::Bound(_) => { |
| panic!("SM70+ doesn't have legacy bound textures"); |
| } |
| TexRef::CBuf(cb) => { |
| assert!(e.sm < 100); |
| e.set_opcode(0xb63); |
| e.set_tex_cb_ref(40..59, cb); |
| } |
| TexRef::Bindless => { |
| if e.sm >= 100 { |
| e.set_opcode(0xd64); |
| e.set_bit(91, true); |
| } else { |
| e.set_opcode(0x364); |
| e.set_bit(59, true); // .B |
| } |
| } |
| } |
| |
| e.set_dst(&self.dsts[0]); |
| if let Dst::Reg(reg) = self.dsts[1] { |
| e.set_reg(64..72, reg); |
| } else { |
| e.set_field(64..72, 255_u8); |
| } |
| e.set_pred_dst(81..84, &self.fault); |
| |
| e.set_reg_src(24..32, &self.srcs[0]); |
| e.set_reg_src(32..40, &self.srcs[1]); |
| |
| if e.sm >= 100 { |
| e.set_field(48..56, 0xff_u8); // ureg |
| } |
| |
| e.set_tex_dim(61..64, self.dim); |
| e.set_tex_channel_mask(72..76, self.channel_mask); |
| e.set_field( |
| 76..78, |
| match self.offset_mode { |
| TexOffsetMode::None => 0_u8, |
| TexOffsetMode::AddOffI => 1_u8, |
| TexOffsetMode::PerPx => 2_u8, |
| }, |
| ); |
| // bit 77: .CL |
| e.set_bit(78, self.z_cmpr); |
| e.set_eviction_priority(&self.mem_eviction_priority); |
| e.set_field(87..89, self.comp); |
| e.set_bit(90, self.nodep); |
| } |
| } |
| |
| impl SM70Op for OpTmml { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_tex_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.tex { |
| TexRef::Bound(_) => { |
| panic!("SM70+ doesn't have legacy bound textures"); |
| } |
| TexRef::CBuf(cb) => { |
| assert!(e.sm < 100); |
| e.set_opcode(0xb69); |
| e.set_tex_cb_ref(40..59, cb); |
| } |
| TexRef::Bindless => { |
| e.set_opcode(0x36a); |
| e.set_bit(59, true); // .B |
| } |
| } |
| |
| e.set_dst(&self.dsts[0]); |
| if let Dst::Reg(reg) = self.dsts[1] { |
| e.set_reg(64..72, reg); |
| } else { |
| e.set_field(64..72, 255_u8); |
| } |
| |
| e.set_reg_src(24..32, &self.srcs[0]); |
| e.set_reg_src(32..40, &self.srcs[1]); |
| |
| e.set_tex_dim(61..64, self.dim); |
| e.set_tex_channel_mask(72..76, self.channel_mask); |
| e.set_bit(77, false); // ToDo: NDV |
| e.set_bit(90, self.nodep); |
| } |
| } |
| |
| impl SM70Op for OpTxd { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_tex_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.tex { |
| TexRef::Bound(_) => { |
| panic!("SM70+ doesn't have legacy bound textures"); |
| } |
| TexRef::CBuf(cb) => { |
| assert!(e.sm < 100); |
| e.set_opcode(0xb6c); |
| e.set_tex_cb_ref(40..59, cb); |
| } |
| TexRef::Bindless => { |
| if e.sm >= 100 { |
| e.set_opcode(0xd6d); |
| e.set_bit(91, true); |
| } else { |
| e.set_opcode(0x36d); |
| e.set_bit(59, true); // .B |
| } |
| } |
| } |
| |
| e.set_dst(&self.dsts[0]); |
| if let Dst::Reg(reg) = self.dsts[1] { |
| e.set_reg(64..72, reg); |
| } else { |
| e.set_field(64..72, 255_u8); |
| } |
| e.set_pred_dst(81..84, &self.fault); |
| |
| e.set_reg_src(24..32, &self.srcs[0]); |
| e.set_reg_src(32..40, &self.srcs[1]); |
| |
| if e.sm >= 100 { |
| e.set_field(48..56, 0xff_u8); // ureg |
| } |
| |
| e.set_tex_dim(61..64, self.dim); |
| e.set_tex_channel_mask(72..76, self.channel_mask); |
| e.set_bit(76, self.offset_mode == TexOffsetMode::AddOffI); |
| e.set_bit(77, false); // ToDo: NDV |
| e.set_eviction_priority(&self.mem_eviction_priority); |
| e.set_bit(90, self.nodep); |
| } |
| } |
| |
| impl SM70Op for OpTxq { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_tex_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.tex { |
| TexRef::Bound(_) => { |
| panic!("SM70+ doesn't have legacy bound textures"); |
| } |
| TexRef::CBuf(cb) => { |
| assert!(e.sm < 100); |
| e.set_opcode(0xb6f); |
| e.set_tex_cb_ref(40..59, cb); |
| } |
| TexRef::Bindless => { |
| e.set_opcode(0x370); |
| e.set_bit(59, true); // .B |
| } |
| } |
| |
| e.set_dst(&self.dsts[0]); |
| if let Dst::Reg(reg) = self.dsts[1] { |
| e.set_reg(64..72, reg); |
| } else { |
| e.set_field(64..72, 255_u8); |
| } |
| |
| e.set_reg_src(24..32, &self.src); |
| e.set_field( |
| 62..64, |
| match self.query { |
| TexQuery::Dimension => 0_u8, |
| TexQuery::TextureType => 1_u8, |
| TexQuery::SamplerPos => 2_u8, |
| }, |
| ); |
| e.set_tex_channel_mask(72..76, self.channel_mask); |
| e.set_bit(90, self.nodep); |
| } |
| } |
| |
| impl SM70Encoder<'_> { |
| fn set_mem_order(&mut self, order: &MemOrder) { |
| if self.sm < 80 { |
| let scope = match order { |
| MemOrder::Constant => MemScope::System, |
| MemOrder::Weak => MemScope::CTA, |
| MemOrder::Strong(s) => *s, |
| }; |
| self.set_field( |
| 77..79, |
| match scope { |
| MemScope::CTA => 0_u8, |
| // SM => 1_u8, |
| MemScope::GPU => 2_u8, |
| MemScope::System => 3_u8, |
| }, |
| ); |
| self.set_field( |
| 79..81, |
| match order { |
| MemOrder::Constant => 0_u8, |
| MemOrder::Weak => 1_u8, |
| MemOrder::Strong(_) => 2_u8, |
| // MMIO => 3_u8, |
| }, |
| ); |
| } else { |
| self.set_field( |
| 77..81, |
| match order { |
| MemOrder::Constant => 0x4_u8, |
| MemOrder::Weak => 0x0_u8, |
| MemOrder::Strong(MemScope::CTA) => 0x5_u8, |
| MemOrder::Strong(MemScope::GPU) => 0x7_u8, |
| MemOrder::Strong(MemScope::System) => 0xa_u8, |
| }, |
| ); |
| } |
| } |
| |
| fn set_eviction_priority(&mut self, pri: &MemEvictionPriority) { |
| self.set_field( |
| 84..87, |
| match pri { |
| MemEvictionPriority::First => 0_u8, |
| MemEvictionPriority::Normal => 1_u8, |
| MemEvictionPriority::Last => 2_u8, |
| MemEvictionPriority::LastUse => 3_u8, |
| MemEvictionPriority::Unchanged => 4_u8, |
| MemEvictionPriority::NoAllocate => 5_u8, |
| }, |
| ); |
| } |
| |
| fn set_mem_type(&mut self, range: Range<usize>, mem_type: MemType) { |
| assert!(range.len() == 3); |
| self.set_field( |
| range, |
| match mem_type { |
| MemType::U8 => 0_u8, |
| MemType::I8 => 1_u8, |
| MemType::U16 => 2_u8, |
| MemType::I16 => 3_u8, |
| MemType::B32 => 4_u8, |
| MemType::B64 => 5_u8, |
| MemType::B128 => 6_u8, |
| }, |
| ); |
| } |
| |
| fn set_mem_access(&mut self, access: &MemAccess) { |
| self.set_field( |
| 72..73, |
| match access.space.addr_type() { |
| MemAddrType::A32 => 0_u8, |
| MemAddrType::A64 => 1_u8, |
| }, |
| ); |
| self.set_mem_type(73..76, access.mem_type); |
| self.set_mem_order(&access.order); |
| self.set_eviction_priority(&access.eviction_priority); |
| } |
| } |
| |
| impl SM70Op for OpSuLd { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.image_access { |
| ImageAccess::Binary(mem_type) => { |
| e.set_opcode(0x99a); |
| e.set_mem_type(73..76, mem_type); |
| } |
| ImageAccess::Formatted(channel_mask) => { |
| e.set_opcode(0x998); |
| e.set_image_channel_mask(72..76, channel_mask); |
| } |
| } |
| |
| e.set_dst(&self.dst); |
| e.set_reg_src(24..32, &self.coord); |
| e.set_reg_src(64..72, &self.handle); |
| e.set_pred_dst(81..84, &self.fault); |
| |
| e.set_image_dim(61..64, self.image_dim); |
| e.set_mem_order(&self.mem_order); |
| e.set_eviction_priority(&self.mem_eviction_priority); |
| } |
| } |
| |
| impl SM70Op for OpSuSt { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.image_access { |
| ImageAccess::Binary(mem_type) => { |
| e.set_opcode(0x99e); |
| e.set_mem_type(73..76, mem_type); |
| } |
| ImageAccess::Formatted(channel_mask) => { |
| e.set_opcode(0x99c); |
| e.set_image_channel_mask(72..76, channel_mask); |
| } |
| } |
| |
| e.set_reg_src(24..32, &self.coord); |
| e.set_reg_src(32..40, &self.data); |
| e.set_reg_src(64..72, &self.handle); |
| |
| e.set_image_dim(61..64, self.image_dim); |
| e.set_mem_order(&self.mem_order); |
| e.set_eviction_priority(&self.mem_eviction_priority); |
| } |
| } |
| |
| impl SM70Op for OpSuAtom { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.dst.is_none() { |
| e.set_opcode(0x3a0); |
| e.set_atom_op(87..90, self.atom_op); |
| } else if let AtomOp::CmpExch(cmp_src) = self.atom_op { |
| e.set_opcode(0x396); |
| assert!(cmp_src == AtomCmpSrc::Packed); |
| } else { |
| e.set_opcode(0x394); |
| e.set_atom_op(87..91, self.atom_op); |
| }; |
| |
| e.set_dst(&self.dst); |
| e.set_reg_src(24..32, &self.coord); |
| e.set_reg_src(32..40, &self.data); |
| e.set_reg_src(64..72, &self.handle); |
| e.set_pred_dst(81..84, &self.fault); |
| |
| e.set_image_dim(61..64, self.image_dim); |
| e.set_mem_order(&self.mem_order); |
| e.set_eviction_priority(&self.mem_eviction_priority); |
| |
| e.set_bit(72, false); // .BA |
| e.set_atom_type(self.atom_type); |
| } |
| } |
| |
| impl SM70Op for OpLd { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.access.space { |
| MemSpace::Global(_) => { |
| e.set_opcode(0x381); |
| e.set_pred_dst(81..84, &Dst::None); |
| e.set_mem_access(&self.access); |
| } |
| MemSpace::Local => { |
| e.set_opcode(0x983); |
| e.set_field(84..87, 1_u8); |
| |
| e.set_mem_type(73..76, self.access.mem_type); |
| assert!(self.access.order == MemOrder::Strong(MemScope::CTA)); |
| assert!( |
| self.access.eviction_priority |
| == MemEvictionPriority::Normal |
| ); |
| } |
| MemSpace::Shared => { |
| e.set_opcode(0x984); |
| |
| e.set_mem_type(73..76, self.access.mem_type); |
| assert!(self.access.order == MemOrder::Strong(MemScope::CTA)); |
| assert!( |
| self.access.eviction_priority |
| == MemEvictionPriority::Normal |
| ); |
| |
| e.set_bit(87, false); // !.ZD - Returns a predicate? |
| } |
| } |
| |
| e.set_dst(&self.dst); |
| e.set_reg_src(24..32, &self.addr); |
| e.set_field(40..64, self.offset); |
| } |
| } |
| |
| impl SM70Op for OpLdc { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| b.copy_alu_src_if_not_reg(&mut self.offset, gpr, SrcType::GPR); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| let SrcRef::CBuf(cb) = &self.cb.src_ref else { |
| panic!("LDC must take a cbuf source"); |
| }; |
| |
| match cb.buf { |
| CBuf::Binding(idx) => { |
| if self.is_uniform() { |
| if e.sm >= 100 { |
| e.set_opcode(0x7ac); |
| e.set_bit(91, true); |
| e.set_ureg_src(24..32, &self.offset); |
| } else { |
| e.set_opcode(0xab9); |
| e.set_bit(91, false); |
| assert!(self.offset.is_zero()); |
| } |
| e.set_udst(&self.dst); |
| assert!(self.mode == LdcMode::Indexed); |
| } else { |
| e.set_opcode(0xb82); |
| e.set_dst(&self.dst); |
| |
| e.set_reg_src(24..32, &self.offset); |
| e.set_field( |
| 78..80, |
| match self.mode { |
| LdcMode::Indexed => 0_u8, |
| LdcMode::IndexedLinear => 1_u8, |
| LdcMode::IndexedSegmented => 2_u8, |
| LdcMode::IndexedSegmentedLinear => 3_u8, |
| }, |
| ); |
| e.set_bit(91, false); // Bound |
| } |
| e.set_field(54..59, idx); |
| } |
| CBuf::BindlessUGPR(handle) => { |
| if self.is_uniform() { |
| if e.sm >= 100 { |
| e.set_opcode(0xbac); |
| } else { |
| e.set_opcode(0xab9); |
| } |
| e.set_udst(&self.dst); |
| |
| if e.sm >= 120 { |
| e.set_ureg_src(64..72, &self.offset); |
| } else if e.sm >= 100 { |
| // Blackwell A adds the source but it has to be zero |
| assert!(self.offset.is_zero()); |
| e.set_ureg_src(64..72, &self.offset); |
| } else { |
| assert!(self.offset.is_zero()); |
| } |
| } else { |
| e.set_opcode(0x582); |
| e.set_dst(&self.dst); |
| |
| e.set_reg_src(64..72, &self.offset); |
| } |
| |
| e.set_ureg(24..32, handle); |
| assert!(self.mode == LdcMode::Indexed); |
| e.set_bit(91, true); // Bindless |
| } |
| CBuf::BindlessSSA(_) => panic!("SSA values must be lowered"), |
| } |
| |
| if e.sm >= 100 && self.is_uniform() { |
| e.set_field(37..54, cb.offset); |
| } else { |
| e.set_field(38..54, cb.offset); |
| } |
| e.set_mem_type(73..76, self.mem_type); |
| |
| if e.sm >= 120 { |
| e.set_field(80..82, 0_u8); // tex/hdr_unpack |
| } else if e.sm >= 100 { |
| e.set_bit(80, false); // tex_unpack |
| } |
| } |
| } |
| |
| impl SM70Op for OpSt { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.access.space { |
| MemSpace::Global(_) => { |
| e.set_opcode(0x386); |
| e.set_mem_access(&self.access); |
| } |
| MemSpace::Local => { |
| e.set_opcode(0x387); |
| e.set_field(84..87, 1_u8); |
| |
| e.set_mem_type(73..76, self.access.mem_type); |
| assert!(self.access.order == MemOrder::Strong(MemScope::CTA)); |
| assert!( |
| self.access.eviction_priority |
| == MemEvictionPriority::Normal |
| ); |
| } |
| MemSpace::Shared => { |
| e.set_opcode(0x388); |
| |
| e.set_mem_type(73..76, self.access.mem_type); |
| assert!(self.access.order == MemOrder::Strong(MemScope::CTA)); |
| assert!( |
| self.access.eviction_priority |
| == MemEvictionPriority::Normal |
| ); |
| } |
| } |
| |
| e.set_reg_src(24..32, &self.addr); |
| e.set_reg_src(32..40, &self.data); |
| e.set_field(40..64, self.offset); |
| } |
| } |
| |
| impl SM70Encoder<'_> { |
| fn set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp) { |
| self.set_field( |
| range, |
| match atom_op { |
| AtomOp::Add => 0_u8, |
| AtomOp::Min => 1_u8, |
| AtomOp::Max => 2_u8, |
| AtomOp::Inc => 3_u8, |
| AtomOp::Dec => 4_u8, |
| AtomOp::And => 5_u8, |
| AtomOp::Or => 6_u8, |
| AtomOp::Xor => 7_u8, |
| AtomOp::Exch => 8_u8, |
| AtomOp::CmpExch(_) => panic!("CmpExch is a separate opcode"), |
| }, |
| ); |
| } |
| |
| fn set_atom_type(&mut self, atom_type: AtomType) { |
| if self.sm >= 90 { |
| // Float/int is differentiated by opcode |
| self.set_field( |
| 73..77, |
| match atom_type { |
| AtomType::F16x2 => 0_u8, |
| // f16x4 => 1 |
| // f16x8 => 2 |
| // bf16x2 => 3 |
| // bf16x4 => 4 |
| // bf16x8 => 5 |
| AtomType::F32 => 9_u8, // .ftz |
| // f32x2.ftz => 10 |
| // f32x4.ftz => 11 |
| // f32x1 => 12 |
| // f32x2 => 13 |
| // f32x4 => 14 |
| AtomType::F64 => 15_u8, |
| |
| AtomType::U32 => 0, |
| AtomType::I32 => 1, |
| AtomType::U64 => 2, |
| AtomType::I64 => 3, |
| // u128 => 4, |
| }, |
| ); |
| } else { |
| self.set_field( |
| 73..76, |
| match atom_type { |
| AtomType::U32 => 0_u8, |
| AtomType::I32 => 1_u8, |
| AtomType::U64 => 2_u8, |
| AtomType::F32 => 3_u8, |
| AtomType::F16x2 => 4_u8, |
| AtomType::I64 => 5_u8, |
| AtomType::F64 => 6_u8, |
| }, |
| ); |
| } |
| } |
| } |
| |
| impl SM70Op for OpAtom { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| match self.mem_space { |
| MemSpace::Global(_) => { |
| if self.dst.is_none() { |
| if e.sm >= 90 && self.atom_type.is_float() { |
| e.set_opcode(0x9a6); |
| } else { |
| e.set_opcode(0x98e); |
| } |
| |
| e.set_reg_src(32..40, &self.data); |
| e.set_atom_op(87..90, self.atom_op); |
| } else if let AtomOp::CmpExch(cmp_src) = self.atom_op { |
| e.set_opcode(0x3a9); |
| |
| assert!(cmp_src == AtomCmpSrc::Separate); |
| e.set_reg_src(32..40, &self.cmpr); |
| e.set_reg_src(64..72, &self.data); |
| e.set_pred_dst(81..84, &Dst::None); |
| } else { |
| if e.sm >= 90 && self.atom_type.is_float() { |
| e.set_opcode(0x3a3); |
| } else { |
| e.set_opcode(0x3a8); |
| } |
| |
| e.set_reg_src(32..40, &self.data); |
| e.set_pred_dst(81..84, &Dst::None); |
| e.set_atom_op(87..91, self.atom_op); |
| } |
| |
| e.set_field( |
| 72..73, |
| match self.mem_space.addr_type() { |
| MemAddrType::A32 => 0_u8, |
| MemAddrType::A64 => 1_u8, |
| }, |
| ); |
| |
| e.set_mem_order(&self.mem_order); |
| e.set_eviction_priority(&self.mem_eviction_priority); |
| } |
| MemSpace::Local => panic!("Atomics do not support local"), |
| MemSpace::Shared => { |
| if let AtomOp::CmpExch(cmp_src) = self.atom_op { |
| e.set_opcode(0x38d); |
| |
| assert!(cmp_src == AtomCmpSrc::Separate); |
| e.set_reg_src(32..40, &self.cmpr); |
| e.set_reg_src(64..72, &self.data); |
| } else { |
| e.set_opcode(0x38c); |
| |
| e.set_reg_src(32..40, &self.data); |
| assert!( |
| self.atom_type != AtomType::U64 |
| || self.atom_op == AtomOp::Exch, |
| "64-bit Shared atomics only support CmpExch or Exch" |
| ); |
| assert!( |
| !self.atom_type.is_float(), |
| "Shared atomics don't support float" |
| ); |
| e.set_atom_op(87..91, self.atom_op); |
| } |
| |
| assert!(self.mem_order == MemOrder::Strong(MemScope::CTA)); |
| assert!( |
| self.mem_eviction_priority == MemEvictionPriority::Normal |
| ); |
| } |
| } |
| |
| e.set_dst(&self.dst); |
| e.set_reg_src(24..32, &self.addr); |
| e.set_field(40..64, self.addr_offset); |
| e.set_atom_type(self.atom_type); |
| } |
| } |
| |
| impl SM70Op for OpAL2P { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x920); |
| |
| e.set_dst(&self.dst); |
| e.set_reg_src(24..32, &self.offset); |
| |
| e.set_field(40..50, self.addr); |
| e.set_field(74..76, 0_u8); // comps |
| e.set_bit(79, self.output); |
| } |
| } |
| |
| impl SM70Op for OpALd { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x321); |
| |
| e.set_dst(&self.dst); |
| e.set_reg_src(32..40, &self.vtx); |
| e.set_reg_src(24..32, &self.offset); |
| |
| e.set_field(40..50, self.addr); |
| e.set_field(74..76, self.comps - 1); |
| e.set_field(76..77, self.patch); |
| e.set_field(77..78, self.phys); |
| e.set_field(79..80, self.output); |
| } |
| } |
| |
| impl SM70Op for OpASt { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x322); |
| |
| e.set_reg_src(32..40, &self.data); |
| e.set_reg_src(64..72, &self.vtx); |
| e.set_reg_src(24..32, &self.offset); |
| |
| e.set_field(40..50, self.addr); |
| e.set_field(74..76, self.comps - 1); |
| e.set_field(76..77, self.patch); |
| e.set_field(77..78, self.phys); |
| } |
| } |
| |
| impl SM70Op for OpIpa { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x326); |
| |
| e.set_dst(&self.dst); |
| |
| assert!(self.addr % 4 == 0); |
| e.set_field(64..72, self.addr >> 2); |
| |
| e.set_field( |
| 76..78, |
| match self.loc { |
| InterpLoc::Default => 0_u8, |
| InterpLoc::Centroid => 1_u8, |
| InterpLoc::Offset => 2_u8, |
| }, |
| ); |
| e.set_field( |
| 78..80, |
| match self.freq { |
| InterpFreq::Pass => 0_u8, |
| InterpFreq::Constant => 1_u8, |
| InterpFreq::State => 2_u8, |
| InterpFreq::PassMulW => { |
| panic!("InterpFreq::PassMulW is invalid on SM70+"); |
| } |
| }, |
| ); |
| |
| assert!(self.inv_w.is_zero()); |
| e.set_reg_src(32..40, &self.offset); |
| |
| // TODO: What is this for? |
| e.set_pred_dst(81..84, &Dst::None); |
| } |
| } |
| |
| impl SM70Op for OpLdTram { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x3ad); |
| e.set_dst(&self.dst); |
| e.set_ureg(24..32, e.zero_reg(RegFile::UGPR)); |
| |
| assert!(self.addr % 4 == 0); |
| e.set_field(64..72, self.addr >> 2); |
| |
| e.set_bit(72, self.use_c); |
| |
| // Unknown but required |
| e.set_bit(91, true); |
| } |
| } |
| |
| impl SM70Op for OpCCtl { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| legalize_ext_instr(self, b); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| assert!(matches!(self.mem_space, MemSpace::Global(_))); |
| e.set_opcode(0x98f); |
| |
| e.set_reg_src(24..32, &self.addr); |
| e.set_field(32..64, self.addr_offset); |
| |
| e.set_field( |
| 87..91, |
| match self.op { |
| CCtlOp::PF1 => 0_u8, |
| CCtlOp::PF2 => 1_u8, |
| CCtlOp::WB => 2_u8, |
| CCtlOp::IV => 3_u8, |
| CCtlOp::IVAll => 4_u8, |
| CCtlOp::RS => 5_u8, |
| CCtlOp::IVAllP => 6_u8, |
| CCtlOp::WBAll => 7_u8, |
| CCtlOp::WBAllP => 8_u8, |
| op => panic!("Unsupported cache control {op:?}"), |
| }, |
| ); |
| } |
| } |
| |
| impl SM70Op for OpMemBar { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x992); |
| |
| e.set_bit(72, false); // !.MMIO |
| e.set_field( |
| 76..79, |
| match self.scope { |
| MemScope::CTA => 0_u8, |
| // SM => 1_u8, |
| MemScope::GPU => 2_u8, |
| MemScope::System => 3_u8, |
| }, |
| ); |
| e.set_bit(80, false); // .SC |
| } |
| } |
| |
| impl SM70Encoder<'_> { |
| fn get_rel_offset(&mut self, label: &Label) -> i64 { |
| let ip = u64::try_from(self.ip).unwrap(); |
| let ip = i64::try_from(ip).unwrap(); |
| |
| let target_ip = *self.labels.get(label).unwrap(); |
| let target_ip = u64::try_from(target_ip).unwrap(); |
| let target_ip = i64::try_from(target_ip).unwrap(); |
| |
| target_ip - ip - 4 |
| } |
| |
| fn set_rel_offset(&mut self, range: Range<usize>, label: &Label) { |
| let rel_offset = self.get_rel_offset(label); |
| |
| self.set_field(range, rel_offset); |
| } |
| |
| fn set_rel_offset2( |
| &mut self, |
| range1: Range<usize>, |
| range2: Range<usize>, |
| label: &Label, |
| ) { |
| let rel_offset = self.get_rel_offset(label); |
| let shift = range1.len(); |
| self.set_field(range1, (rel_offset as u64) & ((1 << shift) - 1)); |
| self.set_field(range2, rel_offset >> shift); |
| } |
| } |
| |
| impl SM70Op for OpBClear { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x355); |
| |
| e.set_dst(&Dst::None); |
| e.set_bar_dst(24..28, &self.dst); |
| |
| e.set_bit(84, true); // .CLEAR |
| } |
| } |
| |
| impl SM70Op for OpBMov { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if dst_is_bar(&self.dst) { |
| e.set_opcode(0x356); |
| |
| e.set_bar_dst(24..28, &self.dst); |
| e.set_reg_src(32..40, &self.src); |
| |
| e.set_bit(84, self.clear); |
| } else { |
| e.set_opcode(0x355); |
| |
| e.set_dst(&self.dst); |
| e.set_bar_src(24..28, &self.src); |
| |
| e.set_bit(84, self.clear); |
| } |
| } |
| } |
| |
| impl SM70Op for OpBreak { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x942); |
| assert!(self.bar_in.src_ref.as_reg() == self.bar_out.as_reg()); |
| e.set_bar_dst(16..20, &self.bar_out); |
| e.set_pred_src(87..90, 90, &self.cond); |
| } |
| } |
| |
| impl SM70Op for OpBSSy { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x945); |
| assert!(self.bar_in.src_ref.as_reg() == self.bar_out.as_reg()); |
| e.set_bar_dst(16..20, &self.bar_out); |
| e.set_rel_offset(34..64, &self.target); |
| e.set_pred_src(87..90, 90, &self.cond); |
| } |
| } |
| |
| impl SM70Op for OpBSync { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x941); |
| e.set_bar_src(16..20, &self.bar); |
| e.set_pred_src(87..90, 90, &self.cond); |
| } |
| } |
| |
| impl SM70Op for OpBra { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x947); |
| if e.sm >= 100 { |
| e.set_rel_offset2(16..24, 34..82, &self.target); |
| } else { |
| e.set_rel_offset(34..82, &self.target); |
| } |
| e.set_field(87..90, 0x7_u8); // TODO: Pred? |
| } |
| } |
| |
| impl SM70Op for OpExit { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x94d); |
| |
| // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3 |
| e.set_field(84..85, false); |
| e.set_field(85..86, false); // .NO_ATEXIT |
| e.set_field(87..90, 0x7_u8); // TODO: Predicate |
| e.set_field(90..91, false); // NOT |
| } |
| } |
| |
| impl SM70Op for OpWarpSync { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu(0x148, None, None, Some(&Src::from(self.mask)), None); |
| e.set_pred_src(87..90, 90, &SrcRef::True.into()); |
| } |
| } |
| |
| impl SM70Op for OpBar { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0xb1d); |
| |
| // e.set_opcode(0x31d); |
| |
| // // src0 == src1 |
| // e.set_reg_src(32..40, SrcRef::Zero.into()); |
| |
| // // 00: RED.POPC |
| // // 01: RED.AND |
| // // 02: RED.OR |
| // e.set_field(74..76, 0_u8); |
| |
| // // 00: SYNC |
| // // 01: ARV |
| // // 02: RED |
| // // 03: SCAN |
| // e.set_field(77..79, 0_u8); |
| |
| // e.set_pred_src(87..90, 90, SrcRef::True.into()); |
| } |
| } |
| |
| impl SM70Op for OpCS2R { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x805); |
| e.set_dst(&self.dst); |
| e.set_field(72..80, self.idx); |
| e.set_bit(80, self.dst.as_reg().unwrap().comps() == 2); // .64 |
| } |
| } |
| |
| impl SM70Op for OpIsberd { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x923); |
| e.set_dst(&self.dst); |
| e.set_reg_src(24..32, &self.idx); |
| } |
| } |
| |
| impl SM70Op for OpKill { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x95b); |
| e.set_pred_src(87..90, 90, &SrcRef::True.into()); |
| } |
| } |
| |
| impl SM70Op for OpNop { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x918); |
| } |
| } |
| |
| impl SM70Op for OpPixLd { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.set_opcode(0x925); |
| e.set_dst(&self.dst); |
| e.set_field( |
| 78..81, |
| match &self.val { |
| PixVal::MsCount => 0_u8, |
| PixVal::CovMask => 1_u8, |
| PixVal::CentroidOffset => 2_u8, |
| PixVal::MyIndex => 3_u8, |
| PixVal::InnerCoverage => 4_u8, |
| other => panic!("Unsupported PixVal: {other}"), |
| }, |
| ); |
| e.set_pred_dst(81..84, &Dst::None); |
| } |
| } |
| |
| impl SM70Op for OpS2R { |
| fn legalize(&mut self, _b: &mut LegalizeBuilder) { |
| // Nothing to do |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| assert!(!self.is_uniform()); |
| e.set_opcode(if self.is_uniform() { 0x9c3 } else { 0x919 }); |
| e.set_dst(&self.dst); |
| e.set_field(72..80, self.idx); |
| } |
| } |
| |
| impl SM70Op for OpOut { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| b.copy_alu_src_if_not_reg(&mut self.handle, gpr, SrcType::GPR); |
| b.copy_alu_src_if_not_reg_or_imm(&mut self.stream, gpr, SrcType::ALU); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x124, |
| Some(&self.dst), |
| Some(&self.handle), |
| Some(&self.stream), |
| None, |
| ); |
| |
| e.set_field( |
| 78..80, |
| match self.out_type { |
| OutType::Emit => 1_u8, |
| OutType::Cut => 2_u8, |
| OutType::EmitThenCut => 3_u8, |
| }, |
| ); |
| } |
| } |
| |
| impl SM70Op for OpOutFinal { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| let gpr = op_gpr(self); |
| b.copy_alu_src_if_not_reg(&mut self.handle, gpr, SrcType::GPR); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| e.encode_alu( |
| 0x124, |
| Some(&Dst::None), |
| Some(&self.handle), |
| Some(&Src::ZERO), |
| None, |
| ); |
| } |
| } |
| |
| impl SM70Op for OpVote { |
| fn legalize(&mut self, b: &mut LegalizeBuilder) { |
| b.copy_src_if_upred(&mut self.pred); |
| } |
| |
| fn encode(&self, e: &mut SM70Encoder<'_>) { |
| if self.is_uniform() { |
| e.set_opcode(0x886); |
| e.set_udst(&self.ballot); |
| } else { |
| e.set_opcode(0x806); |
| e.set_dst(&self.ballot); |
| } |
| |
| e.set_field( |
| 72..74, |
| match self.op { |
| VoteOp::All => 0_u8, |
| VoteOp::Any => 1_u8, |
| VoteOp::Eq => 2_u8, |
| }, |
| ); |
| |
| e.set_pred_dst(81..84, &self.vote); |
| e.set_pred_src(87..90, 90, &self.pred); |
| } |
| } |
| |
| macro_rules! as_sm70_op_match { |
| ($op: expr) => { |
| match $op { |
| Op::FAdd(op) => op, |
| Op::FFma(op) => op, |
| Op::FMnMx(op) => op, |
| Op::FMul(op) => op, |
| Op::FSet(op) => op, |
| Op::FSetP(op) => op, |
| Op::FSwzAdd(op) => op, |
| Op::DAdd(op) => op, |
| Op::DFma(op) => op, |
| Op::DMul(op) => op, |
| Op::DSetP(op) => op, |
| Op::HAdd2(op) => op, |
| Op::HFma2(op) => op, |
| Op::HMul2(op) => op, |
| Op::HSet2(op) => op, |
| Op::HSetP2(op) => op, |
| Op::HMnMx2(op) => op, |
| Op::MuFu(op) => op, |
| Op::BMsk(op) => op, |
| Op::BRev(op) => op, |
| Op::Flo(op) => op, |
| Op::IAbs(op) => op, |
| Op::IAdd3(op) => op, |
| Op::IAdd3X(op) => op, |
| Op::IDp4(op) => op, |
| Op::IMad(op) => op, |
| Op::IMad64(op) => op, |
| Op::IMnMx(op) => op, |
| Op::ISetP(op) => op, |
| Op::Lea(op) => op, |
| Op::LeaX(op) => op, |
| Op::Lop3(op) => op, |
| Op::PopC(op) => op, |
| Op::Shf(op) => op, |
| Op::F2F(op) => op, |
| Op::F2FP(op) => op, |
| Op::F2I(op) => op, |
| Op::I2F(op) => op, |
| Op::FRnd(op) => op, |
| Op::Mov(op) => op, |
| Op::Prmt(op) => op, |
| Op::Sel(op) => op, |
| Op::Shfl(op) => op, |
| Op::PLop3(op) => op, |
| Op::R2UR(op) => op, |
| Op::Tex(op) => op, |
| Op::Tld(op) => op, |
| Op::Tld4(op) => op, |
| Op::Tmml(op) => op, |
| Op::Txd(op) => op, |
| Op::Txq(op) => op, |
| Op::SuLd(op) => op, |
| Op::SuSt(op) => op, |
| Op::SuAtom(op) => op, |
| Op::Ld(op) => op, |
| Op::Ldc(op) => op, |
| Op::St(op) => op, |
| Op::Atom(op) => op, |
| Op::AL2P(op) => op, |
| Op::ALd(op) => op, |
| Op::ASt(op) => op, |
| Op::Ipa(op) => op, |
| Op::LdTram(op) => op, |
| Op::CCtl(op) => op, |
| Op::MemBar(op) => op, |
| Op::BClear(op) => op, |
| Op::BMov(op) => op, |
| Op::Break(op) => op, |
| Op::BSSy(op) => op, |
| Op::BSync(op) => op, |
| Op::Bra(op) => op, |
| Op::Exit(op) => op, |
| Op::WarpSync(op) => op, |
| Op::Bar(op) => op, |
| Op::CS2R(op) => op, |
| Op::Isberd(op) => op, |
| Op::Kill(op) => op, |
| Op::Nop(op) => op, |
| Op::PixLd(op) => op, |
| Op::S2R(op) => op, |
| Op::Out(op) => op, |
| Op::OutFinal(op) => op, |
| Op::Vote(op) => op, |
| _ => panic!("Unsupported op: {}", $op), |
| } |
| }; |
| } |
| |
| fn as_sm70_op(op: &Op) -> &dyn SM70Op { |
| as_sm70_op_match!(op) |
| } |
| |
| fn as_sm70_op_mut(op: &mut Op) -> &mut dyn SM70Op { |
| as_sm70_op_match!(op) |
| } |
| |
| pub fn legalize_sm70_op( |
| _sm: &dyn ShaderModel, |
| b: &mut LegalizeBuilder, |
| op: &mut Op, |
| ) { |
| as_sm70_op_mut(op).legalize(b); |
| } |
| |
| pub fn encode_sm70_shader(sm: &dyn ShaderModel, s: &Shader<'_>) -> Vec<u32> { |
| assert!(s.functions.len() == 1); |
| let func = &s.functions[0]; |
| |
| let mut ip = 0_usize; |
| let mut labels = FxHashMap::default(); |
| for b in &func.blocks { |
| labels.insert(b.label, ip); |
| for instr in &b.instrs { |
| if let Op::Nop(op) = &instr.op { |
| if let Some(label) = op.label { |
| labels.insert(label, ip); |
| } |
| } |
| ip += 4; |
| } |
| } |
| |
| let mut encoded = Vec::new(); |
| for b in &func.blocks { |
| for instr in &b.instrs { |
| let mut e = SM70Encoder { |
| sm: sm.sm(), |
| ip: encoded.len(), |
| labels: &labels, |
| inst: [0_u32; 4], |
| }; |
| as_sm70_op(&instr.op).encode(&mut e); |
| e.set_pred(&instr.pred); |
| e.set_instr_deps(&instr.deps); |
| encoded.extend_from_slice(&e.inst[..]); |
| } |
| } |
| encoded |
| } |