riscv64: [codegen] Implement VisitMin/VisitMax.

And use instructions from the "Zbb" extenstion to implement
the integer variants of Min/Max and improve the code emitted
for Ror.

Test: m test-art-host-gtest
Bug: 283082089
Change-Id: I809d53bfebf46a047536c79541802e74faaa287f
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
index 31e8baf..c842201 100644
--- a/compiler/optimizing/code_generator_riscv64.cc
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -459,6 +459,16 @@
   FpBinOp<FRegister, &Riscv64Assembler::FSubS, &Riscv64Assembler::FSubD>(rd, rs1, rs2, type);
 }
 
+inline void InstructionCodeGeneratorRISCV64::FMin(
+    FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<FRegister, &Riscv64Assembler::FMinS, &Riscv64Assembler::FMinD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMax(
+    FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<FRegister, &Riscv64Assembler::FMaxS, &Riscv64Assembler::FMaxD>(rd, rs1, rs2, type);
+}
+
 inline void InstructionCodeGeneratorRISCV64::FEq(
     XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
   FpBinOp<XRegister, &Riscv64Assembler::FEqS, &Riscv64Assembler::FEqD>(rd, rs1, rs2, type);
@@ -616,27 +626,6 @@
   }
 }
 
-void InstructionCodeGeneratorRISCV64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
-  UNUSED(locations);
-  UNUSED(is_min);
-  LOG(FATAL) << "Unimplemented";
-}
-
-void InstructionCodeGeneratorRISCV64::GenerateMinMaxFP(LocationSummary* locations,
-                                                       bool is_min,
-                                                       DataType::Type type) {
-  UNUSED(locations);
-  UNUSED(is_min);
-  UNUSED(type);
-  LOG(FATAL) << "Unimplemented";
-}
-
-void InstructionCodeGeneratorRISCV64::GenerateMinMax(HBinaryOperation* instruction, bool is_min) {
-  UNUSED(instruction);
-  UNUSED(is_min);
-  LOG(FATAL) << "Unimplemented";
-}
-
 void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadOneRegister(
     HInstruction* instruction,
     Location out,
@@ -1158,7 +1147,9 @@
       locations->SetInAt(0, Location::RequiresRegister());
       HInstruction* right = instruction->InputAt(1);
       bool can_use_imm = false;
-      if (right->IsConstant()) {
+      if (instruction->IsMin() || instruction->IsMax()) {
+        can_use_imm = IsZeroBitPattern(instruction);
+      } else if (right->IsConstant()) {
         int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant());
         can_use_imm = IsInt<12>(instruction->IsSub() ? -imm : imm);
       }
@@ -1217,8 +1208,7 @@
         } else {
           __ Xor(rd, rs1, rs2);
         }
-      } else {
-        DCHECK(instruction->IsAdd() || instruction->IsSub());
+      } else if (instruction->IsAdd() || instruction->IsSub()) {
         if (type == DataType::Type::kInt32) {
           if (use_imm) {
             __ Addiw(rd, rs1, instruction->IsSub() ? -imm : imm);
@@ -1238,6 +1228,13 @@
             __ Sub(rd, rs1, rs2);
           }
         }
+      } else if (instruction->IsMin()) {
+        DCHECK_IMPLIES(use_imm, imm == 0);
+        __ Min(rd, rs1, use_imm ? Zero : rs2);
+      } else {
+        DCHECK(instruction->IsMax());
+        DCHECK_IMPLIES(use_imm, imm == 0);
+        __ Max(rd, rs1, use_imm ? Zero : rs2);
       }
       break;
     }
@@ -1248,9 +1245,13 @@
       FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>();
       if (instruction->IsAdd()) {
         FAdd(rd, rs1, rs2, type);
-      } else {
-        DCHECK(instruction->IsSub());
+      } else if (instruction->IsSub()) {
         FSub(rd, rs1, rs2, type);
+      } else if (instruction->IsMin()) {
+        FMin(rd, rs1, rs2, type);
+      } else {
+        DCHECK(instruction->IsMax());
+        FMax(rd, rs1, rs2, type);
       }
       break;
     }
@@ -1388,11 +1389,8 @@
           } else if (instruction->IsUShr()) {
             __ Srliw(rd, rs1, shamt);
           } else {
-            ScratchRegisterScope srs(GetAssembler());
-            XRegister tmp = srs.AllocateXRegister();
-            __ Srliw(tmp, rs1, shamt);
-            __ Slliw(rd, rs1, 32 - shamt);
-            __ Or(rd, rd, tmp);
+            DCHECK(instruction->IsRor());
+            __ Roriw(rd, rs1, shamt);
           }
         } else {
           if (instruction->IsShl()) {
@@ -1402,11 +1400,8 @@
           } else if (instruction->IsUShr()) {
             __ Srli(rd, rs1, shamt);
           } else {
-            ScratchRegisterScope srs(GetAssembler());
-            XRegister tmp = srs.AllocateXRegister();
-            __ Srli(tmp, rs1, shamt);
-            __ Slli(rd, rs1, 64 - shamt);
-            __ Or(rd, rd, tmp);
+            DCHECK(instruction->IsRor());
+            __ Rori(rd, rs1, shamt);
           }
         }
       } else {
@@ -1419,13 +1414,8 @@
           } else if (instruction->IsUShr()) {
             __ Srlw(rd, rs1, rs2);
           } else {
-            ScratchRegisterScope srs(GetAssembler());
-            XRegister tmp = srs.AllocateXRegister();
-            XRegister tmp2 = srs.AllocateXRegister();
-            __ Srlw(tmp, rs1, rs2);
-            __ Sub(tmp2, Zero, rs2);  // tmp2 = -rs; we can use this instead of `32 - rs`
-            __ Sllw(rd, rs1, tmp2);   // because only low 5 bits are used for SLLW.
-            __ Or(rd, rd, tmp);
+            DCHECK(instruction->IsRor());
+            __ Rorw(rd, rs1, rs2);
           }
         } else {
           if (instruction->IsShl()) {
@@ -1435,13 +1425,8 @@
           } else if (instruction->IsUShr()) {
             __ Srl(rd, rs1, rs2);
           } else {
-            ScratchRegisterScope srs(GetAssembler());
-            XRegister tmp = srs.AllocateXRegister();
-            XRegister tmp2 = srs.AllocateXRegister();
-            __ Srl(tmp, rs1, rs2);
-            __ Sub(tmp2, Zero, rs2);  // tmp2 = -rs; we can use this instead of `64 - rs`
-            __ Sll(rd, rs1, tmp2);    // because only low 6 bits are used for SLL.
-            __ Or(rd, rd, tmp);
+            DCHECK(instruction->IsRor());
+            __ Ror(rd, rs1, rs2);
           }
         }
       }
@@ -2329,13 +2314,11 @@
 }
 
 void LocationsBuilderRISCV64::VisitMax(HMax* instruction) {
-  UNUSED(instruction);
-  LOG(FATAL) << "Unimplemented";
+  HandleBinaryOp(instruction);
 }
 
 void InstructionCodeGeneratorRISCV64::VisitMax(HMax* instruction) {
-  UNUSED(instruction);
-  LOG(FATAL) << "Unimplemented";
+  HandleBinaryOp(instruction);
 }
 
 void LocationsBuilderRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) {
@@ -2369,13 +2352,11 @@
 }
 
 void LocationsBuilderRISCV64::VisitMin(HMin* instruction) {
-  UNUSED(instruction);
-  LOG(FATAL) << "Unimplemented";
+  HandleBinaryOp(instruction);
 }
 
 void InstructionCodeGeneratorRISCV64::VisitMin(HMin* instruction) {
-  UNUSED(instruction);
-  LOG(FATAL) << "Unimplemented";
+  HandleBinaryOp(instruction);
 }
 
 void LocationsBuilderRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) {
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h
index 85a210c96..69ad922 100644
--- a/compiler/optimizing/code_generator_riscv64.h
+++ b/compiler/optimizing/code_generator_riscv64.h
@@ -393,10 +393,6 @@
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
 
-  void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
-  void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
-  void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
-
   // Generate a heap reference load using one register `out`:
   //
   //   out <- *(out + offset)
@@ -477,6 +473,8 @@
   void FpBinOp(Reg rd, FRegister rs1, FRegister rs2, DataType::Type type);
   void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
   void FSub(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FMin(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FMax(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
   void FEq(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
   void FLt(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
   void FLe(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);