Merge "String Compression for ARM and ARM64"
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 55e1221..681988d 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -4589,7 +4589,9 @@
   }
   // We need a temporary register for the read barrier marking slow
   // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
-  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+  // Also need for String compression feature.
+  if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
+      || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -4602,6 +4604,8 @@
   Location out_loc = locations->Out();
   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
   Primitive::Type type = instruction->GetType();
+  const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+                                        instruction->IsStringCharAt();
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
   // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
@@ -4615,10 +4619,31 @@
     case Primitive::kPrimInt: {
       if (index.IsConstant()) {
         int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
-        uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
+        if (maybe_compressed_char_at) {
+          Register length = IP;
+          Label uncompressed_load, done;
+          uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+          __ LoadFromOffset(kLoadWord, length, obj, count_offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ cmp(length, ShifterOperand(0));
+          __ b(&uncompressed_load, GE);
+          __ LoadFromOffset(kLoadUnsignedByte,
+                            out_loc.AsRegister<Register>(),
+                            obj,
+                            data_offset + const_index);
+          __ b(&done);
+          __ Bind(&uncompressed_load);
+          __ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
+                            out_loc.AsRegister<Register>(),
+                            obj,
+                            data_offset + (const_index << 1));
+          __ Bind(&done);
+        } else {
+          uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
 
-        LoadOperandType load_type = GetLoadOperandType(type);
-        __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
+          LoadOperandType load_type = GetLoadOperandType(type);
+          __ LoadFromOffset(load_type, out_loc.AsRegister<Register>(), obj, full_offset);
+        }
       } else {
         Register temp = IP;
 
@@ -4634,7 +4659,24 @@
         } else {
           __ add(temp, obj, ShifterOperand(data_offset));
         }
-        codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+        if (maybe_compressed_char_at) {
+          Label uncompressed_load, done;
+          uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+          Register length = locations->GetTemp(0).AsRegister<Register>();
+          __ LoadFromOffset(kLoadWord, length, obj, count_offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ cmp(length, ShifterOperand(0));
+          __ b(&uncompressed_load, GE);
+          __ ldrb(out_loc.AsRegister<Register>(),
+                  Address(temp, index.AsRegister<Register>(), Shift::LSL, 0));
+          __ b(&done);
+          __ Bind(&uncompressed_load);
+          __ ldrh(out_loc.AsRegister<Register>(),
+                  Address(temp, index.AsRegister<Register>(), Shift::LSL, 1));
+          __ Bind(&done);
+        } else {
+          codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
+        }
       }
       break;
     }
@@ -4734,7 +4776,7 @@
   if (type == Primitive::kPrimNot) {
     // Potential implicit null checks, in the case of reference
     // arrays, are handled in the previous switch statement.
-  } else {
+  } else if (!maybe_compressed_char_at) {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 }
@@ -5024,6 +5066,10 @@
   Register out = locations->Out().AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
+  // Mask out compression flag from String's array length.
+  if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+    __ bic(out, out, ShifterOperand(1u << 31));
+  }
 }
 
 void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a2a2e42..4f7f36b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2052,7 +2052,8 @@
   Location index = locations->InAt(1);
   Location out = locations->Out();
   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
-
+  const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+                                        instruction->IsStringCharAt();
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
@@ -2070,9 +2071,28 @@
   } else {
     // General case.
     MemOperand source = HeapOperand(obj);
+    Register length;
+    if (maybe_compressed_char_at) {
+      uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+      length = temps.AcquireW();
+      __ Ldr(length, HeapOperand(obj, count_offset));
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
     if (index.IsConstant()) {
-      offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
-      source = HeapOperand(obj, offset);
+      if (maybe_compressed_char_at) {
+        vixl::aarch64::Label uncompressed_load, done;
+        __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+        __ Ldrb(Register(OutputCPURegister(instruction)),
+                HeapOperand(obj, offset + Int64ConstantFrom(index)));
+        __ B(&done);
+        __ Bind(&uncompressed_load);
+        __ Ldrh(Register(OutputCPURegister(instruction)),
+                HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
+        __ Bind(&done);
+      } else {
+        offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+        source = HeapOperand(obj, offset);
+      }
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
       if (instruction->GetArray()->IsIntermediateAddress()) {
@@ -2090,11 +2110,24 @@
       } else {
         __ Add(temp, obj, offset);
       }
-      source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+      if (maybe_compressed_char_at) {
+        vixl::aarch64::Label uncompressed_load, done;
+        __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+        __ Ldrb(Register(OutputCPURegister(instruction)),
+                HeapOperand(temp, XRegisterFrom(index), LSL, 0));
+        __ B(&done);
+        __ Bind(&uncompressed_load);
+        __ Ldrh(Register(OutputCPURegister(instruction)),
+                HeapOperand(temp, XRegisterFrom(index), LSL, 1));
+        __ Bind(&done);
+      } else {
+        source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+      }
     }
-
-    codegen_->Load(type, OutputCPURegister(instruction), source);
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+    if (!maybe_compressed_char_at) {
+      codegen_->Load(type, OutputCPURegister(instruction), source);
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
 
     if (type == Primitive::kPrimNot) {
       static_assert(
@@ -2118,9 +2151,14 @@
 
 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
+  vixl::aarch64::Register out = OutputRegister(instruction);
   BlockPoolsScope block_pools(GetVIXLAssembler());
-  __ Ldr(OutputRegister(instruction), HeapOperand(InputRegisterAt(instruction, 0), offset));
+  __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
   codegen_->MaybeRecordImplicitNullCheck(instruction);
+  // Mask out compression flag from String's array length.
+  if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+    __ And(out.W(), out.W(), Operand(static_cast<int32_t>(INT32_MAX)));
+  }
 }
 
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
@@ -2312,7 +2350,6 @@
   BoundsCheckSlowPathARM64* slow_path =
       new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
   codegen_->AddSlowPath(slow_path);
-
   __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
   __ B(slow_path->GetEntryLabel(), hs);
 }
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 495f3fd..56e4c7a 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -44,6 +44,14 @@
   size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
   Primitive::Type type = instruction->GetType();
 
+  // TODO: Implement reading (length + compression) for String compression feature from
+  // negative offset (count_offset - data_offset). Thumb2Assembler does not support T4
+  // encoding of "LDR (immediate)" at the moment.
+  // Don't move array pointer if it is charAt because we need to take the count first.
+  if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+    return;
+  }
+
   if (type == Primitive::kPrimLong
       || type == Primitive::kPrimFloat
       || type == Primitive::kPrimDouble) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6d107d5..d0dd650 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -140,6 +140,13 @@
 
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  // Don't move the array pointer if it is charAt because we need to take the count first.
+  // TODO: Implement reading (length + compression) for String compression feature from
+  // negative offset (count_offset - data_offset) using LDP and clobbering an extra temporary.
+  // Note that "LDR (Immediate)" does not have a "signed offset" encoding.
+  if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+    return;
+  }
   if (TryExtractArrayAccessAddress(instruction,
                                    instruction->GetArray(),
                                    instruction->GetIndex(),
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index fd2da10..96a6ecb 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1039,6 +1039,11 @@
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
+  // Need temporary registers for String compression's feature.
+  if (mirror::kUseStringCompression) {
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
@@ -1053,10 +1058,16 @@
   Register temp0 = locations->GetTemp(0).AsRegister<Register>();
   Register temp1 = locations->GetTemp(1).AsRegister<Register>();
   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
+  Register temp3, temp4;
+  if (mirror::kUseStringCompression) {
+    temp3 = locations->GetTemp(3).AsRegister<Register>();
+    temp4 = locations->GetTemp(4).AsRegister<Register>();
+  }
 
   Label loop;
   Label find_char_diff;
   Label end;
+  Label different_compression;
 
   // Get offsets of count and value fields within a string object.
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1077,20 +1088,40 @@
   // Reference equality check, return 0 if same reference.
   __ subs(out, str, ShifterOperand(arg));
   __ b(&end, EQ);
-  // Load lengths of this and argument strings.
-  __ ldr(temp2, Address(str, count_offset));
-  __ ldr(temp1, Address(arg, count_offset));
+  if (mirror::kUseStringCompression) {
+    // Load lengths of this and argument strings.
+    __ ldr(temp3, Address(str, count_offset));
+    __ ldr(temp4, Address(arg, count_offset));
+    // Clean out compression flag from lengths.
+    __ bic(temp0, temp3, ShifterOperand(0x80000000));
+    __ bic(IP, temp4, ShifterOperand(0x80000000));
+  } else {
+    // Load lengths of this and argument strings.
+    __ ldr(temp0, Address(str, count_offset));
+    __ ldr(IP, Address(arg, count_offset));
+  }
   // out = length diff.
-  __ subs(out, temp2, ShifterOperand(temp1));
+  __ subs(out, temp0, ShifterOperand(IP));
   // temp0 = min(len(str), len(arg)).
-  __ it(Condition::LT, kItElse);
-  __ mov(temp0, ShifterOperand(temp2), Condition::LT);
-  __ mov(temp0, ShifterOperand(temp1), Condition::GE);
+  __ it(GT);
+  __ mov(temp0, ShifterOperand(IP), GT);
   // Shorter string is empty?
   __ CompareAndBranchIfZero(temp0, &end);
 
+  if (mirror::kUseStringCompression) {
+    // Check if both strings using same compression style to use this comparison loop.
+    __ eors(temp3, temp3, ShifterOperand(temp4));
+    __ b(&different_compression, MI);
+  }
   // Store offset of string value in preparation for comparison loop.
   __ mov(temp1, ShifterOperand(value_offset));
+  if (mirror::kUseStringCompression) {
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
+    __ cmp(temp4, ShifterOperand(0));
+    __ it(GE);
+    __ add(temp0, temp0, ShifterOperand(temp0), GE);
+  }
 
   // Assertions that must hold in order to compare multiple characters at a time.
   CHECK_ALIGNED(value_offset, 8);
@@ -1100,6 +1131,7 @@
   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   DCHECK_EQ(char_size, 2u);
 
+  Label find_char_diff_2nd_cmp;
   // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
   __ Bind(&loop);
   __ ldr(IP, Address(str, temp1));
@@ -1107,43 +1139,113 @@
   __ cmp(IP, ShifterOperand(temp2));
   __ b(&find_char_diff, NE);
   __ add(temp1, temp1, ShifterOperand(char_size * 2));
-  __ sub(temp0, temp0, ShifterOperand(2));
 
   __ ldr(IP, Address(str, temp1));
   __ ldr(temp2, Address(arg, temp1));
   __ cmp(IP, ShifterOperand(temp2));
-  __ b(&find_char_diff, NE);
+  __ b(&find_char_diff_2nd_cmp, NE);
   __ add(temp1, temp1, ShifterOperand(char_size * 2));
-  __ subs(temp0, temp0, ShifterOperand(2));
-
-  __ b(&loop, GT);
+  // With string compression, we have compared 8 bytes, otherwise 4 chars.
+  __ subs(temp0, temp0, ShifterOperand(mirror::kUseStringCompression ? 8 : 4));
+  __ b(&loop, HI);
   __ b(&end);
 
-  // Find the single 16-bit character difference.
+  __ Bind(&find_char_diff_2nd_cmp);
+  if (mirror::kUseStringCompression) {
+    __ subs(temp0, temp0, ShifterOperand(4));  // 4 bytes previously compared.
+    __ b(&end, LS);  // Was the second comparison fully beyond the end?
+  } else {
+    // Without string compression, we can start treating temp0 as signed
+    // and rely on the signed comparison below.
+    __ sub(temp0, temp0, ShifterOperand(2));
+  }
+
+  // Find the single character difference.
   __ Bind(&find_char_diff);
   // Get the bit position of the first character that differs.
   __ eor(temp1, temp2, ShifterOperand(IP));
   __ rbit(temp1, temp1);
   __ clz(temp1, temp1);
 
-  // temp0 = number of 16-bit characters remaining to compare.
-  // (it could be < 1 if a difference is found after the first SUB in the comparison loop, and
-  // after the end of the shorter string data).
+  // temp0 = number of characters remaining to compare.
+  // (Without string compression, it could be < 1 if a difference is found by the second CMP
+  // in the comparison loop, and after the end of the shorter string data).
 
-  // (temp1 >> 4) = character where difference occurs between the last two words compared, on the
-  // interval [0,1] (0 for low half-word different, 1 for high half-word different).
+  // Without string compression (temp1 >> 4) = character where difference occurs between the last
+  // two words compared, in the interval [0,1].
+  // (0 for low half-word different, 1 for high half-word different).
+  // With string compression, (temp1 << 3) = byte where the difference occurs,
+  // in the interval [0,3].
 
-  // If temp0 <= (temp1 >> 4), the difference occurs outside the remaining string data, so just
-  // return length diff (out).
-  __ cmp(temp0, ShifterOperand(temp1, LSR, 4));
-  __ b(&end, LE);
+  // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
+  // the remaining string data, so just return length diff (out).
+  // The comparison is unsigned for string compression, otherwise signed.
+  __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4));
+  __ b(&end, mirror::kUseStringCompression ? LS : LE);
   // Extract the characters and calculate the difference.
+  Label uncompressed_string, continue_process;
+  if (mirror::kUseStringCompression) {
+    __ cmp(temp4, ShifterOperand(0));
+    __ b(&uncompressed_string, GE);
+    __ bic(temp1, temp1, ShifterOperand(0x7));
+    __ b(&continue_process);
+  }
+  __ Bind(&uncompressed_string);
   __ bic(temp1, temp1, ShifterOperand(0xf));
+  __ Bind(&continue_process);
+
   __ Lsr(temp2, temp2, temp1);
   __ Lsr(IP, IP, temp1);
+  Label calculate_difference, uncompressed_string_extract_chars;
+  if (mirror::kUseStringCompression) {
+    __ cmp(temp4, ShifterOperand(0));
+    __ b(&uncompressed_string_extract_chars, GE);
+    __ ubfx(temp2, temp2, 0, 8);
+    __ ubfx(IP, IP, 0, 8);
+    __ b(&calculate_difference);
+  }
+  __ Bind(&uncompressed_string_extract_chars);
   __ movt(temp2, 0);
   __ movt(IP, 0);
+  __ Bind(&calculate_difference);
   __ sub(out, IP, ShifterOperand(temp2));
+  __ b(&end);
+
+  if (mirror::kUseStringCompression) {
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+    Label loop_arg_compressed, loop_this_compressed, find_diff;
+    // Comparison for different compression style.
+    // This part is when THIS is compressed and ARG is not.
+    __ Bind(&different_compression);
+    __ add(temp2, str, ShifterOperand(value_offset));
+    __ add(temp3, arg, ShifterOperand(value_offset));
+    __ cmp(temp4, ShifterOperand(0));
+    __ b(&loop_arg_compressed, LT);
+
+    __ Bind(&loop_this_compressed);
+    __ ldrb(IP, Address(temp2, c_char_size, Address::PostIndex));
+    __ ldrh(temp4, Address(temp3, char_size, Address::PostIndex));
+    __ cmp(IP, ShifterOperand(temp4));
+    __ b(&find_diff, NE);
+    __ subs(temp0, temp0, ShifterOperand(1));
+    __ b(&loop_this_compressed, GT);
+    __ b(&end);
+
+    // This part is when THIS is not compressed and ARG is.
+    __ Bind(&loop_arg_compressed);
+    __ ldrh(IP, Address(temp2, char_size, Address::PostIndex));
+    __ ldrb(temp4, Address(temp3, c_char_size, Address::PostIndex));
+    __ cmp(IP, ShifterOperand(temp4));
+    __ b(&find_diff, NE);
+    __ subs(temp0, temp0, ShifterOperand(1));
+    __ b(&loop_arg_compressed, GT);
+    __ b(&end);
+
+    // Calculate the difference.
+    __ Bind(&find_diff);
+    __ sub(out, IP, ShifterOperand(temp4));
+  }
 
   __ Bind(&end);
 
@@ -1180,7 +1282,7 @@
   Register temp1 = locations->GetTemp(1).AsRegister<Register>();
   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
 
-  Label loop;
+  Label loop, preloop;
   Label end;
   Label return_true;
   Label return_false;
@@ -1214,11 +1316,15 @@
   __ ldr(temp, Address(str, count_offset));
   __ ldr(temp1, Address(arg, count_offset));
   // Check if lengths are equal, return false if they're not.
+  // Also compares the compression style, if differs return false.
   __ cmp(temp, ShifterOperand(temp1));
   __ b(&return_false, NE);
   // Return true if both strings are empty.
+  if (mirror::kUseStringCompression) {
+    // Length needs to be masked out first because 0 is treated as compressed.
+    __ bic(temp, temp, ShifterOperand(0x80000000));
+  }
   __ cbz(temp, &return_true);
-
   // Reference equality check, return true if same reference.
   __ cmp(str, ShifterOperand(arg));
   __ b(&return_true, EQ);
@@ -1227,10 +1333,19 @@
   DCHECK_ALIGNED(value_offset, 4);
   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
 
-  __ LoadImmediate(temp1, value_offset);
-
+  if (mirror::kUseStringCompression) {
+    // If not compressed, directly to fast compare. Else do preprocess on length.
+    __ cmp(temp1, ShifterOperand(0));
+    __ b(&preloop, GT);
+    // Mask out compression flag and adjust length for compressed string (8-bit)
+    // as if it is a 16-bit data, new_length = (length + 1) / 2.
+    __ add(temp, temp, ShifterOperand(1));
+    __ Lsr(temp, temp, 1);
+    __ Bind(&preloop);
+  }
   // Loop to compare strings 2 characters at a time starting at the front of the string.
   // Ok to do this because strings with an odd length are zero-padded.
+  __ LoadImmediate(temp1, value_offset);
   __ Bind(&loop);
   __ ldr(out, Address(str, temp1));
   __ ldr(temp2, Address(arg, temp1));
@@ -2330,22 +2445,31 @@
   Register src_ptr = locations->GetTemp(1).AsRegister<Register>();
   Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
 
-  // src range to copy.
-  __ add(src_ptr, srcObj, ShifterOperand(value_offset));
-  __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
-
+  Label done, compressed_string_loop;
   // dst to be copied.
   __ add(dst_ptr, dstObj, ShifterOperand(data_offset));
   __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
 
   __ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
-
-  // Do the copy.
-  Label loop, remainder, done;
-
   // Early out for valid zero-length retrievals.
   __ b(&done, EQ);
 
+  // src range to copy.
+  __ add(src_ptr, srcObj, ShifterOperand(value_offset));
+  Label compressed_string_preloop;
+  if (mirror::kUseStringCompression) {
+    // Location of count in string.
+    const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+    // String's length.
+    __ ldr(IP, Address(srcObj, count_offset));
+    __ cmp(IP, ShifterOperand(0));
+    __ b(&compressed_string_preloop, LT);
+  }
+  __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
+
+  // Do the copy.
+  Label loop, remainder;
+
   // Save repairing the value of num_chr on the < 4 character path.
   __ subs(IP, num_chr, ShifterOperand(4));
   __ b(&remainder, LT);
@@ -2374,6 +2498,20 @@
   __ subs(num_chr, num_chr, ShifterOperand(1));
   __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
   __ b(&remainder, GT);
+  __ b(&done);
+
+  if (mirror::kUseStringCompression) {
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+    // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+    __ Bind(&compressed_string_preloop);
+    __ add(src_ptr, src_ptr, ShifterOperand(srcBegin));
+    __ Bind(&compressed_string_loop);
+    __ ldrb(IP, Address(src_ptr, c_char_size, Address::PostIndex));
+    __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
+    __ subs(num_chr, num_chr, ShifterOperand(1));
+    __ b(&compressed_string_loop, GT);
+  }
 
   __ Bind(&done);
 }
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index ce58657..e2c1802 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1223,6 +1223,11 @@
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
+  // Need temporary registers for String compression's feature.
+  if (mirror::kUseStringCompression) {
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
@@ -1239,10 +1244,16 @@
   Register temp0 = WRegisterFrom(locations->GetTemp(0));
   Register temp1 = WRegisterFrom(locations->GetTemp(1));
   Register temp2 = WRegisterFrom(locations->GetTemp(2));
+  Register temp3, temp5;
+  if (mirror::kUseStringCompression) {
+    temp3 = WRegisterFrom(locations->GetTemp(3));
+    temp5 = WRegisterFrom(locations->GetTemp(4));
+  }
 
   vixl::aarch64::Label loop;
   vixl::aarch64::Label find_char_diff;
   vixl::aarch64::Label end;
+  vixl::aarch64::Label different_compression;
 
   // Get offsets of count and value fields within a string object.
   const int32_t count_offset = mirror::String::CountOffset().Int32Value();
@@ -1263,9 +1274,18 @@
   // Reference equality check, return 0 if same reference.
   __ Subs(out, str, arg);
   __ B(&end, eq);
-  // Load lengths of this and argument strings.
-  __ Ldr(temp0, HeapOperand(str, count_offset));
-  __ Ldr(temp1, HeapOperand(arg, count_offset));
+  if (mirror::kUseStringCompression) {
+    // Load lengths of this and argument strings.
+    __ Ldr(temp3, HeapOperand(str, count_offset));
+    __ Ldr(temp5, HeapOperand(arg, count_offset));
+    // Clean out compression flag from lengths.
+    __ Bic(temp0, temp3, Operand(static_cast<int32_t>(0x80000000)));
+    __ Bic(temp1, temp5, Operand(static_cast<int32_t>(0x80000000)));
+  } else {
+    // Load lengths of this and argument strings.
+    __ Ldr(temp0, HeapOperand(str, count_offset));
+    __ Ldr(temp1, HeapOperand(arg, count_offset));
+  }
   // Return zero if both strings are empty.
   __ Orr(out, temp0, temp1);
   __ Cbz(out, &end);
@@ -1276,8 +1296,22 @@
   // Shorter string is empty?
   __ Cbz(temp2, &end);
 
+  if (mirror::kUseStringCompression) {
+    // Check if both strings using same compression style to use this comparison loop.
+    __ Eor(temp3.W(), temp3, Operand(temp5));
+    __ Tbnz(temp3.W(), kWRegSize - 1, &different_compression);
+  }
   // Store offset of string value in preparation for comparison loop.
   __ Mov(temp1, value_offset);
+  if (mirror::kUseStringCompression) {
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could be in theory exceed INT32_MAX, so treat temp2 as unsigned.
+    vixl::aarch64::Label let_it_signed;
+    __ Cmp(temp5, Operand(0));
+    __ B(lt, &let_it_signed);
+    __ Add(temp2, temp2, Operand(temp2));
+    __ Bind(&let_it_signed);
+  }
 
   UseScratchRegisterScope scratch_scope(masm);
   Register temp4 = scratch_scope.AcquireX();
@@ -1299,29 +1333,90 @@
   __ Cmp(temp4, temp0);
   __ B(ne, &find_char_diff);
   __ Add(temp1, temp1, char_size * 4);
-  __ Subs(temp2, temp2, 4);
-  __ B(gt, &loop);
+  // With string compression, we have compared 8 bytes, otherwise 4 chars.
+  __ Subs(temp2, temp2, (mirror::kUseStringCompression) ? 8 : 4);
+  __ B(hi, &loop);
   __ B(&end);
 
   // Promote temp1 to an X reg, ready for EOR.
   temp1 = temp1.X();
 
-  // Find the single 16-bit character difference.
+  // Find the single character difference.
   __ Bind(&find_char_diff);
   // Get the bit position of the first character that differs.
   __ Eor(temp1, temp0, temp4);
   __ Rbit(temp1, temp1);
   __ Clz(temp1, temp1);
-  // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then
+  // If the number of chars remaining <= the index where the difference occurs (0-3), then
   // the difference occurs outside the remaining string data, so just return length diff (out).
-  __ Cmp(temp2, Operand(temp1.W(), LSR, 4));
-  __ B(le, &end);
+  // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
+  // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
+  // unsigned when string compression is disabled.
+  // When it's enabled, the comparison must be unsigned.
+  __ Cmp(temp2, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
+  __ B(ls, &end);
   // Extract the characters and calculate the difference.
+  vixl::aarch64::Label uncompressed_string, continue_process;
+  if (mirror:: kUseStringCompression) {
+    __ Tbz(temp5, kWRegSize - 1, &uncompressed_string);
+    __ Bic(temp1, temp1, 0x7);
+    __ B(&continue_process);
+  }
+  __ Bind(&uncompressed_string);
   __ Bic(temp1, temp1, 0xf);
+  __ Bind(&continue_process);
+
   __ Lsr(temp0, temp0, temp1);
   __ Lsr(temp4, temp4, temp1);
+  vixl::aarch64::Label uncompressed_string_extract_chars;
+  if (mirror::kUseStringCompression) {
+    __ Tbz(temp5, kWRegSize - 1, &uncompressed_string_extract_chars);
+    __ And(temp4, temp4, 0xff);
+    __ Sub(out, temp4.W(), Operand(temp0.W(), UXTB));
+    __ B(&end);
+  }
+  __ Bind(&uncompressed_string_extract_chars);
   __ And(temp4, temp4, 0xffff);
   __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH));
+  __ B(&end);
+
+  if (mirror::kUseStringCompression) {
+    vixl::aarch64::Label loop_this_compressed, loop_arg_compressed, find_diff;
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+    temp0 = temp0.W();
+    temp1 = temp1.W();
+    // Comparison for different compression style.
+    // This part is when THIS is compressed and ARG is not.
+    __ Bind(&different_compression);
+    __ Add(temp0, str, Operand(value_offset));
+    __ Add(temp1, arg, Operand(value_offset));
+    __ Cmp(temp5, Operand(0));
+    __ B(lt, &loop_arg_compressed);
+
+    __ Bind(&loop_this_compressed);
+    __ Ldrb(temp3, MemOperand(temp0.X(), c_char_size, PostIndex));
+    __ Ldrh(temp5, MemOperand(temp1.X(), char_size, PostIndex));
+    __ Cmp(temp3, Operand(temp5));
+    __ B(ne, &find_diff);
+    __ Subs(temp2, temp2, 1);
+    __ B(gt, &loop_this_compressed);
+    __ B(&end);
+
+    // This part is when THIS is not compressed and ARG is.
+    __ Bind(&loop_arg_compressed);
+    __ Ldrh(temp3, MemOperand(temp0.X(), char_size, PostIndex));
+    __ Ldrb(temp5, MemOperand(temp1.X(), c_char_size, PostIndex));
+    __ Cmp(temp3, Operand(temp5));
+    __ B(ne, &find_diff);
+    __ Subs(temp2, temp2, 1);
+    __ B(gt, &loop_arg_compressed);
+    __ B(&end);
+
+    // Calculate the difference.
+    __ Bind(&find_diff);
+    __ Sub(out, temp3.W(), Operand(temp5.W(), UXTH));
+  }
 
   __ Bind(&end);
 
@@ -1356,7 +1451,7 @@
   Register temp1 = WRegisterFrom(locations->GetTemp(0));
   Register temp2 = WRegisterFrom(locations->GetTemp(1));
 
-  vixl::aarch64::Label loop;
+  vixl::aarch64::Label loop, preloop;
   vixl::aarch64::Label end;
   vixl::aarch64::Label return_true;
   vixl::aarch64::Label return_false;
@@ -1394,22 +1489,37 @@
   __ Ldr(temp, MemOperand(str.X(), count_offset));
   __ Ldr(temp1, MemOperand(arg.X(), count_offset));
   // Check if lengths are equal, return false if they're not.
+  // Also compares the compression style, if differs return false.
   __ Cmp(temp, temp1);
   __ B(&return_false, ne);
-  // Store offset of string value in preparation for comparison loop
-  __ Mov(temp1, value_offset);
   // Return true if both strings are empty.
+  if (mirror::kUseStringCompression) {
+    // Length needs to be masked out first because 0 is treated as compressed.
+    __ Bic(temp, temp, Operand(static_cast<int32_t>(0x80000000)));
+  }
   __ Cbz(temp, &return_true);
 
   // Assertions that must hold in order to compare strings 4 characters at a time.
   DCHECK_ALIGNED(value_offset, 8);
   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
 
+  if (mirror::kUseStringCompression) {
+    // If not compressed, directly to fast compare. Else do preprocess on length.
+    __ Cmp(temp1, Operand(0));
+    __ B(&preloop, gt);
+    // Mask out compression flag and adjust length for compressed string (8-bit)
+    // as if it is a 16-bit data, new_length = (length + 1) / 2
+    __ Add(temp, temp, 1);
+    __ Lsr(temp, temp, 1);
+  }
+
   temp1 = temp1.X();
   temp2 = temp2.X();
-
   // Loop to compare strings 4 characters at a time starting at the beginning of the string.
   // Ok to do this because strings are zero-padded to be 8-byte aligned.
+  // Store offset of string value in preparation for comparison loop
+  __ Bind(&preloop);
+  __ Mov(temp1, value_offset);
   __ Bind(&loop);
   __ Ldr(out, MemOperand(str.X(), temp1));
   __ Ldr(temp2, MemOperand(arg.X(), temp1));
@@ -1773,6 +1883,10 @@
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
+  // Need temporary register for String compression feature.
+  if (mirror::kUseStringCompression) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1800,29 +1914,41 @@
   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
   Register num_chr = XRegisterFrom(locations->GetTemp(1));
   Register tmp1 = XRegisterFrom(locations->GetTemp(2));
+  Register tmp3;
+  if (mirror::kUseStringCompression) {
+    tmp3 = WRegisterFrom(locations->GetTemp(3));
+  }
 
   UseScratchRegisterScope temps(masm);
   Register dst_ptr = temps.AcquireX();
   Register tmp2 = temps.AcquireX();
 
-  // src address to copy from.
-  __ Add(src_ptr, srcObj, Operand(value_offset));
-  __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
+  vixl::aarch64::Label done;
+  vixl::aarch64::Label compressed_string_loop;
+  __ Sub(num_chr, srcEnd, srcBegin);
+  // Early out for valid zero-length retrievals.
+  __ Cbz(num_chr, &done);
 
   // dst address start to copy to.
   __ Add(dst_ptr, dstObj, Operand(data_offset));
   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
 
-  __ Sub(num_chr, srcEnd, srcBegin);
+  // src address to copy from.
+  __ Add(src_ptr, srcObj, Operand(value_offset));
+  vixl::aarch64::Label compressed_string_preloop;
+  if (mirror::kUseStringCompression) {
+    // Location of count in string.
+    const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+    // String's length.
+    __ Ldr(tmp3, MemOperand(srcObj, count_offset));
+    __ Tbnz(tmp3, kWRegSize - 1, &compressed_string_preloop);
+  }
+  __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
 
   // Do the copy.
   vixl::aarch64::Label loop;
-  vixl::aarch64::Label done;
   vixl::aarch64::Label remainder;
 
-  // Early out for valid zero-length retrievals.
-  __ Cbz(num_chr, &done);
-
   // Save repairing the value of num_chr on the < 8 character path.
   __ Subs(tmp1, num_chr, 8);
   __ B(lt, &remainder);
@@ -1848,6 +1974,20 @@
   __ Subs(num_chr, num_chr, 1);
   __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
   __ B(gt, &remainder);
+  __ B(&done);
+
+  if (mirror::kUseStringCompression) {
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+    __ Bind(&compressed_string_preloop);
+    __ Add(src_ptr, src_ptr, Operand(srcBegin));
+    // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+    __ Bind(&compressed_string_loop);
+    __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
+    __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
+    __ Subs(num_chr, num_chr, Operand(1));
+    __ B(gt, &compressed_string_loop);
+  }
 
   __ Bind(&done);
 }
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5d53062..cdb4c25 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1710,7 +1710,11 @@
     .cfi_rel_offset lr, 12
     ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
     add   r0, #MIRROR_STRING_VALUE_OFFSET
-
+#if (STRING_COMPRESSION_FEATURE)
+    /* r4 count (with flag) and r3 holds actual length */
+    mov   r4, r3
+    bic   r3, #2147483648
+#endif
     /* Clamp start to [0..count] */
     cmp   r2, #0
     it    lt
@@ -1723,6 +1727,10 @@
     mov   r12, r0
 
     /* Build pointer to start of data to compare and pre-bias */
+#if (STRING_COMPRESSION_FEATURE)
+    cmp   r4, #0
+    blt   .Lstring_indexof_compressed
+#endif
     add   r0, r0, r2, lsl #1
     sub   r0, #2
 
@@ -1734,6 +1742,7 @@
      *   r0: start of data to test
      *   r1: char to compare
      *   r2: iteration count
+     *   r4: compression style (used temporarily)
      *   r12: original start of string data
      *   r3, r4, r10, r11 available for loading string data
      */
@@ -1791,6 +1800,22 @@
     sub   r0, r12
     asr   r0, r0, #1
     pop {r4, r10-r11, pc}
+#if (STRING_COMPRESSION_FEATURE)
+.Lstring_indexof_compressed:
+    add   r0, r0, r2
+    sub   r0, #1
+    sub   r2, r3, r2
+.Lstring_indexof_compressed_loop:
+    subs  r2, #1
+    blt   .Lindexof_nomatch
+    ldrb  r3, [r0, #1]!
+    cmp   r3, r1
+    beq   .Lstring_indexof_compressed_matched
+    b     .Lstring_indexof_compressed_loop
+.Lstring_indexof_compressed_matched:
+    sub   r0, r12
+    pop {r4, r10-r11, pc}
+#endif
 END art_quick_indexof
 
     /* Assembly routines used to handle ABI differences. */
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index eee949d..04a3cc6 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2378,7 +2378,11 @@
 ENTRY art_quick_indexof
     ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
     add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
-
+#if (STRING_COMPRESSION_FEATURE)
+    /* w4 holds count (with flag) and w3 holds actual length */
+    mov   w4, w3
+    and   w3, w3, #2147483647
+#endif
     /* Clamp start to [0..count] */
     cmp   w2, #0
     csel  w2, wzr, w2, lt
@@ -2388,10 +2392,12 @@
     /* Save a copy to compute result */
     mov   x5, x0
 
+#if (STRING_COMPRESSION_FEATURE)
+    tbnz  w4, #31, .Lstring_indexof_compressed
+#endif
     /* Build pointer to start of data to compare and pre-bias */
     add   x0, x0, x2, lsl #1
     sub   x0, x0, #2
-
     /* Compute iteration count */
     sub   w2, w3, w2
 
@@ -2456,6 +2462,26 @@
     sub   x0, x0, x5
     asr   x0, x0, #1
     ret
+#if (STRING_COMPRESSION_FEATURE)
+   /*
+    * Comparing compressed string character-per-character with
+    * input character
+    */
+.Lstring_indexof_compressed:
+    add   x0, x0, x2
+    sub   x0, x0, #1
+    sub   w2, w3, w2
+.Lstring_indexof_compressed_loop:
+    subs  w2, w2, #1
+    b.lt  .Lindexof_nomatch
+    ldrb  w6, [x0, #1]!
+    cmp   w6, w1
+    b.eq  .Lstring_indexof_compressed_matched
+    b     .Lstring_indexof_compressed_loop
+.Lstring_indexof_compressed_matched:
+    sub   x0, x0, x5
+    ret
+#endif
 END art_quick_indexof
 
     /*