Translate GEP instructions on structs to GEP on int8*

http://b/24142721

X86 data layout has 64-bit scalars aligned at 4-byte boundaries, while
ARM aligns these data at 8-byte boundaries.  To bring frontend- and
backend-generated code into agreement, this patch adds a pass to
translate GEPs on structs to GEPs with int8* operands and byte offsets.
This pass runs only on X86 targets.

Change-Id: I961b2bc7db9f57f15126f27396db42c9deedfd43
diff --git a/include/bcc/Compiler.h b/include/bcc/Compiler.h
index a0925b8..1097bad 100644
--- a/include/bcc/Compiler.h
+++ b/include/bcc/Compiler.h
@@ -68,7 +68,9 @@
 
     kErrInvalidSource,
 
-    kIllegalGlobalFunction
+    kIllegalGlobalFunction,
+
+    kErrInvalidTargetMachine
   };
 
   static const char *GetErrorString(enum ErrorCode pErrCode);
@@ -116,6 +118,8 @@
   // all RenderScript functions.  Returns error if any external function that is
   // not in this whitelist is callable from the script.
   enum ErrorCode screenGlobalFunctions(Script &pScript);
+
+  void translateGEPs(Script &pScript);
 };
 
 } // end namespace bcc
diff --git a/include/bcc/Config/Config.h b/include/bcc/Config/Config.h
index 1b7e4b2..8293827 100644
--- a/include/bcc/Config/Config.h
+++ b/include/bcc/Config/Config.h
@@ -84,6 +84,9 @@
 // Custom DataLayout string for X86 with i64 and f64 set to match the ARM32
 // alignment requirement of 64-bits.
 #define X86_CUSTOM_DL_STRING "e-m:e-p:32:32-i64:64-f64:64:64-f80:32-n8:16:32-S128"
+// Default DataLayout string for X86.  Present to detect future LLVM datalayout
+// changes so X86_CUSTOM_DL_STRING above can be modified appropriately.
+#define X86_DEFAULT_DL_STRING "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 
 #if defined(DEFAULT_ARM_CODEGEN)
   #define DEFAULT_TARGET_TRIPLE_STRING DEFAULT_ARM_TRIPLE_STRING
diff --git a/include/bcc/Renderscript/RSTransforms.h b/include/bcc/Renderscript/RSTransforms.h
index 66353a3..04e8460 100644
--- a/include/bcc/Renderscript/RSTransforms.h
+++ b/include/bcc/Renderscript/RSTransforms.h
@@ -47,6 +47,8 @@
 
 llvm::ModulePass * createRSAddDebugInfoPass();
 
+llvm::FunctionPass *createRSX86TranslateGEPPass();
+
 } // end namespace bcc
 
 #endif // BCC_RS_TRANSFORMS_H
diff --git a/lib/Core/Compiler.cpp b/lib/Core/Compiler.cpp
index fe16b71..a9f1df0 100644
--- a/lib/Core/Compiler.cpp
+++ b/lib/Core/Compiler.cpp
@@ -32,6 +32,7 @@
 #include <llvm/Transforms/Vectorize.h>
 
 #include "bcc/Assert.h"
+#include "bcc/Config/Config.h"
 #include "bcc/Renderscript/RSScript.h"
 #include "bcc/Renderscript/RSTransforms.h"
 #include "bcc/Script.h"
@@ -74,6 +75,8 @@
     return "Error loading input bitcode";
   case kIllegalGlobalFunction:
     return "Use of undefined external function";
+  case kErrInvalidTargetMachine:
+    return "Invalid/unexpected llvm::TargetMachine.";
   }
 
   // This assert should never be reached as the compiler verifies that the
@@ -251,6 +254,15 @@
     return kErrInvalidSource;
   }
 
+  if (getTargetMachine().getTargetTriple().getArch() == llvm::Triple::x86) {
+    // Detect and fail if TargetMachine datalayout is different than what we
+    // expect.  This is to detect changes in default target layout for x86 and
+    // update X86_CUSTOM_DL_STRING in include/bcc/Config/Config.h appropriately.
+    if (dl.getStringRepresentation().compare(X86_DEFAULT_DL_STRING) != 0) {
+      return kErrInvalidTargetMachine;
+    }
+  }
+
   // Sanitize module's target information.
   module.setTargetTriple(getTargetMachine().getTargetTriple().str());
   module.setDataLayout(getTargetMachine().createDataLayout());
@@ -453,3 +465,11 @@
   return kSuccess;
 
 }
+
+void Compiler::translateGEPs(Script &pScript) {
+  llvm::legacy::PassManager pPM;
+  pPM.add(createRSX86TranslateGEPPass());
+
+  // Materialization done in screenGlobalFunctions above.
+  pPM.run(pScript.getSource().getModule());
+}
diff --git a/lib/Renderscript/Android.mk b/lib/Renderscript/Android.mk
index b406be7..e392994 100644
--- a/lib/Renderscript/Android.mk
+++ b/lib/Renderscript/Android.mk
@@ -34,7 +34,8 @@
   RSScreenFunctionsPass.cpp \
   RSStubsWhiteList.cpp \
   RSScriptGroupFusion.cpp \
-  RSX86CallConvPass.cpp
+  RSX86CallConvPass.cpp \
+  RSX86TranslateGEPPass.cpp
 
 #=====================================================================
 # Device Static Library: libbccRenderscript
diff --git a/lib/Renderscript/RSCompilerDriver.cpp b/lib/Renderscript/RSCompilerDriver.cpp
index 0b63ea2..1dfc699 100644
--- a/lib/Renderscript/RSCompilerDriver.cpp
+++ b/lib/Renderscript/RSCompilerDriver.cpp
@@ -23,6 +23,7 @@
 #include <llvm/Support/CommandLine.h>
 #include <llvm/Support/Path.h>
 #include <llvm/Support/raw_ostream.h>
+#include <llvm/Target/TargetMachine.h>
 
 #include "bcinfo/BitcodeWrapper.h"
 #include "bcc/Assert.h"
@@ -126,6 +127,19 @@
     return Compiler::kErrInvalidSource;
   }
 
+  // For (32-bit) x86, translate GEPs on structs or arrays of structs to GEPs on
+  // int8* with byte offsets.  This is to ensure that layout of structs with
+  // 64-bit scalar fields matches frontend-generated code that adheres to ARM
+  // data layout.
+  //
+  // The translation is done before RenderScript runtime library is linked
+  // (during LinkRuntime below) to ensure that RenderScript-driver-provided
+  // structs (like Allocation_t) don't get forced into using the ARM layout
+  // rules.
+  if (mCompiler.getTargetMachine().getTargetTriple().getArch() == llvm::Triple::x86) {
+    mCompiler.translateGEPs(pScript);
+  }
+
   //===--------------------------------------------------------------------===//
   // Link RS script with Renderscript runtime.
   //===--------------------------------------------------------------------===//
diff --git a/lib/Renderscript/RSX86TranslateGEPPass.cpp b/lib/Renderscript/RSX86TranslateGEPPass.cpp
new file mode 100644
index 0000000..75fc2ed
--- /dev/null
+++ b/lib/Renderscript/RSX86TranslateGEPPass.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bcc/Assert.h"
+#include "bcc/Config/Config.h"
+#include "bcc/Support/Log.h"
+#include "bcc/Renderscript/RSTransforms.h"
+
+#include <cstdlib>
+
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/Module.h>
+#include <llvm/Pass.h>
+#include <llvm/IR/GetElementPtrTypeIterator.h>
+
+namespace { // anonymous namespace
+
+/* This pass translates GEPs that index into structs or arrays of structs to
+ * GEPs with an int8* operand and a byte offset.  This translation is done to
+ * enforce on x86 the ARM alignment rule that 64-bit scalars be 8-byte aligned
+ * for structs with such scalars.
+ */
+class RSX86TranslateGEPPass : public llvm::FunctionPass {
+private:
+  static char ID;
+  llvm::LLVMContext *Context;
+  const llvm::DataLayout DL;
+
+  // Walk a GEP instruction and return true if any type indexed is a struct.
+  bool GEPIndexesStructType(const llvm::GetElementPtrInst *GEP) {
+    for (llvm::gep_type_iterator GTI = gep_type_begin(GEP),
+                                 GTE = gep_type_end(GEP);
+         GTI != GTE; ++GTI) {
+      if (llvm::dyn_cast<llvm::StructType>(*GTI)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Helper method to add two llvm::Value parameters
+  llvm::Value *incrementOffset(llvm::Value *accum, llvm::Value *incr,
+                               llvm::Instruction *InsertBefore) {
+    if (accum == nullptr)
+      return incr;
+    return llvm::BinaryOperator::CreateAdd(accum, incr, "", InsertBefore);
+  }
+
+  // Compute the byte offset for a GEP from the GEP's base pointer operand.
+  // Based on visitGetElementPtrInst in llvm/lib/Transforms/Scalar/SROA.cpp.
+  // The difference is that this function handles non-constant array indices and
+  // constructs a sequence of instructions to calculate the offset.  These
+  // instructions might not be the most efficient way to calculate this offset,
+  // but we rely on subsequent optimizations to do necessary fold/combine.
+  llvm::Value *computeGEPOffset(llvm::GetElementPtrInst *GEP) {
+    llvm::Value *Offset = nullptr;
+
+    for (llvm::gep_type_iterator GTI = gep_type_begin(GEP),
+                                 GTE = gep_type_end(GEP);
+         GTI != GTE; ++GTI) {
+      if (llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(*GTI)) {
+        llvm::ConstantInt *OpC = llvm::dyn_cast<llvm::ConstantInt>(GTI.getOperand());
+        if (!OpC) {
+          ALOGE("Operand for struct type is not constant!");
+          bccAssert(false);
+        }
+
+        // Offset = Offset + EltOffset for index into a struct
+        const llvm::StructLayout *SL = DL.getStructLayout(STy);
+        unsigned EltOffset = SL->getElementOffset(OpC->getZExtValue());
+        llvm::Value *Incr = llvm::ConstantInt::get(
+                                llvm::Type::getInt32Ty(*Context), EltOffset);
+        Offset = incrementOffset(Offset, Incr, GEP);
+      } else {
+        // Offset = Offset + Index * EltSize for index into an array or a vector
+        llvm::Value *EltSize = llvm::ConstantInt::get(
+                                 llvm::Type::getInt32Ty(*Context),
+                                 DL.getTypeAllocSize(GTI.getIndexedType()));
+        llvm::Value *Incr = llvm::BinaryOperator::CreateMul(
+                                GTI.getOperand() /* Index */,
+                                EltSize, "", GEP);
+        Offset = incrementOffset(Offset, Incr, GEP);
+      }
+    }
+
+    return Offset;
+  }
+
+  void translateGEP(llvm::GetElementPtrInst *GEP) {
+    // cast GEP pointer operand to int8*
+    llvm::CastInst *Int8Ptr = llvm::CastInst::CreatePointerCast(
+                                  GEP->getPointerOperand(),
+                                  llvm::Type::getInt8PtrTy(*Context),
+                                  "to.int8ptr",
+                                  GEP);
+    llvm::Value *Indices[1] = {computeGEPOffset(GEP)};
+
+    // index into the int8* based on the byte offset
+    llvm::GetElementPtrInst *Int8PtrGEP = llvm::GetElementPtrInst::Create(
+        llvm::Type::getInt8Ty(*Context), Int8Ptr, llvm::makeArrayRef(Indices),
+        "int8ptr.indexed", GEP);
+    Int8PtrGEP->setIsInBounds(GEP->isInBounds());
+
+    // cast the indexed int8* back to the type of the original GEP
+    llvm::CastInst *OutCast = llvm::CastInst::CreatePointerCast(
+        Int8PtrGEP, GEP->getType(), "to.orig.geptype", GEP);
+
+    GEP->replaceAllUsesWith(OutCast);
+  }
+
+public:
+  RSX86TranslateGEPPass()
+    : FunctionPass (ID), DL(X86_CUSTOM_DL_STRING) {
+  }
+
+  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
+    // This pass is run in isolation in a separate pass manager.  So setting
+    // AnalysisUsage is unnecessary.  Set just for completeness.
+    AU.setPreservesCFG();
+  }
+
+  virtual bool runOnFunction(llvm::Function &F) override {
+    bool changed = false;
+    Context = &F.getParent()->getContext();
+
+    // To avoid updating/deleting instructions while walking a BasicBlock's instructions,
+    // collect the GEPs that need to be translated and process them
+    // subsequently.
+    std::vector<llvm::GetElementPtrInst *> GEPsToHandle;
+
+    for (auto &BB: F) {
+      for (auto &I: BB) {
+        if (auto *GEP = llvm::dyn_cast<llvm::GetElementPtrInst>(&I)) {
+          if (GEPIndexesStructType(GEP)) {
+            GEPsToHandle.push_back(GEP);
+          }
+        }
+      }
+    }
+
+    for (auto *GEP: GEPsToHandle) {
+      // Translate GEPs and erase them
+      translateGEP(GEP);
+      changed = true;
+      GEP->eraseFromParent();
+    }
+
+    return changed;
+  }
+
+  virtual const char *getPassName() const override {
+    return "Translate GEPs on structs, intended for x86 target";
+  }
+};
+
+}
+
+char RSX86TranslateGEPPass::ID = 0;
+
+namespace bcc {
+
+llvm::FunctionPass *
+createRSX86TranslateGEPPass() {
+  return new RSX86TranslateGEPPass();
+}
+
+}