Merge changes from topic 'struct-fix' into nyc-dev

* changes:
  Translate GEP instructions on structs to GEP on int8*
  X86: Fix kernel Input/Output GetElementPtr offset issue
diff --git a/include/bcc/Compiler.h b/include/bcc/Compiler.h
index a0925b8..1097bad 100644
--- a/include/bcc/Compiler.h
+++ b/include/bcc/Compiler.h
@@ -68,7 +68,9 @@
 
     kErrInvalidSource,
 
-    kIllegalGlobalFunction
+    kIllegalGlobalFunction,
+
+    kErrInvalidTargetMachine
   };
 
   static const char *GetErrorString(enum ErrorCode pErrCode);
@@ -116,6 +118,8 @@
   // all RenderScript functions.  Returns error if any external function that is
   // not in this whitelist is callable from the script.
   enum ErrorCode screenGlobalFunctions(Script &pScript);
+
+  void translateGEPs(Script &pScript);
 };
 
 } // end namespace bcc
diff --git a/include/bcc/Config/Config.h b/include/bcc/Config/Config.h
index 5c209fd..8293827 100644
--- a/include/bcc/Config/Config.h
+++ b/include/bcc/Config/Config.h
@@ -81,6 +81,13 @@
 #define DEFAULT_X86_TRIPLE_STRING      "i686-unknown-linux"
 #define DEFAULT_X86_64_TRIPLE_STRING   "x86_64-unknown-linux"
 
+// Custom DataLayout string for X86 with i64 and f64 set to match the ARM32
+// alignment requirement of 64-bits.
+#define X86_CUSTOM_DL_STRING "e-m:e-p:32:32-i64:64-f64:64:64-f80:32-n8:16:32-S128"
+// Default DataLayout string for X86.  Present to detect future LLVM datalayout
+// changes so X86_CUSTOM_DL_STRING above can be modified appropriately.
+#define X86_DEFAULT_DL_STRING "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+
 #if defined(DEFAULT_ARM_CODEGEN)
   #define DEFAULT_TARGET_TRIPLE_STRING DEFAULT_ARM_TRIPLE_STRING
 #elif defined(DEFAULT_ARM64_CODEGEN)
diff --git a/include/bcc/Renderscript/RSTransforms.h b/include/bcc/Renderscript/RSTransforms.h
index 66353a3..04e8460 100644
--- a/include/bcc/Renderscript/RSTransforms.h
+++ b/include/bcc/Renderscript/RSTransforms.h
@@ -47,6 +47,8 @@
 
 llvm::ModulePass * createRSAddDebugInfoPass();
 
+llvm::FunctionPass *createRSX86TranslateGEPPass();
+
 } // end namespace bcc
 
 #endif // BCC_RS_TRANSFORMS_H
diff --git a/lib/Core/Compiler.cpp b/lib/Core/Compiler.cpp
index 57fd392..1988da3 100644
--- a/lib/Core/Compiler.cpp
+++ b/lib/Core/Compiler.cpp
@@ -32,6 +32,7 @@
 #include <llvm/Transforms/Vectorize.h>
 
 #include "bcc/Assert.h"
+#include "bcc/Config/Config.h"
 #include "bcc/Renderscript/RSScript.h"
 #include "bcc/Renderscript/RSTransforms.h"
 #include "bcc/Renderscript/RSUtils.h"
@@ -75,6 +76,8 @@
     return "Error loading input bitcode";
   case kIllegalGlobalFunction:
     return "Use of undefined external function";
+  case kErrInvalidTargetMachine:
+    return "Invalid/unexpected llvm::TargetMachine.";
   }
 
   // This assert should never be reached as the compiler verifies that the
@@ -252,6 +255,15 @@
     return kErrInvalidSource;
   }
 
+  if (getTargetMachine().getTargetTriple().getArch() == llvm::Triple::x86) {
+    // Detect and fail if TargetMachine datalayout is different than what we
+    // expect.  This is to detect changes in default target layout for x86 and
+    // update X86_CUSTOM_DL_STRING in include/bcc/Config/Config.h appropriately.
+    if (dl.getStringRepresentation().compare(X86_DEFAULT_DL_STRING) != 0) {
+      return kErrInvalidTargetMachine;
+    }
+  }
+
   // Sanitize module's target information.
   module.setTargetTriple(getTargetMachine().getTargetTriple().str());
   module.setDataLayout(getTargetMachine().createDataLayout());
@@ -458,3 +470,11 @@
   return kSuccess;
 
 }
+
+void Compiler::translateGEPs(Script &pScript) {
+  llvm::legacy::PassManager pPM;
+  pPM.add(createRSX86TranslateGEPPass());
+
+  // Materialization done in screenGlobalFunctions above.
+  pPM.run(pScript.getSource().getModule());
+}
diff --git a/lib/Renderscript/Android.mk b/lib/Renderscript/Android.mk
index b406be7..e392994 100644
--- a/lib/Renderscript/Android.mk
+++ b/lib/Renderscript/Android.mk
@@ -34,7 +34,8 @@
   RSScreenFunctionsPass.cpp \
   RSStubsWhiteList.cpp \
   RSScriptGroupFusion.cpp \
-  RSX86CallConvPass.cpp
+  RSX86CallConvPass.cpp \
+  RSX86TranslateGEPPass.cpp
 
 #=====================================================================
 # Device Static Library: libbccRenderscript
diff --git a/lib/Renderscript/RSCompilerDriver.cpp b/lib/Renderscript/RSCompilerDriver.cpp
index d851187..0370e42 100644
--- a/lib/Renderscript/RSCompilerDriver.cpp
+++ b/lib/Renderscript/RSCompilerDriver.cpp
@@ -23,6 +23,7 @@
 #include <llvm/Support/CommandLine.h>
 #include <llvm/Support/Path.h>
 #include <llvm/Support/raw_ostream.h>
+#include <llvm/Target/TargetMachine.h>
 
 #include "bcinfo/BitcodeWrapper.h"
 #include "bcc/Assert.h"
@@ -126,6 +127,19 @@
     return Compiler::kErrInvalidSource;
   }
 
+  // For (32-bit) x86, translate GEPs on structs or arrays of structs to GEPs on
+  // int8* with byte offsets.  This is to ensure that layout of structs with
+  // 64-bit scalar fields matches frontend-generated code that adheres to ARM
+  // data layout.
+  //
+  // The translation is done before RenderScript runtime library is linked
+  // (during LinkRuntime below) to ensure that RenderScript-driver-provided
+  // structs (like Allocation_t) don't get forced into using the ARM layout
+  // rules.
+  if (mCompiler.getTargetMachine().getTargetTriple().getArch() == llvm::Triple::x86) {
+    mCompiler.translateGEPs(pScript);
+  }
+
   //===--------------------------------------------------------------------===//
   // Link RS script with Renderscript runtime.
   //===--------------------------------------------------------------------===//
diff --git a/lib/Renderscript/RSKernelExpand.cpp b/lib/Renderscript/RSKernelExpand.cpp
index 4193639..d7e4996 100644
--- a/lib/Renderscript/RSKernelExpand.cpp
+++ b/lib/Renderscript/RSKernelExpand.cpp
@@ -646,8 +646,9 @@
   // ArgIter - iterator pointing to first input of the UNexpanded function
   // NumInputs - number of inputs (NOT number of ARGUMENTS)
   //
-  // InBufPtrs[] - this function sets each array element to point to the first
-  //               cell of the corresponding input allocation
+  // InTypes[] - this function saves input type, they will be used in ExpandInputsBody().
+  // InBufPtrs[] - this function sets each array element to point to the first cell / byte
+  //               (byte for x86, cell for other platforms) of the corresponding input allocation
   // InStructTempSlots[] - this function sets each array element either to nullptr
   //                       or to the result of an alloca (for the case where the
   //                       calling convention dictates that a value must be passed
@@ -658,6 +659,7 @@
                                  llvm::MDNode *TBAAPointer,
                                  llvm::Function::arg_iterator ArgIter,
                                  const size_t NumInputs,
+                                 llvm::SmallVectorImpl<llvm::Type *> &InTypes,
                                  llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
                                  llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) {
     bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
@@ -693,12 +695,25 @@
                                              static_cast<int32_t>(InputIndex)}));
       llvm::Value    *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
       llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
-      llvm::Value    *CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
+
+      llvm::Value *CastInBufPtr = nullptr;
+      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
+        CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
+      } else {
+        // The disagreement between module and x86 target machine datalayout
+        // causes mismatched input/output data offset between slang reflected
+        // code and bcc codegen for GetElementPtr. To solve this issue, skip the
+        // cast to InType and leave CastInBufPtr as an int8_t*.  The buffer is
+        // later indexed with an explicit byte offset computed based on
+        // X86_CUSTOM_DL_STRING and then bitcast it to actual input type.
+        CastInBufPtr = InBufPtr;
+      }
 
       if (gEnableRsTbaa) {
         InBufPtr->setMetadata("tbaa", TBAAPointer);
       }
 
+      InTypes.push_back(InType);
       InBufPtrs.push_back(CastInBufPtr);
     }
 
@@ -713,6 +728,8 @@
   // Arg_x1 - first X coordinate to be processed by the expanded function
   // TBAAAllocation - metadata for marking loads of input values out of allocations
   // NumInputs -- number of inputs (NOT number of ARGUMENTS)
+  // InTypes[] - this function uses the saved input types in ExpandInputsLoopInvariant()
+  //             to convert the pointer of byte InPtr to its real type.
   // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant()
   // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant()
   // IndVar - value of loop induction variable (X coordinate) for a given loop iteration
@@ -723,16 +740,32 @@
                         llvm::Value *Arg_x1,
                         llvm::MDNode *TBAAAllocation,
                         const size_t NumInputs,
+                        const llvm::SmallVectorImpl<llvm::Type *> &InTypes,
                         const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
                         const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots,
                         llvm::Value *IndVar,
                         llvm::SmallVectorImpl<llvm::Value *> &RootArgs) {
     llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1);
+    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
 
     for (size_t Index = 0; Index < NumInputs; ++Index) {
-      llvm::Value *InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
-      llvm::Value *Input;
 
+      llvm::Value *InPtr = nullptr;
+      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
+        InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
+      } else {
+        // Treat x86 input buffer as byte[], get indexed pointer with explicit
+        // byte offset computed using a datalayout based on
+        // X86_CUSTOM_DL_STRING, then bitcast it to actual input type.
+        llvm::DataLayout DL(X86_CUSTOM_DL_STRING);
+        llvm::Type *InTy = InTypes[Index];
+        uint64_t InStep = DL.getTypeAllocSize(InTy->getPointerElementType());
+        llvm::Value *OffsetInBytes = Builder.CreateMul(Offset, llvm::ConstantInt::get(Int32Ty, InStep));
+        InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], OffsetInBytes);
+        InPtr = Builder.CreatePointerCast(InPtr, InTy);
+      }
+
+      llvm::Value *Input;
       llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
 
       if (gEnableRsTbaa) {
@@ -774,6 +807,9 @@
     }
 
     llvm::DataLayout DL(Module);
+    if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) {
+      DL.reset(X86_CUSTOM_DL_STRING);
+    }
 
     llvm::Function *ExpandedFunction =
       createEmptyExpandedForEachKernel(Function->getName());
@@ -904,6 +940,10 @@
 
     // TODO: Refactor this to share functionality with ExpandOldStyleForEach.
     llvm::DataLayout DL(Module);
+    if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) {
+      DL.reset(X86_CUSTOM_DL_STRING);
+    }
+    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
 
     llvm::Function *ExpandedFunction =
       createEmptyExpandedForEachKernel(Function->getName());
@@ -984,9 +1024,20 @@
         OutBasePtr->setMetadata("tbaa", TBAAPointer);
       }
 
-      CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
+      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
+        CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
+      } else {
+        // The disagreement between module and x86 target machine datalayout
+        // causes mismatched input/output data offset between slang reflected
+        // code and bcc codegen for GetElementPtr. To solve this issue, skip the
+        // cast to OutTy and leave CastedOutBasePtr as an int8_t*.  The buffer
+        // is later indexed with an explicit byte offset computed based on
+        // X86_CUSTOM_DL_STRING and then bitcast it to actual output type.
+        CastedOutBasePtr = OutBasePtr;
+      }
     }
 
+    llvm::SmallVector<llvm::Type*,  8> InTypes;
     llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
     llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
 
@@ -1011,7 +1062,7 @@
 
     if (NumInPtrArguments > 0) {
       ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments,
-                                InBufPtrs, InStructTempSlots);
+                                InTypes, InBufPtrs, InStructTempSlots);
     }
 
     // Populate the actual call to kernel().
@@ -1024,7 +1075,18 @@
     llvm::Value *OutPtr = nullptr;
     if (CastedOutBasePtr) {
       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
-      OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
+
+      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
+        OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
+      } else {
+        // Treat x86 output buffer as byte[], get indexed pointer with explicit
+        // byte offset computed using a datalayout based on
+        // X86_CUSTOM_DL_STRING, then bitcast it to actual output type.
+        uint64_t OutStep = DL.getTypeAllocSize(OutTy->getPointerElementType());
+        llvm::Value *OutOffsetInBytes = Builder.CreateMul(OutOffset, llvm::ConstantInt::get(Int32Ty, OutStep));
+        OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffsetInBytes);
+        OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
+      }
 
       if (PassOutByPointer) {
         RootArgs.push_back(OutPtr);
@@ -1035,7 +1097,7 @@
 
     if (NumInPtrArguments > 0) {
       ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments,
-                       InBufPtrs, InStructTempSlots, IV, RootArgs);
+                       InTypes, InBufPtrs, InStructTempSlots, IV, RootArgs);
     }
 
     finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
@@ -1122,6 +1184,9 @@
     ALOGV("Expanding simple reduce kernel %s", Function->getName().str().c_str());
 
     llvm::DataLayout DL(Module);
+    if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) {
+      DL.reset(X86_CUSTOM_DL_STRING);
+    }
 
     // TBAA Metadata
     llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, *TBAAAllocation;
@@ -1412,15 +1477,16 @@
         ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs,
                                [](){}, LoopHeader->getTerminator());
 
+    llvm::SmallVector<llvm::Type*,  8> InTypes;
     llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
     llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
     ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs,
-                              InBufPtrs, InStructTempSlots);
+                              InTypes, InBufPtrs, InStructTempSlots);
 
     // Populate the actual call to the original accumulator.
     llvm::SmallVector<llvm::Value*, 8> RootArgs;
     RootArgs.push_back(Arg_accum);
-    ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InBufPtrs, InStructTempSlots,
+    ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InTypes, InBufPtrs, InStructTempSlots,
                      IndVar, RootArgs);
     finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder);
     Builder.CreateCall(FnAccumulator, RootArgs);
diff --git a/lib/Renderscript/RSX86TranslateGEPPass.cpp b/lib/Renderscript/RSX86TranslateGEPPass.cpp
new file mode 100644
index 0000000..75fc2ed
--- /dev/null
+++ b/lib/Renderscript/RSX86TranslateGEPPass.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bcc/Assert.h"
+#include "bcc/Config/Config.h"
+#include "bcc/Support/Log.h"
+#include "bcc/Renderscript/RSTransforms.h"
+
+#include <cstdlib>
+
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/Module.h>
+#include <llvm/Pass.h>
+#include <llvm/IR/GetElementPtrTypeIterator.h>
+
+namespace { // anonymous namespace
+
+/* This pass translates GEPs that index into structs or arrays of structs to
+ * GEPs with an int8* operand and a byte offset.  This translation is done to
+ * enforce on x86 the ARM alignment rule that 64-bit scalars be 8-byte aligned
+ * for structs with such scalars.
+ */
+class RSX86TranslateGEPPass : public llvm::FunctionPass {
+private:
+  static char ID;
+  llvm::LLVMContext *Context;
+  const llvm::DataLayout DL;
+
+  // Walk a GEP instruction and return true if any type indexed is a struct.
+  bool GEPIndexesStructType(const llvm::GetElementPtrInst *GEP) {
+    for (llvm::gep_type_iterator GTI = gep_type_begin(GEP),
+                                 GTE = gep_type_end(GEP);
+         GTI != GTE; ++GTI) {
+      if (llvm::dyn_cast<llvm::StructType>(*GTI)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Helper method to add two llvm::Value parameters
+  llvm::Value *incrementOffset(llvm::Value *accum, llvm::Value *incr,
+                               llvm::Instruction *InsertBefore) {
+    if (accum == nullptr)
+      return incr;
+    return llvm::BinaryOperator::CreateAdd(accum, incr, "", InsertBefore);
+  }
+
+  // Compute the byte offset for a GEP from the GEP's base pointer operand.
+  // Based on visitGetElementPtrInst in llvm/lib/Transforms/Scalar/SROA.cpp.
+  // The difference is that this function handles non-constant array indices and
+  // constructs a sequence of instructions to calculate the offset.  These
+  // instructions might not be the most efficient way to calculate this offset,
+  // but we rely on subsequent optimizations to do necessary fold/combine.
+  llvm::Value *computeGEPOffset(llvm::GetElementPtrInst *GEP) {
+    llvm::Value *Offset = nullptr;
+
+    for (llvm::gep_type_iterator GTI = gep_type_begin(GEP),
+                                 GTE = gep_type_end(GEP);
+         GTI != GTE; ++GTI) {
+      if (llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(*GTI)) {
+        llvm::ConstantInt *OpC = llvm::dyn_cast<llvm::ConstantInt>(GTI.getOperand());
+        if (!OpC) {
+          ALOGE("Operand for struct type is not constant!");
+          bccAssert(false);
+        }
+
+        // Offset = Offset + EltOffset for index into a struct
+        const llvm::StructLayout *SL = DL.getStructLayout(STy);
+        unsigned EltOffset = SL->getElementOffset(OpC->getZExtValue());
+        llvm::Value *Incr = llvm::ConstantInt::get(
+                                llvm::Type::getInt32Ty(*Context), EltOffset);
+        Offset = incrementOffset(Offset, Incr, GEP);
+      } else {
+        // Offset = Offset + Index * EltSize for index into an array or a vector
+        llvm::Value *EltSize = llvm::ConstantInt::get(
+                                 llvm::Type::getInt32Ty(*Context),
+                                 DL.getTypeAllocSize(GTI.getIndexedType()));
+        llvm::Value *Incr = llvm::BinaryOperator::CreateMul(
+                                GTI.getOperand() /* Index */,
+                                EltSize, "", GEP);
+        Offset = incrementOffset(Offset, Incr, GEP);
+      }
+    }
+
+    return Offset;
+  }
+
+  void translateGEP(llvm::GetElementPtrInst *GEP) {
+    // cast GEP pointer operand to int8*
+    llvm::CastInst *Int8Ptr = llvm::CastInst::CreatePointerCast(
+                                  GEP->getPointerOperand(),
+                                  llvm::Type::getInt8PtrTy(*Context),
+                                  "to.int8ptr",
+                                  GEP);
+    llvm::Value *Indices[1] = {computeGEPOffset(GEP)};
+
+    // index into the int8* based on the byte offset
+    llvm::GetElementPtrInst *Int8PtrGEP = llvm::GetElementPtrInst::Create(
+        llvm::Type::getInt8Ty(*Context), Int8Ptr, llvm::makeArrayRef(Indices),
+        "int8ptr.indexed", GEP);
+    Int8PtrGEP->setIsInBounds(GEP->isInBounds());
+
+    // cast the indexed int8* back to the type of the original GEP
+    llvm::CastInst *OutCast = llvm::CastInst::CreatePointerCast(
+        Int8PtrGEP, GEP->getType(), "to.orig.geptype", GEP);
+
+    GEP->replaceAllUsesWith(OutCast);
+  }
+
+public:
+  RSX86TranslateGEPPass()
+    : FunctionPass (ID), DL(X86_CUSTOM_DL_STRING) {
+  }
+
+  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
+    // This pass is run in isolation in a separate pass manager.  So setting
+    // AnalysisUsage is unnecessary.  Set just for completeness.
+    AU.setPreservesCFG();
+  }
+
+  virtual bool runOnFunction(llvm::Function &F) override {
+    bool changed = false;
+    Context = &F.getParent()->getContext();
+
+    // To avoid updating/deleting instructions while walking a BasicBlock's instructions,
+    // collect the GEPs that need to be translated and process them
+    // subsequently.
+    std::vector<llvm::GetElementPtrInst *> GEPsToHandle;
+
+    for (auto &BB: F) {
+      for (auto &I: BB) {
+        if (auto *GEP = llvm::dyn_cast<llvm::GetElementPtrInst>(&I)) {
+          if (GEPIndexesStructType(GEP)) {
+            GEPsToHandle.push_back(GEP);
+          }
+        }
+      }
+    }
+
+    for (auto *GEP: GEPsToHandle) {
+      // Translate GEPs and erase them
+      translateGEP(GEP);
+      changed = true;
+      GEP->eraseFromParent();
+    }
+
+    return changed;
+  }
+
+  virtual const char *getPassName() const override {
+    return "Translate GEPs on structs, intended for x86 target";
+  }
+};
+
+}
+
+char RSX86TranslateGEPPass::ID = 0;
+
+namespace bcc {
+
+llvm::FunctionPass *
+createRSX86TranslateGEPPass() {
+  return new RSX86TranslateGEPPass();
+}
+
+}