resolved conflicts for merge of b9a4701b to klp-dev
Change-Id: I9f7924c489d36913e130fd5e7617e3a96e65f1a1
diff --git a/Android.mk b/Android.mk
old mode 100755
new mode 100644
index 13fe13e..c26a122
--- a/Android.mk
+++ b/Android.mk
@@ -49,11 +49,8 @@
$(call intermediates-dir-for,SHARED_LIBRARIES,libclcore_debug.bc,,)/libclcore_debug.bc
ifeq ($(ARCH_ARM_HAVE_NEON),true)
- # Disable NEON on cortex-a15 temporarily
- ifneq ($(strip $(TARGET_CPU_VARIANT)), cortex-a15)
- libbcc_SHA1_SRCS += \
- $(call intermediates-dir-for,SHARED_LIBRARIES,libclcore_neon.bc,,)/libclcore_neon.bc
- endif
+ libbcc_SHA1_SRCS += \
+ $(call intermediates-dir-for,SHARED_LIBRARIES,libclcore_neon.bc,,)/libclcore_neon.bc
endif
libbcc_GEN_SHA1_STAMP := $(LOCAL_PATH)/tools/build/gen-sha1-stamp.py
@@ -97,10 +94,7 @@
endif
ifeq ($(ARCH_ARM_HAVE_NEON),true)
- # Disable NEON on cortex-a15 temporarily
- ifneq ($(strip $(TARGET_CPU_VARIANT)), cortex-a15)
- LOCAL_REQUIRED_MODULES += libclcore_neon.bc
- endif
+ LOCAL_REQUIRED_MODULES += libclcore_neon.bc
endif
# Generate build information (Build time + Build git revision + Build Semi SHA1)
diff --git a/CleanSpec.mk b/CleanSpec.mk
index 1d4b562..5e023f4 100644
--- a/CleanSpec.mk
+++ b/CleanSpec.mk
@@ -57,6 +57,7 @@
$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libclcore_neon.bc_intermediates)
$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libclcore*.bc_intermediates)
$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libbcinfo_intermediates)
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libclcore*.bc_intermediates)
$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libbc*_intermediates)
$(call add-clean-step, rm -rf $(HOST_OUT)/obj/STATIC_LIBRARIES/libbc*_intermediates)
$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libbc*_intermediates)
diff --git a/bcinfo/Android.mk b/bcinfo/Android.mk
index 1da7d16..cb922ad 100644
--- a/bcinfo/Android.mk
+++ b/bcinfo/Android.mk
@@ -39,11 +39,15 @@
BitcodeWrapper.cpp \
MetadataExtractor.cpp
-libbcinfo_C_INCLUDES := $(LOCAL_PATH)/../include
+libbcinfo_C_INCLUDES := \
+ $(LOCAL_PATH)/../include \
+ $(LOCAL_PATH)/../../slang
+
libbcinfo_STATIC_LIBRARIES := \
libLLVMWrap \
libLLVMBitReader_2_7 \
- libLLVMBitReader_3_0
+ libLLVMBitReader_3_0 \
+ libLLVMBitWriter_3_2
LLVM_ROOT_PATH := external/llvm
diff --git a/bcinfo/BitReader_2_7/BitcodeReader.cpp b/bcinfo/BitReader_2_7/BitcodeReader.cpp
index 40a6586..4d92715 100644
--- a/bcinfo/BitReader_2_7/BitcodeReader.cpp
+++ b/bcinfo/BitReader_2_7/BitcodeReader.cpp
@@ -2985,7 +2985,7 @@
Stream.init(*StreamFile);
unsigned char buf[16];
- if (Bytes->readBytes(0, 16, buf, NULL) == -1)
+ if (Bytes->readBytes(0, 16, buf) == -1)
return Error("Bitcode stream must be at least 16 bytes in length");
if (!isBitcode(buf, buf + 16))
diff --git a/bcinfo/BitReader_3_0/BitcodeReader.cpp b/bcinfo/BitReader_3_0/BitcodeReader.cpp
index 243314b..5ac3ab9 100644
--- a/bcinfo/BitReader_3_0/BitcodeReader.cpp
+++ b/bcinfo/BitReader_3_0/BitcodeReader.cpp
@@ -3324,7 +3324,7 @@
Stream.init(*StreamFile);
unsigned char buf[16];
- if (Bytes->readBytes(0, 16, buf, NULL) == -1)
+ if (Bytes->readBytes(0, 16, buf) == -1)
return Error("Bitcode stream must be at least 16 bytes in length");
if (!isBitcode(buf, buf + 16))
diff --git a/bcinfo/BitcodeTranslator.cpp b/bcinfo/BitcodeTranslator.cpp
index b4755c0..506a12a 100644
--- a/bcinfo/BitcodeTranslator.cpp
+++ b/bcinfo/BitcodeTranslator.cpp
@@ -21,6 +21,8 @@
#include "BitReader_2_7/BitReader_2_7.h"
#include "BitReader_3_0/BitReader_3_0.h"
+#include "BitWriter_3_2/ReaderWriter_3_2.h"
+
#define LOG_TAG "bcinfo"
#include <cutils/log.h>
@@ -141,7 +143,8 @@
std::string Buffer;
llvm::raw_string_ostream OS(Buffer);
- llvm::WriteBitcodeToFile(module, OS);
+ // Use the LLVM 3.2 bitcode writer, instead of the top-of-tree version.
+ llvm_3_2::WriteBitcodeToFile(module, OS);
OS.flush();
AndroidBitcodeWrapper wrapper;
diff --git a/bcinfo/MetadataExtractor.cpp b/bcinfo/MetadataExtractor.cpp
index c4b2965..19262b1 100644
--- a/bcinfo/MetadataExtractor.cpp
+++ b/bcinfo/MetadataExtractor.cpp
@@ -20,7 +20,9 @@
#define LOG_TAG "bcinfo"
#include <cutils/log.h>
+#ifdef HAVE_ANDROID_OS
#include <cutils/properties.h>
+#endif
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Bitcode/ReaderWriter.h"
@@ -248,6 +250,7 @@
mRSFloatPrecision = RS_FP_Relaxed;
}
+#ifdef HAVE_ANDROID_OS
// Provide an override for precsion via adb shell setprop
// adb shell setprop debug.rs.precision rs_fp_full
// adb shell setprop debug.rs.precision rs_fp_relaxed
@@ -267,6 +270,7 @@
mRSFloatPrecision = RS_FP_Full;
}
}
+#endif
return;
}
@@ -327,7 +331,7 @@
bool MetadataExtractor::populateForEachMetadata(
const llvm::NamedMDNode *Names,
const llvm::NamedMDNode *Signatures) {
- if (!Names && !Signatures) {
+ if (!Names && !Signatures && mCompilerVersion == 0) {
// Handle legacy case for pre-ICS bitcode that doesn't contain a metadata
// section for ForEach. We generate a full signature for a "root" function
// which means that we need to set the bottom 5 bits in the mask.
diff --git a/bcinfo/tools/main.cpp b/bcinfo/tools/main.cpp
index 28d29aa..a2cc0ba 100644
--- a/bcinfo/tools/main.cpp
+++ b/bcinfo/tools/main.cpp
@@ -336,7 +336,7 @@
llvm::OwningPtr<llvm::tool_output_file> tof(
new llvm::tool_output_file(outFile.c_str(), errmsg,
- llvm::raw_fd_ostream::F_Binary));
+ llvm::sys::fs::F_Binary));
llvm::OwningPtr<llvm::AssemblyAnnotationWriter> ann;
module->print(tof->os(), ann.get());
diff --git a/include/bcc/Compiler.h b/include/bcc/Compiler.h
index 265f1e3..5167b9a 100644
--- a/include/bcc/Compiler.h
+++ b/include/bcc/Compiler.h
@@ -61,7 +61,6 @@
kErrHookBeforeAddLTOPasses,
kErrHookAfterAddLTOPasses,
- kErrHookBeforeExecuteLTOPasses,
kErrHookAfterExecuteLTOPasses,
kErrHookBeforeAddCodeGenPasses,
@@ -69,7 +68,7 @@
kErrHookBeforeExecuteCodeGenPasses,
kErrHookAfterExecuteCodeGenPasses,
- kMaxErrorCode,
+ kErrInvalidSource
};
static const char *GetErrorString(enum ErrorCode pErrCode);
@@ -89,10 +88,15 @@
enum ErrorCode config(const CompilerConfig &pConfig);
// Compile a script and output the result to a LLVM stream.
- enum ErrorCode compile(Script &pScript, llvm::raw_ostream &pResult);
+ //
+ // @param IRStream If not NULL, the LLVM-IR that is fed to code generation
+ // will be written to IRStream.
+ enum ErrorCode compile(Script &pScript, llvm::raw_ostream &pResult,
+ llvm::raw_ostream *IRStream);
// Compile a script and output the result to a file.
- enum ErrorCode compile(Script &pScript, OutputFile &pResult);
+ enum ErrorCode compile(Script &pScript, OutputFile &pResult,
+ llvm::raw_ostream *IRStream = 0);
const llvm::TargetMachine& getTargetMachine() const
{ return *mTarget; }
diff --git a/include/bcc/Renderscript/RSCompiler.h b/include/bcc/Renderscript/RSCompiler.h
index 537e749..a46d558 100644
--- a/include/bcc/Renderscript/RSCompiler.h
+++ b/include/bcc/Renderscript/RSCompiler.h
@@ -24,7 +24,8 @@
class RSCompiler : public Compiler {
private:
virtual bool beforeAddLTOPasses(Script &pScript, llvm::PassManager &pPM);
- virtual bool beforeExecuteLTOPasses(Script &pScript, llvm::PassManager &pPM);
+ bool addInternalizeSymbolsPass(Script &pScript, llvm::PassManager &pPM);
+ bool addExpandForEachPass(Script &pScript, llvm::PassManager &pPM);
};
} // end namespace bcc
diff --git a/include/bcc/Renderscript/RSCompilerDriver.h b/include/bcc/Renderscript/RSCompilerDriver.h
index 7b54a13..371014c 100644
--- a/include/bcc/Renderscript/RSCompilerDriver.h
+++ b/include/bcc/Renderscript/RSCompilerDriver.h
@@ -22,13 +22,13 @@
#include "bcc/ExecutionEngine/SymbolResolverProxy.h"
#include "bcc/Renderscript/RSInfo.h"
#include "bcc/Renderscript/RSCompiler.h"
+#include "bcc/Renderscript/RSScript.h"
namespace bcc {
class BCCContext;
class CompilerConfig;
class RSExecutable;
-class RSScript;
class RSCompilerDriver {
private:
@@ -42,19 +42,23 @@
// Are we compiling under an RS debug context with additional checks?
bool mDebugContext;
- RSExecutable *loadScriptCache(const char *pOutputPath,
- const RSInfo::DependencyTableTy &pDeps);
+ // Do we merge global variables on ARM using LLVM's optimization pass?
+ // Disabling LLVM's global merge pass allows static globals to be correctly
+ // emitted to ELF. This can result in decreased performance due to increased
+ // register pressure, but it does make the resulting code easier to debug
+ // and work with.
+ bool mEnableGlobalMerge;
// Setup the compiler config for the given script. Return true if mConfig has
// been changed and false if it remains unchanged.
bool setupConfig(const RSScript &pScript);
- RSExecutable *compileScript(RSScript &pScript,
- const char* pScriptName,
- const char *pOutputPath,
- const char *pRuntimePath,
- const RSInfo::DependencyTableTy &pDeps,
- bool pSkipLoad);
+ Compiler::ErrorCode compileScript(RSScript &pScript,
+ const char* pScriptName,
+ const char *pOutputPath,
+ const char *pRuntimePath,
+ const RSInfo::DependencyTableTy &pDeps,
+ bool pSkipLoad, bool pDumpIR = false);
public:
RSCompilerDriver(bool pUseCompilerRT = true);
@@ -78,16 +82,32 @@
mDebugContext = v;
}
- // FIXME: This method accompany with loadScriptCache and compileScript should
+ // This function enables/disables merging of global static variables.
+ // Note that it only takes effect on ARM architectures (other architectures
+ // do not offer this option).
+ void setEnableGlobalMerge(bool v) {
+ mEnableGlobalMerge = v;
+ }
+
+ bool getEnableGlobalMerge() const {
+ return mEnableGlobalMerge;
+ }
+
+ // FIXME: This method accompany with loadScript and compileScript should
// all be const-methods. They're not now because the getAddress() in
// SymbolResolverInterface is not a const-method.
- RSExecutable *build(BCCContext &pContext,
- const char *pCacheDir, const char *pResName,
- const char *pBitcode, size_t pBitcodeSize,
- const char *pRuntimePath,
- RSLinkRuntimeCallback pLinkRuntimeCallback = NULL);
- RSExecutable *build(RSScript &pScript, const char *pOut,
- const char *pRuntimePath);
+ // Returns true if script is successfully compiled.
+ bool build(BCCContext &pContext, const char *pCacheDir, const char *pResName,
+ const char *pBitcode, size_t pBitcodeSize,
+ const char *pRuntimePath,
+ RSLinkRuntimeCallback pLinkRuntimeCallback = NULL,
+ bool pDumpIR = false);
+
+ // Returns true if script is successfully compiled.
+ bool build(RSScript &pScript, const char *pOut, const char *pRuntimePath);
+
+ RSExecutable *loadScript(const char *pCacheDir, const char *pResName,
+ const char *pBitcode, size_t pBitcodeSize);
};
} // end namespace bcc
diff --git a/include/bcc/Renderscript/RSInfo.h b/include/bcc/Renderscript/RSInfo.h
index f76813d..a0a775d 100644
--- a/include/bcc/Renderscript/RSInfo.h
+++ b/include/bcc/Renderscript/RSInfo.h
@@ -21,13 +21,16 @@
#include <utility>
-#include "bcc/Renderscript/RSScript.h"
#include "bcc/Support/Log.h"
#include "bcc/Support/Sha1Util.h"
#include <utils/String8.h>
#include <utils/Vector.h>
+namespace llvm {
+class Module;
+}
+
namespace bcc {
// Forward declarations
@@ -35,6 +38,9 @@
class InputFile;
class OutputFile;
class Source;
+class RSScript;
+
+typedef llvm::Module* (*RSLinkRuntimeCallback) (bcc::RSScript *, llvm::Module *, llvm::Module *);
namespace rsinfo {
@@ -157,7 +163,7 @@
// Return the path of the RS info file corresponded to the given output
// executable file.
- static android::String8 GetPath(const FileBase &pFile);
+ static android::String8 GetPath(const char *pFilename);
static const char LibBCCPath[];
static const char LibCompilerRTPath[];
diff --git a/include/bcc/Renderscript/RSScript.h b/include/bcc/Renderscript/RSScript.h
index b6c19ef..7f927c7 100644
--- a/include/bcc/Renderscript/RSScript.h
+++ b/include/bcc/Renderscript/RSScript.h
@@ -18,6 +18,7 @@
#define BCC_RS_SCRIPT_H
#include "bcc/Script.h"
+#include "bcc/Renderscript/RSInfo.h"
#include "bcc/Support/Sha1Util.h"
namespace llvm {
@@ -26,12 +27,9 @@
namespace bcc {
-class RSInfo;
class RSScript;
class Source;
-typedef llvm::Module* (*RSLinkRuntimeCallback) (bcc::RSScript *, llvm::Module *, llvm::Module *);
-
class RSScript : public Script {
public:
// This is one-one mapping with the llvm::CodeGenOpt::Level in
@@ -64,6 +62,10 @@
RSScript(Source &pSource);
+ virtual ~RSScript() {
+ delete mInfo;
+ }
+
// Set the associated RSInfo of the script.
void setInfo(const RSInfo *pInfo) {
mInfo = pInfo;
diff --git a/include/bcc/Source.h b/include/bcc/Source.h
index 4aa76c1..9ba860b 100644
--- a/include/bcc/Source.h
+++ b/include/bcc/Source.h
@@ -47,8 +47,6 @@
static Source *CreateFromFile(BCCContext &pContext,
const std::string &pPath);
- static Source *CreateFromFd(BCCContext &pContext, int pFd);
-
// Create a Source object from an existing module. If pNoDelete
// is true, destructor won't call delete on the given module.
static Source *CreateFromModule(BCCContext &pContext,
diff --git a/include/bcc/Support/Properties.h b/include/bcc/Support/Properties.h
new file mode 100644
index 0000000..c82901c
--- /dev/null
+++ b/include/bcc/Support/Properties.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BCC_SUPPORT_PROPERTIES_H
+#define BCC_SUPPORT_PROPERTIES_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#if !defined(RS_SERVER) && defined(HAVE_ANDROID_OS)
+#include <cutils/properties.h>
+#endif
+
+static inline uint32_t getProperty(const char *str) {
+#if !defined(RS_SERVER) && defined(HAVE_ANDROID_OS)
+ char buf[PROPERTY_VALUE_MAX];
+ property_get(str, buf, "0");
+ return atoi(buf);
+#else
+ return 0;
+#endif
+}
+
+#endif // BCC_SUPPORT_PROPERTIES_H
diff --git a/include/bcinfo/MetadataExtractor.h b/include/bcinfo/MetadataExtractor.h
index d1a88d9..dbfd5ae 100644
--- a/include/bcinfo/MetadataExtractor.h
+++ b/include/bcinfo/MetadataExtractor.h
@@ -197,6 +197,66 @@
enum RSFloatPrecision getRSFloatPrecision() const {
return mRSFloatPrecision;
}
+
+ /**
+ * \return whether or not this ForEach function signature has an "In"
+ * parameter.
+ *
+ * \param sig - ForEach function signature to check.
+ */
+ static bool hasForEachSignatureIn(uint32_t sig) {
+ return sig & 0x01;
+ }
+
+ /**
+ * \return whether or not this ForEach function signature has an "Out"
+ * parameter.
+ *
+ * \param sig - ForEach function signature to check.
+ */
+ static bool hasForEachSignatureOut(uint32_t sig) {
+ return sig & 0x02;
+ }
+
+ /**
+ * \return whether or not this ForEach function signature has a "UsrData"
+ * parameter.
+ *
+ * \param sig - ForEach function signature to check.
+ */
+ static bool hasForEachSignatureUsrData(uint32_t sig) {
+ return sig & 0x04;
+ }
+
+ /**
+ * \return whether or not this ForEach function signature has an "X"
+ * parameter.
+ *
+ * \param sig - ForEach function signature to check.
+ */
+ static bool hasForEachSignatureX(uint32_t sig) {
+ return sig & 0x08;
+ }
+
+ /**
+ * \return whether or not this ForEach function signature has a "Y"
+ * parameter.
+ *
+ * \param sig - ForEach function signature to check.
+ */
+ static bool hasForEachSignatureY(uint32_t sig) {
+ return sig & 0x10;
+ }
+
+ /**
+ * \return whether or not this ForEach function signature is a
+ * pass-by-value "Kernel".
+ *
+ * \param sig - ForEach function signature to check.
+ */
+ static bool hasForEachSignatureKernel(uint32_t sig) {
+ return sig & 0x20;
+ }
};
} // namespace bcinfo
diff --git a/lib/Core/Compiler.cpp b/lib/Core/Compiler.cpp
index 458fbc0..3440570 100644
--- a/lib/Core/Compiler.cpp
+++ b/lib/Core/Compiler.cpp
@@ -25,6 +25,7 @@
#include <llvm/IR/DataLayout.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/Transforms/IPO.h>
+#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Transforms/Scalar.h>
#include "bcc/Script.h"
@@ -36,57 +37,51 @@
using namespace bcc;
const char *Compiler::GetErrorString(enum ErrorCode pErrCode) {
- static const char *ErrorString[] = {
- /* kSuccess */
- "Successfully compiled.",
- /* kInvalidConfigNoTarget */
- "Invalid compiler config supplied (getTarget() returns NULL.) "
- "(missing call to CompilerConfig::initialize()?)",
- /* kErrCreateTargetMachine */
- "Failed to create llvm::TargetMachine.",
- /* kErrSwitchTargetMachine */
- "Failed to switch llvm::TargetMachine.",
- /* kErrNoTargetMachine */
- "Failed to compile the script since there's no available TargetMachine."
- " (missing call to Compiler::config()?)",
- /* kErrDataLayoutNoMemory */
- "Out of memory when create DataLayout during compilation.",
- /* kErrMaterialization */
- "Failed to materialize the module.",
- /* kErrInvalidOutputFileState */
- "Supplied output file was invalid (in the error state.)",
- /* kErrPrepareOutput */
- "Failed to prepare file for output.",
- /* kPrepareCodeGenPass */
- "Failed to construct pass list for code-generation.",
-
- /* kErrHookBeforeAddLTOPasses */
- "Error occurred during beforeAddLTOPasses() in subclass.",
- /* kErrHookAfterAddLTOPasses */
- "Error occurred during afterAddLTOPasses() in subclass.",
- /* kErrHookBeforeExecuteLTOPasses */
- "Error occurred during beforeExecuteLTOPasses() in subclass.",
- /* kErrHookAfterExecuteLTOPasses */
- "Error occurred during afterExecuteLTOPasses() in subclass.",
-
- /* kErrHookBeforeAddCodeGenPasses */
- "Error occurred during beforeAddCodeGenPasses() in subclass.",
- /* kErrHookAfterAddCodeGenPasses */
- "Error occurred during afterAddCodeGenPasses() in subclass.",
- /* kErrHookBeforeExecuteCodeGenPasses */
- "Error occurred during beforeExecuteCodeGenPasses() in subclass.",
- /* kErrHookAfterExecuteCodeGenPasses */
- "Error occurred during afterExecuteCodeGenPasses() in subclass.",
-
- /* kMaxErrorCode */
- "(Unknown error code)"
- };
-
- if (pErrCode > kMaxErrorCode) {
- pErrCode = kMaxErrorCode;
+ switch (pErrCode) {
+ case kSuccess:
+ return "Successfully compiled.";
+ case kInvalidConfigNoTarget:
+ return "Invalid compiler config supplied (getTarget() returns NULL.) "
+ "(missing call to CompilerConfig::initialize()?)";
+ case kErrCreateTargetMachine:
+ return "Failed to create llvm::TargetMachine.";
+ case kErrSwitchTargetMachine:
+ return "Failed to switch llvm::TargetMachine.";
+ case kErrNoTargetMachine:
+ return "Failed to compile the script since there's no available "
+ "TargetMachine. (missing call to Compiler::config()?)";
+ case kErrDataLayoutNoMemory:
+ return "Out of memory when create DataLayout during compilation.";
+ case kErrMaterialization:
+ return "Failed to materialize the module.";
+ case kErrInvalidOutputFileState:
+ return "Supplied output file was invalid (in the error state.)";
+ case kErrPrepareOutput:
+ return "Failed to prepare file for output.";
+ case kPrepareCodeGenPass:
+ return "Failed to construct pass list for code-generation.";
+ case kErrHookBeforeAddLTOPasses:
+ return "Error occurred during beforeAddLTOPasses() in subclass.";
+ case kErrHookAfterAddLTOPasses:
+ return "Error occurred during afterAddLTOPasses() in subclass.";
+ case kErrHookAfterExecuteLTOPasses:
+ return "Error occurred during afterExecuteLTOPasses() in subclass.";
+ case kErrHookBeforeAddCodeGenPasses:
+ return "Error occurred during beforeAddCodeGenPasses() in subclass.";
+ case kErrHookAfterAddCodeGenPasses:
+ return "Error occurred during afterAddCodeGenPasses() in subclass.";
+ case kErrHookBeforeExecuteCodeGenPasses:
+ return "Error occurred during beforeExecuteCodeGenPasses() in subclass.";
+ case kErrHookAfterExecuteCodeGenPasses:
+ return "Error occurred during afterExecuteCodeGenPasses() in subclass.";
+ case kErrInvalidSource:
+ return "Error loading input bitcode";
}
- return ErrorString[ static_cast<size_t>(pErrCode) ];
+ // This assert should never be reached as the compiler verifies that the
+ // above switch coveres all enum values.
+ assert(false && "Unknown error code encountered");
+ return "";
}
//===----------------------------------------------------------------------===//
@@ -167,108 +162,30 @@
// Add DataLayout to the pass manager.
lto_passes.add(data_layout);
- // Invokde "beforeAddLTOPasses" before adding the first pass.
+ // Invoke "beforeAddLTOPasses" before adding the first pass.
if (!beforeAddLTOPasses(pScript, lto_passes)) {
return kErrHookBeforeAddLTOPasses;
}
- // We now create passes list performing LTO. These are copied from
- // (including comments) llvm::PassManagerBuilder::populateLTOPassManager().
- // Only a subset of these LTO passes are enabled in optimization level 0 as
- // they interfere with interactive debugging.
- //
- // FIXME: Figure out which passes (if any) makes sense for levels 1 and 2.
- //if ( != llvm::CodeGenOpt::None) {
if (mTarget->getOptLevel() == llvm::CodeGenOpt::None) {
lto_passes.add(llvm::createGlobalOptimizerPass());
lto_passes.add(llvm::createConstantMergePass());
} else {
- // Propagate constants at call sites into the functions they call. This
- // opens opportunities for globalopt (and inlining) by substituting
- // function pointers passed as arguments to direct uses of functions.
- lto_passes.add(llvm::createIPSCCPPass());
-
- // Now that we internalized some globals, see if we can hack on them!
- lto_passes.add(llvm::createGlobalOptimizerPass());
-
- // Linking modules together can lead to duplicated global constants, only
- // keep one copy of each constant...
- lto_passes.add(llvm::createConstantMergePass());
-
- // Remove unused arguments from functions...
- lto_passes.add(llvm::createDeadArgEliminationPass());
-
- // Reduce the code after globalopt and ipsccp. Both can open up
- // significant simplification opportunities, and both can propagate
- // functions through function pointers. When this happens, we often have
- // to resolve varargs calls, etc, so let instcombine do this.
- lto_passes.add(llvm::createInstructionCombiningPass());
-
- // Inline small functions
- lto_passes.add(llvm::createFunctionInliningPass());
-
- // Remove dead EH info.
- lto_passes.add(llvm::createPruneEHPass());
-
- // Internalize the globals again after inlining
- lto_passes.add(llvm::createGlobalOptimizerPass());
-
- // Remove dead functions.
- lto_passes.add(llvm::createGlobalDCEPass());
-
- // If we didn't decide to inline a function, check to see if we can
- // transform it to pass arguments by value instead of by reference.
- lto_passes.add(llvm::createArgumentPromotionPass());
-
- // The IPO passes may leave cruft around. Clean up after them.
- lto_passes.add(llvm::createInstructionCombiningPass());
- lto_passes.add(llvm::createJumpThreadingPass());
-
- // Break up allocas
- lto_passes.add(llvm::createScalarReplAggregatesPass());
-
- // Run a few AA driven optimizations here and now, to cleanup the code.
- lto_passes.add(llvm::createFunctionAttrsPass()); // Add nocapture.
- lto_passes.add(llvm::createGlobalsModRefPass()); // IP alias analysis.
-
- // Hoist loop invariants.
- lto_passes.add(llvm::createLICMPass());
-
- // Remove redundancies.
- lto_passes.add(llvm::createGVNPass());
-
- // Remove dead memcpys.
- lto_passes.add(llvm::createMemCpyOptPass());
-
- // Nuke dead stores.
- lto_passes.add(llvm::createDeadStoreEliminationPass());
-
- // Cleanup and simplify the code after the scalar optimizations.
- lto_passes.add(llvm::createInstructionCombiningPass());
-
- lto_passes.add(llvm::createJumpThreadingPass());
-
- // Delete basic blocks, which optimization passes may have killed.
- lto_passes.add(llvm::createCFGSimplificationPass());
-
- // Now that we have optimized the program, discard unreachable functions.
- lto_passes.add(llvm::createGlobalDCEPass());
+ // FIXME: Figure out which passes should be executed.
+ llvm::PassManagerBuilder Builder;
+ Builder.populateLTOPassManager(lto_passes, /*Internalize*/false,
+ /*RunInliner*/true);
}
- // Invokde "afterAddLTOPasses" after pass manager finished its
+ // Invoke "afterAddLTOPasses" after pass manager finished its
// construction.
if (!afterAddLTOPasses(pScript, lto_passes)) {
return kErrHookAfterAddLTOPasses;
}
- // Invokde "beforeExecuteLTOPasses" before executing the passes.
- if (!beforeExecuteLTOPasses(pScript, lto_passes)) {
- return kErrHookBeforeExecuteLTOPasses;
- }
-
lto_passes.run(pScript.getSource().getModule());
- // Invokde "afterExecuteLTOPasses" before returning.
+ // Invoke "afterExecuteLTOPasses" before returning.
if (!afterExecuteLTOPasses(pScript)) {
return kErrHookAfterExecuteLTOPasses;
}
@@ -327,7 +244,8 @@
}
enum Compiler::ErrorCode Compiler::compile(Script &pScript,
- llvm::raw_ostream &pResult) {
+ llvm::raw_ostream &pResult,
+ llvm::raw_ostream *IRStream) {
llvm::Module &module = pScript.getSource().getModule();
enum ErrorCode err;
@@ -352,6 +270,9 @@
return err;
}
+ if (IRStream)
+ *IRStream << module;
+
if ((err = runCodeGen(pScript, pResult)) != kSuccess) {
return err;
}
@@ -360,7 +281,8 @@
}
enum Compiler::ErrorCode Compiler::compile(Script &pScript,
- OutputFile &pResult) {
+ OutputFile &pResult,
+ llvm::raw_ostream *IRStream) {
// Check the state of the specified output file.
if (pResult.hasError()) {
return kErrInvalidOutputFileState;
@@ -373,7 +295,7 @@
}
// Delegate the request.
- enum Compiler::ErrorCode err = compile(pScript, *out);
+ enum Compiler::ErrorCode err = compile(pScript, *out, IRStream);
// Close the output before return.
delete out;
diff --git a/lib/Core/Source.cpp b/lib/Core/Source.cpp
index e0e6886..41397b9 100644
--- a/lib/Core/Source.cpp
+++ b/lib/Core/Source.cpp
@@ -112,34 +112,6 @@
return result;
}
-Source *Source::CreateFromFd(BCCContext &pContext, int pFd) {
- llvm::OwningPtr<llvm::MemoryBuffer> input_data;
-
- llvm::error_code ec =
- llvm::MemoryBuffer::getOpenFile(pFd, /* Filename */"", input_data);
-
- if (ec != llvm::error_code::success()) {
- ALOGE("Failed to load bitcode from file descriptor %d! (%s)",
- pFd, ec.message().c_str());
- return NULL;
- }
-
- llvm::MemoryBuffer *input_memory = input_data.take();
- llvm::Module *module = helper_load_bitcode(pContext.mImpl->mLLVMContext,
- input_memory);
- if (module == NULL) {
- delete input_memory;
- return NULL;
- }
-
- Source *result = CreateFromModule(pContext, *module, /* pNoDelete */false);
- if (result == NULL) {
- delete module;
- }
-
- return result;
-}
-
Source *Source::CreateFromModule(BCCContext &pContext, llvm::Module &pModule,
bool pNoDelete) {
Source *result = new (std::nothrow) Source(pContext, pModule, pNoDelete);
diff --git a/lib/ExecutionEngine/SymbolResolvers.cpp b/lib/ExecutionEngine/SymbolResolvers.cpp
index 3d21e7d..4194a6b 100644
--- a/lib/ExecutionEngine/SymbolResolvers.cpp
+++ b/lib/ExecutionEngine/SymbolResolvers.cpp
@@ -87,6 +87,9 @@
}
DyldSymbolResolver::~DyldSymbolResolver() {
- ::dlclose(mHandle);
+ if (mHandle != NULL) {
+ ::dlclose(mHandle);
+ mHandle = NULL;
+ }
delete [] mError;
}
diff --git a/lib/Renderscript/Android.mk b/lib/Renderscript/Android.mk
index bbd1e64..b0ad839 100644
--- a/lib/Renderscript/Android.mk
+++ b/lib/Renderscript/Android.mk
@@ -68,6 +68,3 @@
include $(LIBBCC_GEN_CONFIG_MK)
include $(LLVM_HOST_BUILD_MK)
include $(BUILD_HOST_STATIC_LIBRARY)
-
-# Build Renderscript runtime (libclcore.bc)
-include $(LOCAL_PATH)/runtime/Android.mk
diff --git a/lib/Renderscript/RSCompiler.cpp b/lib/Renderscript/RSCompiler.cpp
index 618c1c3..9acc455 100644
--- a/lib/Renderscript/RSCompiler.cpp
+++ b/lib/Renderscript/RSCompiler.cpp
@@ -29,7 +29,7 @@
using namespace bcc;
-bool RSCompiler::beforeAddLTOPasses(Script &pScript, llvm::PassManager &pPM) {
+bool RSCompiler::addInternalizeSymbolsPass(Script &pScript, llvm::PassManager &pPM) {
// Add a pass to internalize the symbols that don't need to have global
// visibility.
RSScript &script = static_cast<RSScript &>(pScript);
@@ -87,11 +87,7 @@
return true;
}
-bool RSCompiler::beforeExecuteLTOPasses(Script &pScript,
- llvm::PassManager &pPM) {
- // Execute a pass to expand foreach-able functions
- llvm::PassManager rs_passes;
-
+bool RSCompiler::addExpandForEachPass(Script &pScript, llvm::PassManager &pPM) {
// Script passed to RSCompiler must be a RSScript.
RSScript &script = static_cast<RSScript &>(pScript);
const RSInfo *info = script.getInfo();
@@ -104,14 +100,21 @@
}
// Expand ForEach on CPU path to reduce launch overhead.
- rs_passes.add(createRSForEachExpandPass(info->getExportForeachFuncs(),
- /* pEnableStepOpt */ true));
- if (script.getEmbedInfo()) {
- rs_passes.add(createRSEmbedInfoPass(info));
- }
+ bool pEnableStepOpt = true;
+ pPM.add(createRSForEachExpandPass(info->getExportForeachFuncs(),
+ pEnableStepOpt));
+ if (script.getEmbedInfo())
+ pPM.add(createRSEmbedInfoPass(info));
- // Execute the pass.
- rs_passes.run(module);
+ return true;
+}
+
+bool RSCompiler::beforeAddLTOPasses(Script &pScript, llvm::PassManager &pPM) {
+ if (!addExpandForEachPass(pScript, pPM))
+ return false;
+
+ if (!addInternalizeSymbolsPass(pScript, pPM))
+ return false;
return true;
}
diff --git a/lib/Renderscript/RSCompilerDriver.cpp b/lib/Renderscript/RSCompilerDriver.cpp
index 3d8bc2c..253c4f3 100644
--- a/lib/Renderscript/RSCompilerDriver.cpp
+++ b/lib/Renderscript/RSCompilerDriver.cpp
@@ -16,10 +16,14 @@
#include "bcc/Renderscript/RSCompilerDriver.h"
+#include <llvm/IR/Module.h>
+#include <llvm/Support/CommandLine.h>
#include <llvm/Support/Path.h>
+#include <llvm/Support/raw_ostream.h>
#include "bcinfo/BitcodeWrapper.h"
+#include "bcc/Compiler.h"
#include "bcc/Renderscript/RSExecutable.h"
#include "bcc/Renderscript/RSScript.h"
#include "bcc/Support/CompilerConfig.h"
@@ -32,36 +36,17 @@
#include "bcc/Support/Sha1Util.h"
#include "bcc/Support/OutputFile.h"
+#ifdef HAVE_ANDROID_OS
#include <cutils/properties.h>
+#endif
#include <utils/String8.h>
#include <utils/StopWatch.h>
using namespace bcc;
-namespace {
-
-bool is_force_recompile() {
- char buf[PROPERTY_VALUE_MAX];
-
- // Re-compile if floating point precision has been overridden.
- property_get("debug.rs.precision", buf, "");
- if (buf[0] != '\0') {
- return true;
- }
-
- // Re-compile if debug.rs.forcerecompile is set.
- property_get("debug.rs.forcerecompile", buf, "0");
- if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
- return true;
- } else {
- return false;
- }
-}
-
-} // end anonymous namespace
-
RSCompilerDriver::RSCompilerDriver(bool pUseCompilerRT) :
- mConfig(NULL), mCompiler(), mCompilerRuntime(NULL), mDebugContext(false) {
+ mConfig(NULL), mCompiler(), mCompilerRuntime(NULL), mDebugContext(false),
+ mEnableGlobalMerge(true) {
init::Initialize();
// Chain the symbol resolvers for compiler_rt and RS runtimes.
if (pUseCompilerRT) {
@@ -77,21 +62,38 @@
}
RSExecutable *
-RSCompilerDriver::loadScriptCache(const char *pOutputPath,
- const RSInfo::DependencyTableTy &pDeps) {
- //android::StopWatch load_time("bcc: RSCompilerDriver::loadScriptCache time");
- RSExecutable *result = NULL;
-
- if (is_force_recompile())
+RSCompilerDriver::loadScript(const char *pCacheDir, const char *pResName,
+ const char *pBitcode, size_t pBitcodeSize) {
+ //android::StopWatch load_time("bcc: RSCompilerDriver::loadScript time");
+ if ((pCacheDir == NULL) || (pResName == NULL)) {
+ ALOGE("Missing pCacheDir and/or pResName");
return NULL;
+ }
+
+ if ((pBitcode == NULL) || (pBitcodeSize <= 0)) {
+ ALOGE("No bitcode supplied! (bitcode: %p, size of bitcode: %zu)",
+ pBitcode, pBitcodeSize);
+ return NULL;
+ }
+
+ RSInfo::DependencyTableTy dep_info;
+ uint8_t bitcode_sha1[20];
+ Sha1Util::GetSHA1DigestFromBuffer(bitcode_sha1, pBitcode, pBitcodeSize);
+
+ // {pCacheDir}/{pResName}.o
+ llvm::SmallString<80> output_path(pCacheDir);
+ llvm::sys::path::append(output_path, pResName);
+ llvm::sys::path::replace_extension(output_path, ".o");
+
+ dep_info.push(std::make_pair(output_path.c_str(), bitcode_sha1));
//===--------------------------------------------------------------------===//
- // Acquire the read lock for reading output object file.
+ // Acquire the read lock for reading the Script object file.
//===--------------------------------------------------------------------===//
- FileMutex<FileBase::kReadLock> read_output_mutex(pOutputPath);
+ FileMutex<FileBase::kReadLock> read_output_mutex(output_path.c_str());
if (read_output_mutex.hasError() || !read_output_mutex.lock()) {
- ALOGE("Unable to acquire the read lock for %s! (%s)", pOutputPath,
+ ALOGE("Unable to acquire the read lock for %s! (%s)", output_path.c_str(),
read_output_mutex.getErrorMessage().c_str());
return NULL;
}
@@ -99,25 +101,25 @@
//===--------------------------------------------------------------------===//
// Read the output object file.
//===--------------------------------------------------------------------===//
- InputFile *output_file = new (std::nothrow) InputFile(pOutputPath);
+ InputFile *object_file = new (std::nothrow) InputFile(output_path.c_str());
- if ((output_file == NULL) || output_file->hasError()) {
- // ALOGE("Unable to open the %s for read! (%s)", pOutputPath,
- // output_file->getErrorMessage().c_str());
- delete output_file;
+ if ((object_file == NULL) || object_file->hasError()) {
+ // ALOGE("Unable to open the %s for read! (%s)", output_path.c_str(),
+ // object_file->getErrorMessage().c_str());
+ delete object_file;
return NULL;
}
//===--------------------------------------------------------------------===//
- // Acquire the read lock on output_file for reading its RS info file.
+ // Acquire the read lock on object_file for reading its RS info file.
//===--------------------------------------------------------------------===//
- android::String8 info_path = RSInfo::GetPath(*output_file);
+ android::String8 info_path = RSInfo::GetPath(output_path.c_str());
- if (!output_file->lock()) {
+ if (!object_file->lock()) {
ALOGE("Unable to acquire the read lock on %s for reading %s! (%s)",
- pOutputPath, info_path.string(),
- output_file->getErrorMessage().c_str());
- delete output_file;
+ output_path.c_str(), info_path.string(),
+ object_file->getErrorMessage().c_str());
+ delete object_file;
return NULL;
}
@@ -125,22 +127,22 @@
// Open and load the RS info file.
//===--------------------------------------------------------------------===//
InputFile info_file(info_path.string());
- RSInfo *info = RSInfo::ReadFromFile(info_file, pDeps);
+ RSInfo *info = RSInfo::ReadFromFile(info_file, dep_info);
- // Release the lock on output_file.
- output_file->unlock();
+ // Release the lock on object_file.
+ object_file->unlock();
if (info == NULL) {
- delete output_file;
+ delete object_file;
return NULL;
}
//===--------------------------------------------------------------------===//
// Create the RSExecutable.
//===--------------------------------------------------------------------===//
- result = RSExecutable::Create(*info, *output_file, mResolver);
+ RSExecutable *result = RSExecutable::Create(*info, *object_file, mResolver);
if (result == NULL) {
- delete output_file;
+ delete object_file;
delete info;
return NULL;
}
@@ -148,6 +150,10 @@
return result;
}
+#if defined(DEFAULT_ARM_CODEGEN)
+extern llvm::cl::opt<bool> EnableGlobalMerge;
+#endif
+
bool RSCompilerDriver::setupConfig(const RSScript &pScript) {
bool changed = false;
@@ -169,6 +175,9 @@
return false;
}
mConfig->setOptimizationLevel(script_opt_level);
+#if defined(DEFAULT_ARM_CODEGEN)
+ EnableGlobalMerge = mEnableGlobalMerge;
+#endif
changed = true;
}
@@ -185,15 +194,14 @@
return changed;
}
-RSExecutable *
+Compiler::ErrorCode
RSCompilerDriver::compileScript(RSScript &pScript,
const char* pScriptName,
const char *pOutputPath,
const char *pRuntimePath,
const RSInfo::DependencyTableTy &pDeps,
- bool pSkipLoad) {
+ bool pSkipLoad, bool pDumpIR) {
//android::StopWatch compile_time("bcc: RSCompilerDriver::compileScript time");
- RSExecutable *result = NULL;
RSInfo *info = NULL;
//===--------------------------------------------------------------------===//
@@ -203,7 +211,7 @@
// compiler therefore it should be extracted before compilation.
info = RSInfo::ExtractFromSource(pScript.getSource(), pDeps);
if (info == NULL) {
- return NULL;
+ return Compiler::kErrInvalidSource;
}
//===--------------------------------------------------------------------===//
@@ -218,129 +226,119 @@
//===--------------------------------------------------------------------===//
if (!RSScript::LinkRuntime(pScript, pRuntimePath)) {
ALOGE("Failed to link script '%s' with Renderscript runtime!", pScriptName);
- return NULL;
+ return Compiler::kErrInvalidSource;
}
- // FIXME(srhines): Windows compilation can't use locking like this, but
- // we also don't need to worry about concurrent writers of the same file.
+ {
+ // FIXME(srhines): Windows compilation can't use locking like this, but
+ // we also don't need to worry about concurrent writers of the same file.
#ifndef USE_MINGW
- //===--------------------------------------------------------------------===//
- // Acquire the write lock for writing output object file.
- //===--------------------------------------------------------------------===//
- FileMutex<FileBase::kWriteLock> write_output_mutex(pOutputPath);
+ //===------------------------------------------------------------------===//
+ // Acquire the write lock for writing output object file.
+ //===------------------------------------------------------------------===//
+ FileMutex<FileBase::kWriteLock> write_output_mutex(pOutputPath);
- if (write_output_mutex.hasError() || !write_output_mutex.lock()) {
- ALOGE("Unable to acquire the lock for writing %s! (%s)",
- pOutputPath, write_output_mutex.getErrorMessage().c_str());
- return NULL;
- }
+ if (write_output_mutex.hasError() || !write_output_mutex.lock()) {
+ ALOGE("Unable to acquire the lock for writing %s! (%s)",
+ pOutputPath, write_output_mutex.getErrorMessage().c_str());
+ return Compiler::kErrInvalidSource;
+ }
#endif
- //===--------------------------------------------------------------------===//
- // Open the output file for write.
- //===--------------------------------------------------------------------===//
- unsigned flags = FileBase::kTruncate | FileBase::kBinary;
- if (mDebugContext) {
- // Delete the cache file when we finish up under a debug context.
- flags |= FileBase::kDeleteOnClose;
- }
- OutputFile *output_file = new (std::nothrow) OutputFile(pOutputPath, flags);
+ // Open the output file for write.
+ OutputFile output_file(pOutputPath,
+ FileBase::kTruncate | FileBase::kBinary);
- if ((output_file == NULL) || output_file->hasError()) {
- ALOGE("Unable to open %s for write! (%s)", pOutputPath,
- output_file->getErrorMessage().c_str());
- delete info;
- delete output_file;
- return NULL;
- }
-
- //===--------------------------------------------------------------------===//
- // Setup the config to the compiler.
- //===--------------------------------------------------------------------===//
- bool compiler_need_reconfigure = setupConfig(pScript);
-
- if (mConfig == NULL) {
- ALOGE("Failed to setup config for RS compiler to compile %s!", pOutputPath);
- delete info;
- delete output_file;
- return NULL;
- }
-
- // Compiler need to re-config if it's haven't run the config() yet or the
- // configuration it referenced is changed.
- if (compiler_need_reconfigure) {
- Compiler::ErrorCode err = mCompiler.config(*mConfig);
- if (err != Compiler::kSuccess) {
- ALOGE("Failed to config the RS compiler for %s! (%s)",pOutputPath,
- Compiler::GetErrorString(err));
- delete info;
- delete output_file;
- return NULL;
+ if (output_file.hasError()) {
+ ALOGE("Unable to open %s for write! (%s)", pOutputPath,
+ output_file.getErrorMessage().c_str());
+ return Compiler::kErrInvalidSource;
}
- }
- //===--------------------------------------------------------------------===//
- // Run the compiler.
- //===--------------------------------------------------------------------===//
- Compiler::ErrorCode compile_result = mCompiler.compile(pScript, *output_file);
- if (compile_result != Compiler::kSuccess) {
- ALOGE("Unable to compile the source to file %s! (%s)", pOutputPath,
- Compiler::GetErrorString(compile_result));
- delete info;
- delete output_file;
- return NULL;
+ // Setup the config to the compiler.
+ bool compiler_need_reconfigure = setupConfig(pScript);
+
+ if (mConfig == NULL) {
+ ALOGE("Failed to setup config for RS compiler to compile %s!",
+ pOutputPath);
+ return Compiler::kErrInvalidSource;
+ }
+
+ if (compiler_need_reconfigure) {
+ Compiler::ErrorCode err = mCompiler.config(*mConfig);
+ if (err != Compiler::kSuccess) {
+ ALOGE("Failed to config the RS compiler for %s! (%s)",pOutputPath,
+ Compiler::GetErrorString(err));
+ return Compiler::kErrInvalidSource;
+ }
+ }
+
+ OutputFile *ir_file = NULL;
+ llvm::raw_fd_ostream *IRStream = NULL;
+ if (pDumpIR) {
+ android::String8 path(pOutputPath);
+ path.append(".ll");
+ ir_file = new OutputFile(path.string(), FileBase::kTruncate);
+ IRStream = ir_file->dup();
+ }
+
+ // Run the compiler.
+ Compiler::ErrorCode compile_result = mCompiler.compile(pScript,
+ output_file, IRStream);
+
+ if (ir_file) {
+ ir_file->close();
+ delete ir_file;
+ }
+
+ if (compile_result != Compiler::kSuccess) {
+ ALOGE("Unable to compile the source to file %s! (%s)", pOutputPath,
+ Compiler::GetErrorString(compile_result));
+ return Compiler::kErrInvalidSource;
+ }
}
// No need to produce an RSExecutable in this case.
// TODO: Error handling in this case is nonexistent.
if (pSkipLoad) {
- return NULL;
+ return Compiler::kSuccess;
}
- //===--------------------------------------------------------------------===//
- // Create the RSExecutable.
- //===--------------------------------------------------------------------===//
- result = RSExecutable::Create(*info, *output_file, mResolver);
- if (result == NULL) {
- delete info;
- delete output_file;
- return NULL;
+ {
+ android::String8 info_path = RSInfo::GetPath(pOutputPath);
+ OutputFile info_file(info_path.string(), FileBase::kTruncate);
+
+ if (info_file.hasError()) {
+ ALOGE("Failed to open the info file %s for write! (%s)",
+ info_path.string(), info_file.getErrorMessage().c_str());
+ return Compiler::kErrInvalidSource;
+ }
+
+ FileMutex<FileBase::kWriteLock> write_info_mutex(info_path.string());
+ if (write_info_mutex.hasError() || !write_info_mutex.lock()) {
+ ALOGE("Unable to acquire the lock for writing %s! (%s)",
+ info_path.string(), write_info_mutex.getErrorMessage().c_str());
+ return Compiler::kErrInvalidSource;
+ }
+
+ // Perform the write.
+ if (!info->write(info_file)) {
+ ALOGE("Failed to sync the RS info file %s!", info_path.string());
+ return Compiler::kErrInvalidSource;
+ }
}
- //===--------------------------------------------------------------------===//
- // Dump the disassembly for debug when possible.
- //===--------------------------------------------------------------------===//
-#if USE_DISASSEMBLER
- OutputFile *disassembly_output =
- new (std::nothrow) OutputFile(DEBUG_DISASSEMBLER_FILE,
- FileBase::kAppend);
-
- if (disassembly_output != NULL) {
- result->dumpDisassembly(*disassembly_output);
- delete disassembly_output;
- }
-#endif
-
- //===--------------------------------------------------------------------===//
- // Write out the RS info file.
- //===--------------------------------------------------------------------===//
- // Note that write failure only results in a warning since the source is
- // successfully compiled and loaded.
- if (!result->syncInfo(/* pForce */true)) {
- ALOGW("%s was successfully compiled and loaded but its RS info file failed "
- "to write out!", pOutputPath);
- }
-
- return result;
+ return Compiler::kSuccess;
}
-RSExecutable *RSCompilerDriver::build(BCCContext &pContext,
- const char *pCacheDir,
- const char *pResName,
- const char *pBitcode,
- size_t pBitcodeSize,
- const char *pRuntimePath,
- RSLinkRuntimeCallback pLinkRuntimeCallback) {
+bool RSCompilerDriver::build(BCCContext &pContext,
+ const char *pCacheDir,
+ const char *pResName,
+ const char *pBitcode,
+ size_t pBitcodeSize,
+ const char *pRuntimePath,
+ RSLinkRuntimeCallback pLinkRuntimeCallback,
+ bool pDumpIR) {
// android::StopWatch build_time("bcc: RSCompilerDriver::build time");
//===--------------------------------------------------------------------===//
// Check parameters.
@@ -349,13 +347,13 @@
ALOGE("Invalid parameter passed to RSCompilerDriver::build()! (cache dir: "
"%s, resource name: %s)", ((pCacheDir) ? pCacheDir : "(null)"),
((pResName) ? pResName : "(null)"));
- return NULL;
+ return false;
}
if ((pBitcode == NULL) || (pBitcodeSize <= 0)) {
ALOGE("No bitcode supplied! (bitcode: %p, size of bitcode: %u)",
pBitcode, static_cast<unsigned>(pBitcodeSize));
- return NULL;
+ return false;
}
//===--------------------------------------------------------------------===//
@@ -364,36 +362,16 @@
RSInfo::DependencyTableTy dep_info;
uint8_t bitcode_sha1[20];
Sha1Util::GetSHA1DigestFromBuffer(bitcode_sha1, pBitcode, pBitcodeSize);
- dep_info.push(std::make_pair(pResName, bitcode_sha1));
//===--------------------------------------------------------------------===//
// Construct output path.
- //===--------------------------------------------------------------------===//
- llvm::sys::Path output_path(pCacheDir);
-
- // {pCacheDir}/{pResName}
- if (!output_path.appendComponent(pResName)) {
- ALOGE("Failed to construct output path %s/%s!", pCacheDir, pResName);
- return NULL;
- }
-
// {pCacheDir}/{pResName}.o
- output_path.appendSuffix("o");
-
//===--------------------------------------------------------------------===//
- // Load cache.
- //===--------------------------------------------------------------------===//
- RSExecutable *result = NULL;
+ llvm::SmallString<80> output_path(pCacheDir);
+ llvm::sys::path::append(output_path, pResName);
+ llvm::sys::path::replace_extension(output_path, ".o");
- // Skip loading from the cache if we are using a debug context.
- if (!mDebugContext) {
- result = loadScriptCache(output_path.c_str(), dep_info);
-
- if (result != NULL) {
- // Cache hit
- return result;
- }
- }
+ dep_info.push(std::make_pair(output_path.c_str(), bitcode_sha1));
//===--------------------------------------------------------------------===//
// Load the bitcode and create script.
@@ -401,7 +379,7 @@
Source *source = Source::CreateFromBuffer(pContext, pResName,
pBitcode, pBitcodeSize);
if (source == NULL) {
- return NULL;
+ return false;
}
RSScript *script = new (std::nothrow) RSScript(*source);
@@ -409,7 +387,7 @@
ALOGE("Out of memory when create Script object for '%s'! (output: %s)",
pResName, output_path.c_str());
delete source;
- return NULL;
+ return false;
}
script->setLinkRuntimeCallback(pLinkRuntimeCallback);
@@ -423,26 +401,28 @@
//===--------------------------------------------------------------------===//
// Compile the script
//===--------------------------------------------------------------------===//
- result = compileScript(*script, pResName, output_path.c_str(), pRuntimePath,
- dep_info, false);
+ Compiler::ErrorCode status = compileScript(*script, pResName,
+ output_path.c_str(),
+ pRuntimePath, dep_info, false,
+ pDumpIR);
// Script is no longer used. Free it to get more memory.
delete script;
- if (result == NULL) {
- return NULL;
+ if (status != Compiler::kSuccess) {
+ return false;
}
- return result;
+ return true;
}
-RSExecutable *RSCompilerDriver::build(RSScript &pScript, const char *pOut,
- const char *pRuntimePath) {
+bool RSCompilerDriver::build(RSScript &pScript, const char *pOut,
+ const char *pRuntimePath) {
RSInfo::DependencyTableTy dep_info;
RSInfo *info = RSInfo::ExtractFromSource(pScript.getSource(), dep_info);
if (info == NULL) {
- return NULL;
+ return false;
}
pScript.setInfo(info);
@@ -450,8 +430,12 @@
// offline (host) compilation.
pScript.setEmbedInfo(true);
- RSExecutable *result = compileScript(pScript, pOut, pOut, pRuntimePath,
- dep_info, true);
- return result;
+ Compiler::ErrorCode status = compileScript(pScript, pOut, pOut, pRuntimePath,
+ dep_info, true);
+ if (status != Compiler::kSuccess) {
+ return false;
+ }
+
+ return true;
}
diff --git a/lib/Renderscript/RSExecutable.cpp b/lib/Renderscript/RSExecutable.cpp
index be39f3c..ed06f30 100644
--- a/lib/Renderscript/RSExecutable.cpp
+++ b/lib/Renderscript/RSExecutable.cpp
@@ -129,7 +129,7 @@
return true;
}
- android::String8 info_path = RSInfo::GetPath(*mObjFile);
+ android::String8 info_path = RSInfo::GetPath(mObjFile->getName().c_str());
OutputFile info_file(info_path.string(), FileBase::kTruncate);
if (info_file.hasError()) {
diff --git a/lib/Renderscript/RSForEachExpand.cpp b/lib/Renderscript/RSForEachExpand.cpp
index bf1a199..ca0bb1b 100644
--- a/lib/Renderscript/RSForEachExpand.cpp
+++ b/lib/Renderscript/RSForEachExpand.cpp
@@ -23,16 +23,21 @@
#include <llvm/IR/Function.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/MDBuilder.h>
#include <llvm/IR/Module.h>
#include <llvm/Pass.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/IR/DataLayout.h>
+#include <llvm/IR/Function.h>
#include <llvm/IR/Type.h>
+#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include "bcc/Config/Config.h"
#include "bcc/Renderscript/RSInfo.h"
#include "bcc/Support/Log.h"
+#include "bcinfo/MetadataExtractor.h"
+
using namespace bcc;
namespace {
@@ -103,6 +108,12 @@
}
// Get the actual value we should use to step through an allocation.
+ //
+ // Normally the value we use to step through an allocation is given to us by
+ // the driver. However, for certain primitive data types, we can derive an
+ // integer constant for the step value. We use this integer constant whenever
+ // possible to allow further compiler optimizations to take place.
+ //
// DL - Target Data size/layout information.
// T - Type of allocation (should be a pointer).
// OrigStep - Original step increment (root.expand() input from driver).
@@ -123,30 +134,149 @@
}
}
- static bool hasIn(uint32_t Signature) {
- return Signature & 0x01;
+ /// @brief Returns the type of the ForEach stub parameter structure.
+ ///
+ /// Renderscript uses a single structure in which all parameters are passed
+ /// to keep the signature of the expanded function independent of the
+ /// parameters passed to it.
+ llvm::Type *getForeachStubTy() {
+ llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
+ llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
+ llvm::Type *SizeTy = Int32Ty;
+ /* Defined in frameworks/base/libs/rs/rs_hal.h:
+ *
+ * struct RsForEachStubParamStruct {
+ * const void *in;
+ * void *out;
+ * const void *usr;
+ * size_t usr_len;
+ * uint32_t x;
+ * uint32_t y;
+ * uint32_t z;
+ * uint32_t lod;
+ * enum RsAllocationCubemapFace face;
+ * uint32_t ar[16];
+ * };
+ */
+ llvm::SmallVector<llvm::Type*, 9> StructTys;
+ StructTys.push_back(VoidPtrTy); // const void *in
+ StructTys.push_back(VoidPtrTy); // void *out
+ StructTys.push_back(VoidPtrTy); // const void *usr
+ StructTys.push_back(SizeTy); // size_t usr_len
+ StructTys.push_back(Int32Ty); // uint32_t x
+ StructTys.push_back(Int32Ty); // uint32_t y
+ StructTys.push_back(Int32Ty); // uint32_t z
+ StructTys.push_back(Int32Ty); // uint32_t lod
+ StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace
+ StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
+
+ return llvm::StructType::create(StructTys, "RsForEachStubParamStruct");
}
- static bool hasOut(uint32_t Signature) {
- return Signature & 0x02;
+ /// @brief Create skeleton of the expanded function.
+ ///
+ /// This creates a function with the following signature:
+ ///
+ /// void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
+ /// uint32_t instep, uint32_t outstep)
+ ///
+ llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
+ llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo();
+ llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
+
+ llvm::SmallVector<llvm::Type*, 8> ParamTys;
+ ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
+ ParamTys.push_back(Int32Ty); // uint32_t x1
+ ParamTys.push_back(Int32Ty); // uint32_t x2
+ ParamTys.push_back(Int32Ty); // uint32_t instep
+ ParamTys.push_back(Int32Ty); // uint32_t outstep
+
+ llvm::FunctionType *FT =
+ llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
+ llvm::Function *F =
+ llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage,
+ OldName + ".expand", M);
+
+ llvm::Function::arg_iterator AI = F->arg_begin();
+
+ AI->setName("p");
+ AI++;
+ AI->setName("x1");
+ AI++;
+ AI->setName("x2");
+ AI++;
+ AI->setName("arg_instep");
+ AI++;
+ AI->setName("arg_outstep");
+ AI++;
+
+ assert(AI == F->arg_end());
+
+ llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*C, "Begin", F);
+ llvm::IRBuilder<> Builder(Begin);
+ Builder.CreateRetVoid();
+
+ return F;
}
- static bool hasUsrData(uint32_t Signature) {
- return Signature & 0x04;
- }
+ /// @brief Create an empty loop
+ ///
+ /// Create a loop of the form:
+ ///
+ /// for (i = LowerBound; i < UpperBound; i++)
+ /// ;
+ ///
+ /// After the loop has been created, the builder is set such that
+ /// instructions can be added to the loop body.
+ ///
+ /// @param Builder The builder to use to build this loop. The current
+ /// position of the builder is the position the loop
+ /// will be inserted.
+ /// @param LowerBound The first value of the loop iterator
+ /// @param UpperBound The maximal value of the loop iterator
+ /// @param LoopIV A reference that will be set to the loop iterator.
+ /// @return The BasicBlock that will be executed after the loop.
+ llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
+ llvm::Value *LowerBound,
+ llvm::Value *UpperBound,
+ llvm::PHINode **LoopIV) {
+ assert(LowerBound->getType() == UpperBound->getType());
- static bool hasX(uint32_t Signature) {
- return Signature & 0x08;
- }
+ llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
+ llvm::Value *Cond, *IVNext;
+ llvm::PHINode *IV;
- static bool hasY(uint32_t Signature) {
- return Signature & 0x10;
- }
+ CondBB = Builder.GetInsertBlock();
+ AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
+ HeaderBB = llvm::BasicBlock::Create(*C, "Loop", CondBB->getParent());
- static bool isKernel(uint32_t Signature) {
- return Signature & 0x20;
- }
+ // if (LowerBound < Upperbound)
+ // goto LoopHeader
+ // else
+ // goto AfterBB
+ CondBB->getTerminator()->eraseFromParent();
+ Builder.SetInsertPoint(CondBB);
+ Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
+ Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
+ // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
+ // iv.next = iv + 1
+ // if (iv.next < Upperbound)
+ // goto LoopHeader
+ // else
+ // goto AfterBB
+ Builder.SetInsertPoint(HeaderBB);
+ IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
+ IV->addIncoming(LowerBound, CondBB);
+ IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
+ IV->addIncoming(IVNext, HeaderBB);
+ Cond = Builder.CreateICmpULT(IVNext, UpperBound);
+ Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
+ AfterBB->setName("Exit");
+ Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
+ *LoopIV = IV;
+ return AfterBB;
+ }
public:
RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
@@ -173,57 +303,7 @@
llvm::DataLayout DL(M);
- llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
- llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
- llvm::Type *SizeTy = Int32Ty;
-
- /* Defined in frameworks/base/libs/rs/rs_hal.h:
- *
- * struct RsForEachStubParamStruct {
- * const void *in;
- * void *out;
- * const void *usr;
- * size_t usr_len;
- * uint32_t x;
- * uint32_t y;
- * uint32_t z;
- * uint32_t lod;
- * enum RsAllocationCubemapFace face;
- * uint32_t ar[16];
- * };
- */
- llvm::SmallVector<llvm::Type*, 9> StructTys;
- StructTys.push_back(VoidPtrTy); // const void *in
- StructTys.push_back(VoidPtrTy); // void *out
- StructTys.push_back(VoidPtrTy); // const void *usr
- StructTys.push_back(SizeTy); // size_t usr_len
- StructTys.push_back(Int32Ty); // uint32_t x
- StructTys.push_back(Int32Ty); // uint32_t y
- StructTys.push_back(Int32Ty); // uint32_t z
- StructTys.push_back(Int32Ty); // uint32_t lod
- StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace
- StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
-
- llvm::Type *ForEachStubPtrTy = llvm::StructType::create(
- StructTys, "RsForEachStubParamStruct")->getPointerTo();
-
- /* Create the function signature for our expanded function.
- * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
- * uint32_t instep, uint32_t outstep)
- */
- llvm::SmallVector<llvm::Type*, 8> ParamTys;
- ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
- ParamTys.push_back(Int32Ty); // uint32_t x1
- ParamTys.push_back(Int32Ty); // uint32_t x2
- ParamTys.push_back(Int32Ty); // uint32_t instep
- ParamTys.push_back(Int32Ty); // uint32_t outstep
-
- llvm::FunctionType *FT =
- llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
- llvm::Function *ExpandedFunc =
- llvm::Function::Create(FT,
- llvm::GlobalValue::ExternalLinkage,
- F->getName() + ".expand", M);
+ llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
// Create and name the actual arguments to this expanded function.
llvm::SmallVector<llvm::Argument*, 8> ArgVec;
@@ -245,54 +325,38 @@
llvm::Value *Arg_instep = ArgVec[3];
llvm::Value *Arg_outstep = ArgVec[4];
- Arg_p->setName("p");
- Arg_x1->setName("x1");
- Arg_x2->setName("x2");
- Arg_instep->setName("arg_instep");
- Arg_outstep->setName("arg_outstep");
-
llvm::Value *InStep = NULL;
llvm::Value *OutStep = NULL;
// Construct the actual function body.
- llvm::BasicBlock *Begin =
- llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
- llvm::IRBuilder<> Builder(Begin);
-
- // uint32_t X = x1;
- llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
- Builder.CreateStore(Arg_x1, AX);
+ llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin());
// Collect and construct the arguments for the kernel().
// Note that we load any loop-invariant arguments before entering the Loop.
llvm::Function::arg_iterator Args = F->arg_begin();
llvm::Type *InTy = NULL;
- llvm::AllocaInst *AIn = NULL;
- if (hasIn(Signature)) {
+ llvm::Value *InBasePtr = NULL;
+ if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
InTy = Args->getType();
- AIn = Builder.CreateAlloca(InTy, 0, "AIn");
InStep = getStepValue(&DL, InTy, Arg_instep);
InStep->setName("instep");
- Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
- Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
+ InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
Args++;
}
llvm::Type *OutTy = NULL;
- llvm::AllocaInst *AOut = NULL;
- if (hasOut(Signature)) {
+ llvm::Value *OutBasePtr = NULL;
+ if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
OutTy = Args->getType();
- AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
OutStep = getStepValue(&DL, OutTy, Arg_outstep);
OutStep->setName("outstep");
- Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
- Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
+ OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
Args++;
}
llvm::Value *UsrData = NULL;
- if (hasUsrData(Signature)) {
+ if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
llvm::Type *UsrDataTy = Args->getType();
UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
@@ -300,27 +364,20 @@
Args++;
}
- if (hasX(Signature)) {
+ if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
Args++;
}
llvm::Value *Y = NULL;
- if (hasY(Signature)) {
+ if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
Args++;
}
bccAssert(Args == F->arg_end());
- llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
- llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
-
- // if (x1 < x2) goto Loop; else goto Exit;
- llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
- Builder.CreateCondBr(Cond, Loop, Exit);
-
- // Loop:
- Builder.SetInsertPoint(Loop);
+ llvm::PHINode *IV;
+ createLoop(Builder, Arg_x1, Arg_x2, &IV);
// Populate the actual call to kernel().
llvm::SmallVector<llvm::Value*, 8> RootArgs;
@@ -328,13 +385,32 @@
llvm::Value *InPtr = NULL;
llvm::Value *OutPtr = NULL;
- if (AIn) {
- InPtr = Builder.CreateLoad(AIn, "InPtr");
+ // Calculate the current input and output pointers
+ //
+ // We always calculate the input/output pointers with a GEP operating on i8
+ // values and only cast at the very end to OutTy. This is because the step
+ // between two values is given in bytes.
+ //
+ // TODO: We could further optimize the output by using a GEP operation of
+ // type 'OutTy' in cases where the element type of the allocation allows.
+ if (OutBasePtr) {
+ llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
+ OutOffset = Builder.CreateMul(OutOffset, OutStep);
+ OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
+ OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
+ }
+ if (InBasePtr) {
+ llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
+ InOffset = Builder.CreateMul(InOffset, InStep);
+ InPtr = Builder.CreateGEP(InBasePtr, InOffset);
+ InPtr = Builder.CreatePointerCast(InPtr, InTy);
+ }
+
+ if (InPtr) {
RootArgs.push_back(InPtr);
}
- if (AOut) {
- OutPtr = Builder.CreateLoad(AOut, "OutPtr");
+ if (OutPtr) {
RootArgs.push_back(OutPtr);
}
@@ -342,9 +418,8 @@
RootArgs.push_back(UsrData);
}
- // We always have to load X, since it is used to iterate through the loop.
- llvm::Value *X = Builder.CreateLoad(AX, "X");
- if (hasX(Signature)) {
+ llvm::Value *X = IV;
+ if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
RootArgs.push_back(X);
}
@@ -354,96 +429,19 @@
Builder.CreateCall(F, RootArgs);
- if (InPtr) {
- // InPtr += instep
- llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
- Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
- Builder.CreateStore(NewIn, AIn);
- }
-
- if (OutPtr) {
- // OutPtr += outstep
- llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
- Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
- Builder.CreateStore(NewOut, AOut);
- }
-
- // X++;
- llvm::Value *XPlusOne =
- Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
- Builder.CreateStore(XPlusOne, AX);
-
- // If (X < x2) goto Loop; else goto Exit;
- Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
- Builder.CreateCondBr(Cond, Loop, Exit);
-
- // Exit:
- Builder.SetInsertPoint(Exit);
- Builder.CreateRetVoid();
-
return true;
}
/* Expand a pass-by-value kernel.
*/
bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
- bccAssert(isKernel(Signature));
+ bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
// TODO: Refactor this to share functionality with ExpandFunction.
llvm::DataLayout DL(M);
- llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
- llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
- llvm::Type *SizeTy = Int32Ty;
-
- /* Defined in frameworks/base/libs/rs/rs_hal.h:
- *
- * struct RsForEachStubParamStruct {
- * const void *in;
- * void *out;
- * const void *usr;
- * size_t usr_len;
- * uint32_t x;
- * uint32_t y;
- * uint32_t z;
- * uint32_t lod;
- * enum RsAllocationCubemapFace face;
- * uint32_t ar[16];
- * };
- */
- llvm::SmallVector<llvm::Type*, 9> StructTys;
- StructTys.push_back(VoidPtrTy); // const void *in
- StructTys.push_back(VoidPtrTy); // void *out
- StructTys.push_back(VoidPtrTy); // const void *usr
- StructTys.push_back(SizeTy); // size_t usr_len
- StructTys.push_back(Int32Ty); // uint32_t x
- StructTys.push_back(Int32Ty); // uint32_t y
- StructTys.push_back(Int32Ty); // uint32_t z
- StructTys.push_back(Int32Ty); // uint32_t lod
- StructTys.push_back(Int32Ty); // enum RsAllocationCubemapFace
- StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
-
- llvm::Type *ForEachStubPtrTy = llvm::StructType::create(
- StructTys, "RsForEachStubParamStruct")->getPointerTo();
-
- /* Create the function signature for our expanded function.
- * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
- * uint32_t instep, uint32_t outstep)
- */
- llvm::SmallVector<llvm::Type*, 8> ParamTys;
- ParamTys.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
- ParamTys.push_back(Int32Ty); // uint32_t x1
- ParamTys.push_back(Int32Ty); // uint32_t x2
- ParamTys.push_back(Int32Ty); // uint32_t instep
- ParamTys.push_back(Int32Ty); // uint32_t outstep
-
- llvm::FunctionType *FT =
- llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
- llvm::Function *ExpandedFunc =
- llvm::Function::Create(FT,
- llvm::GlobalValue::ExternalLinkage,
- F->getName() + ".expand", M);
+ llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
// Create and name the actual arguments to this expanded function.
llvm::SmallVector<llvm::Argument*, 8> ArgVec;
@@ -465,32 +463,28 @@
llvm::Value *Arg_instep = ArgVec[3];
llvm::Value *Arg_outstep = ArgVec[4];
- Arg_p->setName("p");
- Arg_x1->setName("x1");
- Arg_x2->setName("x2");
- Arg_instep->setName("arg_instep");
- Arg_outstep->setName("arg_outstep");
-
llvm::Value *InStep = NULL;
llvm::Value *OutStep = NULL;
// Construct the actual function body.
- llvm::BasicBlock *Begin =
- llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
- llvm::IRBuilder<> Builder(Begin);
+ llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin());
- // uint32_t X = x1;
- llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
- Builder.CreateStore(Arg_x1, AX);
+ // Create TBAA meta-data.
+ llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
+
+ llvm::MDBuilder MDHelper(*C);
+ TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
+ TBAAAllocation = MDHelper.createTBAANode("allocation", TBAARenderScript);
+ TBAAPointer = MDHelper.createTBAANode("pointer", TBAARenderScript);
// Collect and construct the arguments for the kernel().
// Note that we load any loop-invariant arguments before entering the Loop.
llvm::Function::arg_iterator Args = F->arg_begin();
llvm::Type *OutTy = NULL;
- llvm::AllocaInst *AOut = NULL;
bool PassOutByReference = false;
- if (hasOut(Signature)) {
+ llvm::LoadInst *OutBasePtr = NULL;
+ if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
llvm::Type *OutBaseTy = F->getReturnType();
if (OutBaseTy->isVoidTy()) {
PassOutByReference = true;
@@ -500,73 +494,83 @@
OutTy = OutBaseTy->getPointerTo();
// We don't increment Args, since we are using the actual return type.
}
- AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
OutStep = getStepValue(&DL, OutTy, Arg_outstep);
OutStep->setName("outstep");
- Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
- Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
+ OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
+ OutBasePtr->setMetadata("tbaa", TBAAPointer);
}
llvm::Type *InBaseTy = NULL;
llvm::Type *InTy = NULL;
- llvm::AllocaInst *AIn = NULL;
- if (hasIn(Signature)) {
+ llvm::LoadInst *InBasePtr = NULL;
+ if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
InBaseTy = Args->getType();
InTy =InBaseTy->getPointerTo();
- AIn = Builder.CreateAlloca(InTy, 0, "AIn");
InStep = getStepValue(&DL, InTy, Arg_instep);
InStep->setName("instep");
- Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
- Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
+ InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
+ InBasePtr->setMetadata("tbaa", TBAAPointer);
Args++;
}
// No usrData parameter on kernels.
- bccAssert(!hasUsrData(Signature));
+ bccAssert(
+ !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
- if (hasX(Signature)) {
+ if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
Args++;
}
llvm::Value *Y = NULL;
- if (hasY(Signature)) {
+ if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
Args++;
}
bccAssert(Args == F->arg_end());
- llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
- llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
-
- // if (x1 < x2) goto Loop; else goto Exit;
- llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
- Builder.CreateCondBr(Cond, Loop, Exit);
-
- // Loop:
- Builder.SetInsertPoint(Loop);
+ llvm::PHINode *IV;
+ createLoop(Builder, Arg_x1, Arg_x2, &IV);
// Populate the actual call to kernel().
llvm::SmallVector<llvm::Value*, 8> RootArgs;
llvm::Value *InPtr = NULL;
- llvm::Value *In = NULL;
llvm::Value *OutPtr = NULL;
+ // Calculate the current input and output pointers
+ //
+ // We always calculate the input/output pointers with a GEP operating on i8
+ // values and only cast at the very end to OutTy. This is because the step
+ // between two values is given in bytes.
+ //
+ // TODO: We could further optimize the output by using a GEP operation of
+ // type 'OutTy' in cases where the element type of the allocation allows.
+ if (OutBasePtr) {
+ llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
+ OutOffset = Builder.CreateMul(OutOffset, OutStep);
+ OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
+ OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
+ }
+ if (InBasePtr) {
+ llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
+ InOffset = Builder.CreateMul(InOffset, InStep);
+ InPtr = Builder.CreateGEP(InBasePtr, InOffset);
+ InPtr = Builder.CreatePointerCast(InPtr, InTy);
+ }
+
if (PassOutByReference) {
- OutPtr = Builder.CreateLoad(AOut, "OutPtr");
RootArgs.push_back(OutPtr);
}
- if (AIn) {
- InPtr = Builder.CreateLoad(AIn, "InPtr");
- In = Builder.CreateLoad(InPtr, "In");
+ if (InPtr) {
+ llvm::LoadInst *In = Builder.CreateLoad(InPtr, "In");
+ In->setMetadata("tbaa", TBAAAllocation);
RootArgs.push_back(In);
}
- // We always have to load X, since it is used to iterate through the loop.
- llvm::Value *X = Builder.CreateLoad(AX, "X");
- if (hasX(Signature)) {
+ llvm::Value *X = IV;
+ if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
RootArgs.push_back(X);
}
@@ -576,58 +580,125 @@
llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
- if (AOut && !PassOutByReference) {
- OutPtr = Builder.CreateLoad(AOut, "OutPtr");
- Builder.CreateStore(RetVal, OutPtr);
+ if (OutPtr && !PassOutByReference) {
+ llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
+ Store->setMetadata("tbaa", TBAAAllocation);
}
- if (InPtr) {
- // InPtr += instep
- llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
- Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
- Builder.CreateStore(NewIn, AIn);
- }
-
- if (OutPtr) {
- // OutPtr += outstep
- llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
- Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
- Builder.CreateStore(NewOut, AOut);
- }
-
- // X++;
- llvm::Value *XPlusOne =
- Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
- Builder.CreateStore(XPlusOne, AX);
-
- // If (X < x2) goto Loop; else goto Exit;
- Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
- Builder.CreateCondBr(Cond, Loop, Exit);
-
- // Exit:
- Builder.SetInsertPoint(Exit);
- Builder.CreateRetVoid();
-
return true;
}
+ /// @brief Checks if pointers to allocation internals are exposed
+ ///
+ /// This function verifies if through the parameters passed to the kernel
+ /// or through calls to the runtime library the script gains access to
+ /// pointers pointing to data within a RenderScript Allocation.
+ /// If we know we control all loads from and stores to data within
+ /// RenderScript allocations and if we know the run-time internal accesses
+ /// are all annotated with RenderScript TBAA metadata, only then we
+ /// can safely use TBAA to distinguish between generic and from-allocation
+ /// pointers.
+ bool allocPointersExposed(llvm::Module &M) {
+ // Old style kernel function can expose pointers to elements within
+ // allocations.
+ // TODO: Extend analysis to allow simple cases of old-style kernels.
+ for (RSInfo::ExportForeachFuncListTy::const_iterator
+ func_iter = mFuncs.begin(), func_end = mFuncs.end();
+ func_iter != func_end; func_iter++) {
+ const char *Name = func_iter->first;
+ uint32_t Signature = func_iter->second;
+ if (M.getFunction(Name) &&
+ !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
+ return true;
+ }
+ }
+
+ // Check for library functions that expose a pointer to an Allocation or
+ // that are not yet annotated with RenderScript-specific tbaa information.
+ static std::vector<std::string> Funcs;
+
+ // rsGetElementAt(...)
+ Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
+ Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
+ Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
+ // rsSetElementAt()
+ Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
+ Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
+ Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
+ // rsGetElementAtYuv_uchar_Y()
+ Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
+ // rsGetElementAtYuv_uchar_U()
+ Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
+ // rsGetElementAtYuv_uchar_V()
+ Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
+
+ for (std::vector<std::string>::iterator FI = Funcs.begin(),
+ FE = Funcs.end();
+ FI != FE; ++FI) {
+ llvm::Function *F = M.getFunction(*FI);
+
+ if (!F) {
+ ALOGE("Missing run-time function '%s'", FI->c_str());
+ return true;
+ }
+
+ if (F->getNumUses() > 0) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
+ ///
+ /// The TBAA metadata used to annotate loads/stores from RenderScript
+ /// Allocations is generated in a separate TBAA tree with a "RenderScript TBAA"
+ /// root node. LLVM does assume may-alias for all nodes in unrelated alias
+ /// analysis trees. This function makes the RenderScript TBAA a subtree of the
+ /// normal C/C++ TBAA tree aside of normal C/C++ types. With the connected trees
+ /// every access to an Allocation is resolved to must-alias if compared to
+ /// a normal C/C++ access.
+ void connectRenderScriptTBAAMetadata(llvm::Module &M) {
+ llvm::MDBuilder MDHelper(*C);
+ llvm::MDNode *TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
+
+ llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA");
+ llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript", TBAARoot);
+
+ TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
+ }
+
virtual bool runOnModule(llvm::Module &M) {
bool Changed = false;
this->M = &M;
C = &M.getContext();
+ bool AllocsExposed = allocPointersExposed(M);
+
for (RSInfo::ExportForeachFuncListTy::const_iterator
func_iter = mFuncs.begin(), func_end = mFuncs.end();
func_iter != func_end; func_iter++) {
const char *name = func_iter->first;
uint32_t signature = func_iter->second;
llvm::Function *kernel = M.getFunction(name);
- if (kernel && isKernel(signature)) {
- Changed |= ExpandKernel(kernel, signature);
+ if (kernel) {
+ if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
+ Changed |= ExpandKernel(kernel, signature);
+ kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
+ } else if (kernel->getReturnType()->isVoidTy()) {
+ Changed |= ExpandFunction(kernel, signature);
+ kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
+ } else {
+ // There are some graphics root functions that are not
+ // expanded, but that will be called directly. For those
+ // functions, we can not set the linkage to internal.
+ }
}
- else if (kernel && kernel->getReturnType()->isVoidTy()) {
- Changed |= ExpandFunction(kernel, signature);
- }
+ }
+
+ if (!AllocsExposed) {
+ connectRenderScriptTBAAMetadata(M);
}
return Changed;
diff --git a/lib/Renderscript/RSInfo.cpp b/lib/Renderscript/RSInfo.cpp
index 496f739..cbb4af9 100644
--- a/lib/Renderscript/RSInfo.cpp
+++ b/lib/Renderscript/RSInfo.cpp
@@ -28,7 +28,9 @@
#include "bcc/Support/FileBase.h"
#include "bcc/Support/Log.h"
+#ifdef HAVE_ANDROID_OS
#include <cutils/properties.h>
+#endif
using namespace bcc;
@@ -86,8 +88,8 @@
#endif // TARGET_BUILD
}
-android::String8 RSInfo::GetPath(const FileBase &pFile) {
- android::String8 result(pFile.getName().c_str());
+android::String8 RSInfo::GetPath(const char *pFilename) {
+ android::String8 result(pFilename);
result.append(".info");
return result;
}
@@ -391,6 +393,7 @@
result = FP_Relaxed;
}
+#ifdef HAVE_ANDROID_OS
// Provide an override for precsion via adb shell setprop
// adb shell setprop debug.rs.precision rs_fp_full
// adb shell setprop debug.rs.precision rs_fp_relaxed
@@ -410,6 +413,7 @@
result = FP_Full;
}
}
+#endif
return result;
}
diff --git a/lib/Renderscript/runtime/Android.mk b/lib/Renderscript/runtime/Android.mk
deleted file mode 100755
index 08cefb6..0000000
--- a/lib/Renderscript/runtime/Android.mk
+++ /dev/null
@@ -1,115 +0,0 @@
-#
-# Copyright (C) 2011-2012 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-LOCAL_PATH := $(call my-dir)
-
-# C/LLVM-IR source files for the library
-clcore_base_files := \
- rs_allocation.c \
- rs_cl.c \
- rs_core.c \
- rs_element.c \
- rs_mesh.c \
- rs_matrix.c \
- rs_program.c \
- rs_sample.c \
- rs_sampler.c \
- convert.ll \
- rsClamp.ll
-
-clcore_files := \
- $(clcore_base_files) \
- math.ll \
- arch/generic.c \
- arch/sqrt.c \
- arch/dot_length.c
-
-clcore_neon_files := \
- $(clcore_base_files) \
- math.ll \
- arch/neon.ll \
- arch/sqrt.c \
- arch/dot_length.c
-
-ifeq ($(ARCH_X86_HAVE_SSE2), true)
- clcore_x86_files := \
- $(clcore_base_files) \
- arch/x86_generic.c \
- arch/x86_clamp.ll \
- arch/x86_math.ll
-
- ifeq ($(ARCH_X86_HAVE_SSE3), true)
- clcore_x86_files += arch/x86_dot_length.ll
- else
- # FIXME: without SSE3, it is still able to get better code through PSHUFD. But,
- # so far, there is no such device with SSE2 only.
- clcore_x86_files += arch/dot_length.c
- endif
-endif
-
-ifeq "REL" "$(PLATFORM_VERSION_CODENAME)"
- RS_VERSION := $(PLATFORM_SDK_VERSION)
-else
- # Increment by 1 whenever this is not a final release build, since we want to
- # be able to see the RS version number change during development.
- # See build/core/version_defaults.mk for more information about this.
- RS_VERSION := "(1 + $(PLATFORM_SDK_VERSION))"
-endif
-
-# Build the base version of the library
-include $(CLEAR_VARS)
-LOCAL_MODULE := libclcore.bc
-LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE_CLASS := SHARED_LIBRARIES
-LOCAL_SRC_FILES := $(clcore_files)
-
-include $(LOCAL_PATH)/build_bc_lib.mk
-
-# Build a debug version of the library
-include $(CLEAR_VARS)
-LOCAL_MODULE := libclcore_debug.bc
-LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE_CLASS := SHARED_LIBRARIES
-rs_debug_runtime := 1
-LOCAL_SRC_FILES := $(clcore_files)
-
-include $(LOCAL_PATH)/build_bc_lib.mk
-
-# Build an optimized version of the library if the device is SSE2- or above
-# capable.
-ifeq ($(ARCH_X86_HAVE_SSE2),true)
-include $(CLEAR_VARS)
-LOCAL_MODULE := libclcore_x86.bc
-LOCAL_MODULE_TAGS := optional
-LOCAL_MODULE_CLASS := SHARED_LIBRARIES
-LOCAL_SRC_FILES := $(clcore_x86_files)
-
-include $(LOCAL_PATH)/build_bc_lib.mk
-endif
-
-# Build a NEON-enabled version of the library (if possible)
-ifeq ($(ARCH_ARM_HAVE_NEON),true)
-# Disable NEON on cortex-a15 temporarily
-ifneq ($(strip $(TARGET_CPU_VARIANT)), cortex-a15)
- include $(CLEAR_VARS)
- LOCAL_MODULE := libclcore_neon.bc
- LOCAL_MODULE_TAGS := optional
- LOCAL_MODULE_CLASS := SHARED_LIBRARIES
- LOCAL_SRC_FILES := $(clcore_neon_files)
-
- include $(LOCAL_PATH)/build_bc_lib.mk
-endif
-endif
diff --git a/lib/Renderscript/runtime/arch/dot_length.c b/lib/Renderscript/runtime/arch/dot_length.c
deleted file mode 100644
index 94c99b6..0000000
--- a/lib/Renderscript/runtime/arch/dot_length.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "rs_types.rsh"
-
-extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
- return lhs * rhs;
-}
-extern float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) {
- return lhs.x*rhs.x + lhs.y*rhs.y;
-}
-extern float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) {
- return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z;
-}
-extern float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) {
- return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w;
-}
-
-extern float __attribute__((overloadable)) fabs(float);
-extern float __attribute__((overloadable)) sqrt(float);
-
-extern float __attribute__((overloadable)) length(float v) {
- return fabs(v);
-}
-extern float __attribute__((overloadable)) length(float2 v) {
- return sqrt(v.x*v.x + v.y*v.y);
-}
-extern float __attribute__((overloadable)) length(float3 v) {
- return sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
-}
-extern float __attribute__((overloadable)) length(float4 v) {
- return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
-}
-
diff --git a/lib/Renderscript/runtime/arch/generic.c b/lib/Renderscript/runtime/arch/generic.c
deleted file mode 100644
index 3724e22..0000000
--- a/lib/Renderscript/runtime/arch/generic.c
+++ /dev/null
@@ -1,936 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rs_types.rsh"
-
-extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
-extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
-extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
-extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
-extern float4 __attribute__((overloadable)) convert_float4(uchar4);
-extern float __attribute__((overloadable)) sqrt(float);
-
-/*
- * CLAMP
- */
-extern float __attribute__((overloadable)) clamp(float amount, float low, float high) {
- return amount < low ? low : (amount > high ? high : amount);
-}
-
-extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high) {
- float2 r;
- r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);
- r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);
- return r;
-}
-
-extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high) {
- float3 r;
- r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);
- r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);
- r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);
- return r;
-}
-
-extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high) {
- float4 r;
- r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);
- r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);
- r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);
- r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w);
- return r;
-}
-
-extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high) {
- float2 r;
- r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
- r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
- return r;
-}
-
-extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high) {
- float3 r;
- r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
- r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
- r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);
- return r;
-}
-
-extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high) {
- float4 r;
- r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
- r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
- r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);
- r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);
- return r;
-}
-
-
-/*
- * FMAX
- */
-
-extern float __attribute__((overloadable)) fmax(float v1, float v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
- float2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
- float3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
- float4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
- float2 r;
- r.x = v1.x > v2 ? v1.x : v2;
- r.y = v1.y > v2 ? v1.y : v2;
- return r;
-}
-
-extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
- float3 r;
- r.x = v1.x > v2 ? v1.x : v2;
- r.y = v1.y > v2 ? v1.y : v2;
- r.z = v1.z > v2 ? v1.z : v2;
- return r;
-}
-
-extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
- float4 r;
- r.x = v1.x > v2 ? v1.x : v2;
- r.y = v1.y > v2 ? v1.y : v2;
- r.z = v1.z > v2 ? v1.z : v2;
- r.w = v1.w > v2 ? v1.w : v2;
- return r;
-}
-
-extern float __attribute__((overloadable)) fmin(float v1, float v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-
-/*
- * FMIN
- */
-extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
- float2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
- float3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
- float4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
- float2 r;
- r.x = v1.x < v2 ? v1.x : v2;
- r.y = v1.y < v2 ? v1.y : v2;
- return r;
-}
-
-extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
- float3 r;
- r.x = v1.x < v2 ? v1.x : v2;
- r.y = v1.y < v2 ? v1.y : v2;
- r.z = v1.z < v2 ? v1.z : v2;
- return r;
-}
-
-extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
- float4 r;
- r.x = v1.x < v2 ? v1.x : v2;
- r.y = v1.y < v2 ? v1.y : v2;
- r.z = v1.z < v2 ? v1.z : v2;
- r.w = v1.w < v2 ? v1.w : v2;
- return r;
-}
-
-
-/*
- * MAX
- */
-
-extern char __attribute__((overloadable)) max(char v1, char v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
- char2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
- char3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
- char4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern short __attribute__((overloadable)) max(short v1, short v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
- short2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
- short3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
- short4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern int __attribute__((overloadable)) max(int v1, int v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
- int2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
- int3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
- int4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern int64_t __attribute__((overloadable)) max(int64_t v1, int64_t v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) {
- long2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) {
- long3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) {
- long4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
- uchar2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
- uchar3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
- uchar4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
- ushort2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
- ushort3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
- ushort4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
- uint2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
- uint3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
- uint4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) {
- ulong2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) {
- ulong3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) {
- ulong4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern float __attribute__((overloadable)) max(float v1, float v2) {
- return fmax(v1, v2);
-}
-
-extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
- return fmax(v1, v2);
-}
-
-extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
- return fmax(v1, v2);
-}
-
-extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
- return fmax(v1, v2);
-}
-
-extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
- return fmax(v1, v2);
-}
-
-extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
- return fmax(v1, v2);
-}
-
-extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
- return fmax(v1, v2);
-}
-
-
-/*
- * MIN
- */
-
-extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
- char2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
- char3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
- char4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
- short2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
- short3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
- short4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
- int2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
- int3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
- int4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern int64_t __attribute__((overloadable)) min(int64_t v1, int64_t v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) {
- long2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) {
- long3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) {
- long4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
- uchar2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
- uchar3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
- uchar4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
- ushort2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
- ushort3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
- ushort4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
- uint2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
- uint3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
- uint4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) {
- ulong2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) {
- ulong3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) {
- ulong4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern float __attribute__((overloadable)) min(float v1, float v2) {
- return fmin(v1, v2);
-}
-
-extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
- return fmin(v1, v2);
-}
-
-extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
- return fmin(v1, v2);
-}
-
-extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
- return fmin(v1, v2);
-}
-
-extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
- return fmin(v1, v2);
-}
-
-extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
- return fmin(v1, v2);
-}
-
-extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
- return fmin(v1, v2);
-}
-
-/*
- * YUV
- */
-
-extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
- short Y = ((short)y) - 16;
- short U = ((short)u) - 128;
- short V = ((short)v) - 128;
-
- short4 p;
- p.r = (Y * 298 + V * 409 + 128) >> 8;
- p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
- p.b = (Y * 298 + U * 516 + 128) >> 8;
- p.a = 255;
- p.r = rsClamp(p.r, (short)0, (short)255);
- p.g = rsClamp(p.g, (short)0, (short)255);
- p.b = rsClamp(p.b, (short)0, (short)255);
-
- return convert_uchar4(p);
-}
-
-static float4 yuv_U_values = {0.f, -0.392f * 0.003921569f, +2.02 * 0.003921569f, 0.f};
-static float4 yuv_V_values = {1.603f * 0.003921569f, -0.815f * 0.003921569f, 0.f, 0.f};
-
-extern float4 __attribute__((overloadable)) rsYuvToRGBA_float4(uchar y, uchar u, uchar v) {
- float4 color = (float)y * 0.003921569f;
- float4 fU = ((float)u) - 128.f;
- float4 fV = ((float)v) - 128.f;
-
- color += fU * yuv_U_values;
- color += fV * yuv_V_values;
- color = clamp(color, 0.f, 1.f);
- return color;
-}
-
-
-/*
- * half_RECIP
- */
-
-extern float __attribute__((overloadable)) half_recip(float v) {
- // FIXME: actual algorithm for generic approximate reciprocal
- return 1.f / v;
-}
-
-extern float2 __attribute__((overloadable)) half_recip(float2 v) {
- float2 r;
- r.x = half_recip(r.x);
- r.y = half_recip(r.y);
- return r;
-}
-
-extern float3 __attribute__((overloadable)) half_recip(float3 v) {
- float3 r;
- r.x = half_recip(r.x);
- r.y = half_recip(r.y);
- r.z = half_recip(r.z);
- return r;
-}
-
-extern float4 __attribute__((overloadable)) half_recip(float4 v) {
- float4 r;
- r.x = half_recip(r.x);
- r.y = half_recip(r.y);
- r.z = half_recip(r.z);
- r.w = half_recip(r.w);
- return r;
-}
-
-
-/*
- * half_SQRT
- */
-
-extern float __attribute__((overloadable)) half_sqrt(float v) {
- return sqrt(v);
-}
-
-extern float2 __attribute__((overloadable)) half_sqrt(float2 v) {
- float2 r;
- r.x = half_sqrt(v.x);
- r.y = half_sqrt(v.y);
- return r;
-}
-
-extern float3 __attribute__((overloadable)) half_sqrt(float3 v) {
- float3 r;
- r.x = half_sqrt(v.x);
- r.y = half_sqrt(v.y);
- r.z = half_sqrt(v.z);
- return r;
-}
-
-extern float4 __attribute__((overloadable)) half_sqrt(float4 v) {
- float4 r;
- r.x = half_sqrt(v.x);
- r.y = half_sqrt(v.y);
- r.z = half_sqrt(v.z);
- r.w = half_sqrt(v.w);
- return r;
-}
-
-
-/*
- * half_rsqrt
- */
-
-extern float __attribute__((overloadable)) half_rsqrt(float v) {
- return 1.f / sqrt(v);
-}
-
-extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
- float2 r;
- r.x = half_rsqrt(v.x);
- r.y = half_rsqrt(v.y);
- return r;
-}
-
-extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
- float3 r;
- r.x = half_rsqrt(v.x);
- r.y = half_rsqrt(v.y);
- r.z = half_rsqrt(v.z);
- return r;
-}
-
-extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
- float4 r;
- r.x = half_rsqrt(v.x);
- r.y = half_rsqrt(v.y);
- r.z = half_rsqrt(v.z);
- r.w = half_rsqrt(v.w);
- return r;
-}
-
-/**
- * matrix ops
- */
-
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
- float4 ret;
- ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
- ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
- ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
- ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
- return ret;
-}
-
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
- float4 ret;
- ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
- ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
- ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
- ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
- return ret;
-}
-
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
- float4 ret;
- ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
- ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
- ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
- ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
- return ret;
-}
-
-extern float3 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
- float3 ret;
- ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
- ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
- ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
- return ret;
-}
-
-extern float3 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
- float3 ret;
- ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
- ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
- ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
- return ret;
-}
-
-/**
- * Pixel Ops
- */
-extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
-{
- uchar4 c;
- c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
- c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
- c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
- c.w = 255;
- return c;
-}
-
-extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
-{
- uchar4 c;
- c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
- c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
- c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
- c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
- return c;
-}
-
-extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
-{
- color *= 255.f;
- color += 0.5f;
- color = clamp(color, 0.f, 255.f);
- uchar4 c = {color.x, color.y, color.z, 255};
- return c;
-}
-
-extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
-{
- color *= 255.f;
- color += 0.5f;
- color = clamp(color, 0.f, 255.f);
- uchar4 c = {color.x, color.y, color.z, color.w};
- return c;
-}
-
diff --git a/lib/Renderscript/runtime/arch/neon.ll b/lib/Renderscript/runtime/arch/neon.ll
deleted file mode 100644
index cc63631..0000000
--- a/lib/Renderscript/runtime/arch/neon.ll
+++ /dev/null
@@ -1,1037 +0,0 @@
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
-target triple = "armv7-none-linux-gnueabi"
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; INTRINSICS ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
-
-declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; HELPERS ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define internal <4 x float> @smear_4f(float %in) nounwind readnone alwaysinline {
- %1 = insertelement <4 x float> undef, float %in, i32 0
- %2 = insertelement <4 x float> %1, float %in, i32 1
- %3 = insertelement <4 x float> %2, float %in, i32 2
- %4 = insertelement <4 x float> %3, float %in, i32 3
- ret <4 x float> %4
-}
-
-define internal <2 x float> @smear_2f(float %in) nounwind readnone alwaysinline {
- %1 = insertelement <2 x float> undef, float %in, i32 0
- %2 = insertelement <2 x float> %1, float %in, i32 1
- ret <2 x float> %2
-}
-
-define internal <4 x i32> @smear_4i32(i32 %in) nounwind readnone alwaysinline {
- %1 = insertelement <4 x i32> undef, i32 %in, i32 0
- %2 = insertelement <4 x i32> %1, i32 %in, i32 1
- %3 = insertelement <4 x i32> %2, i32 %in, i32 2
- %4 = insertelement <4 x i32> %3, i32 %in, i32 3
- ret <4 x i32> %4
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; CLAMP ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %low, <4 x float> %high) nounwind readonly {
- %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %value, <4 x float> %high) nounwind readnone
- %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %low) nounwind readnone
- ret <4 x float> %2
-}
-
-define <4 x float> @_Z5clampDv4_fff(<4 x float> %value, float %low, float %high) nounwind readonly {
- %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
- %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
- %out = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %value, <4 x float> %_low, <4 x float> %_high) nounwind readonly
- ret <4 x float> %out
-}
-
-define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %value, <3 x float> %low, <3 x float> %high) nounwind readonly {
- %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %_low = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %_high = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone
- %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone
- %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %c
-}
-
-define <3 x float> @_Z5clampDv3_fff(<3 x float> %value, float %low, float %high) nounwind readonly {
- %_value = shufflevector <3 x float> %value, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %_high = tail call <4 x float> @smear_4f(float %high) nounwind readnone
- %_low = tail call <4 x float> @smear_4f(float %low) nounwind readnone
- %a = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %_value, <4 x float> %_high) nounwind readnone
- %b = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %_low) nounwind readnone
- %c = shufflevector <4 x float> %b, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %c
-}
-
-define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %value, <2 x float> %low, <2 x float> %high) nounwind readonly {
- %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %high) nounwind readnone
- %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %1, <2 x float> %low) nounwind readnone
- ret <2 x float> %2
-}
-
-define <2 x float> @_Z5clampDv2_fff(<2 x float> %value, float %low, float %high) nounwind readonly {
- %_high = tail call <2 x float> @smear_2f(float %high) nounwind readnone
- %_low = tail call <2 x float> @smear_2f(float %low) nounwind readnone
- %a = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %value, <2 x float> %_high) nounwind readnone
- %b = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %_low) nounwind readnone
- ret <2 x float> %b
-}
-
-define float @_Z5clampfff(float %value, float %low, float %high) nounwind readonly {
- %1 = fcmp olt float %value, %high
- %2 = select i1 %1, float %value, float %high
- %3 = fcmp ogt float %2, %low
- %4 = select i1 %3, float %2, float %low
- ret float %4
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; FMAX ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
- %1 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone
- ret <4 x float> %1
-}
-
-define <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
- %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
- %2 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone
- ret <4 x float> %2
-}
-
-define <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
- %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
- %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %4
-}
-
-define <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
- %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
- %3 = tail call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
- %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %c
-}
-
-define <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
- %1 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone
- ret <2 x float> %1
-}
-
-define <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
- %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
- %2 = tail call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone
- ret <2 x float> %2
-}
-
-define float @_Z4fmaxff(float %v1, float %v2) nounwind readonly {
- %1 = fcmp ogt float %v1, %v2
- %2 = select i1 %1, float %v1, float %v2
- ret float %2
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; FMIN ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readonly {
- %1 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %v2) nounwind readnone
- ret <4 x float> %1
-}
-
-define <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2) nounwind readonly {
- %1 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
- %2 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %v1, <4 x float> %1) nounwind readnone
- ret <4 x float> %2
-}
-
-define <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readonly {
- %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = shufflevector <3 x float> %v2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
- %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %4
-}
-
-define <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2) nounwind readonly {
- %1 = shufflevector <3 x float> %v1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = tail call <4 x float> @smear_4f(float %v2) nounwind readnone
- %3 = tail call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %1, <4 x float> %2) nounwind readnone
- %c = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %c
-}
-
-define <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readonly {
- %1 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %v2) nounwind readnone
- ret <2 x float> %1
-}
-
-define <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2) nounwind readonly {
- %1 = tail call <2 x float> @smear_2f(float %v2) nounwind readnone
- %2 = tail call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %v1, <2 x float> %1) nounwind readnone
- ret <2 x float> %2
-}
-
-define float @_Z4fminff(float %v1, float %v2) nounwind readnone {
- %1 = fcmp olt float %v1, %v2
- %2 = select i1 %1, float %v1, float %v2
- ret float %2
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; MAX ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define signext i8 @_Z3maxcc(i8 signext %v1, i8 signext %v2) nounwind readnone {
- %1 = icmp sgt i8 %v1, %v2
- %2 = select i1 %1, i8 %v1, i8 %v2
- ret i8 %2
-}
-
-define <2 x i8> @_Z3maxDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
- %1 = sext <2 x i8> %v1 to <2 x i32>
- %2 = sext <2 x i8> %v2 to <2 x i32>
- %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
- %4 = trunc <2 x i32> %3 to <2 x i8>
- ret <2 x i8> %4
-}
-
-define <3 x i8> @_Z3maxDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
- %1 = sext <3 x i8> %v1 to <3 x i32>
- %2 = sext <3 x i8> %v2 to <3 x i32>
- %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
- %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %7 = trunc <3 x i32> %6 to <3 x i8>
- ret <3 x i8> %7
-}
-
-define <4 x i8> @_Z3maxDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
- %1 = sext <4 x i8> %v1 to <4 x i32>
- %2 = sext <4 x i8> %v2 to <4 x i32>
- %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = trunc <4 x i32> %3 to <4 x i8>
- ret <4 x i8> %4
-}
-
-define signext i16 @_Z3maxss(i16 signext %v1, i16 signext %v2) nounwind readnone {
- %1 = icmp sgt i16 %v1, %v2
- %2 = select i1 %1, i16 %v1, i16 %v2
- ret i16 %2
-}
-
-define <2 x i16> @_Z3maxDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
- %1 = sext <2 x i16> %v1 to <2 x i32>
- %2 = sext <2 x i16> %v2 to <2 x i32>
- %3 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
- %4 = trunc <2 x i32> %3 to <2 x i16>
- ret <2 x i16> %4
-}
-
-define <3 x i16> @_Z3maxDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
- %1 = sext <3 x i16> %v1 to <3 x i32>
- %2 = sext <3 x i16> %v2 to <3 x i32>
- %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
- %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %7 = trunc <3 x i32> %6 to <3 x i16>
- ret <3 x i16> %7
-}
-
-define <4 x i16> @_Z3maxDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
- %1 = sext <4 x i16> %v1 to <4 x i32>
- %2 = sext <4 x i16> %v2 to <4 x i32>
- %3 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = trunc <4 x i32> %3 to <4 x i16>
- ret <4 x i16> %4
-}
-
-define i32 @_Z3maxii(i32 %v1, i32 %v2) nounwind readnone {
- %1 = icmp sgt i32 %v1, %v2
- %2 = select i1 %1, i32 %v1, i32 %v2
- ret i32 %2
-}
-
-define <2 x i32> @_Z3maxDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
- %1 = tail call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
- ret <2 x i32> %1
-}
-
-define <3 x i32> @_Z3maxDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
- %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = tail call <4 x i32 > @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x i32> %4
-}
-
-define <4 x i32> @_Z3maxDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
- %1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
- ret <4 x i32> %1
-}
-
-define i64 @_Z3maxxx(i64 %v1, i64 %v2) nounwind readnone {
- %1 = icmp sgt i64 %v1, %v2
- %2 = select i1 %1, i64 %v1, i64 %v2
- ret i64 %2
-}
-
-; TODO: long vector types
-
-define zeroext i8 @_Z3maxhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone {
- %1 = icmp ugt i8 %v1, %v2
- %2 = select i1 %1, i8 %v1, i8 %v2
- ret i8 %2
-}
-
-define <2 x i8> @_Z3maxDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
- %1 = zext <2 x i8> %v1 to <2 x i32>
- %2 = zext <2 x i8> %v2 to <2 x i32>
- %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
- %4 = trunc <2 x i32> %3 to <2 x i8>
- ret <2 x i8> %4
-}
-
-define <3 x i8> @_Z3maxDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
- %1 = zext <3 x i8> %v1 to <3 x i32>
- %2 = zext <3 x i8> %v2 to <3 x i32>
- %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
- %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %7 = trunc <3 x i32> %6 to <3 x i8>
- ret <3 x i8> %7
-}
-
-define <4 x i8> @_Z3maxDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
- %1 = zext <4 x i8> %v1 to <4 x i32>
- %2 = zext <4 x i8> %v2 to <4 x i32>
- %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = trunc <4 x i32> %3 to <4 x i8>
- ret <4 x i8> %4
-}
-
-define zeroext i16 @_Z3maxtt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone {
- %1 = icmp ugt i16 %v1, %v2
- %2 = select i1 %1, i16 %v1, i16 %v2
- ret i16 %2
-}
-
-define <2 x i16> @_Z3maxDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
- %1 = zext <2 x i16> %v1 to <2 x i32>
- %2 = zext <2 x i16> %v2 to <2 x i32>
- %3 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
- %4 = trunc <2 x i32> %3 to <2 x i16>
- ret <2 x i16> %4
-}
-
-define <3 x i16> @_Z3maxDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
- %1 = zext <3 x i16> %v1 to <3 x i32>
- %2 = zext <3 x i16> %v2 to <3 x i32>
- %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
- %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %7 = trunc <3 x i32> %6 to <3 x i16>
- ret <3 x i16> %7
-}
-
-define <4 x i16> @_Z3maxDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
- %1 = zext <4 x i16> %v1 to <4 x i32>
- %2 = zext <4 x i16> %v2 to <4 x i32>
- %3 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = trunc <4 x i32> %3 to <4 x i16>
- ret <4 x i16> %4
-}
-
-define i32 @_Z3maxjj(i32 %v1, i32 %v2) nounwind readnone {
- %1 = icmp ugt i32 %v1, %v2
- %2 = select i1 %1, i32 %v1, i32 %v2
- ret i32 %2
-}
-
-define <2 x i32> @_Z3maxDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
- %1 = tail call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
- ret <2 x i32> %1
-}
-
-define <3 x i32> @_Z3maxDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
- %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = tail call <4 x i32 > @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x i32> %4
-}
-
-define <4 x i32> @_Z3maxDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
- %1 = tail call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
- ret <4 x i32> %1
-}
-
-define i64 @_Z3maxyy(i64 %v1, i64 %v2) nounwind readnone {
- %1 = icmp ugt i64 %v1, %v2
- %2 = select i1 %1, i64 %v1, i64 %v2
- ret i64 %2
-}
-
-; TODO: long vector types
-
-define float @_Z3maxff(float %v1, float %v2) nounwind readnone {
- %1 = tail call float @_Z4fmaxff(float %v1, float %v2)
- ret float %1
-}
-
-define <2 x float> @_Z3maxDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
- %1 = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %v1, <2 x float> %v2)
- ret <2 x float> %1
-}
-
-define <2 x float> @_Z3maxDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
- %1 = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %v1, float %v2)
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z3maxDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
- %1 = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %v1, <3 x float> %v2)
- ret <3 x float> %1
-}
-
-define <3 x float> @_Z3maxDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
- %1 = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %v1, float %v2)
- ret <3 x float> %1
-}
-
-define <4 x float> @_Z3maxDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
- %1 = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %v1, <4 x float> %v2)
- ret <4 x float> %1
-}
-
-define <4 x float> @_Z3maxDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
- %1 = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %v1, float %v2)
- ret <4 x float> %1
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; MIN ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define signext i8 @_Z3mincc(i8 signext %v1, i8 signext %v2) nounwind readnone {
- %1 = icmp slt i8 %v1, %v2
- %2 = select i1 %1, i8 %v1, i8 %v2
- ret i8 %2
-}
-
-define <2 x i8> @_Z3minDv2_cS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
- %1 = sext <2 x i8> %v1 to <2 x i32>
- %2 = sext <2 x i8> %v2 to <2 x i32>
- %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
- %4 = trunc <2 x i32> %3 to <2 x i8>
- ret <2 x i8> %4
-}
-
-define <3 x i8> @_Z3minDv3_cS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
- %1 = sext <3 x i8> %v1 to <3 x i32>
- %2 = sext <3 x i8> %v2 to <3 x i32>
- %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
- %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %7 = trunc <3 x i32> %6 to <3 x i8>
- ret <3 x i8> %7
-}
-
-define <4 x i8> @_Z3minDv4_cS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
- %1 = sext <4 x i8> %v1 to <4 x i32>
- %2 = sext <4 x i8> %v2 to <4 x i32>
- %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = trunc <4 x i32> %3 to <4 x i8>
- ret <4 x i8> %4
-}
-
-define signext i16 @_Z3minss(i16 signext %v1, i16 signext %v2) nounwind readnone {
- %1 = icmp slt i16 %v1, %v2
- %2 = select i1 %1, i16 %v1, i16 %v2
- ret i16 %2
-}
-
-define <2 x i16> @_Z3minDv2_sS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
- %1 = sext <2 x i16> %v1 to <2 x i32>
- %2 = sext <2 x i16> %v2 to <2 x i32>
- %3 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
- %4 = trunc <2 x i32> %3 to <2 x i16>
- ret <2 x i16> %4
-}
-
-define <3 x i16> @_Z3minDv3_sS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
- %1 = sext <3 x i16> %v1 to <3 x i32>
- %2 = sext <3 x i16> %v2 to <3 x i32>
- %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
- %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %7 = trunc <3 x i32> %6 to <3 x i16>
- ret <3 x i16> %7
-}
-
-define <4 x i16> @_Z3minDv4_sS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
- %1 = sext <4 x i16> %v1 to <4 x i32>
- %2 = sext <4 x i16> %v2 to <4 x i32>
- %3 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = trunc <4 x i32> %3 to <4 x i16>
- ret <4 x i16> %4
-}
-
-define i32 @_Z3minii(i32 %v1, i32 %v2) nounwind readnone {
- %1 = icmp slt i32 %v1, %v2
- %2 = select i1 %1, i32 %v1, i32 %v2
- ret i32 %2
-}
-
-define <2 x i32> @_Z3minDv2_iS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
- %1 = tail call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
- ret <2 x i32> %1
-}
-
-define <3 x i32> @_Z3minDv3_iS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
- %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = tail call <4 x i32 > @llvm.arm.neon.vmins.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x i32> %4
-}
-
-define <4 x i32> @_Z3minDv4_iS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
- %1 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
- ret <4 x i32> %1
-}
-
-define i64 @_Z3minxx(i64 %v1, i64 %v2) nounwind readnone {
- %1 = icmp slt i64 %v1, %v2
- %2 = select i1 %1, i64 %v1, i64 %v2
- ret i64 %2
-}
-
-; TODO: long vector types
-
-define zeroext i8 @_Z3minhh(i8 zeroext %v1, i8 zeroext %v2) nounwind readnone {
- %1 = icmp ult i8 %v1, %v2
- %2 = select i1 %1, i8 %v1, i8 %v2
- ret i8 %2
-}
-
-define <2 x i8> @_Z3minDv2_hS_(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
- %1 = zext <2 x i8> %v1 to <2 x i32>
- %2 = zext <2 x i8> %v2 to <2 x i32>
- %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
- %4 = trunc <2 x i32> %3 to <2 x i8>
- ret <2 x i8> %4
-}
-
-define <3 x i8> @_Z3minDv3_hS_(<3 x i8> %v1, <3 x i8> %v2) nounwind readnone {
- %1 = zext <3 x i8> %v1 to <3 x i32>
- %2 = zext <3 x i8> %v2 to <3 x i32>
- %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
- %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %7 = trunc <3 x i32> %6 to <3 x i8>
- ret <3 x i8> %7
-}
-
-define <4 x i8> @_Z3minDv4_hS_(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
- %1 = zext <4 x i8> %v1 to <4 x i32>
- %2 = zext <4 x i8> %v2 to <4 x i32>
- %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = trunc <4 x i32> %3 to <4 x i8>
- ret <4 x i8> %4
-}
-
-define zeroext i16 @_Z3mintt(i16 zeroext %v1, i16 zeroext %v2) nounwind readnone {
- %1 = icmp ult i16 %v1, %v2
- %2 = select i1 %1, i16 %v1, i16 %v2
- ret i16 %2
-}
-
-define <2 x i16> @_Z3minDv2_tS_(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
- %1 = zext <2 x i16> %v1 to <2 x i32>
- %2 = zext <2 x i16> %v2 to <2 x i32>
- %3 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %1, <2 x i32> %2) nounwind readnone
- %4 = trunc <2 x i32> %3 to <2 x i16>
- ret <2 x i16> %4
-}
-
-define <3 x i16> @_Z3minDv3_tS_(<3 x i16> %v1, <3 x i16> %v2) nounwind readnone {
- %1 = zext <3 x i16> %v1 to <3 x i32>
- %2 = zext <3 x i16> %v2 to <3 x i32>
- %3 = shufflevector <3 x i32> %1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = shufflevector <3 x i32> %2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %3, <4 x i32> %4) nounwind readnone
- %6 = shufflevector <4 x i32> %5, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- %7 = trunc <3 x i32> %6 to <3 x i16>
- ret <3 x i16> %7
-}
-
-define <4 x i16> @_Z3minDv4_tS_(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
- %1 = zext <4 x i16> %v1 to <4 x i32>
- %2 = zext <4 x i16> %v2 to <4 x i32>
- %3 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = trunc <4 x i32> %3 to <4 x i16>
- ret <4 x i16> %4
-}
-
-define i32 @_Z3minjj(i32 %v1, i32 %v2) nounwind readnone {
- %1 = icmp ult i32 %v1, %v2
- %2 = select i1 %1, i32 %v1, i32 %v2
- ret i32 %2
-}
-
-define <2 x i32> @_Z3minDv2_jS_(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
- %1 = tail call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone
- ret <2 x i32> %1
-}
-
-define <3 x i32> @_Z3minDv3_jS_(<3 x i32> %v1, <3 x i32> %v2) nounwind readnone {
- %1 = shufflevector <3 x i32> %v1, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = shufflevector <3 x i32> %v2, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = tail call <4 x i32 > @llvm.arm.neon.vminu.v4i32(<4 x i32> %1, <4 x i32> %2) nounwind readnone
- %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x i32> %4
-}
-
-define <4 x i32> @_Z3minDv4_jS_(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone {
- %1 = tail call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %v1, <4 x i32> %v2) nounwind readnone
- ret <4 x i32> %1
-}
-
-define i64 @_Z3minyy(i64 %v1, i64 %v2) nounwind readnone {
- %1 = icmp ult i64 %v1, %v2
- %2 = select i1 %1, i64 %v1, i64 %v2
- ret i64 %2
-}
-
-; TODO: long vector types
-
-define float @_Z3minff(float %v1, float %v2) nounwind readnone {
- %1 = tail call float @_Z4fminff(float %v1, float %v2)
- ret float %1
-}
-
-define <2 x float> @_Z3minDv2_fS_(<2 x float> %v1, <2 x float> %v2) nounwind readnone {
- %1 = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %v1, <2 x float> %v2)
- ret <2 x float> %1
-}
-
-define <2 x float> @_Z3minDv2_ff(<2 x float> %v1, float %v2) nounwind readnone {
- %1 = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %v1, float %v2)
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z3minDv3_fS_(<3 x float> %v1, <3 x float> %v2) nounwind readnone {
- %1 = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %v1, <3 x float> %v2)
- ret <3 x float> %1
-}
-
-define <3 x float> @_Z3minDv3_ff(<3 x float> %v1, float %v2) nounwind readnone {
- %1 = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %v1, float %v2)
- ret <3 x float> %1
-}
-
-define <4 x float> @_Z3minDv4_fS_(<4 x float> %v1, <4 x float> %v2) nounwind readnone {
- %1 = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %v1, <4 x float> %v2)
- ret <4 x float> %1
-}
-
-define <4 x float> @_Z3minDv4_ff(<4 x float> %v1, float %v2) nounwind readnone {
- %1 = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %v1, float %v2)
- ret <4 x float> %1
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; YUV ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-@yuv_U = internal constant <4 x i32> <i32 0, i32 -100, i32 516, i32 0>, align 16
-@yuv_V = internal constant <4 x i32> <i32 409, i32 -208, i32 0, i32 0>, align 16
-@yuv_0 = internal constant <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
-@yuv_255 = internal constant <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, align 16
-
-
-define <4 x i8> @_Z18rsYuvToRGBA_uchar4hhh(i8 %pY, i8 %pU, i8 %pV) nounwind readnone alwaysinline {
- %_sy = zext i8 %pY to i32
- %_su = zext i8 %pU to i32
- %_sv = zext i8 %pV to i32
-
- %_sy2 = add i32 -16, %_sy
- %_sy3 = mul i32 298, %_sy2
- %_su2 = add i32 -128, %_su
- %_sv2 = add i32 -128, %_sv
- %_y = tail call <4 x i32> @smear_4i32(i32 %_sy3) nounwind readnone
- %_u = tail call <4 x i32> @smear_4i32(i32 %_su2) nounwind readnone
- %_v = tail call <4 x i32> @smear_4i32(i32 %_sv2) nounwind readnone
-
- %mu = load <4 x i32>* @yuv_U, align 8
- %mv = load <4 x i32>* @yuv_V, align 8
- %_u2 = mul <4 x i32> %_u, %mu
- %_v2 = mul <4 x i32> %_v, %mv
- %_y2 = add <4 x i32> %_y, %_u2
- %_y3 = add <4 x i32> %_y2, %_v2
-
- ; %r1 = tail call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %_y3, <4 x i32> <i32 8, i32 8, i32 8, i32 8>) nounwind readnone
-; %r2 = trunc <4 x i16> %r1 to <4 x i8>
-; ret <4 x i8> %r2
-
- %c0 = load <4 x i32>* @yuv_0, align 8
- %c255 = load <4 x i32>* @yuv_255, align 8
- %r1 = tail call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %_y3, <4 x i32> %c0) nounwind readnone
- %r2 = tail call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %r1, <4 x i32> %c255) nounwind readnone
- %r3 = lshr <4 x i32> %r2, <i32 8, i32 8, i32 8, i32 8>
- %r4 = trunc <4 x i32> %r3 to <4 x i8>
- ret <4 x i8> %r4
-}
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; half_RECIP ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define float @_Z10half_recipf(float %v) {
- %1 = insertelement <2 x float> undef, float %v, i32 0
- %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
- %3 = extractelement <2 x float> %2, i32 0
- ret float %3
-}
-
-define <2 x float> @_Z10half_recip2Dv2_h(<2 x float> %v) nounwind readnone {
- %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z10half_recip3Dv3_h(<3 x float> %v) nounwind readnone {
- %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
- %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %3
-}
-
-define <4 x float> @_Z10half_recip4Dv4_h(<4 x float> %v) nounwind readnone {
- %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone
- ret <4 x float> %1
-}
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; half_SQRT ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define float @_Z9half_sqrtf(float %v) {
- %1 = insertelement <2 x float> undef, float %v, i32 0
- %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
- %3 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %2) nounwind readnone
- %4 = extractelement <2 x float> %3, i32 0
- ret float %4
-}
-
-define <2 x float> @_Z9half_sqrt2Dv2_h(<2 x float> %v) nounwind readnone {
- %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
- %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
- ret <2 x float> %2
-}
-
-define <3 x float> @_Z9half_sqrt3Dv3_h(<3 x float> %v) nounwind readnone {
- %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
- %3 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %2) nounwind readnone
- %4 = shufflevector <4 x float> %3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %4
-}
-
-define <4 x float> @_Z9half_sqrt4Dv4_h(<4 x float> %v) nounwind readnone {
- %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
- %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
- ret <4 x float> %2
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; half_RSQRT ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define float @_Z10half_rsqrtf(float %v) {
- %1 = insertelement <2 x float> undef, float %v, i32 0
- %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
- %3 = extractelement <2 x float> %2, i32 0
- ret float %3
-}
-
-define <2 x float> @_Z10half_rsqrt2Dv2_h(<2 x float> %v) nounwind readnone {
- %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z10half_rsqrt3Dv3_h(<3 x float> %v) nounwind readnone {
- %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
- %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %3
-}
-
-define <4 x float> @_Z10half_rsqrt4Dv4_h(<4 x float> %v) nounwind readnone {
- %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
- ret <4 x float> %1
-}
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; matrix ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
-
-%struct.rs_matrix4x4 = type { [16 x float] }
-%struct.rs_matrix3x3 = type { [9 x float] }
-%struct.rs_matrix2x2 = type { [4 x float] }
-
-define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
- %1 = insertelement <4 x float> undef, float %in, i32 0
- %2 = insertelement <4 x float> %1, float %in, i32 1
- %3 = insertelement <4 x float> %2, float %in, i32 2
- %4 = insertelement <4 x float> %3, float %in, i32 3
- ret <4 x float> %4
-}
-
-
-define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
- %x0 = extractelement <3 x float> %in, i32 0
- %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
- %y0 = extractelement <3 x float> %in, i32 1
- %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
- %z0 = extractelement <3 x float> %in, i32 2
- %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
-
- %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
- %px2 = bitcast float* %px to i8*
- %xm = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %px2, i32 4) nounwind
-
- %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
- %py2 = bitcast float* %py to i8*
- %ym = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %py2, i32 4) nounwind
-
- %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 5
- %pz2 = bitcast float* %pz to i8*
- %zm2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %pz2, i32 4) nounwind
- %zm = shufflevector <4 x float> %zm2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-
- %a1 = fmul <4 x float> %x, %xm
- %a2 = fmul <4 x float> %y, %ym
- %a3 = fadd <4 x float> %a1, %a2
- %a4 = fmul <4 x float> %z, %zm
- %a5 = fadd <4 x float> %a4, %a3
- %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %a6
-}
-
-define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
- %x0 = extractelement <2 x float> %in, i32 0
- %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
- %y0 = extractelement <2 x float> %in, i32 1
- %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
-
- %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
- %px2 = bitcast float* %px to <4 x float>*
- %xm = load <4 x float>* %px2, align 4
- %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
- %py2 = bitcast float* %py to <4 x float>*
- %ym = load <4 x float>* %py2, align 4
-
- %a1 = fmul <4 x float> %x, %xm
- %a2 = fmul <4 x float> %y, %ym
- %a3 = fadd <4 x float> %a1, %a2
- %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %a4
-}
-
-define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
- %x0 = extractelement <4 x float> %in, i32 0
- %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
- %y0 = extractelement <4 x float> %in, i32 1
- %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
- %z0 = extractelement <4 x float> %in, i32 2
- %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
- %w0 = extractelement <4 x float> %in, i32 3
- %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
-
- %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
- %px2 = bitcast float* %px to <4 x float>*
- %xm = load <4 x float>* %px2, align 4
- %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
- %py2 = bitcast float* %py to <4 x float>*
- %ym = load <4 x float>* %py2, align 4
- %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
- %pz2 = bitcast float* %pz to <4 x float>*
- %zm = load <4 x float>* %pz2, align 4
- %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
- %pw2 = bitcast float* %pw to <4 x float>*
- %wm = load <4 x float>* %pw2, align 4
-
- %a1 = fmul <4 x float> %x, %xm
- %a2 = fmul <4 x float> %y, %ym
- %a3 = fadd <4 x float> %a1, %a2
- %a4 = fmul <4 x float> %z, %zm
- %a5 = fadd <4 x float> %a3, %a4
- %a6 = fmul <4 x float> %w, %wm
- %a7 = fadd <4 x float> %a5, %a6
- ret <4 x float> %a7
-}
-
-define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
- %x0 = extractelement <3 x float> %in, i32 0
- %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
- %y0 = extractelement <3 x float> %in, i32 1
- %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
- %z0 = extractelement <3 x float> %in, i32 2
- %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
-
- %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
- %px2 = bitcast float* %px to <4 x float>*
- %xm = load <4 x float>* %px2, align 4
- %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
- %py2 = bitcast float* %py to <4 x float>*
- %ym = load <4 x float>* %py2, align 4
- %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
- %pz2 = bitcast float* %pz to <4 x float>*
- %zm = load <4 x float>* %pz2, align 4
- %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
- %pw2 = bitcast float* %pw to <4 x float>*
- %wm = load <4 x float>* %pw2, align 4
-
- %a1 = fmul <4 x float> %x, %xm
- %a2 = fadd <4 x float> %wm, %a1
- %a3 = fmul <4 x float> %y, %ym
- %a4 = fadd <4 x float> %a2, %a3
- %a5 = fmul <4 x float> %z, %zm
- %a6 = fadd <4 x float> %a4, %a5
- ret <4 x float> %a6
-}
-
-define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
- %x0 = extractelement <2 x float> %in, i32 0
- %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
- %y0 = extractelement <2 x float> %in, i32 1
- %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
-
- %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
- %px2 = bitcast float* %px to <4 x float>*
- %xm = load <4 x float>* %px2, align 4
- %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
- %py2 = bitcast float* %py to <4 x float>*
- %ym = load <4 x float>* %py2, align 4
- %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
- %pw2 = bitcast float* %pw to <4 x float>*
- %wm = load <4 x float>* %pw2, align 4
-
- %a1 = fmul <4 x float> %x, %xm
- %a2 = fadd <4 x float> %wm, %a1
- %a3 = fmul <4 x float> %y, %ym
- %a4 = fadd <4 x float> %a2, %a3
- ret <4 x float> %a4
-}
-
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; pixel ops ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-
-@fc_255.0 = internal constant <4 x float> <float 255.0, float 255.0, float 255.0, float 255.0>, align 16
-@fc_0.5 = internal constant <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, align 16
-@fc_0 = internal constant <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, align 16
-
-declare <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone
-declare <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone
-
-; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
-define <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %color) nounwind readnone {
- %f255 = load <4 x float>* @fc_255.0, align 16
- %f05 = load <4 x float>* @fc_0.5, align 16
- %f0 = load <4 x float>* @fc_0, align 16
- %v1 = fmul <4 x float> %f255, %color
- %v2 = fadd <4 x float> %f05, %v1
- %v3 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %v2, <4 x float> %f0, <4 x float> %f255) nounwind readnone
- %v4 = tail call <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %v3) nounwind readnone
- ret <4 x i8> %v4
-}
-
-; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
-define <4 x i8> @_Z17rsPackColorTo8888Dv3_f(<3 x float> %color) nounwind readnone {
- %1 = shufflevector <3 x float> %color, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = insertelement <4 x float> %1, float 1.0, i32 3
- %3 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %2) nounwind readnone
- ret <4 x i8> %3
-}
-
-; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
-define <4 x i8> @_Z17rsPackColorTo8888fff(float %r, float %g, float %b) nounwind readnone {
- %1 = insertelement <4 x float> undef, float %r, i32 0
- %2 = insertelement <4 x float> %1, float %g, i32 1
- %3 = insertelement <4 x float> %2, float %b, i32 2
- %4 = insertelement <4 x float> %3, float 1.0, i32 3
- %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
- ret <4 x i8> %5
-}
-
-; uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
-define <4 x i8> @_Z17rsPackColorTo8888ffff(float %r, float %g, float %b, float %a) nounwind readnone {
- %1 = insertelement <4 x float> undef, float %r, i32 0
- %2 = insertelement <4 x float> %1, float %g, i32 1
- %3 = insertelement <4 x float> %2, float %b, i32 2
- %4 = insertelement <4 x float> %3, float %a, i32 3
- %5 = tail call <4 x i8> @_Z17rsPackColorTo8888Dv4_f(<4 x float> %4) nounwind readnone
- ret <4 x i8> %5
-}
-
diff --git a/lib/Renderscript/runtime/arch/sqrt.c b/lib/Renderscript/runtime/arch/sqrt.c
deleted file mode 100755
index f1dac5f..0000000
--- a/lib/Renderscript/runtime/arch/sqrt.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rs_types.rsh"
-
-#define FN_FUNC_FN(fnc) \
-extern float2 __attribute__((overloadable)) fnc(float2 v) { \
- float2 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- return r; \
-} \
-extern float3 __attribute__((overloadable)) fnc(float3 v) { \
- float3 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- r.z = fnc(v.z); \
- return r; \
-} \
-extern float4 __attribute__((overloadable)) fnc(float4 v) { \
- float4 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- r.z = fnc(v.z); \
- r.w = fnc(v.w); \
- return r; \
-}
-
-extern float __attribute__((overloadable)) sqrt(float);
-
-FN_FUNC_FN(sqrt)
diff --git a/lib/Renderscript/runtime/arch/x86_clamp.ll b/lib/Renderscript/runtime/arch/x86_clamp.ll
deleted file mode 100755
index 422e9f6..0000000
--- a/lib/Renderscript/runtime/arch/x86_clamp.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-unknown-linux-gnu"
-
-declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
-
-define <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %in, <4 x float> %low, <4 x float> %high) nounwind readnone alwaysinline {
- %1 = tail call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %in, <4 x float> %high) nounwind readnone
- %2 = tail call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %low) nounwind readnone
- ret <4 x float> %2
-}
-
-define <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %in, <3 x float> %low, <3 x float> %high) nounwind readnone alwaysinline {
- %1 = shufflevector <3 x float> %in, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = shufflevector <3 x float> %low, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = shufflevector <3 x float> %high, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %1, <4 x float> %2, <4 x float> %3) nounwind readnone
- %5 = shufflevector <4 x float> %4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %5
-}
-
-define <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %in, <2 x float> %low, <2 x float> %high) nounwind readnone alwaysinline {
- %1 = shufflevector <2 x float> %in, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = shufflevector <2 x float> %low, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = shufflevector <2 x float> %high, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %1, <4 x float> %2, <4 x float> %3) nounwind readnone
- %5 = shufflevector <4 x float> %4, <4 x float> undef, <2 x i32> <i32 0, i32 1>
- ret <2 x float> %5
-}
-
-define float @_Z5clampfff(float %in, float %low, float %high) nounwind readnone alwaysinline {
- %1 = insertelement <4 x float> undef, float %in, i32 0
- %2 = insertelement <4 x float> undef, float %low, i32 0
- %3 = insertelement <4 x float> undef, float %high, i32 0
- %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %3) nounwind readnone
- %5 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %2) nounwind readnone
- %6 = extractelement <4 x float> %5, i32 0
- ret float %6
-}
-
-define <4 x float> @_Z5clampDv4_fff(<4 x float> %in, float %low, float %high) nounwind readonly {
- %1 = insertelement <4 x float> undef, float %low, i32 0
- %2 = insertelement <4 x float> %1, float %low, i32 1
- %3 = insertelement <4 x float> %2, float %low, i32 2
- %4 = insertelement <4 x float> %3, float %low, i32 3
- %5 = insertelement <4 x float> undef, float %high, i32 0
- %6 = insertelement <4 x float> %5, float %high, i32 1
- %7 = insertelement <4 x float> %6, float %high, i32 2
- %8 = insertelement <4 x float> %7, float %high, i32 3
- %9 = tail call <4 x float> @_Z5clampDv4_fS_S_(<4 x float> %in, <4 x float> %4, <4 x float> %8) nounwind readnone
- ret <4 x float> %9
-}
-
-define <3 x float> @_Z5clampDv3_fff(<3 x float> %in, float %low, float %high) nounwind readonly {
- %1 = insertelement <3 x float> undef, float %low, i32 0
- %2 = insertelement <3 x float> %1, float %low, i32 1
- %3 = insertelement <3 x float> %2, float %low, i32 2
- %4 = insertelement <3 x float> undef, float %high, i32 0
- %5 = insertelement <3 x float> %4, float %high, i32 1
- %6 = insertelement <3 x float> %5, float %high, i32 2
- %7 = tail call <3 x float> @_Z5clampDv3_fS_S_(<3 x float> %in, <3 x float> %3, <3 x float> %6) nounwind readnone
- ret <3 x float> %7
-}
-
-define <2 x float> @_Z5clampDv2_fff(<2 x float> %in, float %low, float %high) nounwind readonly {
- %1 = insertelement <2 x float> undef, float %low, i32 0
- %2 = insertelement <2 x float> %1, float %low, i32 1
- %3 = insertelement <2 x float> undef, float %high, i32 0
- %4 = insertelement <2 x float> %3, float %high, i32 1
- %5 = tail call <2 x float> @_Z5clampDv2_fS_S_(<2 x float> %in, <2 x float> %2, <2 x float> %4) nounwind readnone
- ret <2 x float> %5
-}
diff --git a/lib/Renderscript/runtime/arch/x86_dot_length.ll b/lib/Renderscript/runtime/arch/x86_dot_length.ll
deleted file mode 100644
index 21f2f3e..0000000
--- a/lib/Renderscript/runtime/arch/x86_dot_length.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-unknown-linux-gnu"
-
-declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
-declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
-declare float @llvm.sqrt.f32(float) nounwind readnone
-
-define float @_Z3dotDv4_fS_(<4 x float> %lhs, <4 x float> %rhs) nounwind readnone {
- %1 = fmul <4 x float> %lhs, %rhs
- %2 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %1) nounwind readnone
- %3 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %2) nounwind readnone
- %4 = extractelement <4 x float> %3, i32 0
- ret float %4
-}
-
-define float @_Z3dotDv3_fS_(<3 x float> %lhs, <3 x float> %rhs) nounwind readnone {
- %1 = fmul <3 x float> %lhs, %rhs
- %2 = shufflevector <3 x float> %1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = bitcast <4 x float> %2 to <2 x i64>
- %4 = tail call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %3, i32 32)
- %5 = bitcast <2 x i64> %4 to <4 x float>
- %6 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %5, <4 x float> %5) nounwind readnone
- %7 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %6, <4 x float> %6) nounwind readnone
- %8 = extractelement <4 x float> %7, i32 0
- ret float %8
-}
-
-define float @_Z3dotDv2_fS_(<2 x float> %lhs, <2 x float> %rhs) nounwind readnone {
- %1 = fmul <2 x float> %lhs, %rhs
- %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %2) nounwind readnone
- %4 = extractelement <4 x float> %3, i32 0
- ret float %4
-}
-
-define float @_Z3dotff(float %lhs, float %rhs) nounwind readnone {
- %1 = fmul float %lhs, %rhs
- ret float %1
-}
-
-define float @_Z6lengthDv4_f(<4 x float> %in) nounwind readnone alwaysinline {
- %1 = fmul <4 x float> %in, %in
- %2 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %1) nounwind readnone
- %3 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %2) nounwind readnone
- %4 = extractelement <4 x float> %3, i32 0
- %5 = tail call float @llvm.sqrt.f32(float %4) nounwind readnone
- ret float %5
-}
-
-define float @_Z6lengthDv3_f(<3 x float> %in) nounwind readnone alwaysinline {
- %1 = fmul <3 x float> %in, %in
- %2 = shufflevector <3 x float> %1, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = bitcast <4 x float> %2 to <2 x i64>
- %4 = tail call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %3, i32 32)
- %5 = bitcast <2 x i64> %4 to <4 x float>
- %6 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %5, <4 x float> %5) nounwind readnone
- %7 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %6, <4 x float> %6) nounwind readnone
- %8 = extractelement <4 x float> %7, i32 0
- %9 = tail call float @llvm.sqrt.f32(float %8) nounwind readnone
- ret float %9
-}
-
-define float @_Z6lengthDv2_f(<2 x float> %in) nounwind readnone alwaysinline {
- %1 = fmul <2 x float> %in, %in
- %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %2) nounwind readnone
- %4 = extractelement <4 x float> %3, i32 0
- %5 = tail call float @llvm.sqrt.f32(float %4) nounwind readnone
- ret float %5
-}
-
-define float @_Z6lengthf(float %in) nounwind readnone alwaysinline {
- ret float %in
-}
-
diff --git a/lib/Renderscript/runtime/arch/x86_generic.c b/lib/Renderscript/runtime/arch/x86_generic.c
deleted file mode 100644
index c46c54a..0000000
--- a/lib/Renderscript/runtime/arch/x86_generic.c
+++ /dev/null
@@ -1,786 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "rs_types.rsh"
-
-extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
-extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
-extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
-extern float __attribute__((overloadable)) sqrt(float);
-
-/*
- * FMAX
- */
-
-extern float __attribute__((overloadable)) fmax(float v1, float v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
- float2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
- float3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
- float4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
- float2 r;
- r.x = v1.x > v2 ? v1.x : v2;
- r.y = v1.y > v2 ? v1.y : v2;
- return r;
-}
-
-extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
- float3 r;
- r.x = v1.x > v2 ? v1.x : v2;
- r.y = v1.y > v2 ? v1.y : v2;
- r.z = v1.z > v2 ? v1.z : v2;
- return r;
-}
-
-extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
- float4 r;
- r.x = v1.x > v2 ? v1.x : v2;
- r.y = v1.y > v2 ? v1.y : v2;
- r.z = v1.z > v2 ? v1.z : v2;
- r.w = v1.w > v2 ? v1.w : v2;
- return r;
-}
-
-extern float __attribute__((overloadable)) fmin(float v1, float v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-
-/*
- * FMIN
- */
-extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
- float2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
- float3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
- float4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
- float2 r;
- r.x = v1.x < v2 ? v1.x : v2;
- r.y = v1.y < v2 ? v1.y : v2;
- return r;
-}
-
-extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
- float3 r;
- r.x = v1.x < v2 ? v1.x : v2;
- r.y = v1.y < v2 ? v1.y : v2;
- r.z = v1.z < v2 ? v1.z : v2;
- return r;
-}
-
-extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
- float4 r;
- r.x = v1.x < v2 ? v1.x : v2;
- r.y = v1.y < v2 ? v1.y : v2;
- r.z = v1.z < v2 ? v1.z : v2;
- r.w = v1.w < v2 ? v1.w : v2;
- return r;
-}
-
-
-/*
- * MAX
- */
-
-extern char __attribute__((overloadable)) max(char v1, char v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
- char2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
- char3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
- char4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern short __attribute__((overloadable)) max(short v1, short v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
- short2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
- short3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
- short4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern int __attribute__((overloadable)) max(int v1, int v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
- int2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
- int3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
- int4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern int64_t __attribute__((overloadable)) max(int64_t v1, int64_t v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) {
- long2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) {
- long3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) {
- long4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
- uchar2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
- uchar3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
- uchar4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
- ushort2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
- ushort3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
- ushort4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
- uint2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
- uint3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
- uint4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) {
- return v1 > v2 ? v1 : v2;
-}
-
-extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) {
- ulong2 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) {
- ulong3 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) {
- ulong4 r;
- r.x = v1.x > v2.x ? v1.x : v2.x;
- r.y = v1.y > v2.y ? v1.y : v2.y;
- r.z = v1.z > v2.z ? v1.z : v2.z;
- r.w = v1.w > v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern float __attribute__((overloadable)) max(float v1, float v2) {
- return fmax(v1, v2);
-}
-
-extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
- return fmax(v1, v2);
-}
-
-extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
- return fmax(v1, v2);
-}
-
-extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
- return fmax(v1, v2);
-}
-
-extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
- return fmax(v1, v2);
-}
-
-extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
- return fmax(v1, v2);
-}
-
-extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
- return fmax(v1, v2);
-}
-
-
-/*
- * MIN
- */
-
-extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
- char2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
- char3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
- char4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
- short2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
- short3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
- short4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
- int2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
- int3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
- int4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern int64_t __attribute__((overloadable)) min(int64_t v1, int64_t v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) {
- long2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) {
- long3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) {
- long4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
- uchar2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
- uchar3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
- uchar4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
- ushort2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
- ushort3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
- ushort4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
- uint2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
- uint3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
- uint4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) {
- return v1 < v2 ? v1 : v2;
-}
-
-extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) {
- ulong2 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- return r;
-}
-
-extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) {
- ulong3 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- return r;
-}
-
-extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) {
- ulong4 r;
- r.x = v1.x < v2.x ? v1.x : v2.x;
- r.y = v1.y < v2.y ? v1.y : v2.y;
- r.z = v1.z < v2.z ? v1.z : v2.z;
- r.w = v1.w < v2.w ? v1.w : v2.w;
- return r;
-}
-
-extern float __attribute__((overloadable)) min(float v1, float v2) {
- return fmin(v1, v2);
-}
-
-extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
- return fmin(v1, v2);
-}
-
-extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
- return fmin(v1, v2);
-}
-
-extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
- return fmin(v1, v2);
-}
-
-extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
- return fmin(v1, v2);
-}
-
-extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
- return fmin(v1, v2);
-}
-
-extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
- return fmin(v1, v2);
-}
-
-
-/*
- * YUV
- */
-
-extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
- short Y = ((short)y) - 16;
- short U = ((short)u) - 128;
- short V = ((short)v) - 128;
-
- short4 p;
- p.r = (Y * 298 + V * 409 + 128) >> 8;
- p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
- p.b = (Y * 298 + U * 516 + 128) >> 8;
- p.a = 255;
- p.r = rsClamp(p.r, (short)0, (short)255);
- p.g = rsClamp(p.g, (short)0, (short)255);
- p.b = rsClamp(p.b, (short)0, (short)255);
-
- return convert_uchar4(p);
-}
-
-static float4 yuv_U_values = {0.f, -0.392f * 0.003921569f, +2.02 * 0.003921569f, 0.f};
-static float4 yuv_V_values = {1.603f * 0.003921569f, -0.815f * 0.003921569f, 0.f, 0.f};
-
-extern float4 __attribute__((overloadable)) rsYuvToRGBA_float4(uchar y, uchar u, uchar v) {
- float4 color = (float)y * 0.003921569f;
- float4 fU = ((float)u) - 128.f;
- float4 fV = ((float)v) - 128.f;
-
- color += fU * yuv_U_values;
- color += fV * yuv_V_values;
- color = clamp(color, 0.f, 1.f);
- return color;
-}
-
-
-/*
- * half_RECIP
- */
-
-extern float __attribute__((overloadable)) half_recip(float v) {
- // FIXME: actual algorithm for generic approximate reciprocal
- return 1.f / v;
-}
-
-extern float2 __attribute__((overloadable)) half_recip(float2 v) {
- float2 r;
- r.x = half_recip(r.x);
- r.y = half_recip(r.y);
- return r;
-}
-
-extern float3 __attribute__((overloadable)) half_recip(float3 v) {
- float3 r;
- r.x = half_recip(r.x);
- r.y = half_recip(r.y);
- r.z = half_recip(r.z);
- return r;
-}
-
-extern float4 __attribute__((overloadable)) half_recip(float4 v) {
- float4 r;
- r.x = half_recip(r.x);
- r.y = half_recip(r.y);
- r.z = half_recip(r.z);
- r.w = half_recip(r.w);
- return r;
-}
-
-
-/*
- * half_SQRT
- */
-
-extern float __attribute__((overloadable)) half_sqrt(float v) {
- return sqrt(v);
-}
-
-extern float2 __attribute__((overloadable)) half_sqrt(float2 v) {
- float2 r;
- r.x = half_sqrt(v.x);
- r.y = half_sqrt(v.y);
- return r;
-}
-
-extern float3 __attribute__((overloadable)) half_sqrt(float3 v) {
- float3 r;
- r.x = half_sqrt(v.x);
- r.y = half_sqrt(v.y);
- r.z = half_sqrt(v.z);
- return r;
-}
-
-extern float4 __attribute__((overloadable)) half_sqrt(float4 v) {
- float4 r;
- r.x = half_sqrt(v.x);
- r.y = half_sqrt(v.y);
- r.z = half_sqrt(v.z);
- r.w = half_sqrt(v.w);
- return r;
-}
-
-
-/*
- * half_rsqrt
- */
-
-extern float __attribute__((overloadable)) half_rsqrt(float v) {
- return 1.f / sqrt(v);
-}
-
-extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
- float2 r;
- r.x = half_rsqrt(v.x);
- r.y = half_rsqrt(v.y);
- return r;
-}
-
-extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
- float3 r;
- r.x = half_rsqrt(v.x);
- r.y = half_rsqrt(v.y);
- r.z = half_rsqrt(v.z);
- return r;
-}
-
-extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
- float4 r;
- r.x = half_rsqrt(v.x);
- r.y = half_rsqrt(v.y);
- r.z = half_rsqrt(v.z);
- r.w = half_rsqrt(v.w);
- return r;
-}
-
diff --git a/lib/Renderscript/runtime/arch/x86_math.ll b/lib/Renderscript/runtime/arch/x86_math.ll
deleted file mode 100755
index 60add80..0000000
--- a/lib/Renderscript/runtime/arch/x86_math.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-unknown-linux-gnu"
-
-declare float @llvm.sqrt.f32(float) nounwind readnone
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) nounwind readnone
-declare <3 x float> @llvm.sqrt.v3f32(<3 x float>) nounwind readnone
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone
-declare float @llvm.exp.f32(float) nounwind readonly
-declare float @llvm.pow.f32(float, float) nounwind readonly
-
-define float @_Z4sqrtf(float %in) nounwind readnone alwaysinline {
- %1 = tail call float @llvm.sqrt.f32(float %in) nounwind readnone
- ret float %1
-}
-
-define <2 x float> @_Z4sqrtDv2_f(<2 x float> %in) nounwind readnone alwaysinline {
- %1 = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) nounwind readnone
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z4sqrtDv3_f(<3 x float> %in) nounwind readnone alwaysinline {
- %1 = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> %in) nounwind readnone
- ret <3 x float> %1
-}
-
-define <4 x float> @_Z4sqrtDv4_f(<4 x float> %in) nounwind readnone alwaysinline {
- %1 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in) nounwind readnone
- ret <4 x float> %1
-}
-
-define float @_Z3expf(float %in) nounwind readnone {
- %1 = tail call float @llvm.exp.f32(float %in) nounwind readnone
- ret float %1
-}
-
-define float @_Z3powff(float %v1, float %v2) nounwind readnone {
- %1 = tail call float @llvm.pow.f32(float %v1, float %v2) nounwind readnone
- ret float %1
-}
-
diff --git a/lib/Renderscript/runtime/build_bc_lib.mk b/lib/Renderscript/runtime/build_bc_lib.mk
deleted file mode 100644
index 1d20b7a..0000000
--- a/lib/Renderscript/runtime/build_bc_lib.mk
+++ /dev/null
@@ -1,74 +0,0 @@
-#
-# Copyright (C) 2012 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-include $(BUILD_SYSTEM)/base_rules.mk
-
-BCC_STRIP_ATTR := $(BUILD_OUT_EXECUTABLES)/bcc_strip_attr$(BUILD_EXECUTABLE_SUFFIX)
-
-# We need to pass the +long64 flag to the underlying version of Clang, since
-# we are generating a library for use with Renderscript (64-bit long type,
-# not 32-bit).
-bc_clang_cc1_cflags := -target-feature +long64
-bc_translated_clang_cc1_cflags := $(addprefix -Xclang , $(bc_clang_cc1_cflags))
-
-bc_cflags := -MD \
- -DRS_VERSION=$(RS_VERSION) \
- -std=c99 \
- -c \
- -O3 \
- -fno-builtin \
- -emit-llvm \
- -target armv7-none-linux-gnueabi \
- -fsigned-char \
- $(bc_translated_clang_cc1_cflags)
-
-ifeq ($(rs_debug_runtime),1)
-bc_cflags += -DRS_DEBUG_RUNTIME
-endif
-rs_debug_runtime:=
-
-c_sources := $(filter %.c,$(LOCAL_SRC_FILES))
-ll_sources := $(filter %.ll,$(LOCAL_SRC_FILES))
-
-c_bc_files := $(patsubst %.c,%.bc, \
- $(addprefix $(intermediates)/, $(c_sources)))
-
-ll_bc_files := $(patsubst %.ll,%.bc, \
- $(addprefix $(intermediates)/, $(ll_sources)))
-
-$(c_bc_files): PRIVATE_INCLUDES := \
- frameworks/rs/scriptc \
- external/clang/lib/Headers
-$(c_bc_files): PRIVATE_CFLAGS := $(bc_cflags)
-
-$(c_bc_files): $(intermediates)/%.bc: $(LOCAL_PATH)/%.c $(CLANG)
- @mkdir -p $(dir $@)
- $(hide) $(CLANG) $(addprefix -I, $(PRIVATE_INCLUDES)) $(PRIVATE_CFLAGS) $< -o $@
-
-$(ll_bc_files): $(intermediates)/%.bc: $(LOCAL_PATH)/%.ll $(LLVM_AS)
- @mkdir -p $(dir $@)
- $(hide) $(LLVM_AS) $< -o $@
-
--include $(c_bc_files:%.bc=%.d)
--include $(ll_bc_files:%.bc=%.d)
-
-$(LOCAL_BUILT_MODULE): PRIVATE_BC_FILES := $(c_bc_files) $(ll_bc_files)
-$(LOCAL_BUILT_MODULE): $(c_bc_files) $(ll_bc_files)
-$(LOCAL_BUILT_MODULE): $(LLVM_LINK) $(clcore_LLVM_LD)
-$(LOCAL_BUILT_MODULE): $(LLVM_AS) $(BCC_STRIP_ATTR)
- @mkdir -p $(dir $@)
- $(hide) $(LLVM_LINK) $(PRIVATE_BC_FILES) -o $@.unstripped
- $(hide) $(BCC_STRIP_ATTR) -o $@ $@.unstripped
diff --git a/lib/Renderscript/runtime/build_clcore.sh b/lib/Renderscript/runtime/build_clcore.sh
deleted file mode 100755
index 842245c..0000000
--- a/lib/Renderscript/runtime/build_clcore.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/sh
-
-# Usually, manually running build_clcore.sh shouldn't be needed. build_clcore.mk should
-# kick in automatically during Android build process.
-
-# Generate rs_cl.bc
-# =================
-
-scriptc_path=../../../../base/libs/rs/scriptc
-clang_header_path=../../../../../external/clang/lib/Headers
-
-clang -target armv7-none-linux-gnueabi -I${scriptc_path} -I${clang_header_path} -c -std=c99 -O3 rs_cl.c -emit-llvm -o rs_cl.bc
-
-# Generate rs_core.bc
-# ===================
-
-clang -target armv7-none-linux-gnueabi -I${scriptc_path} -I${clang_header_path} -c -std=c99 -O3 rs_core.c -emit-llvm -o rs_core.bc
-
-# Link everything together
-# ========================
-
-llvm-link rs_cl.bc rs_core.bc -o libclcore.bc
diff --git a/lib/Renderscript/runtime/convert.ll b/lib/Renderscript/runtime/convert.ll
deleted file mode 100644
index f45850d..0000000
--- a/lib/Renderscript/runtime/convert.ll
+++ /dev/null
@@ -1,731 +0,0 @@
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
-target triple = "armv7-none-linux-gnueabi"
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; FLOAT ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> %in) nounwind readnone alwaysinline {
- %1 = uitofp <2 x i8> %in to <2 x float>
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z14convert_float3Dv3_h(<3 x i8> %in) nounwind readnone alwaysinline {
- %1 = uitofp <3 x i8> %in to <3 x float>
- ret <3 x float> %1
-}
-
-define <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> %in) nounwind readnone alwaysinline {
- %1 = uitofp <4 x i8> %in to <4 x float>
- ret <4 x float> %1
-}
-
-define <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> %in) nounwind readnone alwaysinline {
- %1 = sitofp <2 x i8> %in to <2 x float>
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z14convert_float3Dv3_c(<3 x i8> %in) nounwind readnone alwaysinline {
- %1 = sitofp <3 x i8> %in to <3 x float>
- ret <3 x float> %1
-}
-
-define <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> %in) nounwind readnone alwaysinline {
- %1 = sitofp <4 x i8> %in to <4 x float>
- ret <4 x float> %1
-}
-
-define <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> %in) nounwind readnone alwaysinline {
- %1 = uitofp <2 x i16> %in to <2 x float>
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z14convert_float3Dv3_t(<3 x i16> %in) nounwind readnone alwaysinline {
- %1 = uitofp <3 x i16> %in to <3 x float>
- ret <3 x float> %1
-}
-
-define <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> %in) nounwind readnone alwaysinline {
- %1 = uitofp <4 x i16> %in to <4 x float>
- ret <4 x float> %1
-}
-
-define <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> %in) nounwind readnone alwaysinline {
- %1 = sitofp <2 x i16> %in to <2 x float>
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z14convert_float3Dv3_s(<3 x i16> %in) nounwind readnone alwaysinline {
- %1 = sitofp <3 x i16> %in to <3 x float>
- ret <3 x float> %1
-}
-
-define <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> %in) nounwind readnone alwaysinline {
- %1 = sitofp <4 x i16> %in to <4 x float>
- ret <4 x float> %1
-}
-
-define <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> %in) nounwind readnone alwaysinline {
- %1 = uitofp <2 x i32> %in to <2 x float>
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z14convert_float3Dv3_j(<3 x i32> %in) nounwind readnone alwaysinline {
- %1 = uitofp <3 x i32> %in to <3 x float>
- ret <3 x float> %1
-}
-
-define <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> %in) nounwind readnone alwaysinline {
- %1 = uitofp <4 x i32> %in to <4 x float>
- ret <4 x float> %1
-}
-
-define <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> %in) nounwind readnone alwaysinline {
- %1 = sitofp <2 x i32> %in to <2 x float>
- ret <2 x float> %1
-}
-
-define <3 x float> @_Z14convert_float3Dv3_i(<3 x i32> %in) nounwind readnone alwaysinline {
- %1 = sitofp <3 x i32> %in to <3 x float>
- ret <3 x float> %1
-}
-
-define <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> %in) nounwind readnone alwaysinline {
- %1 = sitofp <4 x i32> %in to <4 x float>
- ret <4 x float> %1
-}
-
-define <2 x float> @_Z14convert_float2Dv2_f(<2 x float> %in) nounwind readnone alwaysinline {
- ret <2 x float> %in
-}
-
-define <3 x float> @_Z14convert_float3Dv3_f(<3 x float> %in) nounwind readnone alwaysinline {
- ret <3 x float> %in
-}
-
-define <4 x float> @_Z14convert_float4Dv4_f(<4 x float> %in) nounwind readnone alwaysinline {
- ret <4 x float> %in
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; CHAR ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-define <4 x i8> @_Z13convert_char4Dv4_f(<4 x float> %in) nounwind readnone alwaysinline {
- %1 = fptosi <4 x float> %in to <4 x i8>
- ret <4 x i8> %1
-}
-
-define <3 x i8> @_Z13convert_char3Dv3_f(<3 x float> %in) nounwind readnone alwaysinline {
- %1 = fptosi <3 x float> %in to <3 x i8>
- ret <3 x i8> %1
-}
-
-define <2 x i8> @_Z13convert_char2Dv2_f(<2 x float> %in) nounwind readnone alwaysinline {
- %1 = fptosi <2 x float> %in to <2 x i8>
- ret <2 x i8> %1
-}
-
-define <4 x i8> @_Z13convert_char4Dv4_h(<4 x i8> %in) nounwind readnone alwaysinline {
- ret <4 x i8> %in
-}
-
-define <3 x i8> @_Z13convert_char3Dv3_h(<3 x i8> %in) nounwind readnone alwaysinline {
- ret <3 x i8> %in
-}
-
-define <2 x i8> @_Z13convert_char2Dv2_h(<2 x i8> %in) nounwind readnone alwaysinline {
- ret <2 x i8> %in
-}
-
-define <4 x i8> @_Z13convert_char4Dv4_c(<4 x i8> %in) nounwind readnone alwaysinline {
- ret <4 x i8> %in
-}
-
-define <3 x i8> @_Z13convert_char3Dv3_c(<3 x i8> %in) nounwind readnone alwaysinline {
- ret <3 x i8> %in
-}
-
-define <2 x i8> @_Z13convert_char2Dv2_c(<2 x i8> %in) nounwind readnone alwaysinline {
- ret <2 x i8> %in
-}
-
-define <4 x i8> @_Z13convert_char4Dv4_t(<4 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i16> %in to <4 x i8>
- ret <4 x i8> %1
-}
-
-define <3 x i8> @_Z13convert_char3Dv3_t(<3 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i16> %in to <3 x i8>
- ret <3 x i8> %1
-}
-
-define <2 x i8> @_Z13convert_char2Dv2_t(<2 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i16> %in to <2 x i8>
- ret <2 x i8> %1
-}
-
-define <4 x i8> @_Z13convert_char4Dv4_s(<4 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i16> %in to <4 x i8>
- ret <4 x i8> %1
-}
-
-define <3 x i8> @_Z13convert_char3Dv3_s(<3 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i16> %in to <3 x i8>
- ret <3 x i8> %1
-}
-
-define <2 x i8> @_Z13convert_char2Dv2_s(<2 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i16> %in to <2 x i8>
- ret <2 x i8> %1
-}
-
-define <4 x i8> @_Z13convert_char4Dv4_j(<4 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i32> %in to <4 x i8>
- ret <4 x i8> %1
-}
-
-define <3 x i8> @_Z13convert_char3Dv3_j(<3 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i32> %in to <3 x i8>
- ret <3 x i8> %1
-}
-
-define <2 x i8> @_Z13convert_char2Dv2_j(<2 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i32> %in to <2 x i8>
- ret <2 x i8> %1
-}
-
-define <4 x i8> @_Z13convert_char4Dv4_i(<4 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i32> %in to <4 x i8>
- ret <4 x i8> %1
-}
-
-define <3 x i8> @_Z13convert_char3Dv3_i(<3 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i32> %in to <3 x i8>
- ret <3 x i8> %1
-}
-
-define <2 x i8> @_Z13convert_char2Dv2_i(<2 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i32> %in to <2 x i8>
- ret <2 x i8> %1
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; UCHAR ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define <4 x i8> @_Z14convert_uchar4Dv4_f(<4 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <4 x float> %in to <4 x i8>
- ret <4 x i8> %1
-}
-
-define <3 x i8> @_Z14convert_uchar3Dv3_f(<3 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <3 x float> %in to <3 x i8>
- ret <3 x i8> %1
-}
-
-define <2 x i8> @_Z14convert_uchar2Dv2_f(<2 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <2 x float> %in to <2 x i8>
- ret <2 x i8> %1
-}
-
-define <4 x i8> @_Z14convert_uchar4Dv4_h(<4 x i8> %in) nounwind readnone alwaysinline {
- ret <4 x i8> %in
-}
-
-define <3 x i8> @_Z14convert_uchar3Dv3_h(<3 x i8> %in) nounwind readnone alwaysinline {
- ret <3 x i8> %in
-}
-
-define <2 x i8> @_Z14convert_uchar2Dv2_h(<2 x i8> %in) nounwind readnone alwaysinline {
- ret <2 x i8> %in
-}
-
-define <4 x i8> @_Z14convert_uchar4Dv4_c(<4 x i8> %in) nounwind readnone alwaysinline {
- ret <4 x i8> %in
-}
-
-define <3 x i8> @_Z14convert_uchar3Dv3_c(<3 x i8> %in) nounwind readnone alwaysinline {
- ret <3 x i8> %in
-}
-
-define <2 x i8> @_Z14convert_uchar2Dv2_c(<2 x i8> %in) nounwind readnone alwaysinline {
- ret <2 x i8> %in
-}
-
-define <4 x i8> @_Z14convert_uchar4Dv4_t(<4 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i16> %in to <4 x i8>
- ret <4 x i8> %1
-}
-
-define <3 x i8> @_Z14convert_uchar3Dv3_t(<3 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i16> %in to <3 x i8>
- ret <3 x i8> %1
-}
-
-define <2 x i8> @_Z14convert_uchar2Dv2_t(<2 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i16> %in to <2 x i8>
- ret <2 x i8> %1
-}
-
-define <4 x i8> @_Z14convert_uchar4Dv4_s(<4 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i16> %in to <4 x i8>
- ret <4 x i8> %1
-}
-
-define <3 x i8> @_Z14convert_uchar3Dv3_s(<3 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i16> %in to <3 x i8>
- ret <3 x i8> %1
-}
-
-define <2 x i8> @_Z14convert_uchar2Dv2_s(<2 x i16> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i16> %in to <2 x i8>
- ret <2 x i8> %1
-}
-
-define <4 x i8> @_Z14convert_uchar4Dv4_j(<4 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i32> %in to <4 x i8>
- ret <4 x i8> %1
-}
-
-define <3 x i8> @_Z14convert_uchar3Dv3_j(<3 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i32> %in to <3 x i8>
- ret <3 x i8> %1
-}
-
-define <2 x i8> @_Z14convert_uchar2Dv2_j(<2 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i32> %in to <2 x i8>
- ret <2 x i8> %1
-}
-
-define <4 x i8> @_Z14convert_uchar4Dv4_i(<4 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i32> %in to <4 x i8>
- ret <4 x i8> %1
-}
-
-define <3 x i8> @_Z14convert_uchar3Dv3_i(<3 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i32> %in to <3 x i8>
- ret <3 x i8> %1
-}
-
-define <2 x i8> @_Z14convert_uchar2Dv2_i(<2 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i32> %in to <2 x i8>
- ret <2 x i8> %1
-}
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; SHORT ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define <4 x i16> @_Z14convert_short4Dv4_f(<4 x float> %in) nounwind readnone alwaysinline {
- %1 = fptosi <4 x float> %in to <4 x i16>
- ret <4 x i16> %1
-}
-
-define <3 x i16> @_Z14convert_short3Dv3_f(<3 x float> %in) nounwind readnone alwaysinline {
- %1 = fptosi <3 x float> %in to <3 x i16>
- ret <3 x i16> %1
-}
-
-define <2 x i16> @_Z14convert_short2Dv2_f(<2 x float> %in) nounwind readnone alwaysinline {
- %1 = fptosi <2 x float> %in to <2 x i16>
- ret <2 x i16> %1
-}
-
-define <4 x i16> @_Z14convert_short4Dv4_h(<4 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <4 x i8> %in to <4 x i16>
- ret <4 x i16> %1
-}
-
-define <3 x i16> @_Z14convert_short3Dv3_h(<3 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <3 x i8> %in to <3 x i16>
- ret <3 x i16> %1
-}
-
-define <2 x i16> @_Z14convert_short2Dv2_h(<2 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <2 x i8> %in to <2 x i16>
- ret <2 x i16> %1
-}
-
-define <4 x i16> @_Z14convert_short4Dv4_c(<4 x i8> %in) nounwind readnone alwaysinline {
- %1 = sext <4 x i8> %in to <4 x i16>
- ret <4 x i16> %1
-}
-
-define <3 x i16> @_Z14convert_short3Dv3_c(<3 x i8> %in) nounwind readnone alwaysinline {
- %1 = sext <3 x i8> %in to <3 x i16>
- ret <3 x i16> %1
-}
-
-define <2 x i16> @_Z14convert_short2Dv2_c(<2 x i8> %in) nounwind readnone alwaysinline {
- %1 = sext <2 x i8> %in to <2 x i16>
- ret <2 x i16> %1
-}
-
-define <4 x i16> @_Z14convert_short4Dv4_t(<4 x i16> %in) nounwind readnone alwaysinline {
- ret <4 x i16> %in
-}
-
-define <3 x i16> @_Z14convert_short3Dv3_t(<3 x i16> %in) nounwind readnone alwaysinline {
- ret <3 x i16> %in
-}
-
-define <2 x i16> @_Z14convert_short2Dv2_t(<2 x i16> %in) nounwind readnone alwaysinline {
- ret <2 x i16> %in
-}
-
-define <4 x i16> @_Z14convert_short4Dv4_s(<4 x i16> %in) nounwind readnone alwaysinline {
- ret <4 x i16> %in
-}
-
-define <3 x i16> @_Z14convert_short3Dv3_s(<3 x i16> %in) nounwind readnone alwaysinline {
- ret <3 x i16> %in
-}
-
-define <2 x i16> @_Z14convert_short2Dv2_s(<2 x i16> %in) nounwind readnone alwaysinline {
- ret <2 x i16> %in
-}
-
-define <4 x i16> @_Z14convert_short4Dv4_j(<4 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i32> %in to <4 x i16>
- ret <4 x i16> %1
-}
-
-define <3 x i16> @_Z14convert_short3Dv3_j(<3 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i32> %in to <3 x i16>
- ret <3 x i16> %1
-}
-
-define <2 x i16> @_Z14convert_short2Dv2_j(<2 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i32> %in to <2 x i16>
- ret <2 x i16> %1
-}
-
-define <4 x i16> @_Z14convert_short4Dv4_i(<4 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i32> %in to <4 x i16>
- ret <4 x i16> %1
-}
-
-define <3 x i16> @_Z14convert_short3Dv3_i(<3 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i32> %in to <3 x i16>
- ret <3 x i16> %1
-}
-
-define <2 x i16> @_Z14convert_short2Dv2_i(<2 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i32> %in to <2 x i16>
- ret <2 x i16> %1
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; USHORT ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define <4 x i16> @_Z15convert_ushort4Dv4_f(<4 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <4 x float> %in to <4 x i16>
- ret <4 x i16> %1
-}
-
-define <3 x i16> @_Z15convert_ushort3Dv3_f(<3 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <3 x float> %in to <3 x i16>
- ret <3 x i16> %1
-}
-
-define <2 x i16> @_Z15convert_ushort2Dv2_f(<2 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <2 x float> %in to <2 x i16>
- ret <2 x i16> %1
-}
-
-define <4 x i16> @_Z15convert_ushort4Dv4_h(<4 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <4 x i8> %in to <4 x i16>
- ret <4 x i16> %1
-}
-
-define <3 x i16> @_Z15convert_ushort3Dv3_h(<3 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <3 x i8> %in to <3 x i16>
- ret <3 x i16> %1
-}
-
-define <2 x i16> @_Z15convert_ushort2Dv2_h(<2 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <2 x i8> %in to <2 x i16>
- ret <2 x i16> %1
-}
-
-define <4 x i16> @_Z15convert_ushort4Dv4_c(<4 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <4 x i8> %in to <4 x i16>
- ret <4 x i16> %1
-}
-
-define <3 x i16> @_Z15convert_ushort3Dv3_c(<3 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <3 x i8> %in to <3 x i16>
- ret <3 x i16> %1
-}
-
-define <2 x i16> @_Z15convert_ushort2Dv2_c(<2 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <2 x i8> %in to <2 x i16>
- ret <2 x i16> %1
-}
-
-define <4 x i16> @_Z15convert_ushort4Dv4_t(<4 x i16> %in) nounwind readnone alwaysinline {
- ret <4 x i16> %in
-}
-
-define <3 x i16> @_Z15convert_ushort3Dv3_t(<3 x i16> %in) nounwind readnone alwaysinline {
- ret <3 x i16> %in
-}
-
-define <2 x i16> @_Z15convert_ushort2Dv2_t(<2 x i16> %in) nounwind readnone alwaysinline {
- ret <2 x i16> %in
-}
-
-define <4 x i16> @_Z15convert_ushort4Dv4_s(<4 x i16> %in) nounwind readnone alwaysinline {
- ret <4 x i16> %in
-}
-
-define <3 x i16> @_Z15convert_ushort3Dv3_s(<3 x i16> %in) nounwind readnone alwaysinline {
- ret <3 x i16> %in
-}
-
-define <2 x i16> @_Z15convert_ushort2Dv2_s(<2 x i16> %in) nounwind readnone alwaysinline {
- ret <2 x i16> %in
-}
-
-define <4 x i16> @_Z15convert_ushort4Dv4_j(<4 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i32> %in to <4 x i16>
- ret <4 x i16> %1
-}
-
-define <3 x i16> @_Z15convert_ushort3Dv3_j(<3 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i32> %in to <3 x i16>
- ret <3 x i16> %1
-}
-
-define <2 x i16> @_Z15convert_ushort2Dv2_j(<2 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i32> %in to <2 x i16>
- ret <2 x i16> %1
-}
-
-define <4 x i16> @_Z15convert_ushort4Dv4_i(<4 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <4 x i32> %in to <4 x i16>
- ret <4 x i16> %1
-}
-
-define <3 x i16> @_Z15convert_ushort3Dv3_i(<3 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <3 x i32> %in to <3 x i16>
- ret <3 x i16> %1
-}
-
-define <2 x i16> @_Z15convert_ushort2Dv2_i(<2 x i32> %in) nounwind readnone alwaysinline {
- %1 = trunc <2 x i32> %in to <2 x i16>
- ret <2 x i16> %1
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; INT ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define <4 x i32> @_Z12convert_int4Dv4_f(<4 x float> %in) nounwind readnone alwaysinline {
- %1 = fptosi <4 x float> %in to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <3 x i32> @_Z12convert_int3Dv3_f(<3 x float> %in) nounwind readnone alwaysinline {
- %1 = fptosi <3 x float> %in to <3 x i32>
- ret <3 x i32> %1
-}
-
-define <2 x i32> @_Z12convert_int2Dv2_f(<2 x float> %in) nounwind readnone alwaysinline {
- %1 = fptosi <2 x float> %in to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <4 x i32> @_Z12convert_int4Dv4_h(<4 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <4 x i8> %in to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <3 x i32> @_Z12convert_int3Dv3_h(<3 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <3 x i8> %in to <3 x i32>
- ret <3 x i32> %1
-}
-
-define <2 x i32> @_Z12convert_int2Dv2_h(<2 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <2 x i8> %in to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <4 x i32> @_Z12convert_int4Dv4_c(<4 x i8> %in) nounwind readnone alwaysinline {
- %1 = sext <4 x i8> %in to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <3 x i32> @_Z12convert_int3Dv3_c(<3 x i8> %in) nounwind readnone alwaysinline {
- %1 = sext <3 x i8> %in to <3 x i32>
- ret <3 x i32> %1
-}
-
-define <2 x i32> @_Z12convert_int2Dv2_c(<2 x i8> %in) nounwind readnone alwaysinline {
- %1 = sext <2 x i8> %in to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <4 x i32> @_Z12convert_int4Dv4_t(<4 x i16> %in) nounwind readnone alwaysinline {
- %1 = zext <4 x i16> %in to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <3 x i32> @_Z12convert_int3Dv3_t(<3 x i16> %in) nounwind readnone alwaysinline {
- %1 = zext <3 x i16> %in to <3 x i32>
- ret <3 x i32> %1
-}
-
-define <2 x i32> @_Z12convert_int2Dv2_t(<2 x i16> %in) nounwind readnone alwaysinline {
- %1 = zext <2 x i16> %in to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <4 x i32> @_Z12convert_int4Dv4_s(<4 x i16> %in) nounwind readnone alwaysinline {
- %1 = sext <4 x i16> %in to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <3 x i32> @_Z12convert_int3Dv3_s(<3 x i16> %in) nounwind readnone alwaysinline {
- %1 = sext <3 x i16> %in to <3 x i32>
- ret <3 x i32> %1
-}
-
-define <2 x i32> @_Z12convert_int2Dv2_s(<2 x i16> %in) nounwind readnone alwaysinline {
- %1 = sext <2 x i16> %in to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <4 x i32> @_Z12convert_int4Dv4_j(<4 x i32> %in) nounwind readnone alwaysinline {
- ret <4 x i32> %in
-}
-
-define <3 x i32> @_Z12convert_int3Dv3_j(<3 x i32> %in) nounwind readnone alwaysinline {
- ret <3 x i32> %in
-}
-
-define <2 x i32> @_Z12convert_int2Dv2_j(<2 x i32> %in) nounwind readnone alwaysinline {
- ret <2 x i32> %in
-}
-
-define <4 x i32> @_Z12convert_int4Dv4_i(<4 x i32> %in) nounwind readnone alwaysinline {
- ret <4 x i32> %in
-}
-
-define <3 x i32> @_Z12convert_int3Dv3_i(<3 x i32> %in) nounwind readnone alwaysinline {
- ret <3 x i32> %in
-}
-
-define <2 x i32> @_Z12convert_int2Dv2_i(<2 x i32> %in) nounwind readnone alwaysinline {
- ret <2 x i32> %in
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; UINT ;;;;;;;;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-define <4 x i32> @_Z13convert_uint4Dv4_f(<4 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <4 x float> %in to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <3 x i32> @_Z13convert_uint3Dv3_f(<3 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <3 x float> %in to <3 x i32>
- ret <3 x i32> %1
-}
-
-define <2 x i32> @_Z13convert_uint2Dv2_f(<2 x float> %in) nounwind readnone alwaysinline {
- %1 = fptoui <2 x float> %in to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <4 x i32> @_Z13convert_uint4Dv4_h(<4 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <4 x i8> %in to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <3 x i32> @_Z13convert_uint3Dv3_h(<3 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <3 x i8> %in to <3 x i32>
- ret <3 x i32> %1
-}
-
-define <2 x i32> @_Z13convert_uint2Dv2_h(<2 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <2 x i8> %in to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <4 x i32> @_Z13convert_uint4Dv4_c(<4 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <4 x i8> %in to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <3 x i32> @_Z13convert_uint3Dv3_c(<3 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <3 x i8> %in to <3 x i32>
- ret <3 x i32> %1
-}
-
-define <2 x i32> @_Z13convert_uint2Dv2_c(<2 x i8> %in) nounwind readnone alwaysinline {
- %1 = zext <2 x i8> %in to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <4 x i32> @_Z13convert_uint4Dv4_t(<4 x i16> %in) nounwind readnone alwaysinline {
- %1 = zext <4 x i16> %in to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <3 x i32> @_Z13convert_uint3Dv3_t(<3 x i16> %in) nounwind readnone alwaysinline {
- %1 = zext <3 x i16> %in to <3 x i32>
- ret <3 x i32> %1
-}
-
-define <2 x i32> @_Z13convert_uint2Dv2_t(<2 x i16> %in) nounwind readnone alwaysinline {
- %1 = zext <2 x i16> %in to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <4 x i32> @_Z13convert_uint4Dv4_s(<4 x i16> %in) nounwind readnone alwaysinline {
- %1 = zext <4 x i16> %in to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <3 x i32> @_Z13convert_uint3Dv3_s(<3 x i16> %in) nounwind readnone alwaysinline {
- %1 = zext <3 x i16> %in to <3 x i32>
- ret <3 x i32> %1
-}
-
-define <2 x i32> @_Z13convert_uint2Dv2_s(<2 x i16> %in) nounwind readnone alwaysinline {
- %1 = zext <2 x i16> %in to <2 x i32>
- ret <2 x i32> %1
-}
-
-define <4 x i32> @_Z13convert_uint4Dv4_j(<4 x i32> %in) nounwind readnone alwaysinline {
- ret <4 x i32> %in
-}
-
-define <3 x i32> @_Z13convert_uint3Dv3_j(<3 x i32> %in) nounwind readnone alwaysinline {
- ret <3 x i32> %in
-}
-
-define <2 x i32> @_Z13convert_uint2Dv2_j(<2 x i32> %in) nounwind readnone alwaysinline {
- ret <2 x i32> %in
-}
-
-define <4 x i32> @_Z13convert_uint4Dv4_i(<4 x i32> %in) nounwind readnone alwaysinline {
- ret <4 x i32> %in
-}
-
-define <3 x i32> @_Z13convert_uint3Dv3_i(<3 x i32> %in) nounwind readnone alwaysinline {
- ret <3 x i32> %in
-}
-
-define <2 x i32> @_Z13convert_uint2Dv2_i(<2 x i32> %in) nounwind readnone alwaysinline {
- ret <2 x i32> %in
-}
diff --git a/lib/Renderscript/runtime/math.ll b/lib/Renderscript/runtime/math.ll
deleted file mode 100644
index f026d15..0000000
--- a/lib/Renderscript/runtime/math.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
-target triple = "armv7-none-linux-gnueabi"
-
-declare float @llvm.sqrt.f32(float)
-declare float @llvm.pow.f32(float, float)
-declare float @llvm.fabs.f32(float)
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
-declare <3 x float> @llvm.fabs.v3f32(<3 x float>)
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
-
-define float @_Z4sqrtf(float %v) nounwind readnone alwaysinline {
- %1 = tail call float @llvm.sqrt.f32(float %v)
- ret float %1
-}
-
-define float @_Z3powf(float %v1, float %v2) nounwind readnone alwaysinline {
- %1 = tail call float @llvm.pow.f32(float %v1, float %v2)
- ret float %1
-}
diff --git a/lib/Renderscript/runtime/matrix.ll b/lib/Renderscript/runtime/matrix.ll
deleted file mode 100644
index c56405d..0000000
--- a/lib/Renderscript/runtime/matrix.ll
+++ /dev/null
@@ -1,176 +0,0 @@
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
-target triple = "armv7-none-linux-gnueabi"
-
-
-%struct.rs_matrix4x4 = type { [16 x float] }
-%struct.rs_matrix3x3 = type { [9 x float] }
-%struct.rs_matrix2x2 = type { [4 x float] }
-
-define internal <4 x float> @smear_f(float %in) nounwind readnone alwaysinline {
- %1 = insertelement <4 x float> undef, float %in, i32 0
- %2 = insertelement <4 x float> %1, float %in, i32 1
- %3 = insertelement <4 x float> %2, float %in, i32 2
- %4 = insertelement <4 x float> %3, float %in, i32 3
- ret <4 x float> %4
-}
-
-
-define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
- %x0 = extractelement <3 x float> %in, i32 0
- %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
- %y0 = extractelement <3 x float> %in, i32 1
- %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
- %z0 = extractelement <3 x float> %in, i32 2
- %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
-
- %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
- %px2 = bitcast float* %px to <4 x float>*
- %xm = load <4 x float>* %px2, align 4
- %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
- %py2 = bitcast float* %py to <4 x float>*
- %ym = load <4 x float>* %py2, align 4
- %pz = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 6
- %pz2 = bitcast float* %pz to <3 x float>*
- %zm2 = load <3 x float>* %pz2, align 4
- %zm = shufflevector <3 x float> %zm2, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-
- %a1 = fmul <4 x float> %x, %xm
- %a2 = fmul <4 x float> %y, %ym
- %a3 = fadd <4 x float> %a1, %a2
- %a4 = fmul <4 x float> %z, %zm
- %a5 = fadd <4 x float> %a4, %a3
- %a6 = shufflevector <4 x float> %a5, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %a6
-}
-
-define <3 x float> @_Z16rsMatrixMultiplyP12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind readonly {
- %r = tail call <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv3_f(%struct.rs_matrix3x3* nocapture %m, <3 x float> %in) nounwind
- ret <3 x float> %r
-}
-
-define <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
- %x0 = extractelement <2 x float> %in, i32 0
- %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
- %y0 = extractelement <2 x float> %in, i32 1
- %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
-
- %px = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 0
- %px2 = bitcast float* %px to <4 x float>*
- %xm = load <4 x float>* %px2, align 4
- %py = getelementptr inbounds %struct.rs_matrix3x3* %m, i32 0, i32 0, i32 3
- %py2 = bitcast float* %py to <4 x float>*
- %ym = load <4 x float>* %py2, align 4
-
- %a1 = fmul <4 x float> %x, %xm
- %a2 = fmul <4 x float> %y, %ym
- %a3 = fadd <4 x float> %a1, %a2
- %a4 = shufflevector <4 x float> %a3, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
- ret <3 x float> %a4
-}
-
-define <3 x float> @_Z16rsMatrixMultiplyP12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind readonly {
- %r = tail call <3 x float> @_Z16rsMatrixMultiplyPK12rs_matrix3x3Dv2_f(%struct.rs_matrix3x3* nocapture %m, <2 x float> %in) nounwind
- ret <3 x float> %r
-}
-
-define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
- %x0 = extractelement <4 x float> %in, i32 0
- %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
- %y0 = extractelement <4 x float> %in, i32 1
- %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
- %z0 = extractelement <4 x float> %in, i32 2
- %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
- %w0 = extractelement <4 x float> %in, i32 3
- %w = tail call <4 x float> @smear_f(float %w0) nounwind readnone
-
- %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
- %px2 = bitcast float* %px to <4 x float>*
- %xm = load <4 x float>* %px2, align 4
- %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
- %py2 = bitcast float* %py to <4 x float>*
- %ym = load <4 x float>* %py2, align 4
- %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
- %pz2 = bitcast float* %pz to <4 x float>*
- %zm = load <4 x float>* %pz2, align 4
- %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
- %pw2 = bitcast float* %pw to <4 x float>*
- %wm = load <4 x float>* %pw2, align 4
-
- %a1 = fmul <4 x float> %x, %xm
- %a2 = fmul <4 x float> %y, %ym
- %a3 = fadd <4 x float> %a1, %a2
- %a4 = fmul <4 x float> %z, %zm
- %a5 = fadd <4 x float> %a3, %a4
- %a6 = fmul <4 x float> %w, %wm
- %a7 = fadd <4 x float> %a5, %a6
- ret <4 x float> %a7
-}
-
-define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind readonly {
- %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv4_f(%struct.rs_matrix4x4* nocapture %m, <4 x float> %in) nounwind
- ret <4 x float> %r
-}
-
-define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
- %x0 = extractelement <3 x float> %in, i32 0
- %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
- %y0 = extractelement <3 x float> %in, i32 1
- %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
- %z0 = extractelement <3 x float> %in, i32 2
- %z = tail call <4 x float> @smear_f(float %z0) nounwind readnone
-
- %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
- %px2 = bitcast float* %px to <4 x float>*
- %xm = load <4 x float>* %px2, align 4
- %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
- %py2 = bitcast float* %py to <4 x float>*
- %ym = load <4 x float>* %py2, align 4
- %pz = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 8
- %pz2 = bitcast float* %pz to <4 x float>*
- %zm = load <4 x float>* %pz2, align 4
- %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
- %pw2 = bitcast float* %pw to <4 x float>*
- %wm = load <4 x float>* %pw2, align 4
-
- %a1 = fmul <4 x float> %x, %xm
- %a2 = fadd <4 x float> %wm, %a1
- %a3 = fmul <4 x float> %y, %ym
- %a4 = fadd <4 x float> %a2, %a3
- %a5 = fmul <4 x float> %z, %zm
- %a6 = fadd <4 x float> %a4, %a5
- ret <4 x float> %a6
-}
-
-define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind readonly {
- %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv3_f(%struct.rs_matrix4x4* nocapture %m, <3 x float> %in) nounwind
- ret <4 x float> %r
-}
-
-define <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
- %x0 = extractelement <2 x float> %in, i32 0
- %x = tail call <4 x float> @smear_f(float %x0) nounwind readnone
- %y0 = extractelement <2 x float> %in, i32 1
- %y = tail call <4 x float> @smear_f(float %y0) nounwind readnone
-
- %px = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 0
- %px2 = bitcast float* %px to <4 x float>*
- %xm = load <4 x float>* %px2, align 4
- %py = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 4
- %py2 = bitcast float* %py to <4 x float>*
- %ym = load <4 x float>* %py2, align 4
- %pw = getelementptr inbounds %struct.rs_matrix4x4* %m, i32 0, i32 0, i32 12
- %pw2 = bitcast float* %pw to <4 x float>*
- %wm = load <4 x float>* %pw2, align 4
-
- %a1 = fmul <4 x float> %x, %xm
- %a2 = fadd <4 x float> %wm, %a1
- %a3 = fmul <4 x float> %y, %ym
- %a4 = fadd <4 x float> %a2, %a3
- ret <4 x float> %a4
-}
-
-define <4 x float> @_Z16rsMatrixMultiplyP12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind readonly {
- %r = tail call <4 x float> @_Z16rsMatrixMultiplyPK12rs_matrix4x4Dv2_f(%struct.rs_matrix4x4* nocapture %m, <2 x float> %in) nounwind
- ret <4 x float> %r
-}
-
diff --git a/lib/Renderscript/runtime/rsClamp.ll b/lib/Renderscript/runtime/rsClamp.ll
deleted file mode 100644
index eba678a..0000000
--- a/lib/Renderscript/runtime/rsClamp.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
-target triple = "armv7-none-linux-gnueabi"
-
-
-define float @_Z7rsClampfff(float %value, float %low, float %high) nounwind readonly {
- %1 = fcmp olt float %value, %high
- %2 = select i1 %1, float %value, float %high
- %3 = fcmp ogt float %2, %low
- %4 = select i1 %3, float %2, float %low
- ret float %4
-}
-
-define signext i8 @_Z7rsClampccc(i8 signext %value, i8 signext %low, i8 signext %high) nounwind readonly {
- %1 = icmp slt i8 %value, %high
- %2 = select i1 %1, i8 %value, i8 %high
- %3 = icmp sgt i8 %2, %low
- %4 = select i1 %3, i8 %2, i8 %low
- ret i8 %4
-}
-
-define zeroext i8 @_Z7rsClamphhh(i8 zeroext %value, i8 zeroext %low, i8 zeroext %high) nounwind readonly {
- %1 = icmp ult i8 %value, %high
- %2 = select i1 %1, i8 %value, i8 %high
- %3 = icmp ugt i8 %2, %low
- %4 = select i1 %3, i8 %2, i8 %low
- ret i8 %4
-}
-
-define signext i16 @_Z7rsClampsss(i16 signext %value, i16 signext %low, i16 signext %high) nounwind readonly {
- %1 = icmp slt i16 %value, %high
- %2 = select i1 %1, i16 %value, i16 %high
- %3 = icmp sgt i16 %2, %low
- %4 = select i1 %3, i16 %2, i16 %low
- ret i16 %4
-}
-
-define zeroext i16 @_Z7rsClampttt(i16 zeroext %value, i16 zeroext %low, i16 zeroext %high) nounwind readonly {
- %1 = icmp ult i16 %value, %high
- %2 = select i1 %1, i16 %value, i16 %high
- %3 = icmp ugt i16 %2, %low
- %4 = select i1 %3, i16 %2, i16 %low
- ret i16 %4
-}
-
-define i32 @_Z7rsClampiii(i32 %value, i32 %low, i32 %high) nounwind readonly {
- %1 = icmp slt i32 %value, %high
- %2 = select i1 %1, i32 %value, i32 %high
- %3 = icmp sgt i32 %2, %low
- %4 = select i1 %3, i32 %2, i32 %low
- ret i32 %4
-}
-
-define i32 @_Z7rsClampjjj(i32 %value, i32 %low, i32 %high) nounwind readonly {
- %1 = icmp ult i32 %value, %high
- %2 = select i1 %1, i32 %value, i32 %high
- %3 = icmp ugt i32 %2, %low
- %4 = select i1 %3, i32 %2, i32 %low
- ret i32 %4
-}
-
diff --git a/lib/Renderscript/runtime/rs_allocation.c b/lib/Renderscript/runtime/rs_allocation.c
deleted file mode 100644
index 1d0f5b6..0000000
--- a/lib/Renderscript/runtime/rs_allocation.c
+++ /dev/null
@@ -1,310 +0,0 @@
-#include "rs_core.rsh"
-#include "rs_graphics.rsh"
-#include "rs_structs.h"
-
-// Opaque Allocation type operations
-extern uint32_t __attribute__((overloadable))
- rsAllocationGetDimX(rs_allocation a) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- return alloc->mHal.drvState.lod[0].dimX;
-}
-
-extern uint32_t __attribute__((overloadable))
- rsAllocationGetDimY(rs_allocation a) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- return alloc->mHal.drvState.lod[0].dimY;
-}
-
-extern uint32_t __attribute__((overloadable))
- rsAllocationGetDimZ(rs_allocation a) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- return alloc->mHal.drvState.lod[0].dimZ;
-}
-
-extern uint32_t __attribute__((overloadable))
- rsAllocationGetDimLOD(rs_allocation a) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- return alloc->mHal.state.hasMipmaps;
-}
-
-extern uint32_t __attribute__((overloadable))
- rsAllocationGetDimFaces(rs_allocation a) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- return alloc->mHal.state.hasFaces;
-}
-
-
-extern rs_element __attribute__((overloadable))
- rsAllocationGetElement(rs_allocation a) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- if (alloc == NULL) {
- rs_element nullElem = {0};
- return nullElem;
- }
- Type_t *type = (Type_t *)alloc->mHal.state.type;
- rs_element returnElem = {type->mHal.state.element};
- return returnElem;
-}
-
-// TODO: this needs to be optimized, obviously
-static void memcpy(void* dst, void* src, size_t size) {
- char* dst_c = (char*) dst, *src_c = (char*) src;
- for (; size > 0; size--) {
- *dst_c++ = *src_c++;
- }
-}
-
-#ifdef RS_DEBUG_RUNTIME
-#define ELEMENT_AT(T) \
- extern void __attribute__((overloadable)) \
- rsSetElementAt_##T(rs_allocation a, const T *val, uint32_t x); \
- extern void __attribute__((overloadable)) \
- rsSetElementAt_##T(rs_allocation a, const T *val, uint32_t x, uint32_t y); \
- extern void __attribute__((overloadable)) \
- rsSetElementAt_##T(rs_allocation a, const T *val, uint32_t x, uint32_t y, uint32_t z); \
- extern void __attribute__((overloadable)) \
- rsGetElementAt_##T(rs_allocation a, T *val, uint32_t x); \
- extern void __attribute__((overloadable)) \
- rsGetElementAt_##T(rs_allocation a, T *val, uint32_t x, uint32_t y); \
- extern void __attribute__((overloadable)) \
- rsGetElementAt_##T(rs_allocation a, T *val, uint32_t x, uint32_t y, uint32_t z); \
- \
- extern void __attribute__((overloadable)) \
- rsSetElementAt_##T(rs_allocation a, T val, uint32_t x) { \
- rsSetElementAt_##T(a, &val, x); \
- } \
- extern void __attribute__((overloadable)) \
- rsSetElementAt_##T(rs_allocation a, T val, uint32_t x, uint32_t y) { \
- rsSetElementAt_##T(a, &val, x, y); \
- } \
- extern void __attribute__((overloadable)) \
- rsSetElementAt_##T(rs_allocation a, T val, uint32_t x, uint32_t y, uint32_t z) { \
- rsSetElementAt_##T(a, &val, x, y, z); \
- } \
- extern T __attribute__((overloadable)) \
- rsGetElementAt_##T(rs_allocation a, uint32_t x) { \
- T tmp; \
- rsGetElementAt_##T(a, &tmp, x); \
- return tmp; \
- } \
- extern T __attribute__((overloadable)) \
- rsGetElementAt_##T(rs_allocation a, uint32_t x, uint32_t y) { \
- T tmp; \
- rsGetElementAt_##T(a, &tmp, x, y); \
- return tmp; \
- } \
- extern T __attribute__((overloadable)) \
- rsGetElementAt_##T(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) { \
- T tmp; \
- rsGetElementAt_##T(a, &tmp, x, y, z); \
- return tmp; \
- }
-
-#else
-#define ELEMENT_AT(T) \
- extern void __attribute__((overloadable)) \
- rsSetElementAt_##T(rs_allocation a, T val, uint32_t x) { \
- Allocation_t *alloc = (Allocation_t *)a.p; \
- uint8_t *p = (uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr; \
- const uint32_t eSize = sizeof(T); \
- *((T*)&p[(eSize * x)]) = val; \
- } \
- extern void __attribute__((overloadable)) \
- rsSetElementAt_##T(rs_allocation a, T val, uint32_t x, uint32_t y) { \
- Allocation_t *alloc = (Allocation_t *)a.p; \
- uint8_t *p = (uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr; \
- const uint32_t eSize = sizeof(T); \
- const uint32_t stride = alloc->mHal.drvState.lod[0].stride; \
- *((T*)&p[(eSize * x) + (y * stride)]) = val; \
- } \
- extern void __attribute__((overloadable)) \
- rsSetElementAt_##T(rs_allocation a, T val, uint32_t x, uint32_t y, uint32_t z) { \
- Allocation_t *alloc = (Allocation_t *)a.p; \
- uint8_t *p = (uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr; \
- const uint32_t stride = alloc->mHal.drvState.lod[0].stride; \
- const uint32_t dimY = alloc->mHal.drvState.lod[0].dimY; \
- uint8_t *dp = &p[(sizeof(T) * x) + (y * stride) + (z * stride * dimY)]; \
- ((T*)dp)[0] = val; \
- } \
- extern T __attribute__((overloadable)) \
- rsGetElementAt_##T(rs_allocation a, uint32_t x) { \
- Allocation_t *alloc = (Allocation_t *)a.p; \
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr; \
- return *((T*)&p[(sizeof(T) * x)]); \
- } \
- extern T __attribute__((overloadable)) \
- rsGetElementAt_##T(rs_allocation a, uint32_t x, uint32_t y) { \
- Allocation_t *alloc = (Allocation_t *)a.p; \
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr; \
- const uint32_t stride = alloc->mHal.drvState.lod[0].stride; \
- return *((T*)&p[(sizeof(T) * x) + (y * stride)]); \
- } \
- extern T __attribute__((overloadable)) \
- rsGetElementAt_##T(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) { \
- Allocation_t *alloc = (Allocation_t *)a.p; \
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr; \
- const uint32_t stride = alloc->mHal.drvState.lod[0].stride; \
- const uint32_t dimY = alloc->mHal.drvState.lod[0].dimY; \
- const uint8_t *dp = &p[(sizeof(T) * x) + (y * stride) + (z * stride * dimY)]; \
- return ((const T*)dp)[0]; \
- }
-
-
-
-extern const void * __attribute__((overloadable))
- rsGetElementAt(rs_allocation a, uint32_t x) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
- const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
- return &p[eSize * x];
-}
-
-extern const void * __attribute__((overloadable))
- rsGetElementAt(rs_allocation a, uint32_t x, uint32_t y) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
- const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
- const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
- return &p[(eSize * x) + (y * stride)];
-}
-
-extern const void * __attribute__((overloadable))
- rsGetElementAt(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
- const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
- const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
- const uint32_t dimY = alloc->mHal.drvState.lod[0].dimY;
- return &p[(eSize * x) + (y * stride) + (z * stride * dimY)];
-}
-extern void __attribute__((overloadable))
- rsSetElementAt(rs_allocation a, void* ptr, uint32_t x) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
- const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
- memcpy((void*)&p[eSize * x], ptr, eSize);
-}
-
-extern void __attribute__((overloadable))
- rsSetElementAt(rs_allocation a, void* ptr, uint32_t x, uint32_t y) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
- const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
- const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
- memcpy((void*)&p[(eSize * x) + (y * stride)], ptr, eSize);
-}
-
-extern void __attribute__((overloadable))
- rsSetElementAt(rs_allocation a, void* ptr, uint32_t x, uint32_t y, uint32_t z) {
- Allocation_t *alloc = (Allocation_t *)a.p;
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr;
- const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
- const uint32_t stride = alloc->mHal.drvState.lod[0].stride;
- const uint32_t dimY = alloc->mHal.drvState.lod[0].dimY;
- memcpy((void*)&p[(eSize * x) + (y * stride) + (z * stride * dimY)], ptr, eSize);
-}
-#endif
-
-ELEMENT_AT(char)
-ELEMENT_AT(char2)
-ELEMENT_AT(char3)
-ELEMENT_AT(char4)
-ELEMENT_AT(uchar)
-ELEMENT_AT(uchar2)
-ELEMENT_AT(uchar3)
-ELEMENT_AT(uchar4)
-ELEMENT_AT(short)
-ELEMENT_AT(short2)
-ELEMENT_AT(short3)
-ELEMENT_AT(short4)
-ELEMENT_AT(ushort)
-ELEMENT_AT(ushort2)
-ELEMENT_AT(ushort3)
-ELEMENT_AT(ushort4)
-ELEMENT_AT(int)
-ELEMENT_AT(int2)
-ELEMENT_AT(int3)
-ELEMENT_AT(int4)
-ELEMENT_AT(uint)
-ELEMENT_AT(uint2)
-ELEMENT_AT(uint3)
-ELEMENT_AT(uint4)
-ELEMENT_AT(long)
-ELEMENT_AT(long2)
-ELEMENT_AT(long3)
-ELEMENT_AT(long4)
-ELEMENT_AT(ulong)
-ELEMENT_AT(ulong2)
-ELEMENT_AT(ulong3)
-ELEMENT_AT(ulong4)
-ELEMENT_AT(float)
-ELEMENT_AT(float2)
-ELEMENT_AT(float3)
-ELEMENT_AT(float4)
-ELEMENT_AT(double)
-ELEMENT_AT(double2)
-ELEMENT_AT(double3)
-ELEMENT_AT(double4)
-
-#undef ELEMENT_AT
-
-
-extern const uchar __attribute__((overloadable))
- rsGetElementAtYuv_uchar_Y(rs_allocation a, uint32_t x, uint32_t y) {
- return rsGetElementAt_uchar(a, x, y);
-}
-
-extern const uchar __attribute__((overloadable))
- rsGetElementAtYuv_uchar_U(rs_allocation a, uint32_t x, uint32_t y) {
-
- Allocation_t *alloc = (Allocation_t *)a.p;
- const uint32_t yuvID = alloc->mHal.state.yuv;
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[1].mallocPtr;
- const uint32_t stride = alloc->mHal.drvState.lod[1].stride;
-
- switch(yuvID) {
- case 0x32315659: //HAL_PIXEL_FORMAT_YV12:
- x >>= 1;
- y >>= 1;
- return p[x + (y * stride)];
- case 11: //HAL_PIXEL_FORMAT_YCrCb_420_SP: // NV21
- x >>= 1;
- y >>= 1;
- return p[(x<<1) + (y * stride)];
- default:
- break;
- }
-
- return 0;
-}
-
-extern const uchar __attribute__((overloadable))
- rsGetElementAtYuv_uchar_V(rs_allocation a, uint32_t x, uint32_t y) {
-
- Allocation_t *alloc = (Allocation_t *)a.p;
- const uint32_t yuvID = alloc->mHal.state.yuv;
-
- switch(yuvID) {
- case 0x32315659: //HAL_PIXEL_FORMAT_YV12:
- {
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[2].mallocPtr;
- const uint32_t stride = alloc->mHal.drvState.lod[2].stride;
- x >>= 1;
- y >>= 1;
- return p[x + (y * stride)];
- }
- case 11: //HAL_PIXEL_FORMAT_YCrCb_420_SP: // NV21
- {
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[1].mallocPtr;
- const uint32_t stride = alloc->mHal.drvState.lod[1].stride;
- x >>= 1;
- y >>= 1;
- return p[(x<<1) + (y * stride) + 1];
- }
- default:
- break;
- }
-
- return 0;
-}
-
diff --git a/lib/Renderscript/runtime/rs_cl.c b/lib/Renderscript/runtime/rs_cl.c
deleted file mode 100755
index b7f9158..0000000
--- a/lib/Renderscript/runtime/rs_cl.c
+++ /dev/null
@@ -1,1194 +0,0 @@
-#include "rs_types.rsh"
-
-extern float2 __attribute__((overloadable)) convert_float2(int2 c);
-extern float3 __attribute__((overloadable)) convert_float3(int3 c);
-extern float4 __attribute__((overloadable)) convert_float4(int4 c);
-
-extern int2 __attribute__((overloadable)) convert_int2(float2 c);
-extern int3 __attribute__((overloadable)) convert_int3(float3 c);
-extern int4 __attribute__((overloadable)) convert_int4(float4 c);
-
-
-extern float __attribute__((overloadable)) fmin(float v, float v2);
-extern float2 __attribute__((overloadable)) fmin(float2 v, float v2);
-extern float3 __attribute__((overloadable)) fmin(float3 v, float v2);
-extern float4 __attribute__((overloadable)) fmin(float4 v, float v2);
-
-extern float __attribute__((overloadable)) fmax(float v, float v2);
-extern float2 __attribute__((overloadable)) fmax(float2 v, float v2);
-extern float3 __attribute__((overloadable)) fmax(float3 v, float v2);
-extern float4 __attribute__((overloadable)) fmax(float4 v, float v2);
-
-// Float ops, 6.11.2
-
-#define FN_FUNC_FN(fnc) \
-extern float2 __attribute__((overloadable)) fnc(float2 v) { \
- float2 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- return r; \
-} \
-extern float3 __attribute__((overloadable)) fnc(float3 v) { \
- float3 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- r.z = fnc(v.z); \
- return r; \
-} \
-extern float4 __attribute__((overloadable)) fnc(float4 v) { \
- float4 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- r.z = fnc(v.z); \
- r.w = fnc(v.w); \
- return r; \
-}
-
-#define IN_FUNC_FN(fnc) \
-extern int2 __attribute__((overloadable)) fnc(float2 v) { \
- int2 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- return r; \
-} \
-extern int3 __attribute__((overloadable)) fnc(float3 v) { \
- int3 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- r.z = fnc(v.z); \
- return r; \
-} \
-extern int4 __attribute__((overloadable)) fnc(float4 v) { \
- int4 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- r.z = fnc(v.z); \
- r.w = fnc(v.w); \
- return r; \
-}
-
-#define FN_FUNC_FN_FN(fnc) \
-extern float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \
- float2 r; \
- r.x = fnc(v1.x, v2.x); \
- r.y = fnc(v1.y, v2.y); \
- return r; \
-} \
-extern float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \
- float3 r; \
- r.x = fnc(v1.x, v2.x); \
- r.y = fnc(v1.y, v2.y); \
- r.z = fnc(v1.z, v2.z); \
- return r; \
-} \
-extern float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \
- float4 r; \
- r.x = fnc(v1.x, v2.x); \
- r.y = fnc(v1.y, v2.y); \
- r.z = fnc(v1.z, v2.z); \
- r.w = fnc(v1.w, v2.w); \
- return r; \
-}
-
-#define FN_FUNC_FN_F(fnc) \
-extern float2 __attribute__((overloadable)) fnc(float2 v1, float v2) { \
- float2 r; \
- r.x = fnc(v1.x, v2); \
- r.y = fnc(v1.y, v2); \
- return r; \
-} \
-extern float3 __attribute__((overloadable)) fnc(float3 v1, float v2) { \
- float3 r; \
- r.x = fnc(v1.x, v2); \
- r.y = fnc(v1.y, v2); \
- r.z = fnc(v1.z, v2); \
- return r; \
-} \
-extern float4 __attribute__((overloadable)) fnc(float4 v1, float v2) { \
- float4 r; \
- r.x = fnc(v1.x, v2); \
- r.y = fnc(v1.y, v2); \
- r.z = fnc(v1.z, v2); \
- r.w = fnc(v1.w, v2); \
- return r; \
-}
-
-#define FN_FUNC_FN_IN(fnc) \
-extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2) { \
- float2 r; \
- r.x = fnc(v1.x, v2.x); \
- r.y = fnc(v1.y, v2.y); \
- return r; \
-} \
-extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2) { \
- float3 r; \
- r.x = fnc(v1.x, v2.x); \
- r.y = fnc(v1.y, v2.y); \
- r.z = fnc(v1.z, v2.z); \
- return r; \
-} \
-extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2) { \
- float4 r; \
- r.x = fnc(v1.x, v2.x); \
- r.y = fnc(v1.y, v2.y); \
- r.z = fnc(v1.z, v2.z); \
- r.w = fnc(v1.w, v2.w); \
- return r; \
-}
-
-#define FN_FUNC_FN_I(fnc) \
-extern float2 __attribute__((overloadable)) fnc(float2 v1, int v2) { \
- float2 r; \
- r.x = fnc(v1.x, v2); \
- r.y = fnc(v1.y, v2); \
- return r; \
-} \
-extern float3 __attribute__((overloadable)) fnc(float3 v1, int v2) { \
- float3 r; \
- r.x = fnc(v1.x, v2); \
- r.y = fnc(v1.y, v2); \
- r.z = fnc(v1.z, v2); \
- return r; \
-} \
-extern float4 __attribute__((overloadable)) fnc(float4 v1, int v2) { \
- float4 r; \
- r.x = fnc(v1.x, v2); \
- r.y = fnc(v1.y, v2); \
- r.z = fnc(v1.z, v2); \
- r.w = fnc(v1.w, v2); \
- return r; \
-}
-
-#define FN_FUNC_FN_PFN(fnc) \
-extern float2 __attribute__((overloadable)) \
- fnc(float2 v1, float2 *v2) { \
- float2 r; \
- float t[2]; \
- r.x = fnc(v1.x, &t[0]); \
- r.y = fnc(v1.y, &t[1]); \
- v2->x = t[0]; \
- v2->y = t[1]; \
- return r; \
-} \
-extern float3 __attribute__((overloadable)) \
- fnc(float3 v1, float3 *v2) { \
- float3 r; \
- float t[3]; \
- r.x = fnc(v1.x, &t[0]); \
- r.y = fnc(v1.y, &t[1]); \
- r.z = fnc(v1.z, &t[2]); \
- v2->x = t[0]; \
- v2->y = t[1]; \
- v2->z = t[2]; \
- return r; \
-} \
-extern float4 __attribute__((overloadable)) \
- fnc(float4 v1, float4 *v2) { \
- float4 r; \
- float t[4]; \
- r.x = fnc(v1.x, &t[0]); \
- r.y = fnc(v1.y, &t[1]); \
- r.z = fnc(v1.z, &t[2]); \
- r.w = fnc(v1.w, &t[3]); \
- v2->x = t[0]; \
- v2->y = t[1]; \
- v2->z = t[2]; \
- v2->w = t[3]; \
- return r; \
-}
-
-#define FN_FUNC_FN_PIN(fnc) \
-extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2) { \
- float2 r; \
- int t[2]; \
- r.x = fnc(v1.x, &t[0]); \
- r.y = fnc(v1.y, &t[1]); \
- v2->x = t[0]; \
- v2->y = t[1]; \
- return r; \
-} \
-extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2) { \
- float3 r; \
- int t[3]; \
- r.x = fnc(v1.x, &t[0]); \
- r.y = fnc(v1.y, &t[1]); \
- r.z = fnc(v1.z, &t[2]); \
- v2->x = t[0]; \
- v2->y = t[1]; \
- v2->z = t[2]; \
- return r; \
-} \
-extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2) { \
- float4 r; \
- int t[4]; \
- r.x = fnc(v1.x, &t[0]); \
- r.y = fnc(v1.y, &t[1]); \
- r.z = fnc(v1.z, &t[2]); \
- r.w = fnc(v1.w, &t[3]); \
- v2->x = t[0]; \
- v2->y = t[1]; \
- v2->z = t[2]; \
- v2->w = t[3]; \
- return r; \
-}
-
-#define FN_FUNC_FN_FN_FN(fnc) \
-extern float2 __attribute__((overloadable)) \
- fnc(float2 v1, float2 v2, float2 v3) { \
- float2 r; \
- r.x = fnc(v1.x, v2.x, v3.x); \
- r.y = fnc(v1.y, v2.y, v3.y); \
- return r; \
-} \
-extern float3 __attribute__((overloadable)) \
- fnc(float3 v1, float3 v2, float3 v3) { \
- float3 r; \
- r.x = fnc(v1.x, v2.x, v3.x); \
- r.y = fnc(v1.y, v2.y, v3.y); \
- r.z = fnc(v1.z, v2.z, v3.z); \
- return r; \
-} \
-extern float4 __attribute__((overloadable)) \
- fnc(float4 v1, float4 v2, float4 v3) { \
- float4 r; \
- r.x = fnc(v1.x, v2.x, v3.x); \
- r.y = fnc(v1.y, v2.y, v3.y); \
- r.z = fnc(v1.z, v2.z, v3.z); \
- r.w = fnc(v1.w, v2.w, v3.w); \
- return r; \
-}
-
-#define FN_FUNC_FN_FN_PIN(fnc) \
-extern float2 __attribute__((overloadable)) \
- fnc(float2 v1, float2 v2, int2 *v3) { \
- float2 r; \
- int t[2]; \
- r.x = fnc(v1.x, v2.x, &t[0]); \
- r.y = fnc(v1.y, v2.y, &t[1]); \
- v3->x = t[0]; \
- v3->y = t[1]; \
- return r; \
-} \
-extern float3 __attribute__((overloadable)) \
- fnc(float3 v1, float3 v2, int3 *v3) { \
- float3 r; \
- int t[3]; \
- r.x = fnc(v1.x, v2.x, &t[0]); \
- r.y = fnc(v1.y, v2.y, &t[1]); \
- r.z = fnc(v1.z, v2.z, &t[2]); \
- v3->x = t[0]; \
- v3->y = t[1]; \
- v3->z = t[2]; \
- return r; \
-} \
-extern float4 __attribute__((overloadable)) \
- fnc(float4 v1, float4 v2, int4 *v3) { \
- float4 r; \
- int t[4]; \
- r.x = fnc(v1.x, v2.x, &t[0]); \
- r.y = fnc(v1.y, v2.y, &t[1]); \
- r.z = fnc(v1.z, v2.z, &t[2]); \
- r.w = fnc(v1.w, v2.w, &t[3]); \
- v3->x = t[0]; \
- v3->y = t[1]; \
- v3->z = t[2]; \
- v3->w = t[3]; \
- return r; \
-}
-
-static const int iposinf = 0x7f800000;
-static const int ineginf = 0xff800000;
-
-static const float posinf() {
- float f = *((float*)&iposinf);
- return f;
-}
-
-static const float neginf() {
- float f = *((float*)&ineginf);
- return f;
-}
-
-static bool isinf(float f) {
- int i = *((int*)(void*)&f);
- return (i == iposinf) || (i == ineginf);
-}
-
-static bool isnan(float f) {
- int i = *((int*)(void*)&f);
- return (((i & 0x7f800000) == 0x7f800000) && (i & 0x007fffff));
-}
-
-static bool isposzero(float f) {
- int i = *((int*)(void*)&f);
- return (i == 0x00000000);
-}
-
-static bool isnegzero(float f) {
- int i = *((int*)(void*)&f);
- return (i == 0x80000000);
-}
-
-static bool iszero(float f) {
- return isposzero(f) || isnegzero(f);
-}
-
-
-extern float __attribute__((overloadable)) acos(float);
-FN_FUNC_FN(acos)
-
-extern float __attribute__((overloadable)) acosh(float);
-FN_FUNC_FN(acosh)
-
-
-extern float __attribute__((overloadable)) acospi(float v) {
- return acos(v) / M_PI;
-}
-FN_FUNC_FN(acospi)
-
-extern float __attribute__((overloadable)) asin(float);
-FN_FUNC_FN(asin)
-
-extern float __attribute__((overloadable)) asinh(float);
-FN_FUNC_FN(asinh)
-
-extern float __attribute__((overloadable)) asinpi(float v) {
- return asin(v) / M_PI;
-}
-FN_FUNC_FN(asinpi)
-
-extern float __attribute__((overloadable)) atan(float);
-FN_FUNC_FN(atan)
-
-extern float __attribute__((overloadable)) atan2(float, float);
-FN_FUNC_FN_FN(atan2)
-
-extern float __attribute__((overloadable)) atanh(float);
-FN_FUNC_FN(atanh)
-
-extern float __attribute__((overloadable)) atanpi(float v) {
- return atan(v) / M_PI;
-}
-FN_FUNC_FN(atanpi)
-
-
-extern float __attribute__((overloadable)) atan2pi(float y, float x) {
- return atan2(y, x) / M_PI;
-}
-FN_FUNC_FN_FN(atan2pi)
-
-extern float __attribute__((overloadable)) cbrt(float);
-FN_FUNC_FN(cbrt)
-
-extern float __attribute__((overloadable)) ceil(float);
-FN_FUNC_FN(ceil)
-
-extern float __attribute__((overloadable)) copysign(float, float);
-FN_FUNC_FN_FN(copysign)
-
-extern float __attribute__((overloadable)) cos(float);
-FN_FUNC_FN(cos)
-
-extern float __attribute__((overloadable)) cosh(float);
-FN_FUNC_FN(cosh)
-
-extern float __attribute__((overloadable)) cospi(float v) {
- return cos(v * M_PI);
-}
-FN_FUNC_FN(cospi)
-
-extern float __attribute__((overloadable)) erfc(float);
-FN_FUNC_FN(erfc)
-
-extern float __attribute__((overloadable)) erf(float);
-FN_FUNC_FN(erf)
-
-extern float __attribute__((overloadable)) exp(float);
-FN_FUNC_FN(exp)
-
-extern float __attribute__((overloadable)) exp2(float);
-FN_FUNC_FN(exp2)
-
-extern float __attribute__((overloadable)) pow(float, float);
-
-extern float __attribute__((overloadable)) exp10(float v) {
- return exp2(v * 3.321928095f);
-}
-FN_FUNC_FN(exp10)
-
-extern float __attribute__((overloadable)) expm1(float);
-FN_FUNC_FN(expm1)
-
-extern float __attribute__((overloadable)) fabs(float v) {
- int i = *((int*)(void*)&v) & 0x7fffffff;
- return *((float*)(void*)&i);
-}
-FN_FUNC_FN(fabs)
-
-extern float __attribute__((overloadable)) fdim(float, float);
-FN_FUNC_FN_FN(fdim)
-
-extern float __attribute__((overloadable)) floor(float);
-FN_FUNC_FN(floor)
-
-extern float __attribute__((overloadable)) fma(float, float, float);
-FN_FUNC_FN_FN_FN(fma)
-
-extern float __attribute__((overloadable)) fmin(float, float);
-
-extern float __attribute__((overloadable)) fmod(float, float);
-FN_FUNC_FN_FN(fmod)
-
-extern float __attribute__((overloadable)) fract(float v, float *iptr) {
- int i = (int)floor(v);
- if (iptr) {
- iptr[0] = i;
- }
- return fmin(v - i, 0x1.fffffep-1f);
-}
-FN_FUNC_FN_PFN(fract)
-
-extern float __attribute__((overloadable)) frexp(float, int *);
-FN_FUNC_FN_PIN(frexp)
-
-extern float __attribute__((overloadable)) hypot(float, float);
-FN_FUNC_FN_FN(hypot)
-
-extern int __attribute__((overloadable)) ilogb(float);
-IN_FUNC_FN(ilogb)
-
-extern float __attribute__((overloadable)) ldexp(float, int);
-FN_FUNC_FN_IN(ldexp)
-FN_FUNC_FN_I(ldexp)
-
-extern float __attribute__((overloadable)) lgamma(float);
-FN_FUNC_FN(lgamma)
-extern float __attribute__((overloadable)) lgamma(float, int*);
-FN_FUNC_FN_PIN(lgamma)
-
-extern float __attribute__((overloadable)) log(float);
-FN_FUNC_FN(log)
-
-extern float __attribute__((overloadable)) log10(float);
-FN_FUNC_FN(log10)
-
-
-extern float __attribute__((overloadable)) log2(float v) {
- return log10(v) * 3.321928095f;
-}
-FN_FUNC_FN(log2)
-
-extern float __attribute__((overloadable)) log1p(float);
-FN_FUNC_FN(log1p)
-
-extern float __attribute__((overloadable)) logb(float);
-FN_FUNC_FN(logb)
-
-extern float __attribute__((overloadable)) mad(float a, float b, float c) {
- return a * b + c;
-}
-extern float2 __attribute__((overloadable)) mad(float2 a, float2 b, float2 c) {
- return a * b + c;
-}
-extern float3 __attribute__((overloadable)) mad(float3 a, float3 b, float3 c) {
- return a * b + c;
-}
-extern float4 __attribute__((overloadable)) mad(float4 a, float4 b, float4 c) {
- return a * b + c;
-}
-
-extern float __attribute__((overloadable)) modf(float, float *);
-FN_FUNC_FN_PFN(modf);
-
-extern float __attribute__((overloadable)) nan(uint v) {
- float f[1];
- uint32_t *ip = (uint32_t *)f;
- *ip = v | 0x7fc00000;
- return f[0];
-}
-
-extern float __attribute__((overloadable)) nextafter(float, float);
-FN_FUNC_FN_FN(nextafter)
-
-FN_FUNC_FN_FN(pow)
-
-extern float __attribute__((overloadable)) pown(float v, int p) {
- return pow(v, (float)p);
-}
-extern float2 __attribute__((overloadable)) pown(float2 v, int2 p) {
- float2 f2 = convert_float2(p);
- return pow(v, f2);
-}
-extern float3 __attribute__((overloadable)) pown(float3 v, int3 p) {
- float3 f3 = convert_float3(p);
- return pow(v, f3);
-}
-extern float4 __attribute__((overloadable)) pown(float4 v, int4 p) {
- float4 f4 = convert_float4(p);
- return pow(v, f4);
-}
-
-extern float __attribute__((overloadable)) powr(float v, float p) {
- return pow(v, p);
-}
-extern float2 __attribute__((overloadable)) powr(float2 v, float2 p) {
- return pow(v, p);
-}
-extern float3 __attribute__((overloadable)) powr(float3 v, float3 p) {
- return pow(v, p);
-}
-extern float4 __attribute__((overloadable)) powr(float4 v, float4 p) {
- return pow(v, p);
-}
-
-extern float __attribute__((overloadable)) remainder(float, float);
-FN_FUNC_FN_FN(remainder)
-
-extern float __attribute__((overloadable)) remquo(float, float, int *);
-FN_FUNC_FN_FN_PIN(remquo)
-
-extern float __attribute__((overloadable)) rint(float);
-FN_FUNC_FN(rint)
-
-extern float __attribute__((overloadable)) rootn(float v, int r) {
- if (r == 0) {
- return nan(0);
- }
-
- if (iszero(v)) {
- if (r < 0) {
- if (r & 1) {
- return copysign(posinf(), v);
- } else {
- return posinf();
- }
- } else {
- if (r & 1) {
- return copysign(0.f, v);
- } else {
- return 0.f;
- }
- }
- }
-
- if (!isinf(v) && !isnan(v) && (v < 0.f)) {
- if (r & 1) {
- return (-1.f * pow(-1.f * v, 1.f / r));
- } else {
- return nan(0);
- }
- }
-
- return pow(v, 1.f / r);
-}
-FN_FUNC_FN_IN(rootn);
-
-extern float __attribute__((overloadable)) round(float);
-FN_FUNC_FN(round)
-
-
-extern float __attribute__((overloadable)) sqrt(float);
-extern float __attribute__((overloadable)) rsqrt(float v) {
- return 1.f / sqrt(v);
-}
-FN_FUNC_FN(rsqrt)
-
-extern float __attribute__((overloadable)) sin(float);
-FN_FUNC_FN(sin)
-
-extern float __attribute__((overloadable)) sincos(float v, float *cosptr) {
- *cosptr = cos(v);
- return sin(v);
-}
-extern float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) {
- *cosptr = cos(v);
- return sin(v);
-}
-extern float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) {
- *cosptr = cos(v);
- return sin(v);
-}
-extern float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) {
- *cosptr = cos(v);
- return sin(v);
-}
-
-extern float __attribute__((overloadable)) sinh(float);
-FN_FUNC_FN(sinh)
-
-extern float __attribute__((overloadable)) sinpi(float v) {
- return sin(v * M_PI);
-}
-FN_FUNC_FN(sinpi)
-
-extern float __attribute__((overloadable)) tan(float);
-FN_FUNC_FN(tan)
-
-extern float __attribute__((overloadable)) tanh(float);
-FN_FUNC_FN(tanh)
-
-extern float __attribute__((overloadable)) tanpi(float v) {
- return tan(v * M_PI);
-}
-FN_FUNC_FN(tanpi)
-
-
-extern float __attribute__((overloadable)) tgamma(float);
-FN_FUNC_FN(tgamma)
-
-extern float __attribute__((overloadable)) trunc(float);
-FN_FUNC_FN(trunc)
-
-// Int ops (partial), 6.11.3
-
-#define XN_FUNC_YN(typeout, fnc, typein) \
-extern typeout __attribute__((overloadable)) fnc(typein); \
-extern typeout##2 __attribute__((overloadable)) fnc(typein##2 v) { \
- typeout##2 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- return r; \
-} \
-extern typeout##3 __attribute__((overloadable)) fnc(typein##3 v) { \
- typeout##3 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- r.z = fnc(v.z); \
- return r; \
-} \
-extern typeout##4 __attribute__((overloadable)) fnc(typein##4 v) { \
- typeout##4 r; \
- r.x = fnc(v.x); \
- r.y = fnc(v.y); \
- r.z = fnc(v.z); \
- r.w = fnc(v.w); \
- return r; \
-}
-
-
-#define UIN_FUNC_IN(fnc) \
-XN_FUNC_YN(uchar, fnc, char) \
-XN_FUNC_YN(ushort, fnc, short) \
-XN_FUNC_YN(uint, fnc, int)
-
-#define IN_FUNC_IN(fnc) \
-XN_FUNC_YN(uchar, fnc, uchar) \
-XN_FUNC_YN(char, fnc, char) \
-XN_FUNC_YN(ushort, fnc, ushort) \
-XN_FUNC_YN(short, fnc, short) \
-XN_FUNC_YN(uint, fnc, uint) \
-XN_FUNC_YN(int, fnc, int)
-
-
-#define XN_FUNC_XN_XN_BODY(type, fnc, body) \
-extern type __attribute__((overloadable)) \
- fnc(type v1, type v2) { \
- return body; \
-} \
-extern type##2 __attribute__((overloadable)) \
- fnc(type##2 v1, type##2 v2) { \
- type##2 r; \
- r.x = fnc(v1.x, v2.x); \
- r.y = fnc(v1.y, v2.y); \
- return r; \
-} \
-extern type##3 __attribute__((overloadable)) \
- fnc(type##3 v1, type##3 v2) { \
- type##3 r; \
- r.x = fnc(v1.x, v2.x); \
- r.y = fnc(v1.y, v2.y); \
- r.z = fnc(v1.z, v2.z); \
- return r; \
-} \
-extern type##4 __attribute__((overloadable)) \
- fnc(type##4 v1, type##4 v2) { \
- type##4 r; \
- r.x = fnc(v1.x, v2.x); \
- r.y = fnc(v1.y, v2.y); \
- r.z = fnc(v1.z, v2.z); \
- r.w = fnc(v1.w, v2.w); \
- return r; \
-}
-
-#define IN_FUNC_IN_IN_BODY(fnc, body) \
-XN_FUNC_XN_XN_BODY(uchar, fnc, body) \
-XN_FUNC_XN_XN_BODY(char, fnc, body) \
-XN_FUNC_XN_XN_BODY(ushort, fnc, body) \
-XN_FUNC_XN_XN_BODY(short, fnc, body) \
-XN_FUNC_XN_XN_BODY(uint, fnc, body) \
-XN_FUNC_XN_XN_BODY(int, fnc, body) \
-XN_FUNC_XN_XN_BODY(float, fnc, body)
-
-
-/**
- * abs
- */
-extern uint32_t __attribute__((overloadable)) abs(int32_t v) {
- if (v < 0)
- return -v;
- return v;
-}
-extern uint16_t __attribute__((overloadable)) abs(int16_t v) {
- if (v < 0)
- return -v;
- return v;
-}
-extern uint8_t __attribute__((overloadable)) abs(int8_t v) {
- if (v < 0)
- return -v;
- return v;
-}
-
-/**
- * clz
- */
-extern uint32_t __attribute__((overloadable)) clz(uint32_t v) {
- return __builtin_clz(v);
-}
-extern uint16_t __attribute__((overloadable)) clz(uint16_t v) {
- return (uint16_t)__builtin_clz(v);
-}
-extern uint8_t __attribute__((overloadable)) clz(uint8_t v) {
- return (uint8_t)__builtin_clz(v);
-}
-extern int32_t __attribute__((overloadable)) clz(int32_t v) {
- return (int32_t)__builtin_clz((uint32_t)v);
-}
-extern int16_t __attribute__((overloadable)) clz(int16_t v) {
- return (int16_t)__builtin_clz(v);
-}
-extern int8_t __attribute__((overloadable)) clz(int8_t v) {
- return (int8_t)__builtin_clz(v);
-}
-
-
-UIN_FUNC_IN(abs)
-IN_FUNC_IN(clz)
-
-
-// 6.11.4
-
-
-extern float __attribute__((overloadable)) degrees(float radians) {
- return radians * (180.f / M_PI);
-}
-extern float2 __attribute__((overloadable)) degrees(float2 radians) {
- return radians * (180.f / M_PI);
-}
-extern float3 __attribute__((overloadable)) degrees(float3 radians) {
- return radians * (180.f / M_PI);
-}
-extern float4 __attribute__((overloadable)) degrees(float4 radians) {
- return radians * (180.f / M_PI);
-}
-
-extern float __attribute__((overloadable)) mix(float start, float stop, float amount) {
- return start + (stop - start) * amount;
-}
-extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) {
- return start + (stop - start) * amount;
-}
-extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) {
- return start + (stop - start) * amount;
-}
-extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) {
- return start + (stop - start) * amount;
-}
-extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) {
- return start + (stop - start) * amount;
-}
-extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) {
- return start + (stop - start) * amount;
-}
-extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) {
- return start + (stop - start) * amount;
-}
-
-extern float __attribute__((overloadable)) radians(float degrees) {
- return degrees * (M_PI / 180.f);
-}
-extern float2 __attribute__((overloadable)) radians(float2 degrees) {
- return degrees * (M_PI / 180.f);
-}
-extern float3 __attribute__((overloadable)) radians(float3 degrees) {
- return degrees * (M_PI / 180.f);
-}
-extern float4 __attribute__((overloadable)) radians(float4 degrees) {
- return degrees * (M_PI / 180.f);
-}
-
-extern float __attribute__((overloadable)) step(float edge, float v) {
- return (v < edge) ? 0.f : 1.f;
-}
-extern float2 __attribute__((overloadable)) step(float2 edge, float2 v) {
- float2 r;
- r.x = (v.x < edge.x) ? 0.f : 1.f;
- r.y = (v.y < edge.y) ? 0.f : 1.f;
- return r;
-}
-extern float3 __attribute__((overloadable)) step(float3 edge, float3 v) {
- float3 r;
- r.x = (v.x < edge.x) ? 0.f : 1.f;
- r.y = (v.y < edge.y) ? 0.f : 1.f;
- r.z = (v.z < edge.z) ? 0.f : 1.f;
- return r;
-}
-extern float4 __attribute__((overloadable)) step(float4 edge, float4 v) {
- float4 r;
- r.x = (v.x < edge.x) ? 0.f : 1.f;
- r.y = (v.y < edge.y) ? 0.f : 1.f;
- r.z = (v.z < edge.z) ? 0.f : 1.f;
- r.w = (v.w < edge.w) ? 0.f : 1.f;
- return r;
-}
-extern float2 __attribute__((overloadable)) step(float2 edge, float v) {
- float2 r;
- r.x = (v < edge.x) ? 0.f : 1.f;
- r.y = (v < edge.y) ? 0.f : 1.f;
- return r;
-}
-extern float3 __attribute__((overloadable)) step(float3 edge, float v) {
- float3 r;
- r.x = (v < edge.x) ? 0.f : 1.f;
- r.y = (v < edge.y) ? 0.f : 1.f;
- r.z = (v < edge.z) ? 0.f : 1.f;
- return r;
-}
-extern float4 __attribute__((overloadable)) step(float4 edge, float v) {
- float4 r;
- r.x = (v < edge.x) ? 0.f : 1.f;
- r.y = (v < edge.y) ? 0.f : 1.f;
- r.z = (v < edge.z) ? 0.f : 1.f;
- r.w = (v < edge.w) ? 0.f : 1.f;
- return r;
-}
-
-extern float __attribute__((overloadable)) smoothstep(float, float, float);
-extern float2 __attribute__((overloadable)) smoothstep(float2, float2, float2);
-extern float3 __attribute__((overloadable)) smoothstep(float3, float3, float3);
-extern float4 __attribute__((overloadable)) smoothstep(float4, float4, float4);
-extern float2 __attribute__((overloadable)) smoothstep(float, float, float2);
-extern float3 __attribute__((overloadable)) smoothstep(float, float, float3);
-extern float4 __attribute__((overloadable)) smoothstep(float, float, float4);
-
-extern float __attribute__((overloadable)) sign(float v) {
- if (v > 0) return 1.f;
- if (v < 0) return -1.f;
- return v;
-}
-FN_FUNC_FN(sign)
-
-
-// 6.11.5
-extern float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) {
- float3 r;
- r.x = lhs.y * rhs.z - lhs.z * rhs.y;
- r.y = lhs.z * rhs.x - lhs.x * rhs.z;
- r.z = lhs.x * rhs.y - lhs.y * rhs.x;
- return r;
-}
-
-extern float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) {
- float4 r;
- r.x = lhs.y * rhs.z - lhs.z * rhs.y;
- r.y = lhs.z * rhs.x - lhs.x * rhs.z;
- r.z = lhs.x * rhs.y - lhs.y * rhs.x;
- r.w = 0.f;
- return r;
-}
-
-extern float __attribute__((overloadable)) length(float v);
-extern float __attribute__((overloadable)) length(float2 v);
-extern float __attribute__((overloadable)) length(float3 v);
-extern float __attribute__((overloadable)) length(float4 v);
-
-extern float __attribute__((overloadable)) distance(float lhs, float rhs) {
- return length(lhs - rhs);
-}
-extern float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) {
- return length(lhs - rhs);
-}
-extern float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) {
- return length(lhs - rhs);
-}
-extern float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) {
- return length(lhs - rhs);
-}
-
-extern float __attribute__((overloadable)) normalize(float v) {
- return 1.f;
-}
-extern float2 __attribute__((overloadable)) normalize(float2 v) {
- return v / length(v);
-}
-extern float3 __attribute__((overloadable)) normalize(float3 v) {
- return v / length(v);
-}
-extern float4 __attribute__((overloadable)) normalize(float4 v) {
- return v / length(v);
-}
-
-extern float __attribute__((overloadable)) half_sqrt(float);
-
-extern float __attribute__((overloadable)) fast_length(float v) {
- return fabs(v);
-}
-extern float __attribute__((overloadable)) fast_length(float2 v) {
- return half_sqrt(v.x*v.x + v.y*v.y);
-}
-extern float __attribute__((overloadable)) fast_length(float3 v) {
- return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
-}
-extern float __attribute__((overloadable)) fast_length(float4 v) {
- return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
-}
-
-extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) {
- return fast_length(lhs - rhs);
-}
-extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) {
- return fast_length(lhs - rhs);
-}
-extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) {
- return fast_length(lhs - rhs);
-}
-extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) {
- return fast_length(lhs - rhs);
-}
-
-extern float __attribute__((overloadable)) half_rsqrt(float);
-
-extern float __attribute__((overloadable)) fast_normalize(float v) {
- return 1.f;
-}
-extern float2 __attribute__((overloadable)) fast_normalize(float2 v) {
- return v * half_rsqrt(v.x*v.x + v.y*v.y);
-}
-extern float3 __attribute__((overloadable)) fast_normalize(float3 v) {
- return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
-}
-extern float4 __attribute__((overloadable)) fast_normalize(float4 v) {
- return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
-}
-
-extern float __attribute__((overloadable)) half_recip(float);
-
-/*
-extern float __attribute__((overloadable)) approx_atan(float x) {
- if (x == 0.f)
- return 0.f;
- if (x < 0.f)
- return -1.f * approx_atan(-1.f * x);
- if (x > 1.f)
- return M_PI_2 - approx_atan(approx_recip(x));
- return x * approx_recip(1.f + 0.28f * x*x);
-}
-FN_FUNC_FN(approx_atan)
-*/
-
-typedef union
-{
- float fv;
- int32_t iv;
-} ieee_float_shape_type;
-
-/* Get a 32 bit int from a float. */
-
-#define GET_FLOAT_WORD(i,d) \
-do { \
- ieee_float_shape_type gf_u; \
- gf_u.fv = (d); \
- (i) = gf_u.iv; \
-} while (0)
-
-/* Set a float from a 32 bit int. */
-
-#define SET_FLOAT_WORD(d,i) \
-do { \
- ieee_float_shape_type sf_u; \
- sf_u.iv = (i); \
- (d) = sf_u.fv; \
-} while (0)
-
-
-
-// Valid -125 to 125
-extern float __attribute__((overloadable)) native_exp2(float v) {
- int32_t iv = (int)v;
- int32_t x = iv + (iv >> 31); // ~floor(v)
- float r = (v - x);
-
- float fo;
- SET_FLOAT_WORD(fo, (x + 127) << 23);
-
- r *= 0.694f; // ~ log(e) / log(2)
- float r2 = r*r;
- float adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
- return fo * adj;
-}
-
-extern float2 __attribute__((overloadable)) native_exp2(float2 v) {
- int2 iv = convert_int2(v);
- int2 x = iv + (iv >> (int2)31);//floor(v);
- float2 r = (v - convert_float2(x));
-
- x += 127;
-
- float2 fo = (float2)(x << (int2)23);
-
- r *= 0.694f; // ~ log(e) / log(2)
- float2 r2 = r*r;
- float2 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
- return fo * adj;
-}
-
-extern float4 __attribute__((overloadable)) native_exp2(float4 v) {
- int4 iv = convert_int4(v);
- int4 x = iv + (iv >> (int4)31);//floor(v);
- float4 r = (v - convert_float4(x));
-
- x += 127;
-
- float4 fo = (float4)(x << (int4)23);
-
- r *= 0.694f; // ~ log(e) / log(2)
- float4 r2 = r*r;
- float4 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
- return fo * adj;
-}
-
-extern float3 __attribute__((overloadable)) native_exp2(float3 v) {
- float4 t = 1.f;
- t.xyz = v;
- return native_exp2(t).xyz;
-}
-
-
-extern float __attribute__((overloadable)) native_exp(float v) {
- return native_exp2(v * 1.442695041f);
-}
-extern float2 __attribute__((overloadable)) native_exp(float2 v) {
- return native_exp2(v * 1.442695041f);
-}
-extern float3 __attribute__((overloadable)) native_exp(float3 v) {
- return native_exp2(v * 1.442695041f);
-}
-extern float4 __attribute__((overloadable)) native_exp(float4 v) {
- return native_exp2(v * 1.442695041f);
-}
-
-extern float __attribute__((overloadable)) native_exp10(float v) {
- return native_exp2(v * 3.321928095f);
-}
-extern float2 __attribute__((overloadable)) native_exp10(float2 v) {
- return native_exp2(v * 3.321928095f);
-}
-extern float3 __attribute__((overloadable)) native_exp10(float3 v) {
- return native_exp2(v * 3.321928095f);
-}
-extern float4 __attribute__((overloadable)) native_exp10(float4 v) {
- return native_exp2(v * 3.321928095f);
-}
-
-extern float __attribute__((overloadable)) native_log2(float v) {
- int32_t ibits;
- GET_FLOAT_WORD(ibits, v);
-
- int32_t e = (ibits >> 23) & 0xff;
-
- ibits &= 0x7fffff;
- ibits |= 127 << 23;
-
- float ir;
- SET_FLOAT_WORD(ir, ibits);
-
- ir -= 1.5f;
- float ir2 = ir*ir;
- float adj2 = 0.405465108f + // -0.00009f +
- (0.666666667f * ir) -
- (0.222222222f * ir2) +
- (0.098765432f * ir*ir2) -
- (0.049382716f * ir2*ir2) +
- (0.026337449f * ir*ir2*ir2) -
- (0.014631916f * ir2*ir2*ir2);
- adj2 *= (1.f / 0.693147181f);
-
- return (float)(e - 127) + adj2;
-}
-extern float2 __attribute__((overloadable)) native_log2(float2 v) {
- float2 v2 = {native_log2(v.x), native_log2(v.y)};
- return v2;
-}
-extern float3 __attribute__((overloadable)) native_log2(float3 v) {
- float3 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z)};
- return v2;
-}
-extern float4 __attribute__((overloadable)) native_log2(float4 v) {
- float4 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z), native_log2(v.w)};
- return v2;
-}
-
-extern float __attribute__((overloadable)) native_log(float v) {
- return native_log2(v) * (1.f / 1.442695041f);
-}
-extern float2 __attribute__((overloadable)) native_log(float2 v) {
- return native_log2(v) * (1.f / 1.442695041f);
-}
-extern float3 __attribute__((overloadable)) native_log(float3 v) {
- return native_log2(v) * (1.f / 1.442695041f);
-}
-extern float4 __attribute__((overloadable)) native_log(float4 v) {
- return native_log2(v) * (1.f / 1.442695041f);
-}
-
-extern float __attribute__((overloadable)) native_log10(float v) {
- return native_log2(v) * (1.f / 3.321928095f);
-}
-extern float2 __attribute__((overloadable)) native_log10(float2 v) {
- return native_log2(v) * (1.f / 3.321928095f);
-}
-extern float3 __attribute__((overloadable)) native_log10(float3 v) {
- return native_log2(v) * (1.f / 3.321928095f);
-}
-extern float4 __attribute__((overloadable)) native_log10(float4 v) {
- return native_log2(v) * (1.f / 3.321928095f);
-}
-
-
-extern float __attribute__((overloadable)) native_powr(float v, float y) {
- float v2 = native_log2(v);
- v2 = fmax(v2, -125.f);
- return native_exp2(v2 * y);
-}
-extern float2 __attribute__((overloadable)) native_powr(float2 v, float2 y) {
- float2 v2 = native_log2(v);
- v2 = fmax(v2, -125.f);
- return native_exp2(v2 * y);
-}
-extern float3 __attribute__((overloadable)) native_powr(float3 v, float3 y) {
- float3 v2 = native_log2(v);
- v2 = fmax(v2, -125.f);
- return native_exp2(v2 * y);
-}
-extern float4 __attribute__((overloadable)) native_powr(float4 v, float4 y) {
- float4 v2 = native_log2(v);
- v2 = fmax(v2, -125.f);
- return native_exp2(v2 * y);
-}
-
-
-#undef FN_FUNC_FN
-#undef IN_FUNC_FN
-#undef FN_FUNC_FN_FN
-#undef FN_FUNC_FN_F
-#undef FN_FUNC_FN_IN
-#undef FN_FUNC_FN_I
-#undef FN_FUNC_FN_PFN
-#undef FN_FUNC_FN_PIN
-#undef FN_FUNC_FN_FN_FN
-#undef FN_FUNC_FN_FN_PIN
-#undef XN_FUNC_YN
-#undef UIN_FUNC_IN
-#undef IN_FUNC_IN
-#undef XN_FUNC_XN_XN_BODY
-#undef IN_FUNC_IN_IN_BODY
diff --git a/lib/Renderscript/runtime/rs_core.c b/lib/Renderscript/runtime/rs_core.c
deleted file mode 100644
index 54fcccb..0000000
--- a/lib/Renderscript/runtime/rs_core.c
+++ /dev/null
@@ -1,204 +0,0 @@
-#include "rs_core.rsh"
-#include "rs_graphics.rsh"
-#include "rs_structs.h"
-
-/* Function declarations from libRS */
-extern float4 __attribute__((overloadable)) convert_float4(uchar4 c);
-
-/* Implementation of Core Runtime */
-
-/*
-extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
-{
- uchar4 c;
- c.x = (uchar)(r * 255.f + 0.5f);
- c.y = (uchar)(g * 255.f + 0.5f);
- c.z = (uchar)(b * 255.f + 0.5f);
- c.w = 255;
- return c;
-}
-
-extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
-{
- uchar4 c;
- c.x = (uchar)(r * 255.f + 0.5f);
- c.y = (uchar)(g * 255.f + 0.5f);
- c.z = (uchar)(b * 255.f + 0.5f);
- c.w = (uchar)(a * 255.f + 0.5f);
- return c;
-}
-
-extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
-{
- color *= 255.f;
- color += 0.5f;
- uchar4 c = {color.x, color.y, color.z, 255};
- return c;
-}
-
-extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
-{
- color *= 255.f;
- color += 0.5f;
- uchar4 c = {color.x, color.y, color.z, color.w};
- return c;
-}
-*/
-
-extern float4 rsUnpackColor8888(uchar4 c)
-{
- return convert_float4(c) * 0.003921569f;
-}
-
-
-extern int32_t __attribute__((overloadable)) rsAtomicCas(volatile int32_t *ptr, int32_t expectedValue, int32_t newValue) {
- return __sync_val_compare_and_swap(ptr, expectedValue, newValue);
-}
-
-extern uint32_t __attribute__((overloadable)) rsAtomicCas(volatile uint32_t *ptr, uint32_t expectedValue, uint32_t newValue) {
- return __sync_val_compare_and_swap((volatile int32_t *)ptr, (int32_t)expectedValue, (int32_t)newValue);
-}
-
-extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile int32_t *ptr) {
- return __sync_fetch_and_add(ptr, 1);
-}
-
-extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile int32_t *ptr) {
- return __sync_fetch_and_sub(ptr, 1);
-}
-
-extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile int32_t *ptr, int32_t value) {
- return __sync_fetch_and_add(ptr, value);
-}
-
-extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile int32_t *ptr, int32_t value) {
- return __sync_fetch_and_sub(ptr, value);
-}
-
-extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile int32_t *ptr, int32_t value) {
- return __sync_fetch_and_and(ptr, value);
-}
-
-extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile int32_t *ptr, int32_t value) {
- return __sync_fetch_and_or(ptr, value);
-}
-
-extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile int32_t *ptr, int32_t value) {
- return __sync_fetch_and_xor(ptr, value);
-}
-
-extern uint32_t __attribute__((overloadable)) min(uint32_t, uint32_t);
-extern int32_t __attribute__((overloadable)) min(int32_t, int32_t);
-extern uint32_t __attribute__((overloadable)) max(uint32_t, uint32_t);
-extern int32_t __attribute__((overloadable)) max(int32_t, int32_t);
-
-extern uint32_t __attribute__((overloadable)) rsAtomicMin(volatile uint32_t *ptr, uint32_t value) {
- uint32_t prev, status;
- do {
- prev = *ptr;
- uint32_t n = min(value, prev);
- status = rsAtomicCas((volatile int32_t*) ptr, (int32_t) prev, (int32_t)n);
- } while (status != prev);
- return prev;
-}
-
-extern int32_t __attribute__((overloadable)) rsAtomicMin(volatile int32_t *ptr, int32_t value) {
- int32_t prev, status;
- do {
- prev = *ptr;
- int32_t n = min(value, prev);
- status = rsAtomicCas(ptr, prev, n);
- } while (status != prev);
- return prev;
-}
-
-extern uint32_t __attribute__((overloadable)) rsAtomicMax(volatile uint32_t *ptr, uint32_t value) {
- uint32_t prev, status;
- do {
- prev = *ptr;
- uint32_t n = max(value, prev);
- status = rsAtomicCas((volatile int32_t*) ptr, (int32_t) prev, (int32_t) n);
- } while (status != prev);
- return prev;
-}
-
-extern int32_t __attribute__((overloadable)) rsAtomicMax(volatile int32_t *ptr, int32_t value) {
- int32_t prev, status;
- do {
- prev = *ptr;
- int32_t n = max(value, prev);
- status = rsAtomicCas(ptr, prev, n);
- } while (status != prev);
- return prev;
-}
-
-
-
-extern int32_t rand();
-#define RAND_MAX 0x7fffffff
-
-
-
-extern float __attribute__((overloadable)) rsRand(float min, float max);/* {
- float r = (float)rand();
- r /= RAND_MAX;
- r = r * (max - min) + min;
- return r;
-}
-*/
-
-extern float __attribute__((overloadable)) rsRand(float max) {
- return rsRand(0.f, max);
- //float r = (float)rand();
- //r *= max;
- //r /= RAND_MAX;
- //return r;
-}
-
-extern int __attribute__((overloadable)) rsRand(int max) {
- return (int)rsRand((float)max);
-}
-
-extern int __attribute__((overloadable)) rsRand(int min, int max) {
- return (int)rsRand((float)min, (float)max);
-}
-
-#define PRIM_DEBUG(T) \
-extern void __attribute__((overloadable)) rsDebug(const char *, const T *); \
-void __attribute__((overloadable)) rsDebug(const char *txt, T val) { \
- rsDebug(txt, &val); \
-}
-
-PRIM_DEBUG(char2)
-PRIM_DEBUG(char3)
-PRIM_DEBUG(char4)
-PRIM_DEBUG(uchar2)
-PRIM_DEBUG(uchar3)
-PRIM_DEBUG(uchar4)
-PRIM_DEBUG(short2)
-PRIM_DEBUG(short3)
-PRIM_DEBUG(short4)
-PRIM_DEBUG(ushort2)
-PRIM_DEBUG(ushort3)
-PRIM_DEBUG(ushort4)
-PRIM_DEBUG(int2)
-PRIM_DEBUG(int3)
-PRIM_DEBUG(int4)
-PRIM_DEBUG(uint2)
-PRIM_DEBUG(uint3)
-PRIM_DEBUG(uint4)
-PRIM_DEBUG(long2)
-PRIM_DEBUG(long3)
-PRIM_DEBUG(long4)
-PRIM_DEBUG(ulong2)
-PRIM_DEBUG(ulong3)
-PRIM_DEBUG(ulong4)
-PRIM_DEBUG(float2)
-PRIM_DEBUG(float3)
-PRIM_DEBUG(float4)
-PRIM_DEBUG(double2)
-PRIM_DEBUG(double3)
-PRIM_DEBUG(double4)
-
-#undef PRIM_DEBUG
-
diff --git a/lib/Renderscript/runtime/rs_element.c b/lib/Renderscript/runtime/rs_element.c
deleted file mode 100644
index 4db5883..0000000
--- a/lib/Renderscript/runtime/rs_element.c
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "rs_core.rsh"
-#include "rs_graphics.rsh"
-#include "rs_structs.h"
-
-/**
-* Element
-*/
-extern uint32_t __attribute__((overloadable))
- rsElementGetSubElementCount(rs_element e) {
- Element_t *element = (Element_t *)e.p;
- if (element == NULL) {
- return 0;
- }
- return element->mHal.state.fieldsCount;
-}
-
-extern rs_element __attribute__((overloadable))
- rsElementGetSubElement(rs_element e, uint32_t index) {
- Element_t *element = (Element_t *)e.p;
- if (element == NULL || index >= element->mHal.state.fieldsCount) {
- rs_element nullElem = {0};
- return nullElem;
- }
- rs_element returnElem = {element->mHal.state.fields[index]};
- return returnElem;
-}
-
-extern uint32_t __attribute__((overloadable))
- rsElementGetSubElementNameLength(rs_element e, uint32_t index) {
- Element_t *element = (Element_t *)e.p;
- if (element == NULL || index >= element->mHal.state.fieldsCount) {
- return 0;
- }
- return element->mHal.state.fieldNameLengths[index];
-}
-
-extern uint32_t __attribute__((overloadable))
- rsElementGetSubElementName(rs_element e, uint32_t index, char *name, uint32_t nameLength) {
- Element_t *element = (Element_t *)e.p;
- if (element == NULL || index >= element->mHal.state.fieldsCount ||
- nameLength == 0 || name == 0) {
- return 0;
- }
-
- uint32_t numToCopy = element->mHal.state.fieldNameLengths[index];
- if (nameLength < numToCopy) {
- numToCopy = nameLength;
- }
- // Place the null terminator manually, in case of partial string
- numToCopy --;
- name[numToCopy] = '\0';
- const char *nameSource = element->mHal.state.fieldNames[index];
- for (uint32_t i = 0; i < numToCopy; i ++) {
- name[i] = nameSource[i];
- }
- return numToCopy;
-}
-
-extern uint32_t __attribute__((overloadable))
- rsElementGetSubElementArraySize(rs_element e, uint32_t index) {
- Element_t *element = (Element_t *)e.p;
- if (element == NULL || index >= element->mHal.state.fieldsCount) {
- return 0;
- }
- return element->mHal.state.fieldArraySizes[index];
-}
-
-extern uint32_t __attribute__((overloadable))
- rsElementGetSubElementOffsetBytes(rs_element e, uint32_t index) {
- Element_t *element = (Element_t *)e.p;
- if (element == NULL || index >= element->mHal.state.fieldsCount) {
- return 0;
- }
- return element->mHal.state.fieldOffsetBytes[index];
-}
-
-extern uint32_t __attribute__((overloadable))
- rsElementGetBytesSize(rs_element e) {
- Element_t *element = (Element_t *)e.p;
- if (element == NULL) {
- return 0;
- }
- return element->mHal.state.elementSizeBytes;
-}
-
-extern rs_data_type __attribute__((overloadable))
- rsElementGetDataType(rs_element e) {
- Element_t *element = (Element_t *)e.p;
- if (element == NULL) {
- return RS_TYPE_INVALID;
- }
- return element->mHal.state.dataType;
-}
-
-extern rs_data_kind __attribute__((overloadable))
- rsElementGetDataKind(rs_element e) {
- Element_t *element = (Element_t *)e.p;
- if (element == NULL) {
- return RS_KIND_INVALID;
- }
- return element->mHal.state.dataKind;
-}
-
-extern uint32_t __attribute__((overloadable))
- rsElementGetVectorSize(rs_element e) {
- Element_t *element = (Element_t *)e.p;
- if (element == NULL) {
- return 0;
- }
- return element->mHal.state.vectorSize;
-}
diff --git a/lib/Renderscript/runtime/rs_matrix.c b/lib/Renderscript/runtime/rs_matrix.c
deleted file mode 100644
index 3afccc1..0000000
--- a/lib/Renderscript/runtime/rs_matrix.c
+++ /dev/null
@@ -1,314 +0,0 @@
-#include "rs_core.rsh"
-#include "rs_graphics.rsh"
-#include "rs_structs.h"
-
-/* Function declarations from libRS */
-extern float4 __attribute__((overloadable)) convert_float4(uchar4 c);
-
-/* Implementation of Core Runtime */
-
-
-/////////////////////////////////////////////////////
-// Matrix ops
-/////////////////////////////////////////////////////
-
-
-extern void __attribute__((overloadable))
-rsMatrixLoadIdentity(rs_matrix4x4 *m) {
- m->m[0] = 1.f;
- m->m[1] = 0.f;
- m->m[2] = 0.f;
- m->m[3] = 0.f;
- m->m[4] = 0.f;
- m->m[5] = 1.f;
- m->m[6] = 0.f;
- m->m[7] = 0.f;
- m->m[8] = 0.f;
- m->m[9] = 0.f;
- m->m[10] = 1.f;
- m->m[11] = 0.f;
- m->m[12] = 0.f;
- m->m[13] = 0.f;
- m->m[14] = 0.f;
- m->m[15] = 1.f;
-}
-
-extern void __attribute__((overloadable))
-rsMatrixLoadIdentity(rs_matrix3x3 *m) {
- m->m[0] = 1.f;
- m->m[1] = 0.f;
- m->m[2] = 0.f;
- m->m[3] = 0.f;
- m->m[4] = 1.f;
- m->m[5] = 0.f;
- m->m[6] = 0.f;
- m->m[7] = 0.f;
- m->m[8] = 1.f;
-}
-extern void __attribute__((overloadable))
-rsMatrixLoadIdentity(rs_matrix2x2 *m) {
- m->m[0] = 1.f;
- m->m[1] = 0.f;
- m->m[2] = 0.f;
- m->m[3] = 1.f;
-}
-
-extern void __attribute__((overloadable))
-rsMatrixLoad(rs_matrix4x4 *m, const float *f) {
- m->m[0] = f[0];
- m->m[1] = f[1];
- m->m[2] = f[2];
- m->m[3] = f[3];
- m->m[4] = f[4];
- m->m[5] = f[5];
- m->m[6] = f[6];
- m->m[7] = f[7];
- m->m[8] = f[8];
- m->m[9] = f[9];
- m->m[10] = f[10];
- m->m[11] = f[11];
- m->m[12] = f[12];
- m->m[13] = f[13];
- m->m[14] = f[14];
- m->m[15] = f[15];
-}
-extern void __attribute__((overloadable))
-rsMatrixLoad(rs_matrix3x3 *m, const float *f) {
- m->m[0] = f[0];
- m->m[1] = f[1];
- m->m[2] = f[2];
- m->m[3] = f[3];
- m->m[4] = f[4];
- m->m[5] = f[5];
- m->m[6] = f[6];
- m->m[7] = f[7];
- m->m[8] = f[8];
-}
-extern void __attribute__((overloadable))
-rsMatrixLoad(rs_matrix2x2 *m, const float *f) {
- m->m[0] = f[0];
- m->m[1] = f[1];
- m->m[2] = f[2];
- m->m[3] = f[3];
-}
-
-extern void __attribute__((overloadable))
-rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix4x4 *s) {
- m->m[0] = s->m[0];
- m->m[1] = s->m[1];
- m->m[2] = s->m[2];
- m->m[3] = s->m[3];
- m->m[4] = s->m[4];
- m->m[5] = s->m[5];
- m->m[6] = s->m[6];
- m->m[7] = s->m[7];
- m->m[8] = s->m[8];
- m->m[9] = s->m[9];
- m->m[10] = s->m[10];
- m->m[11] = s->m[11];
- m->m[12] = s->m[12];
- m->m[13] = s->m[13];
- m->m[14] = s->m[14];
- m->m[15] = s->m[15];
-}
-extern void __attribute__((overloadable))
-rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix3x3 *v) {
- m->m[0] = v->m[0];
- m->m[1] = v->m[1];
- m->m[2] = v->m[2];
- m->m[3] = 0.f;
- m->m[4] = v->m[3];
- m->m[5] = v->m[4];
- m->m[6] = v->m[5];
- m->m[7] = 0.f;
- m->m[8] = v->m[6];
- m->m[9] = v->m[7];
- m->m[10] = v->m[8];
- m->m[11] = 0.f;
- m->m[12] = 0.f;
- m->m[13] = 0.f;
- m->m[14] = 0.f;
- m->m[15] = 1.f;
-}
-extern void __attribute__((overloadable))
-rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix2x2 *v) {
- m->m[0] = v->m[0];
- m->m[1] = v->m[1];
- m->m[2] = 0.f;
- m->m[3] = 0.f;
- m->m[4] = v->m[2];
- m->m[5] = v->m[3];
- m->m[6] = 0.f;
- m->m[7] = 0.f;
- m->m[8] = 0.f;
- m->m[9] = 0.f;
- m->m[10] = 1.f;
- m->m[11] = 0.f;
- m->m[12] = 0.f;
- m->m[13] = 0.f;
- m->m[14] = 0.f;
- m->m[15] = 1.f;
-}
-extern void __attribute__((overloadable))
-rsMatrixLoad(rs_matrix3x3 *m, const rs_matrix3x3 *s) {
- m->m[0] = s->m[0];
- m->m[1] = s->m[1];
- m->m[2] = s->m[2];
- m->m[3] = s->m[3];
- m->m[4] = s->m[4];
- m->m[5] = s->m[5];
- m->m[6] = s->m[6];
- m->m[7] = s->m[7];
- m->m[8] = s->m[8];
-}
-extern void __attribute__((overloadable))
-rsMatrixLoad(rs_matrix2x2 *m, const rs_matrix2x2 *s) {
- m->m[0] = s->m[0];
- m->m[1] = s->m[1];
- m->m[2] = s->m[2];
- m->m[3] = s->m[3];
-}
-
-
-extern void __attribute__((overloadable))
-rsMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v) {
- m->m[row * 4 + col] = v;
-}
-
-extern float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col) {
- return m->m[row * 4 + col];
-}
-
-extern void __attribute__((overloadable))
-rsMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v) {
- m->m[row * 3 + col] = v;
-}
-
-extern float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col) {
- return m->m[row * 3 + col];
-}
-
-extern void __attribute__((overloadable))
-rsMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v) {
- m->m[row * 2 + col] = v;
-}
-
-extern float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col) {
- return m->m[row * 2 + col];
-}
-
-extern float2 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix2x2 *m, float2 in) {
- float2 ret;
- ret.x = (m->m[0] * in.x) + (m->m[2] * in.y);
- ret.y = (m->m[1] * in.x) + (m->m[3] * in.y);
- return ret;
-}
-extern float2 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix2x2 *m, float2 in) {
- return rsMatrixMultiply((const rs_matrix2x2 *)m, in);
-}
-
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float4 in) {
- return rsMatrixMultiply((const rs_matrix4x4 *)m, in);
-}
-
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float3 in) {
- return rsMatrixMultiply((const rs_matrix4x4 *)m, in);
-}
-
-extern float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float2 in) {
- return rsMatrixMultiply((const rs_matrix4x4 *)m, in);
-}
-
-extern float3 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *m, float3 in) {
- return rsMatrixMultiply((const rs_matrix3x3 *)m, in);
-}
-
-extern float3 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *m, float2 in) {
- return rsMatrixMultiply((const rs_matrix3x3 *)m, in);
-}
-
-extern void __attribute__((overloadable))
-rsMatrixLoadMultiply(rs_matrix4x4 *ret, const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs) {
- for (int i=0 ; i<4 ; i++) {
- float ri0 = 0;
- float ri1 = 0;
- float ri2 = 0;
- float ri3 = 0;
- for (int j=0 ; j<4 ; j++) {
- const float rhs_ij = rsMatrixGet(rhs, i, j);
- ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij;
- ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij;
- ri2 += rsMatrixGet(lhs, j, 2) * rhs_ij;
- ri3 += rsMatrixGet(lhs, j, 3) * rhs_ij;
- }
- rsMatrixSet(ret, i, 0, ri0);
- rsMatrixSet(ret, i, 1, ri1);
- rsMatrixSet(ret, i, 2, ri2);
- rsMatrixSet(ret, i, 3, ri3);
- }
-}
-
-extern void __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs) {
- rs_matrix4x4 r;
- rsMatrixLoadMultiply(&r, lhs, rhs);
- rsMatrixLoad(lhs, &r);
-}
-
-extern void __attribute__((overloadable))
-rsMatrixLoadMultiply(rs_matrix3x3 *ret, const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs) {
- for (int i=0 ; i<3 ; i++) {
- float ri0 = 0;
- float ri1 = 0;
- float ri2 = 0;
- for (int j=0 ; j<3 ; j++) {
- const float rhs_ij = rsMatrixGet(rhs, i, j);
- ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij;
- ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij;
- ri2 += rsMatrixGet(lhs, j, 2) * rhs_ij;
- }
- rsMatrixSet(ret, i, 0, ri0);
- rsMatrixSet(ret, i, 1, ri1);
- rsMatrixSet(ret, i, 2, ri2);
- }
-}
-
-extern void __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs) {
- rs_matrix3x3 r;
- rsMatrixLoadMultiply(&r, lhs, rhs);
- rsMatrixLoad(lhs, &r);
-}
-
-extern void __attribute__((overloadable))
-rsMatrixLoadMultiply(rs_matrix2x2 *ret, const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs) {
- for (int i=0 ; i<2 ; i++) {
- float ri0 = 0;
- float ri1 = 0;
- for (int j=0 ; j<2 ; j++) {
- const float rhs_ij = rsMatrixGet(rhs, i, j);
- ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij;
- ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij;
- }
- rsMatrixSet(ret, i, 0, ri0);
- rsMatrixSet(ret, i, 1, ri1);
- }
-}
-
-extern void __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs) {
- rs_matrix2x2 r;
- rsMatrixLoadMultiply(&r, lhs, rhs);
- rsMatrixLoad(lhs, &r);
-}
-
diff --git a/lib/Renderscript/runtime/rs_mesh.c b/lib/Renderscript/runtime/rs_mesh.c
deleted file mode 100644
index bb533bc..0000000
--- a/lib/Renderscript/runtime/rs_mesh.c
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "rs_core.rsh"
-#include "rs_graphics.rsh"
-#include "rs_structs.h"
-
-/**
-* Mesh
-*/
-extern uint32_t __attribute__((overloadable))
- rsgMeshGetVertexAllocationCount(rs_mesh m) {
- Mesh_t *mesh = (Mesh_t *)m.p;
- if (mesh == NULL) {
- return 0;
- }
- return mesh->mHal.state.vertexBuffersCount;
-}
-
-extern uint32_t __attribute__((overloadable))
- rsgMeshGetPrimitiveCount(rs_mesh m) {
- Mesh_t *mesh = (Mesh_t *)m.p;
- if (mesh == NULL) {
- return 0;
- }
- return mesh->mHal.state.primitivesCount;
-}
-
-extern rs_allocation __attribute__((overloadable))
- rsgMeshGetVertexAllocation(rs_mesh m, uint32_t index) {
- Mesh_t *mesh = (Mesh_t *)m.p;
- if (mesh == NULL || index >= mesh->mHal.state.vertexBuffersCount) {
- rs_allocation nullAlloc = {0};
- return nullAlloc;
- }
- rs_allocation returnAlloc = {mesh->mHal.state.vertexBuffers[index]};
- return returnAlloc;
-}
-
-extern rs_allocation __attribute__((overloadable))
- rsgMeshGetIndexAllocation(rs_mesh m, uint32_t index) {
- Mesh_t *mesh = (Mesh_t *)m.p;
- if (mesh == NULL || index >= mesh->mHal.state.primitivesCount) {
- rs_allocation nullAlloc = {0};
- return nullAlloc;
- }
- rs_allocation returnAlloc = {mesh->mHal.state.indexBuffers[index]};
- return returnAlloc;
-}
-
-extern rs_primitive __attribute__((overloadable))
- rsgMeshGetPrimitive(rs_mesh m, uint32_t index) {
- Mesh_t *mesh = (Mesh_t *)m.p;
- if (mesh == NULL || index >= mesh->mHal.state.primitivesCount) {
- return RS_PRIMITIVE_INVALID;
- }
- return mesh->mHal.state.primitives[index];
-}
diff --git a/lib/Renderscript/runtime/rs_program.c b/lib/Renderscript/runtime/rs_program.c
deleted file mode 100644
index 64c656f..0000000
--- a/lib/Renderscript/runtime/rs_program.c
+++ /dev/null
@@ -1,108 +0,0 @@
-#include "rs_core.rsh"
-#include "rs_graphics.rsh"
-#include "rs_structs.h"
-
-/**
-* Program Store
-*/
-extern rs_depth_func __attribute__((overloadable))
- rsgProgramStoreGetDepthFunc(rs_program_store ps) {
- ProgramStore_t *prog = (ProgramStore_t *)ps.p;
- if (prog == NULL) {
- return RS_DEPTH_FUNC_INVALID;
- }
- return prog->mHal.state.depthFunc;
-}
-
-extern bool __attribute__((overloadable))
- rsgProgramStoreIsDepthMaskEnabled(rs_program_store ps) {
- ProgramStore_t *prog = (ProgramStore_t *)ps.p;
- if (prog == NULL) {
- return false;
- }
- return prog->mHal.state.depthWriteEnable;
-}
-
-extern bool __attribute__((overloadable))
- rsgProgramStoreIsColorMaskRedEnabled(rs_program_store ps) {
- ProgramStore_t *prog = (ProgramStore_t *)ps.p;
- if (prog == NULL) {
- return false;
- }
- return prog->mHal.state.colorRWriteEnable;
-}
-
-extern bool __attribute__((overloadable))
- rsgProgramStoreIsColorMaskGreenEnabled(rs_program_store ps) {
- ProgramStore_t *prog = (ProgramStore_t *)ps.p;
- if (prog == NULL) {
- return false;
- }
- return prog->mHal.state.colorGWriteEnable;
-}
-
-extern bool __attribute__((overloadable))
- rsgProgramStoreIsColorMaskBlueEnabled(rs_program_store ps) {
- ProgramStore_t *prog = (ProgramStore_t *)ps.p;
- if (prog == NULL) {
- return false;
- }
- return prog->mHal.state.colorBWriteEnable;
-}
-
-extern bool __attribute__((overloadable))
- rsgProgramStoreIsColorMaskAlphaEnabled(rs_program_store ps) {
- ProgramStore_t *prog = (ProgramStore_t *)ps.p;
- if (prog == NULL) {
- return false;
- }
- return prog->mHal.state.colorAWriteEnable;
-}
-
-extern rs_blend_src_func __attribute__((overloadable))
- rsgProgramStoreGetBlendSrcFunc(rs_program_store ps) {
- ProgramStore_t *prog = (ProgramStore_t *)ps.p;
- if (prog == NULL) {
- return RS_BLEND_SRC_INVALID;
- }
- return prog->mHal.state.blendSrc;
-}
-
-extern rs_blend_dst_func __attribute__((overloadable))
- rsgProgramStoreGetBlendDstFunc(rs_program_store ps) {
- ProgramStore_t *prog = (ProgramStore_t *)ps.p;
- if (prog == NULL) {
- return RS_BLEND_DST_INVALID;
- }
- return prog->mHal.state.blendDst;
-}
-
-extern bool __attribute__((overloadable))
- rsgProgramStoreIsDitherEnabled(rs_program_store ps) {
- ProgramStore_t *prog = (ProgramStore_t *)ps.p;
- if (prog == NULL) {
- return false;
- }
- return prog->mHal.state.ditherEnable;
-}
-
-/**
-* Program Raster
-*/
-extern bool __attribute__((overloadable))
- rsgProgramRasterIsPointSpriteEnabled(rs_program_raster pr) {
- ProgramRaster_t *prog = (ProgramRaster_t *)pr.p;
- if (prog == NULL) {
- return false;
- }
- return prog->mHal.state.pointSprite;
-}
-
-extern rs_cull_mode __attribute__((overloadable))
- rsgProgramRasterGetCullMode(rs_program_raster pr) {
- ProgramRaster_t *prog = (ProgramRaster_t *)pr.p;
- if (prog == NULL) {
- return RS_CULL_INVALID;
- }
- return prog->mHal.state.cull;
-}
diff --git a/lib/Renderscript/runtime/rs_sample.c b/lib/Renderscript/runtime/rs_sample.c
deleted file mode 100644
index 8bc6966..0000000
--- a/lib/Renderscript/runtime/rs_sample.c
+++ /dev/null
@@ -1,662 +0,0 @@
-#include "rs_core.rsh"
-#include "rs_graphics.rsh"
-#include "rs_structs.h"
-
-
-// 565 Conversion bits taken from SkBitmap
-#define SK_R16_BITS 5
-#define SK_G16_BITS 6
-#define SK_B16_BITS 5
-
-#define SK_R16_SHIFT (SK_B16_BITS + SK_G16_BITS)
-#define SK_G16_SHIFT (SK_B16_BITS)
-#define SK_B16_SHIFT 0
-
-#define SK_R16_MASK ((1 << SK_R16_BITS) - 1)
-#define SK_G16_MASK ((1 << SK_G16_BITS) - 1)
-#define SK_B16_MASK ((1 << SK_B16_BITS) - 1)
-
-#define SkGetPackedR16(color) (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
-#define SkGetPackedG16(color) (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
-#define SkGetPackedB16(color) (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
-
-static inline unsigned SkR16ToR32(unsigned r) {
- return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
-}
-
-static inline unsigned SkG16ToG32(unsigned g) {
- return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
-}
-
-static inline unsigned SkB16ToB32(unsigned b) {
- return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
-}
-
-#define SkPacked16ToR32(c) SkR16ToR32(SkGetPackedR16(c))
-#define SkPacked16ToG32(c) SkG16ToG32(SkGetPackedG16(c))
-#define SkPacked16ToB32(c) SkB16ToB32(SkGetPackedB16(c))
-
-static float3 getFrom565(uint16_t color) {
- float3 result;
- result.x = (float)SkPacked16ToR32(color);
- result.y = (float)SkPacked16ToG32(color);
- result.z = (float)SkPacked16ToB32(color);
- return result;
-}
-
-/**
-* Allocation sampling
-*/
-static inline float __attribute__((overloadable))
- getElementAt1(const uint8_t *p, int32_t x) {
- float r = p[x];
- return r;
-}
-
-static inline float2 __attribute__((overloadable))
- getElementAt2(const uint8_t *p, int32_t x) {
- x *= 2;
- float2 r = {p[x], p[x+1]};
- return r;
-}
-
-static inline float3 __attribute__((overloadable))
- getElementAt3(const uint8_t *p, int32_t x) {
- x *= 4;
- float3 r = {p[x], p[x+1], p[x+2]};
- return r;
-}
-
-static inline float4 __attribute__((overloadable))
- getElementAt4(const uint8_t *p, int32_t x) {
- x *= 4;
- const uchar4 *p2 = (const uchar4 *)&p[x];
- return convert_float4(p2[0]);
-}
-
-static inline float3 __attribute__((overloadable))
- getElementAt565(const uint8_t *p, int32_t x) {
- x *= 2;
- float3 r = getFrom565(((const uint16_t *)p)[0]);
- return r;
-}
-
-static inline float __attribute__((overloadable))
- getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
- p += y * stride;
- float r = p[x];
- return r;
-}
-
-static inline float2 __attribute__((overloadable))
- getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
- p += y * stride;
- x *= 2;
- float2 r = {p[x], p[x+1]};
- return r;
-}
-
-static inline float3 __attribute__((overloadable))
- getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
- p += y * stride;
- x *= 4;
- float3 r = {p[x], p[x+1], p[x+2]};
- return r;
-}
-
-static inline float4 __attribute__((overloadable))
- getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
- p += y * stride;
- x *= 4;
- float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
- return r;
-}
-
-static inline float3 __attribute__((overloadable))
- getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
- p += y * stride;
- x *= 2;
- float3 r = getFrom565(((const uint16_t *)p)[0]);
- return r;
-}
-
-
-
-
-
-static float4 __attribute__((overloadable))
- getSample_A(const uint8_t *p, int32_t iPixel,
- int32_t next, float w0, float w1) {
- float p0 = getElementAt1(p, iPixel);
- float p1 = getElementAt1(p, next);
- float r = p0 * w0 + p1 * w1;
- r *= (1.f / 255.f);
- float4 ret = {0.f, 0.f, 0.f, r};
- return ret;
-}
-static float4 __attribute__((overloadable))
- getSample_L(const uint8_t *p, int32_t iPixel,
- int32_t next, float w0, float w1) {
- float p0 = getElementAt1(p, iPixel);
- float p1 = getElementAt1(p, next);
- float r = p0 * w0 + p1 * w1;
- r *= (1.f / 255.f);
- float4 ret = {r, r, r, 1.f};
- return ret;
-}
-static float4 __attribute__((overloadable))
- getSample_LA(const uint8_t *p, int32_t iPixel,
- int32_t next, float w0, float w1) {
- float2 p0 = getElementAt2(p, iPixel);
- float2 p1 = getElementAt2(p, next);
- float2 r = p0 * w0 + p1 * w1;
- r *= (1.f / 255.f);
- float4 ret = {r.x, r.x, r.x, r.y};
- return ret;
-}
-static float4 __attribute__((overloadable))
- getSample_RGB(const uint8_t *p, int32_t iPixel,
- int32_t next, float w0, float w1) {
- float3 p0 = getElementAt3(p, iPixel);
- float3 p1 = getElementAt3(p, next);
- float3 r = p0 * w0 + p1 * w1;
- r *= (1.f / 255.f);
- float4 ret = {r.x, r.x, r.z, 1.f};
- return ret;
-}
-static float4 __attribute__((overloadable))
- getSample_565(const uint8_t *p, int32_t iPixel,
- int32_t next, float w0, float w1) {
- float3 p0 = getElementAt565(p, iPixel);
- float3 p1 = getElementAt565(p, next);
- float3 r = p0 * w0 + p1 * w1;
- r *= (1.f / 255.f);
- float4 ret = {r.x, r.x, r.z, 1.f};
- return ret;
-}
-static float4 __attribute__((overloadable))
- getSample_RGBA(const uint8_t *p, int32_t iPixel,
- int32_t next, float w0, float w1) {
- float4 p0 = getElementAt4(p, iPixel);
- float4 p1 = getElementAt4(p, next);
- float4 r = p0 * w0 + p1 * w1;
- r *= (1.f / 255.f);
- return r;
-}
-
-
-static float4 __attribute__((overloadable))
- getSample_A(const uint8_t *p, size_t stride,
- int locX, int locY, int nextX, int nextY,
- float w0, float w1, float w2, float w3) {
- float p0 = getElementAt1(p, stride, locX, locY);
- float p1 = getElementAt1(p, stride, nextX, locY);
- float p2 = getElementAt1(p, stride, locX, nextY);
- float p3 = getElementAt1(p, stride, nextX, nextY);
- float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
- r *= (1.f / 255.f);
- float4 ret = {0.f, 0.f, 0.f, r};
- return ret;
-}
-static float4 __attribute__((overloadable))
- getSample_L(const uint8_t *p, size_t stride,
- int locX, int locY, int nextX, int nextY,
- float w0, float w1, float w2, float w3) {
- float p0 = getElementAt1(p, stride, locX, locY);
- float p1 = getElementAt1(p, stride, nextX, locY);
- float p2 = getElementAt1(p, stride, locX, nextY);
- float p3 = getElementAt1(p, stride, nextX, nextY);
- float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
- r *= (1.f / 255.f);
- float4 ret = {r, r, r, 1.f};
- return ret;
-}
-static float4 __attribute__((overloadable))
- getSample_LA(const uint8_t *p, size_t stride,
- int locX, int locY, int nextX, int nextY,
- float w0, float w1, float w2, float w3) {
- float2 p0 = getElementAt2(p, stride, locX, locY);
- float2 p1 = getElementAt2(p, stride, nextX, locY);
- float2 p2 = getElementAt2(p, stride, locX, nextY);
- float2 p3 = getElementAt2(p, stride, nextX, nextY);
- float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
- r *= (1.f / 255.f);
- float4 ret = {r.x, r.x, r.x, r.y};
- return ret;
-}
-static float4 __attribute__((overloadable))
- getSample_RGB(const uint8_t *p, size_t stride,
- int locX, int locY, int nextX, int nextY,
- float w0, float w1, float w2, float w3) {
- float4 p0 = getElementAt4(p, stride, locX, locY);
- float4 p1 = getElementAt4(p, stride, nextX, locY);
- float4 p2 = getElementAt4(p, stride, locX, nextY);
- float4 p3 = getElementAt4(p, stride, nextX, nextY);
- float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
- r *= (1.f / 255.f);
- float4 ret = {r.x, r.y, r.z, 1.f};
- return ret;
-}
-static float4 __attribute__((overloadable))
- getSample_RGBA(const uint8_t *p, size_t stride,
- int locX, int locY, int nextX, int nextY,
- float w0, float w1, float w2, float w3) {
- float4 p0 = getElementAt4(p, stride, locX, locY);
- float4 p1 = getElementAt4(p, stride, nextX, locY);
- float4 p2 = getElementAt4(p, stride, locX, nextY);
- float4 p3 = getElementAt4(p, stride, nextX, nextY);
- float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
- r *= (1.f / 255.f);
- return r;
-}
-static float4 __attribute__((overloadable))
- getSample_565(const uint8_t *p, size_t stride,
- int locX, int locY, int nextX, int nextY,
- float w0, float w1, float w2, float w3) {
- float3 p0 = getElementAt565(p, stride, locX, locY);
- float3 p1 = getElementAt565(p, stride, nextX, locY);
- float3 p2 = getElementAt565(p, stride, locX, nextY);
- float3 p3 = getElementAt565(p, stride, nextX, nextY);
- float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
- r *= (1.f / 255.f);
- float4 ret;
- ret.rgb = r;
- ret.w = 1.f;
- return ret;
-}
-
-static float4 __attribute__((overloadable))
- getBilinearSample1D(const Allocation_t *alloc, float2 weights,
- uint32_t iPixel, uint32_t next,
- rs_data_kind dk, rs_data_type dt, uint32_t lod) {
-
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
-
- switch(dk) {
- case RS_KIND_PIXEL_RGBA:
- return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
- case RS_KIND_PIXEL_A:
- return getSample_A(p, iPixel, next, weights.x, weights.y);
- case RS_KIND_PIXEL_RGB:
- if (dt == RS_TYPE_UNSIGNED_5_6_5) {
- return getSample_565(p, iPixel, next, weights.x, weights.y);
- }
- return getSample_RGB(p, iPixel, next, weights.x, weights.y);
- case RS_KIND_PIXEL_L:
- return getSample_L(p, iPixel, next, weights.x, weights.y);
- case RS_KIND_PIXEL_LA:
- return getSample_LA(p, iPixel, next, weights.x, weights.y);
-
- default:
- //__builtin_unreachable();
- break;
- }
-
- //__builtin_unreachable();
- return 0.f;
-}
-
-static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
- if (wrap == RS_SAMPLER_WRAP) {
- coord = coord % size;
- if (coord < 0) {
- coord += size;
- }
- }
- if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
- coord = coord % (size * 2);
- if (coord < 0) {
- coord = (size * 2) + coord;
- }
- if (coord >= size) {
- coord = (size * 2) - coord;
- }
- }
- return (uint32_t)max(0, min(coord, size - 1));
-}
-
-static float4 __attribute__((overloadable))
- getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
- int lx, int ly, int nx, int ny,
- rs_data_kind dk, rs_data_type dt, uint32_t lod) {
-
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
- size_t stride = alloc->mHal.drvState.lod[lod].stride;
-
- switch(dk) {
- case RS_KIND_PIXEL_RGBA:
- return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
- case RS_KIND_PIXEL_A:
- return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
- case RS_KIND_PIXEL_LA:
- return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
- case RS_KIND_PIXEL_RGB:
- if (dt == RS_TYPE_UNSIGNED_5_6_5) {
- return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
- }
- return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
- case RS_KIND_PIXEL_L:
- return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
-
- default:
- //__builtin_unreachable();
- break;
- }
-
- //__builtin_unreachable();
- return 0.f;
-}
-
-static float4 __attribute__((overloadable))
- getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
- rs_data_type dt, uint32_t lod) {
-
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
-
- float4 result = {0.f, 0.f, 0.f, 255.f};
-
- switch(dk) {
- case RS_KIND_PIXEL_RGBA:
- result = getElementAt4(p, iPixel);
- break;
- case RS_KIND_PIXEL_A:
- result.w = getElementAt1(p, iPixel);
- break;
- case RS_KIND_PIXEL_LA:
- result.zw = getElementAt2(p, iPixel);
- result.xy = result.z;
- break;
- case RS_KIND_PIXEL_RGB:
- if (dt == RS_TYPE_UNSIGNED_5_6_5) {
- result.xyz = getElementAt565(p, iPixel);
- } else {
- result.xyz = getElementAt3(p, iPixel);
- }
- break;
- case RS_KIND_PIXEL_L:
- result.xyz = getElementAt1(p, iPixel);
-
- default:
- //__builtin_unreachable();
- break;
- }
-
- return result * 0.003921569f;
-}
-
-static float4 __attribute__((overloadable))
- getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
- rs_data_type dt, uint32_t lod) {
-
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
- size_t stride = alloc->mHal.drvState.lod[lod].stride;
-
- float4 result = {0.f, 0.f, 0.f, 255.f};
-
- switch(dk) {
- case RS_KIND_PIXEL_RGBA:
- result = getElementAt4(p, stride, iPixel.x, iPixel.y);
- break;
- case RS_KIND_PIXEL_A:
- result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
- break;
- case RS_KIND_PIXEL_LA:
- result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
- result.xy = result.z;
- break;
- case RS_KIND_PIXEL_RGB:
- if (dt == RS_TYPE_UNSIGNED_5_6_5) {
- result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
- } else {
- result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
- }
- break;
-
- default:
- //__builtin_unreachable();
- break;
- }
-
- return result * 0.003921569f;
-}
-
-static float4 __attribute__((overloadable))
- sample_LOD_LinearPixel(const Allocation_t *alloc, const Type_t *type,
- rs_data_kind dk, rs_data_type dt,
- rs_sampler s,
- float uv, uint32_t lod) {
-
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
-
- rs_sampler_value wrapS = rsSamplerGetWrapS(s);
- int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
- float pixelUV = uv * (float)(sourceW);
- int32_t iPixel = (int32_t)(pixelUV);
- float frac = pixelUV - (float)iPixel;
-
- if (frac < 0.5f) {
- iPixel -= 1;
- frac += 0.5f;
- } else {
- frac -= 0.5f;
- }
-
- float oneMinusFrac = 1.0f - frac;
-
- float2 weights;
- weights.x = oneMinusFrac;
- weights.y = frac;
-
- uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
- uint32_t location = wrapI(wrapS, iPixel, sourceW);
-
- return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
-}
-
-static float4 __attribute__((overloadable))
- sample_LOD_NearestPixel(const Allocation_t *alloc,
- rs_data_kind dk, rs_data_type dt,
- rs_sampler s,
- float uv, uint32_t lod) {
-
- rs_sampler_value wrapS = rsSamplerGetWrapS(s);
- int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
- int32_t iPixel = (int32_t)(uv * (float)(sourceW));
- uint32_t location = wrapI(wrapS, iPixel, sourceW);
-
- return getNearestSample(alloc, location, dk, dt, lod);
-}
-
-static float4 __attribute__((overloadable))
- sample_LOD_LinearPixel(const Allocation_t *alloc,
- rs_data_kind dk, rs_data_type dt,
- rs_sampler s,
- float2 uv, uint32_t lod) {
-
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
-
- rs_sampler_value wrapS = rsSamplerGetWrapS(s);
- rs_sampler_value wrapT = rsSamplerGetWrapT(s);
-
- int sourceW = alloc->mHal.drvState.lod[lod].dimX;
- int sourceH = alloc->mHal.drvState.lod[lod].dimY;
-
- float pixelU = uv.x * sourceW;
- float pixelV = uv.y * sourceH;
- int iPixelU = pixelU;
- int iPixelV = pixelV;
- float fracU = pixelU - iPixelU;
- float fracV = pixelV - iPixelV;
-
- if (fracU < 0.5f) {
- iPixelU -= 1;
- fracU += 0.5f;
- } else {
- fracU -= 0.5f;
- }
- if (fracV < 0.5f) {
- iPixelV -= 1;
- fracV += 0.5f;
- } else {
- fracV -= 0.5f;
- }
- float oneMinusFracU = 1.0f - fracU;
- float oneMinusFracV = 1.0f - fracV;
-
- float w0 = oneMinusFracU * oneMinusFracV;
- float w1 = fracU * oneMinusFracV;
- float w2 = oneMinusFracU * fracV;
- float w3 = fracU * fracV;
-
- int nx = wrapI(wrapS, iPixelU + 1, sourceW);
- int ny = wrapI(wrapT, iPixelV + 1, sourceH);
- int lx = wrapI(wrapS, iPixelU, sourceW);
- int ly = wrapI(wrapT, iPixelV, sourceH);
-
- return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
-
-}
-
-static float4 __attribute__((overloadable))
- sample_LOD_NearestPixel(const Allocation_t *alloc,
- rs_data_kind dk, rs_data_type dt,
- rs_sampler s,
- float2 uv, uint32_t lod) {
- rs_sampler_value wrapS = rsSamplerGetWrapS(s);
- rs_sampler_value wrapT = rsSamplerGetWrapT(s);
-
- int sourceW = alloc->mHal.drvState.lod[lod].dimX;
- int sourceH = alloc->mHal.drvState.lod[lod].dimY;
-
- float2 dimF;
- dimF.x = (float)(sourceW);
- dimF.y = (float)(sourceH);
- int2 iPixel = convert_int2(uv * dimF);
-
- uint2 location;
- location.x = wrapI(wrapS, iPixel.x, sourceW);
- location.y = wrapI(wrapT, iPixel.y, sourceH);
- return getNearestSample(alloc, location, dk, dt, lod);
-}
-
-extern const float4 __attribute__((overloadable))
- rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
- rs_element elem = rsAllocationGetElement(a);
- rs_data_kind dk = rsElementGetDataKind(elem);
- rs_data_type dt = rsElementGetDataType(elem);
-
- if (dk == RS_KIND_USER || (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5)) {
- return 0.f;
- }
-
- const Allocation_t *alloc = (const Allocation_t *)a.p;
- const Type_t *type = (const Type_t*)alloc->mHal.state.type;
-
- rs_sampler_value sampleMin = rsSamplerGetMinification(s);
- rs_sampler_value sampleMag = rsSamplerGetMagnification(s);
-
- if (lod <= 0.0f) {
- if (sampleMag == RS_SAMPLER_NEAREST) {
- return sample_LOD_NearestPixel(alloc, dk, dt, s, uv, 0);
- }
- return sample_LOD_LinearPixel(alloc, dk, dt, s, uv, 0);
- }
-
- if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
- uint32_t maxLOD = type->mHal.state.lodCount - 1;
- lod = min(lod, (float)maxLOD);
- uint32_t nearestLOD = (uint32_t)round(lod);
- return sample_LOD_LinearPixel(alloc, dk, dt, s, uv, nearestLOD);
- }
-
- if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
- uint32_t lod0 = (uint32_t)floor(lod);
- uint32_t lod1 = (uint32_t)ceil(lod);
- uint32_t maxLOD = type->mHal.state.lodCount - 1;
- lod0 = min(lod0, maxLOD);
- lod1 = min(lod1, maxLOD);
- float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, s, uv, lod0);
- float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, s, uv, lod1);
- float frac = lod - (float)lod0;
- return sample0 * (1.0f - frac) + sample1 * frac;
- }
-
- return sample_LOD_NearestPixel(alloc, dk, dt, s, uv, 0);
-}
-
-extern const float4 __attribute__((overloadable))
- rsSample(rs_allocation a, rs_sampler s, float location) {
- return rsSample(a, s, location, 0);
-}
-
-
-extern const float4 __attribute__((overloadable))
- rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
-
- const Allocation_t *alloc = (const Allocation_t *)a.p;
-
- rs_element elem = rsAllocationGetElement(a);
- rs_data_kind dk = rsElementGetDataKind(elem);
- rs_data_type dt = rsElementGetDataType(elem);
-
- if (dk == RS_KIND_USER ||
- (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5) ||
- !(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
- return 0.f;
- }
-
- rs_sampler_value sampleMin = rsSamplerGetMinification(s);
- rs_sampler_value sampleMag = rsSamplerGetMagnification(s);
-
- if (lod <= 0.0f) {
- if (sampleMag == RS_SAMPLER_NEAREST) {
- return sample_LOD_NearestPixel(alloc, dk, dt, s, uv, 0);
- }
- return sample_LOD_LinearPixel(alloc, dk, dt, s, uv, 0);
- }
-
- if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
- const Type_t *type = (const Type_t*)alloc->mHal.state.type;
- uint32_t maxLOD = type->mHal.state.lodCount - 1;
- lod = min(lod, (float)maxLOD);
- uint32_t nearestLOD = (uint32_t)round(lod);
- return sample_LOD_LinearPixel(alloc, dk, dt, s, uv, nearestLOD);
- }
-
- if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
- const Type_t *type = (const Type_t*)alloc->mHal.state.type;
- uint32_t lod0 = (uint32_t)floor(lod);
- uint32_t lod1 = (uint32_t)ceil(lod);
- uint32_t maxLOD = type->mHal.state.lodCount - 1;
- lod0 = min(lod0, maxLOD);
- lod1 = min(lod1, maxLOD);
- float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, s, uv, lod0);
- float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, s, uv, lod1);
- float frac = lod - (float)lod0;
- return sample0 * (1.0f - frac) + sample1 * frac;
- }
-
- return sample_LOD_NearestPixel(alloc, dk, dt, s, uv, 0);
-}
-
-extern const float4 __attribute__((overloadable))
- rsSample(rs_allocation a, rs_sampler s, float2 uv) {
-
- const Allocation_t *alloc = (const Allocation_t *)a.p;
-
- rs_element elem = rsAllocationGetElement(a);
- rs_data_kind dk = rsElementGetDataKind(elem);
- rs_data_type dt = rsElementGetDataType(elem);
-
- if (dk == RS_KIND_USER ||
- (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5) ||
- !(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
- return 0.f;
- }
-
- if (rsSamplerGetMagnification(s) == RS_SAMPLER_NEAREST) {
- return sample_LOD_NearestPixel(alloc, dk, dt, s, uv, 0);
- }
- return sample_LOD_LinearPixel(alloc, dk, dt, s, uv, 0);
-}
-
diff --git a/lib/Renderscript/runtime/rs_sampler.c b/lib/Renderscript/runtime/rs_sampler.c
deleted file mode 100644
index 39782de..0000000
--- a/lib/Renderscript/runtime/rs_sampler.c
+++ /dev/null
@@ -1,51 +0,0 @@
-#include "rs_core.rsh"
-#include "rs_graphics.rsh"
-#include "rs_structs.h"
-
-/**
-* Sampler
-*/
-extern rs_sampler_value __attribute__((overloadable))
- rsSamplerGetMinification(rs_sampler s) {
- Sampler_t *prog = (Sampler_t *)s.p;
- if (prog == NULL) {
- return RS_SAMPLER_INVALID;
- }
- return prog->mHal.state.minFilter;
-}
-
-extern rs_sampler_value __attribute__((overloadable))
- rsSamplerGetMagnification(rs_sampler s) {
- Sampler_t *prog = (Sampler_t *)s.p;
- if (prog == NULL) {
- return RS_SAMPLER_INVALID;
- }
- return prog->mHal.state.magFilter;
-}
-
-extern rs_sampler_value __attribute__((overloadable))
- rsSamplerGetWrapS(rs_sampler s) {
- Sampler_t *prog = (Sampler_t *)s.p;
- if (prog == NULL) {
- return RS_SAMPLER_INVALID;
- }
- return prog->mHal.state.wrapS;
-}
-
-extern rs_sampler_value __attribute__((overloadable))
- rsSamplerGetWrapT(rs_sampler s) {
- Sampler_t *prog = (Sampler_t *)s.p;
- if (prog == NULL) {
- return RS_SAMPLER_INVALID;
- }
- return prog->mHal.state.wrapT;
-}
-
-extern float __attribute__((overloadable))
- rsSamplerGetAnisotropy(rs_sampler s) {
- Sampler_t *prog = (Sampler_t *)s.p;
- if (prog == NULL) {
- return 0.0f;
- }
- return prog->mHal.state.aniso;
-}
diff --git a/lib/Renderscript/runtime/rs_structs.h b/lib/Renderscript/runtime/rs_structs.h
deleted file mode 100644
index 6db4279..0000000
--- a/lib/Renderscript/runtime/rs_structs.h
+++ /dev/null
@@ -1,262 +0,0 @@
-#ifndef _RS_STRUCTS_H_
-#define _RS_STRUCTS_H_
-
-/*****************************************************************************
- * CAUTION
- *
- * The following structure layout provides a more efficient way to access
- * internal members of the C++ class Allocation owned by librs. Unfortunately,
- * since this class has virtual members, we can't simply use offsetof() or any
- * other compiler trickery to dynamically get the appropriate values at
- * build-time. This layout may need to be updated whenever
- * frameworks/base/libs/rs/rsAllocation.h is modified.
- *
- * Having the layout information available in this file allows us to
- * accelerate functionality like rsAllocationGetDimX(). Without this
- * information, we would not be able to inline the bitcode, thus resulting in
- * potential runtime performance penalties for tight loops operating on
- * allocations.
- *
- *****************************************************************************/
-typedef enum {
- RS_ALLOCATION_MIPMAP_NONE = 0,
- RS_ALLOCATION_MIPMAP_FULL = 1,
- RS_ALLOCATION_MIPMAP_ON_SYNC_TO_TEXTURE = 2
-} rs_allocation_mipmap_control;
-
-typedef struct Allocation {
- char __pad[32];
- struct {
- void * drv;
- struct {
- const void *type;
- uint32_t usageFlags;
- rs_allocation_mipmap_control mipmapControl;
- uint32_t yuv;
- uint32_t elementSizeBytes;
- bool hasMipmaps;
- bool hasFaces;
- bool hasReferences;
- void * usrPtr;
- int32_t surfaceTextureID;
- void * wndSurface;
- void * surfaceTexture;
- } state;
-
- struct DrvState {
- struct LodState {
- void * mallocPtr;
- size_t stride;
- uint32_t dimX;
- uint32_t dimY;
- uint32_t dimZ;
- } lod[16/*android::renderscript::Allocation::MAX_LOD*/];
- size_t faceOffset;
- uint32_t lodCount;
- uint32_t faceCount;
- } drvState;
- } mHal;
-} Allocation_t;
-
-/*****************************************************************************
- * CAUTION
- *
- * The following structure layout provides a more efficient way to access
- * internal members of the C++ class ProgramStore owned by librs. Unfortunately,
- * since this class has virtual members, we can't simply use offsetof() or any
- * other compiler trickery to dynamically get the appropriate values at
- * build-time. This layout may need to be updated whenever
- * frameworks/base/libs/rs/rsProgramStore.h is modified.
- *
- * Having the layout information available in this file allows us to
- * accelerate functionality like rsgProgramStoreGetDepthFunc(). Without this
- * information, we would not be able to inline the bitcode, thus resulting in
- * potential runtime performance penalties for tight loops operating on
- * program store.
- *
- *****************************************************************************/
-typedef struct ProgramStore {
- char __pad[40];
- struct {
- struct {
- bool ditherEnable;
- bool colorRWriteEnable;
- bool colorGWriteEnable;
- bool colorBWriteEnable;
- bool colorAWriteEnable;
- rs_blend_src_func blendSrc;
- rs_blend_dst_func blendDst;
- bool depthWriteEnable;
- rs_depth_func depthFunc;
- } state;
- } mHal;
-} ProgramStore_t;
-
-/*****************************************************************************
- * CAUTION
- *
- * The following structure layout provides a more efficient way to access
- * internal members of the C++ class ProgramRaster owned by librs. Unfortunately,
- * since this class has virtual members, we can't simply use offsetof() or any
- * other compiler trickery to dynamically get the appropriate values at
- * build-time. This layout may need to be updated whenever
- * frameworks/base/libs/rs/rsProgramRaster.h is modified.
- *
- * Having the layout information available in this file allows us to
- * accelerate functionality like rsgProgramRasterGetCullMode(). Without this
- * information, we would not be able to inline the bitcode, thus resulting in
- * potential runtime performance penalties for tight loops operating on
- * program raster.
- *
- *****************************************************************************/
-typedef struct ProgramRaster {
- char __pad[36];
- struct {
- void * drv;
- struct {
- bool pointSprite;
- rs_cull_mode cull;
- } state;
- } mHal;
-} ProgramRaster_t;
-
-/*****************************************************************************
- * CAUTION
- *
- * The following structure layout provides a more efficient way to access
- * internal members of the C++ class Sampler owned by librs. Unfortunately,
- * since this class has virtual members, we can't simply use offsetof() or any
- * other compiler trickery to dynamically get the appropriate values at
- * build-time. This layout may need to be updated whenever
- * frameworks/base/libs/rs/rsSampler.h is modified.
- *
- * Having the layout information available in this file allows us to
- * accelerate functionality like rsgProgramRasterGetMagFilter(). Without this
- * information, we would not be able to inline the bitcode, thus resulting in
- * potential runtime performance penalties for tight loops operating on
- * samplers.
- *
- *****************************************************************************/
-typedef struct Sampler {
- char __pad[32];
- struct {
- void *drv;
- struct {
- rs_sampler_value magFilter;
- rs_sampler_value minFilter;
- rs_sampler_value wrapS;
- rs_sampler_value wrapT;
- rs_sampler_value wrapR;
- float aniso;
- } state;
- } mHal;
-} Sampler_t;
-
-/*****************************************************************************
- * CAUTION
- *
- * The following structure layout provides a more efficient way to access
- * internal members of the C++ class Element owned by librs. Unfortunately,
- * since this class has virtual members, we can't simply use offsetof() or any
- * other compiler trickery to dynamically get the appropriate values at
- * build-time. This layout may need to be updated whenever
- * frameworks/base/libs/rs/rsElement.h is modified.
- *
- * Having the layout information available in this file allows us to
- * accelerate functionality like rsElementGetSubElementCount(). Without this
- * information, we would not be able to inline the bitcode, thus resulting in
- * potential runtime performance penalties for tight loops operating on
- * elements.
- *
- *****************************************************************************/
-typedef struct Element {
- char __pad[32];
- struct {
- void *drv;
- struct {
- rs_data_type dataType;
- rs_data_kind dataKind;
- uint32_t vectorSize;
- uint32_t elementSizeBytes;
-
- // Subelements
- const void **fields;
- uint32_t *fieldArraySizes;
- const char **fieldNames;
- uint32_t *fieldNameLengths;
- uint32_t *fieldOffsetBytes;
- uint32_t fieldsCount;
- } state;
- } mHal;
-} Element_t;
-
-/*****************************************************************************
- * CAUTION
- *
- * The following structure layout provides a more efficient way to access
- * internal members of the C++ class Type owned by librs. Unfortunately,
- * since this class has virtual members, we can't simply use offsetof() or any
- * other compiler trickery to dynamically get the appropriate values at
- * build-time. This layout may need to be updated whenever
- * frameworks/base/libs/rs/rsType.h is modified.
- *
- * Having the layout information available in this file allows us to
- * accelerate functionality like rsAllocationGetElement(). Without this
- * information, we would not be able to inline the bitcode, thus resulting in
- * potential runtime performance penalties for tight loops operating on
- * types.
- *
- *****************************************************************************/
-typedef struct Type {
- char __pad[32];
- struct {
- void *drv;
- struct {
- const void * element;
- uint32_t dimX;
- uint32_t dimY;
- uint32_t dimZ;
- uint32_t *lodDimX;
- uint32_t *lodDimY;
- uint32_t *lodDimZ;
- uint32_t *lodOffset;
- uint32_t lodCount;
- bool faces;
- } state;
- } mHal;
-} Type_t;
-
-/*****************************************************************************
- * CAUTION
- *
- * The following structure layout provides a more efficient way to access
- * internal members of the C++ class Mesh owned by librs. Unfortunately,
- * since this class has virtual members, we can't simply use offsetof() or any
- * other compiler trickery to dynamically get the appropriate values at
- * build-time. This layout may need to be updated whenever
- * frameworks/base/libs/rs/rsMesh.h is modified.
- *
- * Having the layout information available in this file allows us to
- * accelerate functionality like rsMeshGetVertexAllocationCount(). Without this
- * information, we would not be able to inline the bitcode, thus resulting in
- * potential runtime performance penalties for tight loops operating on
- * meshes.
- *
- *****************************************************************************/
-typedef struct Mesh {
- char __pad[32];
- struct {
- void *drv;
- struct {
- void **vertexBuffers;
- uint32_t vertexBuffersCount;
-
- // indexBuffers[i] could be NULL, in which case only primitives[i] is used
- void **indexBuffers;
- uint32_t indexBuffersCount;
- rs_primitive *primitives;
- uint32_t primitivesCount;
- } state;
- } mHal;
-} Mesh_t;
-#endif // _RS_CORE_H_
diff --git a/lib/Support/Initialization.cpp b/lib/Support/Initialization.cpp
index b41962c..97e4834 100644
--- a/lib/Support/Initialization.cpp
+++ b/lib/Support/Initialization.cpp
@@ -26,7 +26,8 @@
namespace {
-void llvm_error_handler(void *pUserData, const std::string &pMessage) {
+void llvm_error_handler(void *pUserData, const std::string &pMessage,
+ bool pGenCrashDiag) {
ALOGE("%s", pMessage.c_str());
::exit(1);
}
@@ -46,6 +47,7 @@
#if defined(PROVIDE_ARM_CODEGEN)
LLVMInitializeARMAsmPrinter();
+ LLVMInitializeARMAsmParser();
# if USE_DISASSEMBLER
LLVMInitializeARMDisassembler();
# endif
@@ -56,6 +58,7 @@
#if defined(PROVIDE_MIPS_CODEGEN)
LLVMInitializeMipsAsmPrinter();
+ LLVMInitializeMipsAsmParser();
# if USE_DISASSEMBLER
LLVMInitializeMipsDisassembler();
# endif
@@ -66,6 +69,7 @@
#if defined(PROVIDE_X86_CODEGEN)
LLVMInitializeX86AsmPrinter();
+ LLVMInitializeX86AsmParser();
# if USE_DISASSEMBLER
LLVMInitializeX86Disassembler();
# endif
diff --git a/lib/Support/TargetCompilerConfigs.cpp b/lib/Support/TargetCompilerConfigs.cpp
index 7d3de45..948e836 100644
--- a/lib/Support/TargetCompilerConfigs.cpp
+++ b/lib/Support/TargetCompilerConfigs.cpp
@@ -14,8 +14,12 @@
* limitations under the License.
*/
+#include "bcc/Support/Properties.h"
#include "bcc/Support/TargetCompilerConfigs.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Host.h"
+
// Get ARM version number (i.e., __ARM_ARCH__)
#ifdef __arm__
#include <machine/cpu-features.h>
@@ -45,6 +49,9 @@
void
ARMBaseCompilerConfig::GetFeatureVector(std::vector<std::string> &pAttributes,
bool pInThumbMode, bool pEnableNEON) {
+ llvm::StringMap<bool> Features;
+ llvm::sys::getHostCPUFeatures(Features);
+
#if defined(ARCH_ARM_HAVE_VFP)
pAttributes.push_back("+vfp3");
# if !defined(ARCH_ARM_HAVE_VFP_D32)
@@ -60,17 +67,20 @@
}
}
-#if defined(ARCH_ARM_HAVE_NEON)
- if (pEnableNEON) {
+ if (pEnableNEON && Features.count("neon") && Features["neon"]) {
pAttributes.push_back("+neon");
} else {
pAttributes.push_back("-neon");
pAttributes.push_back("-neonfp");
}
-#else
- pAttributes.push_back("-neon");
- pAttributes.push_back("-neonfp");
-#endif
+
+ if (!getProperty("debug.rs.arm-no-hwdiv")) {
+ if (Features.count("hwdiv-arm") && Features["hwdiv-arm"])
+ pAttributes.push_back("+hwdiv-arm");
+
+ if (Features.count("hwdiv") && Features["hwdiv"])
+ pAttributes.push_back("+hwdiv");
+ }
return;
}
@@ -82,6 +92,9 @@
// Enable NEON by default.
mEnableNEON = true;
+ if (!getProperty("debug.rs.arm-no-tune-for-cpu"))
+ setCPU(llvm::sys::getHostCPUName());
+
std::vector<std::string> attributes;
GetFeatureVector(attributes, mInThumbMode, mEnableNEON);
setFeatureString(attributes);
diff --git a/libbcc-device-build.mk b/libbcc-device-build.mk
index 48fa35f..54340c4 100644
--- a/libbcc-device-build.mk
+++ b/libbcc-device-build.mk
@@ -43,10 +43,7 @@
endif
endif
ifeq ($(ARCH_ARM_HAVE_NEON),true)
- # Disable NEON on cortex-a15 temporarily
- ifneq ($(strip $(TARGET_CPU_VARIANT)), cortex-a15)
- LOCAL_CFLAGS += -DARCH_ARM_HAVE_NEON
- endif
+ LOCAL_CFLAGS += -DARCH_ARM_HAVE_NEON
endif
else
ifeq ($(TARGET_ARCH),mips)
diff --git a/libbcc.mk b/libbcc.mk
index 3b2463f..adea004 100644
--- a/libbcc.mk
+++ b/libbcc.mk
@@ -24,7 +24,7 @@
LLVM_ROOT_PATH := external/llvm
MCLD_ROOT_PATH := frameworks/compile/mclinker
-RSLOADER_ROOT_PATH := frameworks/rs/driver/linkloader
+RSLOADER_ROOT_PATH := frameworks/rs/cpu_ref/linkloader
#=====================================================================
# Related Makefile Paths of libbcc
diff --git a/tests/bccarm b/tests/bccarm
deleted file mode 100755
index 0914e10..0000000
--- a/tests/bccarm
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/python
-#
-# Run a test on the ARM version of bcc.
-
-import unittest
-import subprocess
-import os
-import sys
-
-def compile(args):
- proc = subprocess.Popen(["bcc"] + args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- result = proc.communicate()
- return result
-
-def runCmd(args):
- proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- result = proc.communicate()
- return result[0].strip()
-
-def uname():
- return runCmd(["uname"])
-
-def unameM():
- return runCmd(["uname", "-m"])
-
-def which(item):
- return runCmd(["which", item])
-
-def adb(args):
- return runCmd(["adb"] + args)
-
-def setupArm(file):
- print "Setting up arm"
- adb(["remount"])
- adb(["shell", "rm", "/system/bin/bcc"])
- adb(["shell", "mkdir", "/system/bin/bccdata"])
- adb(["shell", "mkdir", "/system/bin/bccdata/data"])
-
- remoteFileName = os.path.join("/system/bin/bccdata", file)
- adb(["push", file, remoteFileName])
-
- # Copy over compiler
- adb(["sync"])
- return remoteFileName
-
-def compileArm(args):
- remoteArgs = []
- fileName = ""
- for arg in sys.argv[1:]:
- if arg.startswith('-'):
- remoteArgs.append(arg)
- else:
- fileName = arg
-
- remoteFileName = setupArm(fileName)
- remoteArgs.append(remoteFileName)
- remoteCmdLine = ["adb", "shell", "/system/bin/bcc"] + remoteArgs
- proc = subprocess.Popen(remoteCmdLine, stdout=subprocess.PIPE)
- result = proc.communicate()
- return result[0].replace("\r","")
-
-
-def main():
- print compileArm(sys.argv[1:])
-
-if __name__ == '__main__':
- main()
-
-
diff --git a/tests/data/addressOf.bc b/tests/data/addressOf.bc
deleted file mode 100644
index 8b54383..0000000
--- a/tests/data/addressOf.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/array.bc b/tests/data/array.bc
deleted file mode 100644
index 0d389fc..0000000
--- a/tests/data/array.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/assignment.bc b/tests/data/assignment.bc
deleted file mode 100644
index 6cb36ea..0000000
--- a/tests/data/assignment.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/assignmentop.bc b/tests/data/assignmentop.bc
deleted file mode 100644
index f4131f8..0000000
--- a/tests/data/assignmentop.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/b2071670.bc b/tests/data/b2071670.bc
deleted file mode 100644
index e9b495e..0000000
--- a/tests/data/b2071670.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/brackets.bc b/tests/data/brackets.bc
deleted file mode 100644
index da5fc92..0000000
--- a/tests/data/brackets.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/casts.bc b/tests/data/casts.bc
deleted file mode 100644
index d21e54a..0000000
--- a/tests/data/casts.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/char.bc b/tests/data/char.bc
deleted file mode 100644
index 8ba6f9d..0000000
--- a/tests/data/char.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/comma.bc b/tests/data/comma.bc
deleted file mode 100644
index 0e159c1..0000000
--- a/tests/data/comma.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/constants.bc b/tests/data/constants.bc
deleted file mode 100644
index c0699e1..0000000
--- a/tests/data/constants.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/defines.bc b/tests/data/defines.bc
deleted file mode 100644
index 4457f46..0000000
--- a/tests/data/defines.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/double.bc b/tests/data/double.bc
deleted file mode 100644
index 3dc204e..0000000
--- a/tests/data/double.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/expr-ansi.bc b/tests/data/expr-ansi.bc
deleted file mode 100644
index 9b1ddc9..0000000
--- a/tests/data/expr-ansi.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/expr2.bc b/tests/data/expr2.bc
deleted file mode 100644
index 4b6ce5f..0000000
--- a/tests/data/expr2.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/film.bc b/tests/data/film.bc
deleted file mode 100644
index 5be0267..0000000
--- a/tests/data/film.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/float.bc b/tests/data/float.bc
deleted file mode 100644
index 8ec27f3..0000000
--- a/tests/data/float.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/floatdouble.bc b/tests/data/floatdouble.bc
deleted file mode 100644
index 8d30e62..0000000
--- a/tests/data/floatdouble.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/flops.bc b/tests/data/flops.bc
deleted file mode 100644
index 856b222..0000000
--- a/tests/data/flops.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/hello.bc b/tests/data/hello.bc
deleted file mode 100644
index 74c75e7..0000000
--- a/tests/data/hello.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/inc.bc b/tests/data/inc.bc
deleted file mode 100644
index 6763032..0000000
--- a/tests/data/inc.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/iops.bc b/tests/data/iops.bc
deleted file mode 100644
index 7edaca6..0000000
--- a/tests/data/iops.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/missing-main.bc b/tests/data/missing-main.bc
deleted file mode 100644
index 6eacb7f..0000000
--- a/tests/data/missing-main.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/otcc-ansi.bc b/tests/data/otcc-ansi.bc
deleted file mode 100644
index 377fb47..0000000
--- a/tests/data/otcc-ansi.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/otcc.c b/tests/data/otcc.c
deleted file mode 100644
index 433ae2e..0000000
--- a/tests/data/otcc.c
+++ /dev/null
@@ -1,448 +0,0 @@
-#include <stdio.h>
-#define k *(int*)
-#define a if(
-#define c ad()
-#define i else
-#define p while(
-#define x *(char*)
-#define b ==
-#define V =calloc(1,99999)
-#define f ()
-#define J return
-#define l ae(
-#define n e)
-#define u d!=
-#define F int
-#define y (j)
-#define r m=
-#define t +4
-F d,z,C,h,P,K,ac,q,G,v,Q,R,D,L,W,M;
-E(n{
-x D++=e;
-}
-o f{
-a L){
-h=x L++;
-a h b 2){
-L=0;
-h=W;
-}
-}
-i h=fgetc(Q);
-}
-X f{
-J isalnum(h)|h b 95;
-}
-Y f{
-a h b 92){
-o f;
-a h b 110)h=10;
-}
-}
-c{
-F e,j,m;
-p isspace(h)|h b 35){
-a h b 35){
-o f;
-c;
-a d b 536){
-c;
-E(32);
-k d=1;
-k(d t)=D;
-}
-p h!=10){
-E(h);
-o f;
-}
-E(h);
-E(2);
-}
-o f;
-}
-C=0;
-d=h;
-a X f){
-E(32);
-M=D;
-p X f){
-E(h);
-o f;
-}
-a isdigit(d)){
-z=strtol(M,0,0);
-d=2;
-}
-i{
-x D=32;
-d=strstr(R,M-1)-R;
-x D=0;
-d=d*8+256;
-a d>536){
-d=P+d;
-a k d b 1){
-L=k(d t);
-W=h;
-o f;
-c;
-}
-}
-}
-}
-i{
-o f;
-a d b 39){
-d=2;
-Y f;
-z=h;
-o f;
-o f;
-}
-i a d b 47&h b 42){
-o f;
-p h){
-p h!=42)o f;
-o f;
-a h b 47)h=0;
-}
-o f;
-c;
-}
-i{
-e="++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!='g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b";
-p j=x e++){
-r x e++;
-z=0;
-p(C=x e++-98)<0)z=z*64+C+64;
-a j b d&(m b h|m b 64)){
-a m b h){
-o f;
-d=1;
-}
-break;
-}
-}
-}
-}
-}
-l g){
-p g&&g!=-1){
-x q++=g;
-g=g>>8;
-}
-}
-A(n{
-F g;
-p n{
-g=k e;
-k e=q-e-4;
-e=g;
-}
-}
-s(g,n{
-l g);
-k q=e;
-e=q;
-q=q t;
-J e;
-}
-H(n{
-s(184,n;
-}
-B(n{
-J s(233,n;
-}
-S(j,n{
-l 1032325);
-J s(132+j,n;
-}
-Z(n{
-l 49465);
-H(0);
-l 15);
-l e+144);
-l 192);
-}
-N(j,n{
-l j+131);
-s((e<512)<<7|5,n;
-}
-T y{
-F g,e,m,aa;
-g=1;
-a d b 34){
-H(v);
-p h!=34){
-Y f;
-x v++=h;
-o f;
-}
-x v=0;
-v=v t&-4;
-o f;
-c;
-}
-i{
-aa=C;
-r z;
-e=d;
-c;
-a e b 2){
-H(m);
-}
-i a aa b 2){
-T(0);
-s(185,0);
-a e b 33)Z(m);
-i l m);
-}
-i a e b 40){
-w f;
-c;
-}
-i a e b 42){
-c;
-e=d;
-c;
-c;
-a d b 42){
-c;
-c;
-c;
-c;
-e=0;
-}
-c;
-T(0);
-a d b 61){
-c;
-l 80);
-w f;
-l 89);
-l 392+(e b 256));
-}
-i a n{
-a e b 256)l 139);
-i l 48655);
-q++;
-}
-}
-i a e b 38){
-N(10,k d);
-c;
-}
-i{
-g=k e;
-a!g)g=dlsym(0,M);
-a d b 61&j){
-c;
-w f;
-N(6,g);
-}
-i a u 40){
-N(8,g);
-a C b 11){
-N(0,g);
-l z);
-c;
-}
-}
-}
-}
-a d b 40){
-a g b 1)l 80);
-r s(60545,0);
-c;
-j=0;
-p u 41){
-w f;
-s(2393225,j);
-a d b 44)c;
-j=j t;
-}
-k r j;
-c;
-a!g){
-e=e t;
-k e=s(232,k n;
-}
-i a g b 1){
-s(2397439,j);
-j=j t;
-}
-i{
-s(232,g-q-5);
-}
-a j)s(50305,j);
-}
-}
-O y{
-F e,g,m;
-a j--b 1)T(1);
-i{
-O y;
-r 0;
-p j b C){
-g=d;
-e=z;
-c;
-a j>8){
-r S(e,m);
-O y;
-}
-i{
-l 80);
-O y;
-l 89);
-a j b 4|j b 5){
-Z(n;
-}
-i{
-l n;
-a g b 37)l 146);
-}
-}
-}
-a m&&j>8){
-r S(e,m);
-H(e^1);
-B(5);
-A(m);
-H(n;
-}
-}
-}
-w f{
-O(11);
-}
-U f{
-w f;
-J S(0,0);
-}
-I y{
-F m,g,e;
-a d b 288){
-c;
-c;
-r U f;
-c;
-I y;
-a d b 312){
-c;
-g=B(0);
-A(m);
-I y;
-A(g);
-}
-i{
-A(m);
-}
-}
-i a d b 352|d b 504){
-e=d;
-c;
-c;
-a e b 352){
-g=q;
-r U f;
-}
-i{
-a u 59)w f;
-c;
-g=q;
-r 0;
-a u 59)r U f;
-c;
-a u 41){
-e=B(0);
-w f;
-B(g-q-5);
-A(n;
-g=e t;
-}
-}
-c;
-I(&m);
-B(g-q-5);
-A(m);
-}
-i a d b 123){
-c;
-ab(1);
-p u 125)I y;
-c;
-}
-i{
-a d b 448){
-c;
-a u 59)w f;
-K=B(K);
-}
-i a d b 400){
-c;
-k j=B(k j);
-}
-i a u 59)w f;
-c;
-}
-}
-ab y{
-F m;
-p d b 256|u-1&!j){
-a d b 256){
-c;
-p u 59){
-a j){
-G=G t;
-k d=-G;
-}
-i{
-k d=v;
-v=v t;
-}
-c;
-a d b 44)c;
-}
-c;
-}
-i{
-A(k(d t));
-k d=q;
-c;
-c;
-r 8;
-p u 41){
-k d=m;
-r m t;
-c;
-a d b 44)c;
-}
-c;
-K=G=0;
-l 15042901);
-r s(60545,0);
-I(0);
-A(K);
-l 50121);
-k r G;
-}
-}
-}
-main(g,n{
-Q=stdin;
-a g-->1){
-e=e t;
-Q=fopen(k e,"r");
-}
-D=strcpy(R V," int if else while break return for define main ")+48;
-v V;
-q=ac V;
-P V;
-o f;
-c;
-ab(0);
-mprotect(ac & (~ 4095), (99999 + 4095) & (~ 4095), 7);
-fprintf(stderr, "otcc.c: about to execute compiled code.\n");
-J(*(int(*)f)k(P+592))(g,n;
-}
-
diff --git a/tests/data/pointers.bc b/tests/data/pointers.bc
deleted file mode 100644
index d4e88e4..0000000
--- a/tests/data/pointers.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/pointers2.bc b/tests/data/pointers2.bc
deleted file mode 100644
index b6d1f91..0000000
--- a/tests/data/pointers2.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/returnval-ansi.bc b/tests/data/returnval-ansi.bc
deleted file mode 100644
index ac14ab1..0000000
--- a/tests/data/returnval-ansi.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/returnval.c b/tests/data/returnval.c
deleted file mode 100644
index 1cf5bae..0000000
--- a/tests/data/returnval.c
+++ /dev/null
@@ -1,4 +0,0 @@
-main() {
- return 42;
-}
-
diff --git a/tests/data/short.bc b/tests/data/short.bc
deleted file mode 100644
index eae1d26..0000000
--- a/tests/data/short.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/simplest.bc b/tests/data/simplest.bc
deleted file mode 100644
index 4b6ce5f..0000000
--- a/tests/data/simplest.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/src/addressOf.c b/tests/data/src/addressOf.c
deleted file mode 100644
index e7acde5..0000000
--- a/tests/data/src/addressOf.c
+++ /dev/null
@@ -1,31 +0,0 @@
-void testStruct() {
- struct str {
- float x;
- float y;
- };
-
- struct str base;
- int index = 0;
-
- base.x = 10.0;
- struct str *s = &base;
-
- float *v = &(*s).x;
- float *v2 = &s[index].x;
- printf("testStruct: %g %g %g\n",base.x, *v, *v2);
-}
-
-void testArray() {
- int a[2];
- a[0] = 1;
- a[1] = 2;
- int* p = &a[0];
- int* p2 = a;
- printf("testArray: %d %d %d\n", a[0], *p, *p2);
-}
-
-int main() {
- testStruct();
- testArray();
- return 0;
-}
diff --git a/tests/data/src/array.c b/tests/data/src/array.c
deleted file mode 100644
index ca4a728..0000000
--- a/tests/data/src/array.c
+++ /dev/null
@@ -1,107 +0,0 @@
-// Array allocation tests
-
-void testLocalInt()
-{
- int a[3];
- a[0] = 1;
- a[1] = 2;
- a[2] = a[0] + a[1];
- printf("localInt: %d\n", a[2]);
-}
-
-char a[3];
-double d[3];
-
-void testGlobalChar()
-{
- a[0] = 1;
- a[1] = 2;
- a[2] = a[0] + a[1];
- printf("globalChar: %d\n", a[2]);
-}
-
-void testGlobalDouble()
-{
- d[0] = 1;
- d[1] = 2;
- d[2] = d[0] + d[1];
- printf("globalDouble: %g\n", d[2]);
-}
-
-void testLocalDouble()
-{
- double d[3];
- float m[12];
- m[0] = 1.0f;
- m[1] = 2.0f;
- d[0] = 1.0;
- d[1] = 2.0;
- d[2] = d[0] + d[1];
- m[2] = m[0] + m[1];
- printf("localDouble: %g %g\n", d[2], m[2]);
-}
-
-void vectorAdd(int* a, int* b, float* c, int len) {
- int i;
- for(i = 0; i < len; i++) {
- c[i] = a[i] + b[i];
- }
-}
-
-void testArgs() {
- int a[3], b[3];
- float c[3];
- int i;
- int len = 3;
- for(i = 0; i < len; i++) {
- a[i] = i;
- b[i] = i;
- c[i] = 0;
- }
- vectorAdd(a,b,c, len);
- printf("testArgs:");
- for(i = 0; i < len; i++) {
- printf(" %g", c[i]);
- }
- printf("\n");
-}
-
-void testDecay() {
- char c[4];
- c[0] = 'H';
- c[1] = 'i';
- c[2] = '!';
- c[3] = 0;
- printf("testDecay: %s\n", c);
-}
-
-void test2D() {
- char c[10][20];
- int x;
- int y;
- printf("test2D:\n");
- for(y = 0; y < 10; y++) {
- for(x = 0; x < 20; x++) {
- c[y][x] = 'a' + (15 & (y * 19 + x));
- }
- }
- for(y = 0; y < 10; y++) {
- for(x = 0; x < 20; x++) {
- printf("%c", c[y][x]);
- }
- printf("\n");
- }
-
-}
-
-int main()
-{
- testLocalInt();
- testLocalDouble();
- testGlobalChar();
- testGlobalDouble();
- testArgs();
- testDecay();
- test2D();
- return 0;
-}
diff --git a/tests/data/src/assignment.c b/tests/data/src/assignment.c
deleted file mode 100644
index 4fc7801..0000000
--- a/tests/data/src/assignment.c
+++ /dev/null
@@ -1,9 +0,0 @@
-int main() {
- int a = 0;
- int b = 1;
- a = b = 2; // Test that "b = 2" generates an rvalue.
- if (a = 7) { // Test that a = 7 generates an rvalue.
- b = 3;
- }
- return a;
-}
diff --git a/tests/data/src/assignmentop.c b/tests/data/src/assignmentop.c
deleted file mode 100644
index 649edf9..0000000
--- a/tests/data/src/assignmentop.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// Test assignment operations
-
-void testAssignment() {
- int a = 2;
- a *= 5;
- printf("2 *= 5 %d\n", a);
- a = 20;
- a /= 5;
- printf("20 /= 5 %d\n", a);
- a = 17;
- a %= 5;
- printf("17 %%= 5 %d\n", a);
- a = 17;
- a += 5;
- printf("17 += 5 %d\n", a);
- a = 17;
- a-=5;
- printf("17 -= 5 %d\n", a);
- a = 17;
- a<<=1;
- printf("17<<= 1 %d\n", a);
- a = 17;
- a>>=1;
- printf("17>>= 1 %d\n", a);
- a = 17;
- a&=1;
- printf("17&= 1 %d\n", a);
- a = 17;
- a^=1;
- printf("17^= 1 %d\n", a);
- a = 16;
- a^=1;
- printf("16|= 1 %d\n", a);
-}
-
-int a;
-
-int* f() {
- printf("f()\n");
- return &a;
-}
-
-void testEval() {
- a = 0;
- printf("*f() = *f() + 10;\n");
- *f() = *f() + 10;
- printf("a = %d\n", a);
-}
-
-void testOpEval() {
- a = 0;
- printf("*f() += 10;\n");
- *f() += 10;
- printf("a = %d\n", a);
-}
-
-int main() {
- testAssignment();
- testEval();
- testOpEval();
- return 0;
-}
diff --git a/tests/data/src/b2071670.c b/tests/data/src/b2071670.c
deleted file mode 100644
index 311bc4f..0000000
--- a/tests/data/src/b2071670.c
+++ /dev/null
@@ -1,9 +0,0 @@
-// See http://b/2071670
-
-int main() {
- float f = 10.0f;
- float* floatPointer = &f;
- // The following line used to incorrectly error: "Incompatible pointer or array types"
- int* buffer = (int*) floatPointer;
- return *buffer;
-}
diff --git a/tests/data/src/bellard.otccex.c b/tests/data/src/bellard.otccex.c
deleted file mode 100644
index e8f0989..0000000
--- a/tests/data/src/bellard.otccex.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/* #!/usr/local/bin/otcc */
-/*
- * Sample OTCC C example. You can uncomment the first line and install
- * otcc in /usr/local/bin to make otcc scripts !
- */
-
-/* Any preprocessor directive except #define are ignored. We put this
- include so that a standard C compiler can compile this code too. */
-#include <stdio.h>
-
-/* defines are handled, but macro arguments cannot be given. No
- recursive defines are tolerated */
-#define DEFAULT_BASE 10
-
-/*
- * Only old style K&R prototypes are parsed. Only int arguments are
- * allowed (implicit types).
- *
- * By benchmarking the execution time of this function (for example
- * for fib(35)), you'll notice that OTCC is quite fast because it
- * generates native i386 machine code.
- */
-fib(n)
-{
- if (n <= 2)
- return 1;
- else
- return fib(n-1) + fib(n-2);
-}
-
-/* Identifiers are parsed the same way as C: begins with letter or
- '_', and then letters, '_' or digits */
-fact(n)
-{
- /* local variables can be declared. Only 'int' type is supported */
- int i, r;
- r = 1;
- /* 'while' and 'for' loops are supported */
- for(i=2;i<=n;i++)
- r = r * i;
- return r;
-}
-
-/* Well, we could use printf, but it would be too easy */
-print_num(n, b)
-{
- int tab, p, c;
- /* Numbers can be entered in decimal, hexadecimal ('0x' prefix) and
- octal ('0' prefix) */
- /* more complex programs use malloc */
- tab = malloc(0x100);
- p = tab;
- while (1) {
- c = n % b;
- /* Character constants can be used */
- if (c >= 10)
- c = c + 'a' - 10;
- else
- c = c + '0';
- *(char *)p = c;
- p++;
- n = n / b;
- /* 'break' is supported */
- if (n == 0)
- break;
- }
- while (p != tab) {
- p--;
- printf("%c", *(char *)p);
- }
- free(tab);
-}
-
-/* 'main' takes standard 'argc' and 'argv' parameters */
-main(argc, argv)
-{
- /* no local name space is supported, but local variables ARE
- supported. As long as you do not use a globally defined
- variable name as local variable (which is a bad habbit), you
- won't have any problem */
- int s, n, f, base;
-
- /* && and || operator have the same semantics as C (left to right
- evaluation and early exit) */
- if (argc != 2 && argc != 3) {
- /* '*' operator is supported with explicit casting to 'int *',
- 'char *' or 'int (*)()' (function pointer). Of course, 'int'
- are supposed to be used as pointers too. */
- s = *(int *)argv;
- help(s);
- return 1;
- }
- /* Any libc function can be used because OTCC uses dynamic linking */
- n = atoi(*(int *)(argv + 4));
- base = DEFAULT_BASE;
- if (argc >= 3) {
- base = atoi(*(int *)(argv + 8));
- if (base < 2 || base > 36) {
- /* external variables can be used too (here: 'stderr') */
- fprintf(stderr, "Invalid base\n");
- return 1;
- }
- }
- printf("fib(%d) = ", n);
- print_num(fib(n), base);
- printf("\n");
-
- printf("fact(%d) = ", n);
- if (n > 12) {
- printf("Overflow");
- } else {
- /* why not using a function pointer ? */
- f = &fact;
- print_num((*(int (*)())f)(n), base);
- }
- printf("\n");
- return 0;
-}
-
-/* functions can be used before being defined */
-help(name)
-{
- printf("usage: %s n [base]\n", name);
- printf("Compute fib(n) and fact(n) and output the result in base 'base'\n");
-}
-
diff --git a/tests/data/src/brackets.c b/tests/data/src/brackets.c
deleted file mode 100644
index bab88a2..0000000
--- a/tests/data/src/brackets.c
+++ /dev/null
@@ -1,61 +0,0 @@
-void testBrackets(int* ar, int len) {
- int i;
- int errors = 0;
- for (i = 0; i < len; i++) {
- ar[i] = i;
- }
- for (i = 0; i < len; i++) {
- if (ar[i] != i) {
- printf("error: [%d] %d != %d\n", i, ar[i], i);
- errors++;
- }
- }
- printf("Errors: %d\n", errors);
-}
-
-void testBrackets2D(int** ar2D, int lenX, int lenY) {
- int x, y;
- int errors = 0;
- for (x = 0; x < lenX; x++) {
- for (y = 0; y < lenY; y++) {
- ar2D[x][y] = x * lenY + y;
- }
- }
- for (x = 0; x < lenX; x++) {
- for (y = 0; y < lenY; y++) {
- int expected = x * lenY + y;
- int val = ar2D[x][y];
- if (val != expected) {
- printf("error: [%d][%d] %d != %d\n", x, y, val, expected);
- errors++;
- }
- }
- }
- printf("2D Errors: %d\n", errors);
-}
-
-void testHeap() {
- int* ar = (int*) malloc(100);
- testBrackets(ar, 25);
- free(ar);
-}
-
-void testHeap2D() {
- int lenX = 10;
- int lenY = 5;
- int* ar = (int*) malloc(lenX * lenY * 4);
- int** ar2D = (int**) malloc(lenX * 4);
- int i;
- for(i = 0; i < lenX; i++) {
- ar2D[i] = ar + lenY * i;
- }
- testBrackets2D(ar2D, lenX, lenY);
- free(ar);
- free(ar2D);
-}
-
-int main() {
- testHeap();
- testHeap2D();
- return 0;
-}
diff --git a/tests/data/src/casts.c b/tests/data/src/casts.c
deleted file mode 100644
index d3a49b4..0000000
--- a/tests/data/src/casts.c
+++ /dev/null
@@ -1,85 +0,0 @@
-void test1() {
- int a = 3;
- int* pb = &a;
- int c = *pb;
- printf("Reading from a pointer: %d %d\n", a, c);
- *pb = 4;
- printf("Writing to a pointer: %d\n", a);
- printf("Testing casts: %d %g %g %d\n", 3, (float) 3, 4.5, (int) 4.5);
-}
-
-void test2() {
- int x = 4;
- int px = &x;
- // int z = * px; // An error, expected a pointer type
- int y = * (int*) px;
- printf("Testing reading (int*): %d\n", y);
-}
-
-void test3() {
- int px = (int) malloc(120);
- * (int*) px = 8;
- * (int*) (px + 4) = 9;
- printf("Testing writing (int*): %d %d\n", * (int*) px, * (int*) (px + 4));
- free((void*) px);
-}
-
-void test4() {
- int x = 0x12345678;
- int px = &x;
- int a = * (char*) px;
- int b = * (char*) (px + 1);
- int c = * (char*) (px + 2);
- int d = * (char*) (px + 3);
- printf("Testing reading (char*): 0x%02x 0x%02x 0x%02x 0x%02x\n", a, b, c, d);
-}
-
-void test5() {
- int x = 0xFFFFFFFF;
- int px = &x;
- * (char*) px = 0x21;
- * (char*) (px + 1) = 0x43;
- * (char*) (px + 2) = 0x65;
- * (char*) (px + 3) = 0x87;
- printf("Testing writing (char*): 0x%08x\n", x);
-}
-
-int f(int b) {
- printf("f(%d)\n", b);
- return 7 * b;
-}
-
-void test6() {
- int fp = &f;
- int x = (*(int(*)()) fp)(10);
- printf("Function pointer result: %d\n", x);
-}
-
-void test7() {
- int px = (int) malloc(120);
- * (float*) px = 8.8f;
- * (float*) (px + 4) = 9.9f;
- printf("Testing read/write (float*): %g %g\n", * (float*) px, * (float*) (px + 4));
- free((void*) px);
-}
-
-void test8() {
- int px = (int) malloc(120);
- * (double*) px = 8.8;
- * (double*) (px + 8) = 9.9;
- printf("Testing read/write (double*): %g %g\n", * (double*) px, * (double*) (px + 8));
- free((void*) px);
-}
-
-
-int main() {
- test1();
- test2();
- test3();
- test4();
- test5();
- test6();
- test7();
- test8();
- return 0;
-}
diff --git a/tests/data/src/char.c b/tests/data/src/char.c
deleted file mode 100644
index 8c63ba2..0000000
--- a/tests/data/src/char.c
+++ /dev/null
@@ -1,13 +0,0 @@
-char ga;
-char gb;
-
-int main() {
- char a = 'c';
- char b = a * 3;
- printf("a = %d, b = %d\n", a, b);
- ga = 'd';
- gb = ga * 3;
- printf("ga = %d, gb = %d\n", ga, gb);
- return 0;
-}
-
diff --git a/tests/data/src/comma.c b/tests/data/src/comma.c
deleted file mode 100644
index 496944c..0000000
--- a/tests/data/src/comma.c
+++ /dev/null
@@ -1,35 +0,0 @@
-int testReturn() {
- return 10, 20, 30;
-}
-
-int testArg(int a) {
- return a;
-}
-
-void testComma() {
- int a;
- 0, a = 10,20;
- printf("statement: %d\n", a);
- a = 1;
- if (a = 0, 1) {
- printf("if: a = %d\n", a);
- }
- int b = 0;
- a = 10;
- while(b++,a--) {}
- printf("while: b = %d\n", b);
- b = 0;
- for(b++,a = 0;b++, a < 10; b++, a++) {}
- printf("for: b = %d\n", b);
- b = testReturn();
- printf("return: %d\n", b);
- b = testArg((a,12));
- printf("arg: %d\n", b);
-}
-
-
-
-int main() {
- testComma();
- return 0;
-}
diff --git a/tests/data/src/constants.c b/tests/data/src/constants.c
deleted file mode 100644
index 230109a..0000000
--- a/tests/data/src/constants.c
+++ /dev/null
@@ -1,30 +0,0 @@
-#define FOO 0x10
-
-int main() {
- printf("0 = %d\n", 0);
- printf("010 = %d\n", 010);
- printf("0x10 = %d\n", FOO);
- printf("'\\a' = %d\n", '\a');
- printf("'\\b' = %d\n", '\b');
- printf("'\\f' = %d\n", '\f');
- printf("'\\n' = %d\n", '\n');
- printf("'\\r' = %d\n", '\r');
- printf("'\\t' = %d\n", '\t');
- printf("'\\v' = %d\n", '\v');
- // Undefined
- // printf("'\\z' = %d\n", '\z');
- printf("'\\\\' = %d\n", '\\');
- printf("'\\'' = %d\n", '\'');
- printf("'\\\"' = %d\n", '\"');
- printf("'\\?' = %d\n", '\?');
- printf("'\\0' = %d\n", '\0');
- printf("'\\1' = %d\n", '\1');
- printf("'\\12' = %d\n", '\12');
- printf("'\\123' = %d\n", '\123');
- printf("'\\x0' = %d\n", '\x0');
- printf("'\\x1' = %d\n", '\x1');
- printf("'\\x12' = %d\n", '\x12');
- printf("'\\x123' = %d\n", '\x123');
- printf("'\\x1f' = %d\n", '\x1f');
- printf("'\\x1F' = %d\n", '\x1F');
-}
diff --git a/tests/data/src/defines.c b/tests/data/src/defines.c
deleted file mode 100644
index 6cb6f7e..0000000
--- a/tests/data/src/defines.c
+++ /dev/null
@@ -1,9 +0,0 @@
-// Simple tests of the C preprocessor
-
-#define A 1
-#define A (4 / 2)
-#define B 1 // This is a comment. With a / in it.
-
-int main() {
- return A + B;
-}
diff --git a/tests/data/src/double.c b/tests/data/src/double.c
deleted file mode 100644
index 5bc20a3..0000000
--- a/tests/data/src/double.c
+++ /dev/null
@@ -1,7 +0,0 @@
-double atof(char *nptr);
-
-int main() {
- printf("Value = %g\n", atof("10.42"));
- return 0;
-}
-
diff --git a/tests/data/src/error.c b/tests/data/src/error.c
deleted file mode 100644
index 2e08dcc..0000000
--- a/tests/data/src/error.c
+++ /dev/null
@@ -1,2 +0,0 @@
-void foo;
-
diff --git a/tests/data/src/expr-ansi.c b/tests/data/src/expr-ansi.c
deleted file mode 100644
index d463659..0000000
--- a/tests/data/src/expr-ansi.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Test operators */
-
-void testInc() { int a, b; a = 3; b = a++; printf("3++ = %d %d\n", b, a); }
-void testDec() { int a, b; a = 3; b = a--; printf("3-- = %d %d\n", b, a); }
-void testTimes(){ printf("%d * %d = %d\n", 10, 4, 10 * 4); }
-void testDiv(){ printf("%d / %d = %d\n", 11, 4, 11 / 4); }
-void testMod(){ printf("%d %% %d = %d\n", 11, 4, 11 % 4); }
-void testPlus(){ printf("%d + %d = %d\n", 10, 4, 10 + 4); }
-void testMinus(){ printf("%d - %d = %d\n", 10, 4, 10 - 4); }
-void testShiftLeft(){ printf("%d << %d = %d\n", 10, 4, 10 << 4); }
-void testShiftRight(){ printf("%d >> %d = %d\n", 100, 4, 100 >> 4); }
-void testLess(){ printf("%d < %d = %d\n", 10, 4, 10 < 4); }
-void testLesEqual(){ printf("%d <= %d = %d\n", 10, 4, 10 <= 4); }
-void testGreater(){ printf("%d > %d = %d\n", 10, 4, 10 > 4); }
-void testGreaterEqual(){ printf("%d >= %d = %d\n", 10, 4, 10 >= 4); }
-void testEqualTo(){ printf("%d == %d = %d\n", 10, 4, 10 == 4); }
-void testNotEqualTo(){ printf("%d != %d = %d\n", 10, 4, 10 != 4); }
-void testBitAnd(){ printf("%d & %d = %d\n", 10, 7, 10 & 7); }
-void testBitXor(){ printf("%d ^ %d = %d\n", 10, 7, 10 ^ 7); }
-void testBitOr(){ printf("%d | %d = %d\n", 10, 4, 10 | 4); }
-void testAssignment(){ int a, b; a = 3; b = a; printf("b == %d\n", b); }
-void testLogicalAnd(){ printf("%d && %d = %d\n", 10, 4, 10 && 4); }
-void testLogicalOr(){ printf("%d || %d = %d\n", 10, 4, 10 || 4); }
-void testAddressOf(){ int a; printf("&a is %d\n", &a); }
-void testPointerIndirection(){ int a, b; a = &b; b = 17; printf("*%d = %d =?= %d\n", a, * (int*) a, b); }
-void testNegation(){ printf("-%d = %d\n", 10, -10); }
-void testUnaryPlus(){ printf("+%d = %d\n", 10, +10); }
-void testUnaryNot(){ printf("!%d = %d\n", 10, !10); }
-void testBitNot(){ printf("~%d = %d\n", 10, ~10); }
-
-int main(int a, char** b) {
- testInc();
- testDec();
- testTimes();
- testDiv();
- testMod();
- testPlus();
- testMinus();
- testShiftLeft();
- testShiftRight();
- testLess();
- testLesEqual();
- testGreater();
- testGreaterEqual();
- testEqualTo();
- testNotEqualTo();
- testBitAnd();
- testBinXor();
- testBitOr();
- testAssignment();
- testLogicalAnd();
- testLogicalOr();
- testAddressOf();
- testPointerIndirection();
- testNegation();
- testUnaryPlus();
- testUnaryNot();
- testBitNot();
- return 0;
-}
diff --git a/tests/data/src/expr.c b/tests/data/src/expr.c
deleted file mode 100644
index 4f2d2e7..0000000
--- a/tests/data/src/expr.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Test operators */
-
-testInc() { int a, b; a = 3; b = a++; printf("3++ = %d %d\n", b, a); }
-testDec() { int a, b; a = 3; b = a--; printf("3-- = %d %d\n", b, a); }
-testTimes(){ printf("%d * %d = %d\n", 10, 4, 10 * 4); }
-testDiv(){ printf("%d / %d = %d\n", 11, 4, 11 / 4); }
-testMod(){ printf("%d %% %d = %d\n", 11, 4, 11 % 4); }
-testPlus(){ printf("%d + %d = %d\n", 10, 4, 10 + 4); }
-testMinus(){ printf("%d - %d = %d\n", 10, 4, 10 - 4); }
-testShiftLeft(){ printf("%d << %d = %d\n", 10, 4, 10 << 4); }
-testShiftRight(){ printf("%d >> %d = %d\n", 100, 4, 100 >> 4); }
-testLess(){ printf("%d < %d = %d\n", 10, 4, 10 < 4); }
-testLesEqual(){ printf("%d <= %d = %d\n", 10, 4, 10 <= 4); }
-testGreater(){ printf("%d > %d = %d\n", 10, 4, 10 > 4); }
-testGreaterEqual(){ printf("%d >= %d = %d\n", 10, 4, 10 >= 4); }
-testEqualTo(){ printf("%d == %d = %d\n", 10, 4, 10 == 4); }
-testNotEqualTo(){ printf("%d != %d = %d\n", 10, 4, 10 != 4); }
-testBitAnd(){ printf("%d & %d = %d\n", 10, 7, 10 & 7); }
-testBitXor(){ printf("%d ^ %d = %d\n", 10, 7, 10 ^ 7); }
-testBitOr(){ printf("%d | %d = %d\n", 10, 4, 10 | 4); }
-testAssignment(){ int a, b; a = 3; b = a; printf("b == %d\n", b); }
-testLogicalAnd(){ printf("%d && %d = %d\n", 10, 4, 10 && 4); }
-testLogicalOr(){ printf("%d || %d = %d\n", 10, 4, 10 || 4); }
-testAddressOf(){ int a; printf("&a is %d\n", &a); }
-testPointerIndirection(){ int a, b; a = &b; b = 17; printf("*%d = %d =?= %d\n", a, * (int*) a, b); }
-testNegation(){ printf("-%d = %d\n", 10, -10); }
-testUnaryPlus(){ printf("+%d = %d\n", 10, +10); }
-testUnaryNot(){ printf("!%d = %d\n", 10, !10); }
-testBitNot(){ printf("~%d = %d\n", 10, ~10); }
-
-main(a,b) {
- testInc();
- testDec();
- testTimes();
- testDiv();
- testMod();
- testPlus();
- testMinus();
- testShiftLeft();
- testShiftRight();
- testLess();
- testLesEqual();
- testGreater();
- testGreaterEqual();
- testEqualTo();
- testNotEqualTo();
- testBitAnd();
- testBinXor();
- testBitOr();
- testAssignment();
- testLogicalAnd();
- testLogicalOr();
- testAddressOf();
- testPointerIndirection();
- testNegation();
- testUnaryPlus();
- testUnaryNot();
- testBitNot();
- return 0;
-}
\ No newline at end of file
diff --git a/tests/data/src/expr2.c b/tests/data/src/expr2.c
deleted file mode 100644
index 04b6a38..0000000
--- a/tests/data/src/expr2.c
+++ /dev/null
@@ -1,6 +0,0 @@
-/* Test operators */
-
-main() {
- int a;
- a = a++;
-}
diff --git a/tests/data/src/film.c b/tests/data/src/film.c
deleted file mode 100644
index 00c2d36..0000000
--- a/tests/data/src/film.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// Test logical and bitwise AND and OR
-
-int test(int x, int y) {
- int v = x || y;
- return v;
-}
-
-int test2(int x, int y) {
- if(x | y) {
- return 1;
- } else {
- return 0;
- }
-}
-
-int test3(int x, int y) {
- int v = x && y;
- return v;
-}
-
-int test4(int x, int y) {
- if(x & y) {
- return 1;
- } else {
- return 0;
- }
-}
-
-int main(int index)
-{
- int x,y;
- printf("testing...\n");
- int totalBad = 0;
- for(y = 0; y < 2; y++) {
- for(x = 0; x < 2; x++) {
- int a = test(x,y);
- int b = test2(x,y);
- if (a != b) {
- printf("Results differ: OR x=%d y=%d a=%d b=%d\n", x, y, a, b);
- totalBad++;
- }
- a = test3(x,y);
- b = test4(x,y);
- if (a != b) {
- printf("Results differ: AND x=%d y=%d a=%d b=%d\n", x, y, a, b);
- totalBad++;
- }
- }
- }
- printf("Total bad: %d\n", totalBad);
- return 0;
-}
-
diff --git a/tests/data/src/float.c b/tests/data/src/float.c
deleted file mode 100644
index f48b3d1..0000000
--- a/tests/data/src/float.c
+++ /dev/null
@@ -1,57 +0,0 @@
-int ftoi(float f) {
- return f;
-}
-
-int dtoi(double d) {
- return d;
-}
-
-float itof(int i) {
- return i;
-}
-
-double itod(int i) {
- return i;
-}
-
-float f0, f1;
-double d0, d1;
-
-void testParseConsts() {
- printf("Constants: %g %g %g %g %g %g %g %g %g\n", 0e1, 0E1, 0.f, .01f,
- .01e0f, 1.0e-1, 1.0e1, 1.0e+1,
- .1f);
-}
-void testVars(float arg0, float arg1, double arg2, double arg3) {
- float local0, local1;
- double local2, local3;
- f0 = arg0;
- f1 = arg1;
- d0 = arg2;
- d1 = arg3;
- local0 = arg0;
- local1 = arg1;
- local2 = arg2;
- local3 = arg3;
- printf("globals: %g %g %g %g\n", f0, f1, d0, d1);
- printf("args: %g %g %g %g\n", arg0, arg1, arg2, arg3);
- printf("locals: %g %g %g %g\n", local0, local1, local2, local3);
-
-
- printf("cast rval: %g %g\n", * (float*) & f1, * (double*) & d1);
-
- * (float*) & f0 = 1.1f;
- * (double*) & d0 = 3.3;
- printf("cast lval: %g %g %g %g\n", f0, f1, d0, d1);
-}
-
-int main() {
- testParseConsts();
- printf("int: %d float: %g double: %g\n", 1, 2.2f, 3.3);
- printf(" ftoi(1.4f)=%d\n", ftoi(1.4f));
- printf(" dtoi(2.4)=%d\n", dtoi(2.4));
- printf(" itof(3)=%g\n", itof(3));
- printf(" itod(4)=%g\n", itod(4));
- testVars(1.0f, 2.0f, 3.0, 4.0);
- return 0;
-}
diff --git a/tests/data/src/floatdouble.c b/tests/data/src/floatdouble.c
deleted file mode 100644
index 264c641..0000000
--- a/tests/data/src/floatdouble.c
+++ /dev/null
@@ -1,9 +0,0 @@
-int main()
-{
- // Test coercing values when storing.
- float a = 0.002;
- double b = 0.1f;
- int c = 10.002;
- printf("%g %g %d\n", a, b, c);
- return 0;
-}
diff --git a/tests/data/src/flops.c b/tests/data/src/flops.c
deleted file mode 100644
index 40b1b28..0000000
--- a/tests/data/src/flops.c
+++ /dev/null
@@ -1,158 +0,0 @@
-// Test floating point operations.
-
-void unaryOps() {
- // Unary ops
- printf("-%g = %g\n", 1.1, -1.1);
- printf("!%g = %d\n", 1.2, !1.2);
- printf("!%g = %d\n", 0.0, !0.0);
-}
-
-void binaryOps() {
- printf("double op double:\n");
- printf("%g + %g = %g\n", 1.0, 2.0, 1.0 + 2.0);
- printf("%g - %g = %g\n", 1.0, 2.0, 1.0 - 2.0);
- printf("%g * %g = %g\n", 1.0, 2.0, 1.0 * 2.0);
- printf("%g / %g = %g\n", 1.0, 2.0, 1.0 / 2.0);
-
- printf("float op float:\n");
- printf("%g + %g = %g\n", 1.0f, 2.0f, 1.0f + 2.0f);
- printf("%g - %g = %g\n", 1.0f, 2.0f, 1.0f - 2.0f);
- printf("%g * %g = %g\n", 1.0f, 2.0f, 1.0f * 2.0f);
- printf("%g / %g = %g\n", 1.0f, 2.0f, 1.0f / 2.0f);
-
- printf("double op float:\n");
- printf("%g + %g = %g\n", 1.0, 2.0f, 1.0 + 2.0f);
- printf("%g - %g = %g\n", 1.0, 2.0f, 1.0 - 2.0f);
- printf("%g * %g = %g\n", 1.0, 2.0f, 1.0 * 2.0f);
- printf("%g / %g = %g\n", 1.0, 2.0f, 1.0 / 2.0f);
-
- printf("double op int:\n");
- printf("%g + %d = %g\n", 1.0, 2, 1.0 + 2);
- printf("%g - %d = %g\n", 1.0, 2, 1.0 - 2);
- printf("%g * %d = %g\n", 1.0, 2, 1.0 * 2);
- printf("%g / %d = %g\n", 1.0, 2, 1.0 / 2);
-
- printf("int op double:\n");
- printf("%d + %g = %g\n", 1, 2.0, 1 + 2.0);
- printf("%d - %g = %g\n", 1, 2.0, 1 - 2.0);
- printf("%d * %g = %g\n", 1, 2.0, 1 * 2.0);
- printf("%d / %g = %g\n", 1, 2.0, 1 / 2.0);
-}
-
-void comparisonTestdd(double a, double b) {
- printf("%g op %g: < %d <= %d == %d >= %d > %d != %d\n",
- a, b, a < b, a <= b, a == b, a >= b, a > b, a != b);
-}
-
-void comparisonOpsdd() {
- printf("double op double:\n");
- comparisonTestdd(1.0, 2.0);
- comparisonTestdd(1.0, 1.0);
- comparisonTestdd(2.0, 1.0);
-}
-
-
-void comparisonTestdf(double a, float b) {
- printf("%g op %g: < %d <= %d == %d >= %d > %d != %d\n",
- a, b, a < b, a <= b, a == b, a >= b, a > b, a != b);
-}
-
-void comparisonOpsdf() {
- printf("double op float:\n");
- comparisonTestdf(1.0, 2.0f);
- comparisonTestdf(1.0, 1.0f);
- comparisonTestdf(2.0, 1.0f);
-}
-
-void comparisonTestff(float a, float b) {
- printf("%g op %g: < %d <= %d == %d >= %d > %d != %d\n",
- a, b, a < b, a <= b, a == b, a >= b, a > b, a != b);
-}
-
-void comparisonOpsff() {
- printf("float op float:\n");
- comparisonTestff(1.0f, 2.0f);
- comparisonTestff(1.0f, 1.0f);
- comparisonTestff(2.0f, 1.0f);
-}
-
-void comparisonTestid(int a, double b) {
- printf("%d op %g: < %d <= %d == %d >= %d > %d != %d\n",
- a, b, a < b, a <= b, a == b, a >= b, a > b, a != b);
-}
-
-void comparisonOpsid() {
- printf("int op double:\n");
- comparisonTestid(1, 2.0);
- comparisonTestid(1, 1.0);
- comparisonTestid(2, 1.0);
-}
-void comparisonTestdi(double a, int b) {
- printf("%g op %d: < %d <= %d == %d >= %d > %d != %d\n",
- a, b, a < b, a <= b, a == b, a >= b, a > b, a != b);
-}
-
-void comparisonOpsdi() {
- printf("double op int:\n");
- comparisonTestdi(1.0f, 2);
- comparisonTestdi(1.0f, 1);
- comparisonTestdi(2.0f, 1);
-}
-
-void comparisonOps() {
- comparisonOpsdd();
- comparisonOpsdf();
- comparisonOpsff();
- comparisonOpsid();
- comparisonOpsdi();
-}
-
-int branch(double d) {
- if (d) {
- return 1;
- }
- return 0;
-}
-
-void testBranching() {
- printf("branching: %d %d %d\n", branch(-1.0), branch(0.0), branch(1.0));
-}
-
-void testpassi(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l) {
- printf("testpassi: %d %d %d %d %d %d %d %d %d %d %d %d\n", a, b, c, d, e, f, g, h, i, j, k, l);
-}
-
-void testpassf(float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l) {
- printf("testpassf: %g %g %g %g %g %g %g %g %g %g %g %g\n", a, b, c, d, e, f, g, h, i, j, k, l);
-}
-
-void testpassd(double a, double b, double c, double d, double e, double f, double g, double h, double i, double j, double k, double l) {
- printf("testpassd: %g %g %g %g %g %g %g %g %g %g %g %g\n", a, b, c, d, e, f, g, h, i, j, k, l);
-}
-
-void testpassidf(int i, double d, float f) {
- printf("testpassidf: %d %g %g\n", i, d, f);
-}
-
-void testParameterPassing() {
- float x;
- testpassi(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
- testpassf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
- testpassd(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
- testpassi(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f);
- testpassf(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f);
- testpassd(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f);
- testpassi(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0);
- testpassf(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0);
- testpassd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0);
- testpassidf(1, 2.0, 3.0f);
-}
-
-int main() {
- unaryOps();
- binaryOps();
- comparisonOps();
- testBranching();
- testParameterPassing();
- return 0;
-}
diff --git a/tests/data/src/funcargs.c b/tests/data/src/funcargs.c
deleted file mode 100644
index 1dce226..0000000
--- a/tests/data/src/funcargs.c
+++ /dev/null
@@ -1,8 +0,0 @@
-int f(int a,int, int c) {
- return a + c;
-}
-
-int main() {
- return f(1,2,3);
-}
-
diff --git a/tests/data/src/hello.c b/tests/data/src/hello.c
deleted file mode 100644
index 06c9d03..0000000
--- a/tests/data/src/hello.c
+++ /dev/null
@@ -1,4 +0,0 @@
-int main() {
- printf("Hello, world\n");
- return 0;
-}
diff --git a/tests/data/src/inc.c b/tests/data/src/inc.c
deleted file mode 100644
index 14c09d1..0000000
--- a/tests/data/src/inc.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// Check integer operations
-
-int main() {
- int a = 0;
- printf("%d\n", a++);
- printf("%d\n", a++);
- printf("%d\n", a--);
- printf("%d\n", a--);
- printf("%d\n", ++a);
- printf("%d\n", ++a);
- printf("%d\n", --a);
- printf("%d\n", --a);
- return a;
-}
diff --git a/tests/data/src/iops.c b/tests/data/src/iops.c
deleted file mode 100644
index 780e95d..0000000
--- a/tests/data/src/iops.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// Check integer operations
-
-void loops() {
- int y;
- printf("++\n");
- for(y = 0; y < 10; y++) {
- printf("%d\n", y);
- }
- printf("--\n");
- for(y = 10; y >= 0; y--) {
- printf("%d\n", y);
- }
-}
-
-void checkLiterals() {
- printf("Literals: %d %d\n", 1, -1);
-}
-
-int main() {
- checkLiterals();
- loops();
- return 0;
-}
diff --git a/tests/data/src/locals.c b/tests/data/src/locals.c
deleted file mode 100644
index f1ef363..0000000
--- a/tests/data/src/locals.c
+++ /dev/null
@@ -1,71 +0,0 @@
-int a;
-
-int f() {
- int a;
- // Undefined variable b
- // printf("f 0: a = %d b = %d\n", a, b);
- printf("f 0: a = %d\n", a);
- a = 2;
- printf("f 1: a = %d\n", a);
-}
-
-int g(int a) {
- printf("g 0: a = %d\n", a);
- a = 3;
- printf("g 1: a = %d\n", a);
-}
-
-int h(int a) {
- // int a; // gcc 4.3 says error: 'a' redeclared as different kind of symbol
-
- printf("h 0: a = %d\n", a);
- a = 4;
- printf("h 1: a = %d\n", a);
-}
-
-// Already defined global
-// int h() {}
-int globCheck() {
- fprintf(stdout, "globCheck()\n");
-}
-
-int fwdCheck() {
- b();
- // Undefined forward reference
- // c();
-}
-
-int b() {
- printf("b()\n");
-}
-
-int nested() {
- int a;
- printf("nested 0: a = %d\n", a);
- a = 50;
- printf("nested 1: a = %d\n", a);
- {
- int a;
- printf("nested 2: a = %d\n", a);
- a = 51;
- printf("nested 3: a = %d\n", a);
- }
- printf("nested 4: a = %d\n", a);
-}
-
-int main() {
- globCheck();
- fwdCheck();
- printf("main 0: a = %d\n", a);
- a = 5;
- printf("main 1: a = %d\n", a);
- f();
- printf("main 2: a = %d\n", a);
- g(77);
- printf("main 3: a = %d\n", a);
- h(30);
- printf("main 4: a = %d\n", a);
- nested();
- printf("main 5: a = %d\n", a);
- return 0;
-}
diff --git a/tests/data/src/missing-main.c b/tests/data/src/missing-main.c
deleted file mode 100644
index e73eec4..0000000
--- a/tests/data/src/missing-main.c
+++ /dev/null
@@ -1,4 +0,0 @@
-/* No main. */
-
-a() {
-}
\ No newline at end of file
diff --git a/tests/data/src/otcc-ansi.c b/tests/data/src/otcc-ansi.c
deleted file mode 100644
index e1534bd..0000000
--- a/tests/data/src/otcc-ansi.c
+++ /dev/null
@@ -1,469 +0,0 @@
-// #include <stdio.h>
-extern int stdin, stderr, errno;
-int d, z, C, h, P, K, ac, q, G, v, Q, R, D, L, W, M;
-
-void w();
-void ab(int);
-
-void E(int e) {
- *(char*) D++ = e;
-}
-
-void o() {
- if (L) {
- h = *(char*) L++;
- if (h == 2) {
- L = 0;
- h = W;
- }
- } else
- h = fgetc(Q);
-}
-
-int X() {
- return isalnum(h) | h == 95;
-}
-
-void Y() {
- if (h == 92) {
- o();
- if (h == 110)
- h = 10;
- }
-}
-
-void ad() {
- int e, j, m;
- while (isspace(h) | h == 35) {
- if (h == 35) {
- o();
- ad();
- if (d == 536) {
- ad();
- E(32);
- *(int*) d = 1;
- *(int*) (d + 4) = D;
- }
- while (h != 10) {
- E(h);
- o();
- }
- E(h);
- E(2);
- }
- o();
- }
- C = 0;
- d = h;
- if (X()) {
- E(32);
- M = D;
- while (X()) {
- E(h);
- o();
- }
- if (isdigit(d)) {
- z = strtol(M, 0, 0);
- d = 2;
- } else {
- *(char*) D = 32;
- d = strstr(R, M - 1) - R;
- *(char*) D = 0;
- d = d * 8 + 256;
- if (d > 536) {
- d = P + d;
- if (*(int*) d == 1) {
- L = *(int*) (d + 4);
- W = h;
- o();
- ad();
- }
- }
- }
- } else {
- o();
- if (d == 39) {
- d = 2;
- Y();
- z = h;
- o();
- o();
- } else if (d == 47 & h == 42) {
- o();
- while (h) {
- while (h != 42)
- o();
- o();
- if (h == 47)
- h = 0;
- }
- o();
- ad();
- } else {
- e
- = "++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!='g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b";
- while (j = *(char*) e++) {
- m = *(char*) e++;
- z = 0;
- while ((C = *(char*) e++ - 98) < 0)
- z = z * 64 + C + 64;
- if (j == d & (m == h | m == 64)) {
- if (m == h) {
- o();
- d = 1;
- }
- break;
- }
- }
- }
- }
-}
-
-void ae(int g) {
- while( g&&g!=-1) {
- *(char*) q++=g;
- g=g>>8;
- }
-}
-
-void A(int e) {
- int g;
- while( e) {
- g=*(int*) e;
- *(int*) e=q-e-4;
- e=g;
- }
-}
-
-int s(int g, int e) {
- ae(g);
- *(int*) q = e;
- e = q;
- q = q + 4;
- return e;
-}
-
-void H(int e) {
- s(184,e);
-}
-
-int B(int e) {
- return s(233,e);
-}
-
-int S(int j, int e) {
- ae(1032325);
- return s(132 + j, e);
-}
-
-void Z(int e) {
- ae( 49465);
- H(0);
- ae( 15);
- ae( e+144);
- ae( 192);
-}
-
-void N(int j, int e) {
- ae(j + 131);
- s((e > -512 && e < 512) << 7 | 5, e);
-}
-
-void T (int j) {
- int g,e,m,aa;
- g=1;
- if( d == 34) {
- H(v);
- while( h!=34) {
- Y ();
- *(char*) v++=h;
- o ();
- }
- *(char*) v=0;
- v=v +4&-4;
- o ();
- ad();
- }
- else {
- aa=C;
- m= z;
- e=d;
- ad();
- if( e == 2) {
- H(m);
- }
- else if( aa == 2) {
- T(0);
- s(185,0);
- if( e == 33)Z(m);
- else ae( m);
- }
- else if( e == 40) {
- w ();
- ad();
- }
- else if( e == 42) {
- ad();
- e=d;
- ad();
- ad();
- if( d == 42) {
- ad();
- ad();
- ad();
- ad();
- e=0;
- }
- ad();
- T(0);
- if( d == 61) {
- ad();
- ae( 80);
- w ();
- ae( 89);
- ae( 392+(e == 256));
- }
- else if( e) {
- if( e == 256)ae( 139);
- else ae( 48655);
- q++;
- }
- }
- else if( e == 38) {
- N(10,*(int*) d);
- ad();
- }
- else {
- g=*(int*) e;
- if(!g)g=dlsym(0,M);
- if( d == 61&j) {
- ad();
- w ();
- N(6,g);
- }
- else if( d!= 40) {
- N(8,g);
- if( C == 11) {
- N(0,g);
- ae( z);
- ad();
- }
- }
- }
- }
- if( d == 40) {
- if( g == 1)ae( 80);
- m= s(60545,0);
- ad();
- j=0;
- while( d!= 41) {
- w ();
- s(2393225,j);
- if( d == 44)ad();
- j=j +4;
- }
- *(int*) m= j;
- ad();
- if(!g) {
- e=e +4;
- *(int*) e=s(232,*(int*) e);
- }
- else if( g == 1) {
- s(2397439,j);
- j=j +4;
- }
- else {
- s(232,g-q-5);
- }
- if( j)s(50305,j);
- }
-}
-
-void O (int j) {
- int e,g,m;
- if( j--== 1)T(1);
- else {
- O (j);
- m= 0;
- while( j == C) {
- g=d;
- e=z;
- ad();
- if( j>8) {
- m= S(e,m);
- O (j);
- }
- else {
- ae( 80);
- O (j);
- ae( 89);
- if( j == 4|j == 5) {
- Z(e);
- }
- else {
- ae( e);
- if( g == 37)ae( 146);
- }
- }
- }
- if( m&&j>8) {
- m= S(e,m);
- H(e^1);
- B(5);
- A(m);
- H(e);
- }
- }
-}
-
-void w() {
- O(11);
-}
-
-int U() {
- w();
- return S(0, 0);
-}
-
-void I (int j) {
- int m,g,e;
- if( d == 288) {
- ad();
- ad();
- m= U ();
- ad();
- I (j);
- if( d == 312) {
- ad();
- g=B(0);
- A(m);
- I (j);
- A(g);
- }
- else {
- A(m);
- }
- }
- else if( d == 352|d == 504) {
- e=d;
- ad();
- ad();
- if( e == 352) {
- g=q;
- m= U ();
- }
- else {
- if( d!= 59)w ();
- ad();
- g=q;
- m= 0;
- if( d!= 59)m= U ();
- ad();
- if( d!= 41) {
- e=B(0);
- w ();
- B(g-q-5);
- A(e);
- g=e +4;
- }
- }
- ad();
- I(&m);
- B(g-q-5);
- A(m);
- }
- else if( d == 123) {
- ad();
- ab(1);
- while( d!= 125)I (j);
- ad();
- }
- else {
- if( d == 448) {
- ad();
- if( d!= 59)w ();
- K=B(K);
- }
- else if( d == 400) {
- ad();
- *(int*) j=B(*(int*) j);
- }
- else if( d!= 59)w ();
- ad();
- }
-}
-
-void ab (int j) {
- int m;
- while( d == 256|d!=-1&!j) {
- if( d == 256) {
- ad();
- while( d!= 59) {
- if( j) {
- G=G +4;
- *(int*) d=-G;
- }
- else {
- *(int*) d=v;
- v=v +4;
- }
- ad();
- if( d == 44)ad() ;
- }
- ad();
- }
- else {
- A(*(int*)(d +4));
- *(int*) d=q;
- ad();
- ad();
- m= 8;
- while( d!= 41) {
- *(int*) d=m;
- m= m +4;
- ad();
- if( d == 44)ad();
- }
- ad();
- K=G=0;
- ae( 15042901);
- m= s(60545,0);
- I(0);
- A(K);
- ae( 50121);
- *(int*) m= G;
- }
- }
-}
-
-int run(int g, int e) {
- return (*(int(*)()) *(int*) (P + 592))(g, e);
-}
-
-int main(int g, char** e) {
- int result;
- Q = stdin;
- if (g-- > 1) {
- Q = fopen(e[1], "r");
- if (!Q) {
- fprintf(stderr, "otcc-ansi.c: could not open file %s\n", *(int*) e);
- return -2;
- }
- }
- D = strcpy(R = calloc(1, 99999), " int if else while break return for define main ") + 48;
- v = calloc(1, 99999);
- q = ac = calloc(1, 99999);
- P = calloc(1, 99999);
- o();
- ad();
- ab(0);
- if (mprotect(ac & (~ 4095), (99999 + 4095) & (~ 4095), 7)) {
- printf("Mprotect failed. %d\n", errno);
- return -1;
- }
- fprintf(stderr, "otcc-ansi.c: About to execute compiled code:\n");
- result = run(g, e);
- fprintf(stderr, "atcc-ansi.c: result: %d\n", result);
- return result;
-}
-
diff --git a/tests/data/src/otcc-noinclude.c b/tests/data/src/otcc-noinclude.c
deleted file mode 100644
index 530f9e2..0000000
--- a/tests/data/src/otcc-noinclude.c
+++ /dev/null
@@ -1,446 +0,0 @@
-// #include <stdio.h>
-#define k *(int*)
-#define a if(
-#define c ad()
-#define i else
-#define p while(
-#define x *(char*)
-#define b ==
-#define V =calloc(1,99999)
-#define f ()
-#define J return
-#define l ae(
-#define n e)
-#define u d!=
-#define F int
-#define y (j)
-#define r m=
-#define t +4
-F d,z,C,h,P,K,ac,q,G,v,Q,R,D,L,W,M;
-E(n{
-x D++=e;
-}
-o f{
-a L){
-h=x L++;
-a h b 2){
-L=0;
-h=W;
-}
-}
-i h=fgetc(Q);
-}
-X f{
-J isalnum(h)|h b 95;
-}
-Y f{
-a h b 92){
-o f;
-a h b 110)h=10;
-}
-}
-c{
-F e,j,m;
-p isspace(h)|h b 35){
-a h b 35){
-o f;
-c;
-a d b 536){
-c;
-E(32);
-k d=1;
-k(d t)=D;
-}
-p h!=10){
-E(h);
-o f;
-}
-E(h);
-E(2);
-}
-o f;
-}
-C=0;
-d=h;
-a X f){
-E(32);
-M=D;
-p X f){
-E(h);
-o f;
-}
-a isdigit(d)){
-z=strtol(M,0,0);
-d=2;
-}
-i{
-x D=32;
-d=strstr(R,M-1)-R;
-x D=0;
-d=d*8+256;
-a d>536){
-d=P+d;
-a k d b 1){
-L=k(d t);
-W=h;
-o f;
-c;
-}
-}
-}
-}
-i{
-o f;
-a d b 39){
-d=2;
-Y f;
-z=h;
-o f;
-o f;
-}
-i a d b 47&h b 42){
-o f;
-p h){
-p h!=42)o f;
-o f;
-a h b 47)h=0;
-}
-o f;
-c;
-}
-i{
-e="++#m--%am*@R<^1c/@%[_[H3c%@%[_[H3c+@.B#d-@%:_^BKd<<Z/03e>>`/03e<=0f>=/f<@.f>@1f==&g!='g&&k||#l&@.BCh^@.BSi|@.B+j~@/%Yd!@&d*@b";
-p j=x e++){
-r x e++;
-z=0;
-p(C=x e++-98)<0)z=z*64+C+64;
-a j b d&(m b h|m b 64)){
-a m b h){
-o f;
-d=1;
-}
-break;
-}
-}
-}
-}
-}
-l g){
-p g&&g!=-1){
-x q++=g;
-g=g>>8;
-}
-}
-A(n{
-F g;
-p n{
-g=k e;
-k e=q-e-4;
-e=g;
-}
-}
-s(g,n{
-l g);
-k q=e;
-e=q;
-q=q t;
-J e;
-}
-H(n{
-s(184,n;
-}
-B(n{
-J s(233,n;
-}
-S(j,n{
-l 1032325);
-J s(132+j,n;
-}
-Z(n{
-l 49465);
-H(0);
-l 15);
-l e+144);
-l 192);
-}
-N(j,n{
-l j+131);
-s((e<512)<<7|5,n;
-}
-T y{
-F g,e,m,aa;
-g=1;
-a d b 34){
-H(v);
-p h!=34){
-Y f;
-x v++=h;
-o f;
-}
-x v=0;
-v=v t&-4;
-o f;
-c;
-}
-i{
-aa=C;
-r z;
-e=d;
-c;
-a e b 2){
-H(m);
-}
-i a aa b 2){
-T(0);
-s(185,0);
-a e b 33)Z(m);
-i l m);
-}
-i a e b 40){
-w f;
-c;
-}
-i a e b 42){
-c;
-e=d;
-c;
-c;
-a d b 42){
-c;
-c;
-c;
-c;
-e=0;
-}
-c;
-T(0);
-a d b 61){
-c;
-l 80);
-w f;
-l 89);
-l 392+(e b 256));
-}
-i a n{
-a e b 256)l 139);
-i l 48655);
-q++;
-}
-}
-i a e b 38){
-N(10,k d);
-c;
-}
-i{
-g=k e;
-a!g)g=dlsym(0,M);
-a d b 61&j){
-c;
-w f;
-N(6,g);
-}
-i a u 40){
-N(8,g);
-a C b 11){
-N(0,g);
-l z);
-c;
-}
-}
-}
-}
-a d b 40){
-a g b 1)l 80);
-r s(60545,0);
-c;
-j=0;
-p u 41){
-w f;
-s(2393225,j);
-a d b 44)c;
-j=j t;
-}
-k r j;
-c;
-a!g){
-e=e t;
-k e=s(232,k n;
-}
-i a g b 1){
-s(2397439,j);
-j=j t;
-}
-i{
-s(232,g-q-5);
-}
-a j)s(50305,j);
-}
-}
-O y{
-F e,g,m;
-a j--b 1)T(1);
-i{
-O y;
-r 0;
-p j b C){
-g=d;
-e=z;
-c;
-a j>8){
-r S(e,m);
-O y;
-}
-i{
-l 80);
-O y;
-l 89);
-a j b 4|j b 5){
-Z(n;
-}
-i{
-l n;
-a g b 37)l 146);
-}
-}
-}
-a m&&j>8){
-r S(e,m);
-H(e^1);
-B(5);
-A(m);
-H(n;
-}
-}
-}
-w f{
-O(11);
-}
-U f{
-w f;
-J S(0,0);
-}
-I y{
-F m,g,e;
-a d b 288){
-c;
-c;
-r U f;
-c;
-I y;
-a d b 312){
-c;
-g=B(0);
-A(m);
-I y;
-A(g);
-}
-i{
-A(m);
-}
-}
-i a d b 352|d b 504){
-e=d;
-c;
-c;
-a e b 352){
-g=q;
-r U f;
-}
-i{
-a u 59)w f;
-c;
-g=q;
-r 0;
-a u 59)r U f;
-c;
-a u 41){
-e=B(0);
-w f;
-B(g-q-5);
-A(n;
-g=e t;
-}
-}
-c;
-I(&m);
-B(g-q-5);
-A(m);
-}
-i a d b 123){
-c;
-ab(1);
-p u 125)I y;
-c;
-}
-i{
-a d b 448){
-c;
-a u 59)w f;
-K=B(K);
-}
-i a d b 400){
-c;
-k j=B(k j);
-}
-i a u 59)w f;
-c;
-}
-}
-ab y{
-F m;
-p d b 256|u-1&!j){
-a d b 256){
-c;
-p u 59){
-a j){
-G=G t;
-k d=-G;
-}
-i{
-k d=v;
-v=v t;
-}
-c;
-a d b 44)c;
-}
-c;
-}
-i{
-A(k(d t));
-k d=q;
-c;
-c;
-r 8;
-p u 41){
-k d=m;
-r m t;
-c;
-a d b 44)c;
-}
-c;
-K=G=0;
-l 15042901);
-r s(60545,0);
-I(0);
-A(K);
-l 50121);
-k r G;
-}
-}
-}
-main(g,n{
-Q=stdin;
-a g-->1){
-e=e t;
-Q=fopen(k e,"r");
-}
-D=strcpy(R V," int if else while break return for define main ")+48;
-v V;
-q=ac V;
-P V;
-o f;
-c;
-ab(0);
-J(*(int(*)f)k(P+592))(g,n;
-}
-
diff --git a/tests/data/src/pointers.c b/tests/data/src/pointers.c
deleted file mode 100644
index 461ebeb..0000000
--- a/tests/data/src/pointers.c
+++ /dev/null
@@ -1,15 +0,0 @@
-int main() {
- int* pa = (int*) malloc(100);
- int* pb = pa + 1;
- int* pc = (int*) 0;
- *pa = 1;
- *pb = 2;
- printf("Pointer difference: %d %d\n", pb - pa, ((int) pb) - ((int) pa));
- int c = * (pa + 1);
- printf("Pointer addition: %d\n", c);
- printf("Pointer comparison to zero: %d %d %d\n", pa == 0, pb == 0, pc == 0);
- printf("Pointer comparison: %d %d %d %d %d\n", pa < pb, pa == pb, pa > pb, ! pb, ! pc);
- free(pa);
- return 0;
-}
-
diff --git a/tests/data/src/pointers2.c b/tests/data/src/pointers2.c
deleted file mode 100644
index 69e402f..0000000
--- a/tests/data/src/pointers2.c
+++ /dev/null
@@ -1,35 +0,0 @@
-// Test multiple levels of indirection
-
-void testsingle() {
- int a = 0;
- int* pa = &a;
- printf("a = %d, *pa = %d\n", a, *pa);
- *pa = 2;
- printf("a = %d, *pa = %d\n", a, *pa);
-}
-
-void testdouble() {
- int a = 0;
- int* pa = &a;
- int** ppa = &pa;
- printf("a = %d, *pa = %d **ppa = %d\n", a, *pa, **ppa);
- **ppa = 2;
- printf("a = %d, *pa = %d **ppa = %d\n", a, *pa, **ppa);
-}
-
-void testtripple() {
- int a = 0;
- int* pa = &a;
- int** ppa = &pa;
- int*** pppa = &ppa;
- printf("a = %d, *pa = %d **ppa = %d\n ***pppa = %d", a, *pa, **ppa, ***pppa);
- ***pppa = 2;
- printf("a = %d, *pa = %d **ppa = %d\n ***pppa = %d", a, *pa, **ppa, ***pppa);
-}
-
-int main() {
- testsingle();
- testdouble();
- testdouble();
- return 0;
-}
diff --git a/tests/data/src/returnval-ansi.c b/tests/data/src/returnval-ansi.c
deleted file mode 100644
index 6b53fd5..0000000
--- a/tests/data/src/returnval-ansi.c
+++ /dev/null
@@ -1,8 +0,0 @@
-
-int main(int argc, char** argv) {
- return f();
-}
-
-int f() {
- return 42;
-}
diff --git a/tests/data/src/rollo3.c b/tests/data/src/rollo3.c
deleted file mode 100644
index b21c12f..0000000
--- a/tests/data/src/rollo3.c
+++ /dev/null
@@ -1,9 +0,0 @@
-
-float fabsf(float);
-
-int main(void* con, int ft, int launchID)
-{
- float f = fabsf(-10.0f);
- return f;
-}
-
diff --git a/tests/data/src/short.c b/tests/data/src/short.c
deleted file mode 100644
index 5e222f3..0000000
--- a/tests/data/src/short.c
+++ /dev/null
@@ -1,6 +0,0 @@
-short a = 3;
-int main() {
- short* b = &a;
- *b = *b - 5;
- return a;
-}
diff --git a/tests/data/src/simplest.c b/tests/data/src/simplest.c
deleted file mode 100644
index bae895a..0000000
--- a/tests/data/src/simplest.c
+++ /dev/null
@@ -1 +0,0 @@
-main() {}
diff --git a/tests/data/src/structs.c b/tests/data/src/structs.c
deleted file mode 100644
index e824a3e..0000000
--- a/tests/data/src/structs.c
+++ /dev/null
@@ -1,90 +0,0 @@
-// struct definition and declaration
-struct a {
- int a;
- int b;
-} c;
-
-// Useful anonymous struct declaration
-struct {
- int y;
-} anon1, anon2;
-
-// forward declarations
-struct a;
-struct b;
-struct c;
-
-struct b {int a; int b; };
-
-// struct c {b g; }; // syntax error.
-
-// struct s {float c,a,b,c;} s; // duplicate struct member
-
-struct c {struct b g; };
-
-// struct a { int w; }; // error
-
-void testCopying() {
- struct a {int a[10]; char c;} a, b;
- a.c = 37;
- b.c = 38;
- b = a;
- printf("testCopying: %d == %d\n", a.c, b.c);
-}
-
-void testUnion() {
- union u;
- union u {float f;int i;} u;
- u.f = 1.0f;
- printf("testUnion: %g == 0x%08x\n", u.f, u.i);
-}
-
-struct v {float x, y, z, w; };
-
-void add(struct v* result, struct v* a, struct v* b) {
- result->x = a->x + b->x;
- result->y = a->y + b->y;
- result->z = a->z + b->z;
- result->w = a->w + b->w;
-}
-
-void set(struct v* v, float x, float y, float z, float w) {
- v->x = x;
- v->y = y;
- v->z = z;
- v->w = w;
-}
-
-void print(struct v* v) {
- printf("(%g, %g, %g, %g)\n", v->x, v->y, v->z, v->w);
-}
-
-void testArgs() {
- struct v a, b, c;
- set(&a, 1.0f, 2.0f, 3.0f, 4.0f);
- set(&b, 5.0f, 6.0f, 7.0f, 8.0f);
- add(&c, &a, &b);
- printf("testArgs: ");
- print(&c);
-}
-
-int main() {
- anon1.y = 3;
- anon2.y = anon1.y;
-
- testCopying();
- testUnion();
- testArgs();
-
- struct c cc;
- cc.g.a = 3;
- c.a = 1;
- c.b = 3;
- struct a {int x, y; } z;
- // struct a {int x, y; } z2;
- z.x = c.a;
- struct a *pA;
- pA = &z;
- pA->x += 5;
- return pA->x;
-}
diff --git a/tests/data/src/testStringConcat.c b/tests/data/src/testStringConcat.c
deleted file mode 100644
index bf06ae1..0000000
--- a/tests/data/src/testStringConcat.c
+++ /dev/null
@@ -1,4 +0,0 @@
-int main() {
- return printf("Hello" "," " world\n");
-}
-
diff --git a/tests/data/structs.bc b/tests/data/structs.bc
deleted file mode 100644
index 1534ee2..0000000
--- a/tests/data/structs.bc
+++ /dev/null
Binary files differ
diff --git a/tests/data/testStringConcat.bc b/tests/data/testStringConcat.bc
deleted file mode 100644
index 73385b1..0000000
--- a/tests/data/testStringConcat.bc
+++ /dev/null
Binary files differ
diff --git a/tests/test b/tests/test
deleted file mode 100755
index 8fd6916..0000000
--- a/tests/test
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-SCRIPT_DIR=`dirname $BASH_SOURCE`
-cd $SCRIPT_DIR
-python test.py "$@"
-
diff --git a/tests/test.py b/tests/test.py
deleted file mode 100644
index 92aeec1..0000000
--- a/tests/test.py
+++ /dev/null
@@ -1,510 +0,0 @@
-#
-# Copyright (C) 2010 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# Test the bcc compiler
-
-import unittest
-import subprocess
-import os
-import sys
-
-gArmInitialized = False
-gUseArm = True
-gUseX86 = True
-gRunOTCCOutput = True
-
-
-def parseArgv():
- global gUseArm
- global gUseX86
- global gRunOTCCOutput
- for arg in sys.argv[1:]:
- if arg == "--noarm":
- print "--noarm: not testing ARM"
- gUseArm = False
- elif arg == "--nox86":
- print "--nox86: not testing x86"
- gUseX86 = False
- elif arg == "--norunotcc":
- print "--norunotcc detected, not running OTCC output"
- gRunOTCCOutput = False
- else:
- print "Unknown parameter: ", arg
- raise "Unknown parameter"
- sys.argv = sys.argv[0:1]
-
-def compile(args):
- proc = subprocess.Popen(["../libbcc_driver"] + args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
- result = proc.communicate()
- return result
-
-def runCmd(args):
- proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- result = proc.communicate()
- return result[0].strip()
-
-def uname():
- return runCmd(["uname"])
-
-def unameM():
- return runCmd(["uname", "-m"])
-
-def which(item):
- return runCmd(["which", item])
-
-def fileType(item):
- return runCmd(["file", item])
-
-def outputCanRun():
- ft = fileType(which("bcc"))
- return ft.find("ELF 32-bit LSB executable, Intel 80386") >= 0
-
-def checkEnvironment():
- global gRunOTCCOutput
- gRunOTCCOutput = uname() == "Linux" and unameM() != "x86_64" and outputCanRun()
-
-def adb(args):
- return runCmd(["adb"] + args)
-
-def setupArm():
- global gArmInitialized
- if gArmInitialized:
- return
- print "Setting up arm"
- adb(["remount"])
- adb(["shell", "rm", "/system/bin/bcc"])
- adb(["shell", "mkdir", "/system/bin/bccdata"])
- adb(["shell", "mkdir", "/system/bin/bccdata/data"])
- # Clear out old data TODO: handle recursion
- adb(["shell", "rm", "/system/bin/bccdata/data/*"])
- # Copy over data
- for root, dirs, files in os.walk("data"):
- for d in dirs:
- adb(["shell", "mkdir", os.path.join(root, d)])
- for f in files:
- adb(["push", os.path.join(root, f), os.path.join("/system/bin/bccdata", root, f)])
- # Copy over compiler
- adb(["sync"])
- gArmInitialized = True
-
-def compileArm(args):
- setupArm()
- proc = subprocess.Popen(["adb", "shell", "/system/bin/bcc"] + args, stdout=subprocess.PIPE)
- result = proc.communicate()
- return result[0].replace("\r","")
-
-def compare(a, b):
- if a != b:
- firstDiff = firstDifference(a, b)
- print "Strings differ at character %d. Common: %s. Difference '%s' != '%s'" % (
- firstDiff, a[0:firstDiff], safeAccess(a, firstDiff), safeAccess(b, firstDiff))
-
-def safeAccess(s, i):
- if 0 <= i < len(s):
- return s[i]
- else:
- return '?'
-
-def firstDifference(a, b):
- commonLen = min(len(a), len(b))
- for i in xrange(0, commonLen):
- if a[i] != b[i]:
- return i
- return commonLen
-
-# a1 and a2 are the expected stdout and stderr.
-# b1 and b2 are the actual stdout and stderr.
-# Compare the two, sets. Allow any individual line
-# to appear in either stdout or stderr. This is because
-# the way we obtain output on the ARM combines both
-# streams into one sequence.
-
-def compareOuput(a1,a2,b1,b2):
- while True:
- totalLen = len(a1) + len(a2) + len(b1) + len(b2)
- a1, b1 = matchCommon(a1, b1)
- a1, b2 = matchCommon(a1, b2)
- a2, b1 = matchCommon(a2, b1)
- a2, b2 = matchCommon(a2, b2)
- newTotalLen = len(a1) + len(a2) + len(b1) + len(b2)
- if newTotalLen == 0:
- return True
- if newTotalLen == totalLen:
- print "Failed at %d %d %d %d" % (len(a1), len(a2), len(b1), len(b2))
- print "a1", a1
- print "a2", a2
- print "b1", b1
- print "b2", b2
- return False
-
-def matchCommon(a, b):
- """Remove common items from the beginning of a and b,
- return just the tails that are different."""
- while len(a) > 0 and len(b) > 0 and a[0] == b[0]:
- a = a[1:]
- b = b[1:]
- return a, b
-
-def rewritePaths(args):
- return [rewritePath(x) for x in args]
-
-def rewritePath(p):
- """Take a path that's correct on the x86 and convert to a path
- that's correct on ARM."""
- if p.startswith("data/"):
- p = "/system/bin/bccdata/" + p
- return p
-
-class TestACC(unittest.TestCase):
-
- def checkResult(self, out, err, stdErrResult, stdOutResult=""):
- a1 = out.splitlines()
- a2 = err.splitlines()
- b2 = stdErrResult.splitlines()
- b1 = stdOutResult.splitlines()
- self.assertEqual(True, compareOuput(a1,a2,b1,b2))
-
- def compileCheck(self, args, stdErrResult, stdOutResult="",
- targets=['arm', 'x86']):
- global gUseArm
- global gUseX86
- targetSet = frozenset(targets)
- if gUseX86 and 'x86' in targetSet:
- print args
- out, err = compile(args)
- self.checkResult(out, err, stdErrResult, stdOutResult)
- if gUseArm and 'arm' in targetSet:
- out = compileArm(rewritePaths(args))
- self.checkResult(out, "", stdErrResult, stdOutResult)
-
- def compileCheckArm(self, args, result):
- self.assertEqual(compileArm(args), result)
-
- def testCompileReturnVal(self):
- self.compileCheck(["data/returnval-ansi.bc"], "")
-
- def testCompileOTCCANSII(self):
- self.compileCheck(["data/otcc-ansi.bc"], "", "", ['x86'])
-
- def testRunReturnVal(self):
- self.compileCheck(["-c -R", "data/returnval-ansi.bc"],
- "Executing compiled code:\nresult: 42\n")
-
- def testStringLiteralConcatenation(self):
- self.compileCheck(["-c -R", "data/testStringConcat.bc"],
- "Executing compiled code:\nresult: 13\n", "Hello, world\n")
-
- def testRunOTCCANSI(self):
- global gRunOTCCOutput
- if gRunOTCCOutput:
- self.compileCheck(["-c -R", "data/otcc-ansi.bc", "data/returnval.c"],
- "Executing compiled code:\notcc-ansi.c: About to execute compiled code:\natcc-ansi.c: result: 42\nresult: 42\n", "",
- ['x86'])
-
- def testRunOTCCANSI2(self):
- global gRunOTCCOutput
- if gRunOTCCOutput:
- self.compileCheck(["-c -R", "data/otcc-ansi.bc", "data/otcc.c", "data/returnval.c"],
- "Executing compiled code:\notcc-ansi.c: About to execute compiled code:\notcc.c: about to execute compiled code.\natcc-ansi.c: result: 42\nresult: 42\n", "",['x86'])
-
- def testRunConstants(self):
- self.compileCheck(["-c -R", "data/constants.bc"],
- "Executing compiled code:\nresult: 0\n",
- "0 = 0\n010 = 8\n0x10 = 16\n'\\a' = 7\n'\\b' = 8\n'\\f' = 12\n'\\n' = 10\n'\\r' = 13\n'\\t' = 9\n'\\v' = 11\n'\\\\' = 92\n'\\'' = 39\n" +
- "'\\\"' = 34\n'\\?' = 63\n'\\0' = 0\n'\\1' = 1\n'\\12' = 10\n'\\123' = 83\n'\\x0' = 0\n'\\x1' = 1\n'\\x12' = 18\n'\\x123' = 35\n'\\x1f' = 31\n'\\x1F' = 31\n")
-
- def testRunFloat(self):
- self.compileCheck(["-c -R", "data/float.bc"],
- "Executing compiled code:\nresult: 0\n",
- """Constants: 0 0 0 0.01 0.01 0.1 10 10 0.1
-int: 1 float: 2.2 double: 3.3
- ftoi(1.4f)=1
- dtoi(2.4)=2
- itof(3)=3
- itod(4)=4
-globals: 1 2 3 4
-args: 1 2 3 4
-locals: 1 2 3 4
-cast rval: 2 4
-cast lval: 1.1 2 3.3 4
-""")
-
- def testRunFlops(self):
- self.compileCheck(["-c -R", "data/flops.bc"],
- """Executing compiled code:
-result: 0""",
-"""-1.1 = -1.1
-!1.2 = 0
-!0 = 1
-double op double:
-1 + 2 = 3
-1 - 2 = -1
-1 * 2 = 2
-1 / 2 = 0.5
-float op float:
-1 + 2 = 3
-1 - 2 = -1
-1 * 2 = 2
-1 / 2 = 0.5
-double op float:
-1 + 2 = 3
-1 - 2 = -1
-1 * 2 = 2
-1 / 2 = 0.5
-double op int:
-1 + 2 = 3
-1 - 2 = -1
-1 * 2 = 2
-1 / 2 = 0.5
-int op double:
-1 + 2 = 3
-1 - 2 = -1
-1 * 2 = 2
-1 / 2 = 0.5
-double op double:
-1 op 2: < 1 <= 1 == 0 >= 0 > 0 != 1
-1 op 1: < 0 <= 1 == 1 >= 1 > 0 != 0
-2 op 1: < 0 <= 0 == 0 >= 1 > 1 != 1
-double op float:
-1 op 2: < 1 <= 1 == 0 >= 0 > 0 != 1
-1 op 1: < 0 <= 1 == 1 >= 1 > 0 != 0
-2 op 1: < 0 <= 0 == 0 >= 1 > 1 != 1
-float op float:
-1 op 2: < 1 <= 1 == 0 >= 0 > 0 != 1
-1 op 1: < 0 <= 1 == 1 >= 1 > 0 != 0
-2 op 1: < 0 <= 0 == 0 >= 1 > 1 != 1
-int op double:
-1 op 2: < 1 <= 1 == 0 >= 0 > 0 != 1
-1 op 1: < 0 <= 1 == 1 >= 1 > 0 != 0
-2 op 1: < 0 <= 0 == 0 >= 1 > 1 != 1
-double op int:
-1 op 2: < 1 <= 1 == 0 >= 0 > 0 != 1
-1 op 1: < 0 <= 1 == 1 >= 1 > 0 != 0
-2 op 1: < 0 <= 0 == 0 >= 1 > 1 != 1
-branching: 1 0 1
-testpassi: 1 2 3 4 5 6 7 8 9 10 11 12
-testpassf: 1 2 3 4 5 6 7 8 9 10 11 12
-testpassd: 1 2 3 4 5 6 7 8 9 10 11 12
-testpassi: 1 2 3 4 5 6 7 8 9 10 11 12
-testpassf: 1 2 3 4 5 6 7 8 9 10 11 12
-testpassd: 1 2 3 4 5 6 7 8 9 10 11 12
-testpassi: 1 2 3 4 5 6 7 8 9 10 11 12
-testpassf: 1 2 3 4 5 6 7 8 9 10 11 12
-testpassd: 1 2 3 4 5 6 7 8 9 10 11 12
-testpassidf: 1 2 3
-""")
- def testCasts(self):
- self.compileCheck(["-c -R", "data/casts.bc"],
- """Executing compiled code:
-result: 0""", """Reading from a pointer: 3 3
-Writing to a pointer: 4
-Testing casts: 3 3 4.5 4
-Testing reading (int*): 4
-Testing writing (int*): 8 9
-Testing reading (char*): 0x78 0x56 0x34 0x12
-Testing writing (char*): 0x87654321
-f(10)
-Function pointer result: 70
-Testing read/write (float*): 8.8 9.9
-Testing read/write (double*): 8.8 9.9
-""")
-
- def testChar(self):
- self.compileCheck(["-c -R", "data/char.bc"], """Executing compiled code:
-result: 0""", """a = 99, b = 41
-ga = 100, gb = 44""")
-
- def testPointerArithmetic(self):
- self.compileCheck(["-c -R", "data/pointers.bc"], """Executing compiled code:
-result: 0""", """Pointer difference: 1 4
-Pointer addition: 2
-Pointer comparison to zero: 0 0 1
-Pointer comparison: 1 0 0 0 1
-""")
- def testRollo3(self):
- self.compileCheck(["-c -R", "data/rollo3.bc"], """Executing compiled code:
-result: 10""", """""")
-
- def testFloatDouble(self):
- self.compileCheck(["-c -R", "data/floatdouble.bc"], """Executing compiled code:
-result: 0""", """0.002 0.1 10""")
-
- def testIncDec(self):
- self.compileCheck(["-c -R", "data/inc.bc"], """Executing compiled code:
-0
-1
-2
-1
-1
-2
-1
-0
-result: 0
-""","""""")
-
- def testIops(self):
- self.compileCheck(["-c -R", "data/iops.bc"], """Executing compiled code:
-result: 0""", """Literals: 1 -1
-++
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
---
-10
-9
-8
-7
-6
-5
-4
-3
-2
-1
-0
-""")
-
- def testFilm(self):
- self.compileCheck(["-c -R", "data/film.bc"], """Executing compiled code:
-result: 0""", """testing...
-Total bad: 0
-""")
-
- def testpointers2(self):
- self.compileCheck(["-c -R", "data/pointers2.bc"], """Executing compiled code:
-result: 0""", """a = 0, *pa = 0
-a = 2, *pa = 2
-a = 0, *pa = 0 **ppa = 0
-a = 2, *pa = 2 **ppa = 2
-a = 0, *pa = 0 **ppa = 0
-a = 2, *pa = 2 **ppa = 2
-""")
-
- def testassignmentop(self):
- self.compileCheck(["-c -R", "data/assignmentop.bc"], """Executing compiled code:
-result: 0""", """2 *= 5 10
-20 /= 5 4
-17 %= 5 2
-17 += 5 22
-17 -= 5 12
-17<<= 1 34
-17>>= 1 8
-17&= 1 1
-17^= 1 16
-16|= 1 17
-*f() = *f() + 10;
-f()
-f()
-a = 10
-*f() += 10;
-f()
-a = 10
-""")
-
- def testcomma(self):
- self.compileCheck(["-c -R", "data/comma.bc"], """Executing compiled code:
-result: 0""", """statement: 10
-if: a = 0
-while: b = 11
-for: b = 22
-return: 30
-arg: 12
-""")
-
- def testBrackets(self):
- self.compileCheck(["-c -R", "data/brackets.bc"], """Executing compiled code:
-Errors: 0
-2D Errors: 0
-result: 0
-""","""""")
-
- def testShort(self):
- self.compileCheck(["-c -R", "data/short.bc"], """Executing compiled code:
-result: -2
-""","""""")
-
- def testAssignment(self):
- self.compileCheck(["-c -R", "data/assignment.bc"], """Executing compiled code:
-result: 7
-""","""""")
-
- def testArray(self):
- self.compileCheck(["-c -R", "data/array.bc"], """Executing compiled code:
-localInt: 3
-localDouble: 3 3
-globalChar: 3
-globalDouble: 3
-testArgs: 0 2 4
-testDecay: Hi!
-test2D:
-abcdefghijklmnopabcd
-defghijklmnopabcdefg
-ghijklmnopabcdefghij
-jklmnopabcdefghijklm
-mnopabcdefghijklmnop
-pabcdefghijklmnopabc
-cdefghijklmnopabcdef
-fghijklmnopabcdefghi
-ijklmnopabcdefghijkl
-lmnopabcdefghijklmno
-result: 0
-""","""""")
-
- def testDefines(self):
- self.compileCheck(["-c -R", "data/defines.bc"], """Executing compiled code:
-result: 3
-""","""""")
-
- def testFuncArgs(self):
- self.compileCheck(["-c -R", "data/funcargs.bc"], """Executing compiled code:
-result: 4
-""","""""")
-
- def testB2071670(self):
- self.compileCheck(["-c -R", "data/b2071670.bc"], """Executing compiled code:
-result: 1092616192
-""","""""")
-
- def testStructs(self):
- self.compileCheck(["-c -R", "data/structs.bc"], """Executing compiled code:
-testCopying: 37 == 37
-testUnion: 1 == 0x3f800000
-testArgs: (6, 8, 10, 12)
-result: 6
-""","""""")
-
- def testAddressOf(self):
- self.compileCheck(["-c -R", "data/addressOf.bc"], """Executing compiled code:
-testStruct: 10 10 10
-testArray: 1 1 1
-result: 0
-""","""""")
-
-def main():
- checkEnvironment()
- parseArgv()
- unittest.main()
-
-if __name__ == '__main__':
- main()
-
diff --git a/tools/bcc/Android.mk b/tools/bcc/Android.mk
index e005ffc..4d6d462 100644
--- a/tools/bcc/Android.mk
+++ b/tools/bcc/Android.mk
@@ -23,8 +23,7 @@
LOCAL_MODULE := bcc
LOCAL_MODULE_CLASS := EXECUTABLES
-LOCAL_SRC_FILES := \
- main.cpp
+LOCAL_SRC_FILES := Main.cpp
LOCAL_SHARED_LIBRARIES := \
libbcc \
@@ -35,7 +34,6 @@
$(LOCAL_PATH)/../../include
LOCAL_LDLIBS = -ldl
-LOCAL_SRC_FILES := Main.cpp
include $(LIBBCC_HOST_BUILD_MK)
include $(LIBBCC_GEN_CONFIG_MK)
diff --git a/tools/bcc/Main.cpp b/tools/bcc/Main.cpp
index 0d2cff4..0a79338 100644
--- a/tools/bcc/Main.cpp
+++ b/tools/bcc/Main.cpp
@@ -24,7 +24,7 @@
#include <llvm/Config/config.h>
#include <llvm/Support/CommandLine.h>
#include <llvm/Support/FileSystem.h>
-#include <llvm/Support/Path.h>
+#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/system_error.h>
@@ -36,6 +36,7 @@
#include <bcc/ExecutionEngine/ObjectLoader.h>
#include <bcc/ExecutionEngine/SymbolResolverProxy.h>
#include <bcc/ExecutionEngine/SymbolResolvers.h>
+#include <bcc/Renderscript/RSCompilerDriver.h>
#include <bcc/Script.h>
#include <bcc/Source.h>
#include <bcc/Support/CompilerConfig.h>
@@ -51,13 +52,27 @@
//===----------------------------------------------------------------------===//
namespace {
-llvm::cl::list<std::string>
-OptInputFilenames(llvm::cl::Positional, llvm::cl::OneOrMore,
- llvm::cl::desc("<input bitcode files>"));
+llvm::cl::opt<std::string>
+OptInputFilename(llvm::cl::Positional, llvm::cl::ValueRequired,
+ llvm::cl::desc("<input bitcode file>"));
llvm::cl::opt<std::string>
OptOutputFilename("o", llvm::cl::desc("Specify the output filename"),
- llvm::cl::value_desc("filename"));
+ llvm::cl::value_desc("filename"),
+ llvm::cl::init("bcc_output"));
+
+llvm::cl::opt<std::string>
+OptBCLibFilename("bclib", llvm::cl::desc("Specify the bclib filename"),
+ llvm::cl::value_desc("bclib"));
+
+llvm::cl::opt<std::string>
+OptOutputPath("output_path", llvm::cl::desc("Specify the output path"),
+ llvm::cl::value_desc("output path"),
+ llvm::cl::init("."));
+
+llvm::cl::opt<bool>
+OptEmitLLVM("emit-llvm",
+ llvm::cl::desc("Emit an LLVM-IR version of the generated program"));
#ifdef TARGET_BUILD
const std::string OptTargetTriple(DEFAULT_TARGET_TRIPLE_STRING);
@@ -77,56 +92,12 @@
//===----------------------------------------------------------------------===//
// Compiler Options
//===----------------------------------------------------------------------===//
-llvm::cl::opt<bool>
-OptPIC("fPIC", llvm::cl::desc("Generate fully relocatable, position independent"
- " code"));
+// RenderScript uses -O3 by default
llvm::cl::opt<char>
OptOptLevel("O", llvm::cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
- "(default: -O2)"),
- llvm::cl::Prefix, llvm::cl::ZeroOrMore, llvm::cl::init('2'));
-
-llvm::cl::opt<bool>
-OptC("c", llvm::cl::desc("Compile and assemble, but do not link."));
-
-//===----------------------------------------------------------------------===//
-// Linker Options
-//===----------------------------------------------------------------------===//
-// FIXME: this option will be removed in the future when MCLinker is capable
-// of generating shared library directly from given bitcode. It only
-// takes effect when -shared is supplied.
-llvm::cl::opt<std::string>
-OptImmObjectOutput("or", llvm::cl::desc("Specify the filename for output the "
- "intermediate relocatable when linking "
- "the input bitcode to the shared "
- "library"), llvm::cl::ValueRequired);
-
-llvm::cl::opt<bool>
-OptShared("shared", llvm::cl::desc("Create a shared library from input bitcode "
- "files"));
-
-
-//===----------------------------------------------------------------------===//
-// Loader Options
-//===----------------------------------------------------------------------===//
-llvm::cl::opt<bool>
-OptRunEntry("R", llvm::cl::desc("Run the entry method after successfully load "
- "and compile."));
-
-llvm::cl::opt<std::string>
-OptEntryFunction("entry-function", llvm::cl::desc("Specify the entry function "
- "for -R (default: main)"),
- llvm::cl::value_desc("function"), llvm::cl::init("main"));
-
-llvm::cl::opt<bool>
-OptEnableGDB("enable-gdb", llvm::cl::desc("Enable GDB JIT debugging when "
- "runs the entry method"));
-
-llvm::cl::list<std::string>
-OptRuntimeLibs("load", llvm::cl::desc("Specify the shared libraries for "
- "execution (e.g., -load=c will search "
- "and load libc.so for execution)"),
- llvm::cl::ZeroOrMore, llvm::cl::value_desc("namespec"));
+ "(default: -O3)"),
+ llvm::cl::Prefix, llvm::cl::ZeroOrMore, llvm::cl::init('3'));
// Override "bcc -version" since the LLVM version information is not correct on
// Android build.
@@ -148,42 +119,8 @@
} // end anonymous namespace
static inline
-Script *PrepareScript(BCCContext &pContext,
- const llvm::cl::list<std::string> &pBitcodeFiles) {
- Script *result = NULL;
-
- for (unsigned i = 0; i < pBitcodeFiles.size(); i++) {
- const std::string &input_bitcode = pBitcodeFiles[i];
- Source *source = Source::CreateFromFile(pContext, input_bitcode);
- if (source == NULL) {
- llvm::errs() << "Failed to load llvm module from file `" << input_bitcode
- << "'!\n";
- return NULL;
- }
-
- if (result != NULL) {
- if (!result->mergeSource(*source, /* pPreserveSource */false)) {
- llvm::errs() << "Failed to merge the llvm module `" << input_bitcode
- << "' to compile!\n";
- delete source;
- return NULL;
- }
- } else {
- result = new (std::nothrow) Script(*source);
- if (result == NULL) {
- llvm::errs() << "Out of memory when create script for file `"
- << input_bitcode << "'!\n";
- delete source;
- return NULL;
- }
- }
- }
-
- return result;
-}
-
-static inline
-bool ConfigCompiler(Compiler &pCompiler) {
+bool ConfigCompiler(RSCompilerDriver &pRSCD) {
+ RSCompiler *RSC = pRSCD.getCompiler();
CompilerConfig *config = NULL;
#ifdef TARGET_BUILD
@@ -196,24 +133,19 @@
return false;
}
- // Setup the config according to the valud of command line option.
- if (OptPIC) {
- config->setRelocationModel(llvm::Reloc::PIC_);
- }
switch (OptOptLevel) {
case '0': config->setOptimizationLevel(llvm::CodeGenOpt::None); break;
case '1': config->setOptimizationLevel(llvm::CodeGenOpt::Less); break;
- case '3': config->setOptimizationLevel(llvm::CodeGenOpt::Aggressive); break;
- case '2':
+ case '2': config->setOptimizationLevel(llvm::CodeGenOpt::Default); break;
+ case '3':
default: {
- config->setOptimizationLevel(llvm::CodeGenOpt::Default);
+ config->setOptimizationLevel(llvm::CodeGenOpt::Aggressive);
break;
}
}
- Compiler::ErrorCode result = pCompiler.config(*config);
-
- delete config;
+ pRSCD.setConfig(config);
+ Compiler::ErrorCode result = RSC->config(*config);
if (result != Compiler::kSuccess) {
llvm::errs() << "Failed to configure the compiler! (detail: "
@@ -224,43 +156,6 @@
return true;
}
-#define DEFAULT_OUTPUT_PATH "/sdcard/a.out"
-static inline
-std::string DetermineOutputFilename(const std::string &pOutputPath) {
- if (!pOutputPath.empty()) {
- return pOutputPath;
- }
-
- // User doesn't specify the value to -o.
- if (OptInputFilenames.size() > 1) {
- llvm::errs() << "Use " DEFAULT_OUTPUT_PATH " for output file!\n";
- return DEFAULT_OUTPUT_PATH;
- }
-
- // There's only one input bitcode file.
- const std::string &input_path = OptInputFilenames[0];
- llvm::SmallString<200> output_path(input_path);
-
- llvm::error_code err = llvm::sys::fs::make_absolute(output_path);
- if (err != llvm::errc::success) {
- llvm::errs() << "Failed to determine the absolute path of `" << input_path
- << "'! (detail: " << err.message() << ")\n";
- return "";
- }
-
- if (OptC) {
- // -c was specified. Replace the extension to .o.
- llvm::sys::path::replace_extension(output_path, "o");
- } else {
- // Use a.out under current working directory when compile executable or
- // shared library.
- llvm::sys::path::remove_filename(output_path);
- llvm::sys::path::append(output_path, "a.out");
- }
-
- return output_path.c_str();
-}
-
static inline
bool CompileScript(Compiler &pCompiler, Script &pScript,
const std::string &pOutputPath) {
@@ -284,150 +179,38 @@
return true;
}
-static inline
-bool PrepareRuntimes(std::vector<SymbolResolverInterface *> &pRuntimes) {
- llvm::SmallVector<const char *, 2> search_paths;
-
-#ifdef TARGET_BUILD
- search_paths.push_back("/system/lib/");
-#else
- search_paths.push_back("/lib/");
- search_paths.push_back("/usr/lib/");
-#endif
-
- // Most of the following lines comes from llvm/tools/llvm-ld.cpp.
- for (unsigned i = 0; i < OptRuntimeLibs.size(); i++) {
- const std::string &lib = OptRuntimeLibs[i];
- llvm::sys::Path lib_path;
- for (llvm::SmallVectorImpl<const char *>::const_iterator
- search_path_iter = search_paths.begin(),
- search_path_end = search_paths.end();
- search_path_iter != search_path_end; search_path_iter++) {
-
- lib_path = *search_path_iter;
- lib_path.appendComponent("lib" + lib);
- lib_path.appendSuffix(llvm::sys::Path::GetDLLSuffix());
-
- if (lib_path.isEmpty()) {
- if (!lib_path.isDynamicLibrary()) {
- lib_path = llvm::sys::Path();
- } else {
- break;
- }
- }
- } // for each search_paths
- if (lib_path.isEmpty()) {
- // FIXME: llvm::sys::Path::FindLibrary(...) is able to consume
- // 'const std::string &' instead of 'std::string &'.
- std::string lib_tmp = lib;
- lib_path = llvm::sys::Path::FindLibrary(lib_tmp);
- }
- if (lib_path.isEmpty()) {
- llvm::errs() << "Unable to find `lib" << lib << "' for execution!\n";
- llvm::DeleteContainerPointers(pRuntimes);
- return false;
- } else {
- DyldSymbolResolver *dyld_resolver =
- new (std::nothrow) DyldSymbolResolver(lib_path.str().c_str());
-
- if (dyld_resolver != NULL) {
- pRuntimes.push_back(dyld_resolver);
- } else {
- llvm::errs() << "Out of memory when load `" << lib_path.str() << "'!\n";
- llvm::DeleteContainerPointers(pRuntimes);
- return false;
- }
- }
- } // for each OptRuntimeLibs
-
- return true;
-}
-
-static inline
-bool LoadAndRun(const std::string &pOutputExecutable) {
- SymbolResolverProxy runtime_resolver;
-
- // Include compiler runtime.
- CompilerRTSymbolResolver compiler_runtimes;
- runtime_resolver.chainResolver(compiler_runtimes);
-
- // Open the output file for execution.
- InputFile input_exec(pOutputExecutable);
- if (input_exec.hasError()) {
- llvm::errs() << "Failed to open the executable `" << pOutputExecutable
- << "'! (detail: " << input_exec.getErrorMessage() << ")\n";
- return false;
- }
-
- // Load the runtime libraries given in command line.
- std::vector<SymbolResolverInterface *> lib_runtimes;
- if (!PrepareRuntimes(lib_runtimes)) {
- return false;
- }
-
- for (std::vector<SymbolResolverInterface *>::const_iterator
- librt_iter = lib_runtimes.begin(), librt_end = lib_runtimes.end();
- librt_iter != librt_end; librt_iter++) {
- runtime_resolver.chainResolver(*(*librt_iter));
- }
-
- // Load the output file.
- ObjectLoader *loader = ObjectLoader::Load(input_exec, runtime_resolver,
- OptEnableGDB);
- if (loader == NULL) {
- llvm::errs() << "Failed to load `" << pOutputExecutable << "'!\n";
- llvm::DeleteContainerPointers(lib_runtimes);
- return false;
- }
-
- // Retrieve the address of entry function.
- void *entry = loader->getSymbolAddress(OptEntryFunction.c_str());
- if (entry == NULL) {
- llvm::errs() << "Couldn't find entry method `" << OptEntryFunction
- << "' in " << pOutputExecutable << "' for execution!\n";
- delete loader;
- llvm::DeleteContainerPointers(lib_runtimes);
- return false;
- }
-
- // Execute the entry function.
- int run_result = reinterpret_cast<int (*)()>(entry)();
- llvm::errs() << "result: " << run_result << "\n";
-
- // Clean up.
- delete loader;
- llvm::DeleteContainerPointers(lib_runtimes);
-
- return true;
-}
-
int main(int argc, char **argv) {
llvm::cl::SetVersionPrinter(BCCVersionPrinter);
llvm::cl::ParseCommandLineOptions(argc, argv);
init::Initialize();
BCCContext context;
- Compiler compiler;
+ RSCompilerDriver RSCD;
- Script *script = PrepareScript(context, OptInputFilenames);
- if (script == NULL) {
+ llvm::OwningPtr<llvm::MemoryBuffer> input_data;
+
+ llvm::error_code ec =
+ llvm::MemoryBuffer::getFile(OptInputFilename.c_str(), input_data);
+ if (ec != llvm::error_code::success()) {
+ ALOGE("Failed to load bitcode from path %s! (%s)",
+ OptInputFilename.c_str(), ec.message().c_str());
return EXIT_FAILURE;
}
- if (!ConfigCompiler(compiler)) {
+ llvm::MemoryBuffer *input_memory = input_data.take();
+
+ const char *bitcode = input_memory->getBufferStart();
+ size_t bitcodeSize = input_memory->getBufferSize();
+
+ if (!ConfigCompiler(RSCD)) {
+ ALOGE("Failed to configure compiler");
return EXIT_FAILURE;
}
+ bool built = RSCD.build(context, OptOutputPath.c_str(),
+ OptOutputFilename.c_str(), bitcode, bitcodeSize,
+ OptBCLibFilename.c_str(), NULL, OptEmitLLVM);
- std::string OutputFilename = DetermineOutputFilename(OptOutputFilename);
- if (OutputFilename.empty()) {
- return EXIT_FAILURE;
- }
-
- if (!CompileScript(compiler, *script, OutputFilename)) {
- return EXIT_FAILURE;
- }
-
- if (OptRunEntry && !LoadAndRun(OutputFilename)) {
+ if (!built) {
return EXIT_FAILURE;
}
diff --git a/tools/bcc_compat/Main.cpp b/tools/bcc_compat/Main.cpp
index 4399893..1391de8 100644
--- a/tools/bcc_compat/Main.cpp
+++ b/tools/bcc_compat/Main.cpp
@@ -279,7 +279,10 @@
RSScript *s = NULL;
s = PrepareRSScript(context, OptInputFilenames);
- rscd.build(*s, OutputFilename.c_str(), OptRuntimePath.c_str());
+ if (!rscd.build(*s, OutputFilename.c_str(), OptRuntimePath.c_str())) {
+ fprintf(stderr, "Failed to compile script!");
+ return EXIT_FAILURE;
+ }
return EXIT_SUCCESS;
}
diff --git a/tools/bcc_strip_attr/bcc_strip_attr.cpp b/tools/bcc_strip_attr/bcc_strip_attr.cpp
index 0c1d9cb..19fa9d1 100644
--- a/tools/bcc_strip_attr/bcc_strip_attr.cpp
+++ b/tools/bcc_strip_attr/bcc_strip_attr.cpp
@@ -18,13 +18,14 @@
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
#include "llvm/Pass.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/IRReader.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/SystemUtils.h"
#include "llvm/Support/ToolOutputFile.h"
using namespace llvm;
@@ -90,18 +91,11 @@
static inline std::auto_ptr<Module> LoadFile(const char *argv0,
const std::string &FN,
LLVMContext& Context) {
- sys::Path Filename;
- if (!Filename.set(FN)) {
- errs() << "Invalid file name: '" << FN << "'\n";
- return std::auto_ptr<Module>();
- }
-
SMDiagnostic Err;
- Module* Result = 0;
-
- const std::string &FNStr = Filename.str();
- Result = ParseIRFile(FNStr, Err, Context);
- if (Result) return std::auto_ptr<Module>(Result); // Load successful!
+ Module* Result = ParseIRFile(FN, Err, Context);
+ if (Result) {
+ return std::auto_ptr<Module>(Result); // Load successful!
+ }
Err.print(argv0, errs());
return std::auto_ptr<Module>();
@@ -133,7 +127,7 @@
std::string ErrorInfo;
tool_output_file Out(OutputFilename.c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary);
+ sys::fs::F_Binary);
if (!ErrorInfo.empty()) {
errs() << ErrorInfo << '\n';
return 1;
diff --git a/tools/build/gen-sha1-stamp.py b/tools/build/gen-sha1-stamp.py
index 239d040..012d522 100755
--- a/tools/build/gen-sha1-stamp.py
+++ b/tools/build/gen-sha1-stamp.py
@@ -76,7 +76,6 @@
def print_asm_data(data, size):
col = 0
- sys.stdout.write(".align 8\n")
for i in xrange(size):
c = data[i]
if col == 0:
@@ -95,6 +94,7 @@
def print_asm_symbol_data(sym, h):
sys.stdout.write("""
+.align 8
#ifdef __APPLE_CC__
_%s:\n\
#else\n\