Do not intercept JVM-internal C stdlib calls

The JVM frequently calls strcmp/memcmp/..., which fills up the table of
recent compares with entries that are either duplicates of values
already reported by the bytecode instrumentation or JDK-internal strings
that are not relevant for fuzzing.

This commit adds an ignorelist to the C stdlib interceptors that filters
out calls from known JVM libraries. If the fuzz target has not yet
loaded a native library, all such callbacks are ignored, which greatly
improves fuzzer performance for string-heavy targets. E.g.,
JsonSanitizerDenylistFuzzer takes < 1 million runs now when it used to
take over 3 million.
diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel
index f190a08..d54cdba 100644
--- a/WORKSPACE.bazel
+++ b/WORKSPACE.bazel
@@ -206,9 +206,10 @@
     name = "libFuzzer",
     build_file = "//third_party:libFuzzer.BUILD",
     patches = [
+        "//third_party:libFuzzer-make-interceptors-configurable.patch",
         "//third_party:libFuzzer-pass-death-callback-to-jazzer.patch",
     ],
-    sha256 = "8ad4ddbafac4f2c8f2ea523c2c4196f940e8e16f9e635210537582a48622a5d5",
-    strip_prefix = "llvm-project-llvmorg-11.0.0",
-    url = "https://github.com/llvm/llvm-project/archive/llvmorg-11.0.0.tar.gz",
+    sha256 = "a78949f86fc9852f51b11ceb3e6c2c61bb6e4ebb073198cebddc82451f708adf",
+    strip_prefix = "llvm-project-llvmorg-12.0.0-rc3",
+    url = "https://github.com/llvm/llvm-project/archive/llvmorg-12.0.0-rc3.tar.gz",
 )
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
index 43e8a48..47ebab3 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
@@ -69,6 +69,7 @@
             "div" -> setOf(InstrumentationType.DIV)
             "gep" -> setOf(InstrumentationType.GEP)
             "indir" -> setOf(InstrumentationType.INDIR)
+            "native" -> setOf(InstrumentationType.NATIVE)
             "all" -> InstrumentationType.values().toSet()
             else -> {
                 println("WARN: Skipping unknown instrumentation type $it")
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
index 0e304f4..35ee395 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
@@ -18,6 +18,7 @@
 import com.code_intelligence.jazzer.instrumentor.Hook
 import com.code_intelligence.jazzer.instrumentor.InstrumentationType
 import com.code_intelligence.jazzer.instrumentor.loadHooks
+import com.code_intelligence.jazzer.runtime.NativeLibHooks
 import com.code_intelligence.jazzer.runtime.TraceCmpHooks
 import com.code_intelligence.jazzer.runtime.TraceDivHooks
 import com.code_intelligence.jazzer.runtime.TraceIndirHooks
@@ -81,6 +82,7 @@
                 InstrumentationType.CMP -> TraceCmpHooks::class.java
                 InstrumentationType.DIV -> TraceDivHooks::class.java
                 InstrumentationType.INDIR -> TraceIndirHooks::class.java
+                InstrumentationType.NATIVE -> NativeLibHooks::class.java
                 else -> null
             }
         }
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/instrumentor/Instrumentor.kt b/agent/src/main/java/com/code_intelligence/jazzer/instrumentor/Instrumentor.kt
index 50904e6..7879384 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/instrumentor/Instrumentor.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/instrumentor/Instrumentor.kt
@@ -23,6 +23,7 @@
     DIV,
     GEP,
     INDIR,
+    NATIVE,
 }
 
 internal interface Instrumentor {
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/runtime/NativeLibHooks.java b/agent/src/main/java/com/code_intelligence/jazzer/runtime/NativeLibHooks.java
new file mode 100644
index 0000000..495cad7
--- /dev/null
+++ b/agent/src/main/java/com/code_intelligence/jazzer/runtime/NativeLibHooks.java
@@ -0,0 +1,35 @@
+// Copyright 2021 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.code_intelligence.jazzer.runtime;
+
+import com.code_intelligence.jazzer.api.HookType;
+import com.code_intelligence.jazzer.api.MethodHook;
+import java.lang.invoke.MethodHandle;
+
+@SuppressWarnings("unused")
+final public class NativeLibHooks {
+  @MethodHook(type = HookType.BEFORE, targetClassName = "java.lang.Runtime",
+      targetMethod = "loadLibrary", targetMethodDescriptor = "(Ljava/lang/String;)V")
+  @MethodHook(type = HookType.BEFORE, targetClassName = "java.lang.System",
+      targetMethod = "loadLibrary", targetMethodDescriptor = "(Ljava/lang/String;)V")
+  @MethodHook(type = HookType.BEFORE, targetClassName = "java.lang.Runtime", targetMethod = "load",
+      targetMethodDescriptor = "(Ljava/lang/String;)V")
+  @MethodHook(type = HookType.BEFORE, targetClassName = "java.lang.System", targetMethod = "load",
+      targetMethodDescriptor = "(Ljava/lang/String;)V")
+  public static void
+  loadLibraryHook(MethodHandle method, Object thisObject, Object[] arguments, int hookId) {
+    TraceDataFlowNativeCallbacks.handleLibraryLoad();
+  }
+}
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/runtime/TraceDataFlowNativeCallbacks.java b/agent/src/main/java/com/code_intelligence/jazzer/runtime/TraceDataFlowNativeCallbacks.java
index 147386a..f779cec 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/runtime/TraceDataFlowNativeCallbacks.java
+++ b/agent/src/main/java/com/code_intelligence/jazzer/runtime/TraceDataFlowNativeCallbacks.java
@@ -73,4 +73,6 @@
     // as the stack layout required for the call can't be achieved without local variables.
     return Long.compare(arg1, arg2);
   }
+
+  public static native void handleLibraryLoad();
 }
diff --git a/bazel/fuzz_target.bzl b/bazel/fuzz_target.bzl
index 9a18bdf..9f2fe2d 100644
--- a/bazel/fuzz_target.bzl
+++ b/bazel/fuzz_target.bzl
@@ -32,6 +32,7 @@
     ]
     if hook_classes:
         deploy_manifest_lines.append("Jazzer-Hook-Classes: %s" % ":".join(hook_classes))
+
     # Deps can only be specified on java_binary targets with sources, which
     # excludes e.g. Kotlin libraries wrapped into java_binary via runtime_deps.
     target_deps = deps + ["//agent/src/main/java/com/code_intelligence/jazzer/api"] if srcs else []
diff --git a/driver/libfuzzer_callbacks.cpp b/driver/libfuzzer_callbacks.cpp
index d1c754a..398d69d 100644
--- a/driver/libfuzzer_callbacks.cpp
+++ b/driver/libfuzzer_callbacks.cpp
@@ -14,10 +14,14 @@
 
 #include "libfuzzer_callbacks.h"
 
-#include <algorithm>
+#include <fstream>
 #include <iostream>
+#include <utility>
+#include <vector>
 
+#include "absl/strings/match.h"
 #include "absl/strings/str_format.h"
+#include "absl/strings/str_split.h"
 #include "glog/logging.h"
 #include "sanitizer_hooks_with_pc.h"
 #include "third_party/jni/jni.h"
@@ -172,6 +176,106 @@
                                          static_cast<uintptr_t>(callee_id));
 }
 
+bool is_using_native_libraries = false;
+std::vector<std::pair<uintptr_t, uintptr_t>> ignore_for_interception_ranges;
+
+extern "C" [[maybe_unused]] bool __sanitizer_weak_is_relevant_pc(
+    void *caller_pc) {
+  // If the fuzz target is not using native libraries, calls to strcmp, memcmp,
+  // etc. should never be intercepted. The values reported if they were at best
+  // duplicate the values received from our bytecode instrumentation and at
+  // worst pollute the table of recent compares with string internal to the JDK.
+  if (!is_using_native_libraries) return false;
+  // If the fuzz target is using native libraries, intercept calls only if they
+  // don't originate from those address ranges that are known to belong to the
+  // JDK.
+  bool should_intercept = std::none_of(
+      ignore_for_interception_ranges.cbegin(),
+      ignore_for_interception_ranges.cend(), [caller_pc](const auto &range) {
+        uintptr_t start;
+        uintptr_t end;
+        std::tie(start, end) = range;
+        auto address = reinterpret_cast<uintptr_t>(caller_pc);
+        return start <= address && address <= end;
+      });
+  if (should_intercept) {
+    std::cout << " PC: " << caller_pc << std::endl;
+  }
+  return should_intercept;
+}
+
+/**
+ * Adds the address ranges of executable segmentes of the library lib_name to
+ * the ignorelist for C standard library function interception (strcmp, memcmp,
+ * ...).
+ */
+void ignoreLibraryForInterception(const std::string &lib_name) {
+  const auto num_address_ranges = ignore_for_interception_ranges.size();
+  std::ifstream loaded_libs("/proc/self/maps");
+  std::string line;
+  while (std::getline(loaded_libs, line)) {
+    if (!absl::StrContains(line, lib_name)) continue;
+    // clang-format off
+    // A typical line looks as follows:
+    // 7f15356c9000-7f1536367000 r-xp 0020d000 fd:01 19275673         /usr/lib/jvm/java-15-openjdk-amd64/lib/server/libjvm.so
+    // clang-format on
+    std::vector<std::string_view> parts =
+        absl::StrSplit(line, ' ', absl::SkipEmpty());
+    if (parts.size() != 6) {
+      std::cout << "ERROR: Invalid format for /proc/self/maps\n"
+                << line << std::endl;
+      exit(1);
+    }
+    // Skip non-executable address ranges.
+    if (!absl::StrContains(parts[1], 'x')) continue;
+    std::string_view range_str = parts[0];
+    std::vector<std::string> range = absl::StrSplit(range_str, '-');
+    if (range.size() != 2) {
+      std::cout
+          << "ERROR: Unexpected address range format in /proc/self/maps line: "
+          << range_str << std::endl;
+      exit(1);
+    }
+    std::size_t pos;
+    auto start = std::stoull(range[0], &pos, 16);
+    if (pos != range[0].size()) {
+      std::cout
+          << "ERROR: Unexpected address range format in /proc/self/maps line: "
+          << range_str << std::endl;
+      exit(1);
+    }
+    auto end = std::stoull(range[1], &pos, 16);
+    if (pos != range[0].size()) {
+      std::cout
+          << "ERROR: Unexpected address range format in /proc/self/maps line: "
+          << range_str << std::endl;
+      exit(1);
+    }
+    ignore_for_interception_ranges.emplace_back(start, end);
+  }
+  const auto num_code_segments =
+      ignore_for_interception_ranges.size() - num_address_ranges;
+  LOG(INFO) << "added " << num_code_segments
+            << " code segment of native library " << lib_name
+            << " to interceptor ignorelist";
+}
+
+const std::vector<std::string> kLibrariesToIgnoreForInterception = {
+    // The driver executable itself can be treated just like a library.
+    "jazzer_driver", "libinstrument.so", "libjava.so",
+    "libjimage.so",  "libjli.so",        "libjvm.so",
+    "libnet.so",     "libverify.so",     "libzip.so",
+};
+
+void JNICALL handleLibraryLoad(JNIEnv &env, jclass cls) {
+  if (is_using_native_libraries) return;
+  LOG(INFO) << "detected a native library load, enabling interception for libc "
+               "functions";
+  is_using_native_libraries = true;
+  for (const auto &lib_name : kLibrariesToIgnoreForInterception)
+    ignoreLibraryForInterception(lib_name);
+}
+
 void registerCallback(JNIEnv &env, const char *java_hooks_class_name,
                       const JNINativeMethod *methods, int num_methods) {
   auto java_hooks_class = env.FindClass(java_hooks_class_name);
@@ -263,6 +367,15 @@
                      sizeof(indir_methods) / sizeof(indir_methods[0]));
   }
 
+  {
+    JNINativeMethod native_methods[]{{(char *)"handleLibraryLoad",
+                                      (char *)"()V",
+                                      (void *)(&handleLibraryLoad)}};
+
+    registerCallback(env, kLibfuzzerTraceDataFlowHooksClass, native_methods,
+                     sizeof(native_methods) / sizeof(native_methods[0]));
+  }
+
   return env.ExceptionCheck();
 }
 
diff --git a/third_party/BUILD.bazel b/third_party/BUILD.bazel
index 11a555e..b391ce5 100644
--- a/third_party/BUILD.bazel
+++ b/third_party/BUILD.bazel
@@ -2,6 +2,7 @@
     "gflags-use-double-dash-args.patch",
     "jacoco-make-probe-inserter-subclassable.patch",
     "jacoco_internal.BUILD",
+    "libFuzzer-make-interceptors-configurable.patch",
     "libFuzzer-pass-death-callback-to-jazzer.patch",
     "libFuzzer.BUILD",
     "libjpeg_turbo.BUILD",
diff --git a/third_party/libFuzzer-make-interceptors-configurable.patch b/third_party/libFuzzer-make-interceptors-configurable.patch
new file mode 100644
index 0000000..9420c4a
--- /dev/null
+++ b/third_party/libFuzzer-make-interceptors-configurable.patch
@@ -0,0 +1,109 @@
+diff --git compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp
+index b87798603fda..10e34ee86cce 100644
+--- compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp
++++ compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp
+@@ -147,11 +147,18 @@ DEFINE_REAL(char *, strstr, const char *, const char *)
+ DEFINE_REAL(char *, strcasestr, const char *, const char *)
+ DEFINE_REAL(void *, memmem, const void *, size_t, const void *, size_t)
+
++extern "C" __attribute__((weak)) bool
++__sanitizer_weak_is_relevant_pc(void * caller_pc) {
++  return false;
++}
++
+ ATTRIBUTE_INTERFACE int bcmp(const char *s1, const char *s2, size_t n) {
+   if (!FuzzerInited)
+     return internal_memcmp(s1, s2, n);
+   int result = REAL(bcmp)(s1, s2, n);
+-  __sanitizer_weak_hook_memcmp(GET_CALLER_PC(), s1, s2, n, result);
++  void *caller_pc = GET_CALLER_PC();
++  if (__sanitizer_weak_is_relevant_pc(caller_pc))
++    __sanitizer_weak_hook_memcmp(caller_pc, s1, s2, n, result);
+   return result;
+ }
+
+@@ -159,7 +166,9 @@ ATTRIBUTE_INTERFACE int memcmp(const void *s1, const void *s2, size_t n) {
+   if (!FuzzerInited)
+     return internal_memcmp(s1, s2, n);
+   int result = REAL(memcmp)(s1, s2, n);
+-  __sanitizer_weak_hook_memcmp(GET_CALLER_PC(), s1, s2, n, result);
++  void *caller_pc = GET_CALLER_PC();
++  if (__sanitizer_weak_is_relevant_pc(caller_pc))
++    __sanitizer_weak_hook_memcmp(caller_pc, s1, s2, n, result);
+   return result;
+ }
+
+@@ -167,7 +176,9 @@ ATTRIBUTE_INTERFACE int strncmp(const char *s1, const char *s2, size_t n) {
+   if (!FuzzerInited)
+     return internal_strncmp(s1, s2, n);
+   int result = REAL(strncmp)(s1, s2, n);
+-  __sanitizer_weak_hook_strncmp(GET_CALLER_PC(), s1, s2, n, result);
++  void *caller_pc = GET_CALLER_PC();
++  if (__sanitizer_weak_is_relevant_pc(caller_pc))
++    __sanitizer_weak_hook_strncmp(caller_pc, s1, s2, n, result);
+   return result;
+ }
+
+@@ -175,21 +186,27 @@ ATTRIBUTE_INTERFACE int strcmp(const char *s1, const char *s2) {
+   if (!FuzzerInited)
+     return internal_strcmp(s1, s2);
+   int result = REAL(strcmp)(s1, s2);
+-  __sanitizer_weak_hook_strcmp(GET_CALLER_PC(), s1, s2, result);
++  void *caller_pc = GET_CALLER_PC();
++  if (__sanitizer_weak_is_relevant_pc(caller_pc))
++    __sanitizer_weak_hook_strcmp(caller_pc, s1, s2, result);
+   return result;
+ }
+
+ ATTRIBUTE_INTERFACE int strncasecmp(const char *s1, const char *s2, size_t n) {
+   ensureFuzzerInited();
+   int result = REAL(strncasecmp)(s1, s2, n);
+-  __sanitizer_weak_hook_strncasecmp(GET_CALLER_PC(), s1, s2, n, result);
++  void *caller_pc = GET_CALLER_PC();
++  if (__sanitizer_weak_is_relevant_pc(caller_pc))
++    __sanitizer_weak_hook_strncasecmp(caller_pc, s1, s2, n, result);
+   return result;
+ }
+
+ ATTRIBUTE_INTERFACE int strcasecmp(const char *s1, const char *s2) {
+   ensureFuzzerInited();
+   int result = REAL(strcasecmp)(s1, s2);
+-  __sanitizer_weak_hook_strcasecmp(GET_CALLER_PC(), s1, s2, result);
++  void *caller_pc = GET_CALLER_PC();
++  if (__sanitizer_weak_is_relevant_pc(caller_pc))
++    __sanitizer_weak_hook_strcasecmp(caller_pc, s1, s2, result);
+   return result;
+ }
+
+@@ -197,14 +214,18 @@ ATTRIBUTE_INTERFACE char *strstr(const char *s1, const char *s2) {
+   if (!FuzzerInited)
+     return internal_strstr(s1, s2);
+   char *result = REAL(strstr)(s1, s2);
+-  __sanitizer_weak_hook_strstr(GET_CALLER_PC(), s1, s2, result);
++  void *caller_pc = GET_CALLER_PC();
++  if (__sanitizer_weak_is_relevant_pc(caller_pc))
++    __sanitizer_weak_hook_strstr(caller_pc, s1, s2, result);
+   return result;
+ }
+
+ ATTRIBUTE_INTERFACE char *strcasestr(const char *s1, const char *s2) {
+   ensureFuzzerInited();
+   char *result = REAL(strcasestr)(s1, s2);
+-  __sanitizer_weak_hook_strcasestr(GET_CALLER_PC(), s1, s2, result);
++  void *caller_pc = GET_CALLER_PC();
++  if (__sanitizer_weak_is_relevant_pc(caller_pc))
++    __sanitizer_weak_hook_strcasestr(caller_pc, s1, s2, result);
+   return result;
+ }
+
+@@ -212,7 +233,9 @@ ATTRIBUTE_INTERFACE
+ void *memmem(const void *s1, size_t len1, const void *s2, size_t len2) {
+   ensureFuzzerInited();
+   void *result = REAL(memmem)(s1, len1, s2, len2);
+-  __sanitizer_weak_hook_memmem(GET_CALLER_PC(), s1, len1, s2, len2, result);
++  void *caller_pc = GET_CALLER_PC();
++  if (__sanitizer_weak_is_relevant_pc(caller_pc))
++    __sanitizer_weak_hook_memmem(caller_pc, s1, len1, s2, len2, result);
+   return result;
+ }
+
diff --git a/third_party/libFuzzer-pass-death-callback-to-jazzer.patch b/third_party/libFuzzer-pass-death-callback-to-jazzer.patch
index 31bd2af..3fb9fbb 100644
--- a/third_party/libFuzzer-pass-death-callback-to-jazzer.patch
+++ b/third_party/libFuzzer-pass-death-callback-to-jazzer.patch
@@ -1,5 +1,5 @@
 diff --git compiler-rt/lib/fuzzer/FuzzerExtFunctions.def compiler-rt/lib/fuzzer/FuzzerExtFunctions.def
-index 51edf8444..e31f00402 100644
+index 51edf8444e94..e31f0040268b 100644
 --- compiler-rt/lib/fuzzer/FuzzerExtFunctions.def
 +++ compiler-rt/lib/fuzzer/FuzzerExtFunctions.def
 @@ -42,7 +42,7 @@ EXT_FUNC(__sanitizer_symbolize_pc, void,
@@ -12,7 +12,7 @@
  EXT_FUNC(__msan_scoped_disable_interceptor_checks, void, (), false);
  EXT_FUNC(__msan_scoped_enable_interceptor_checks, void, (), false);
 diff --git compiler-rt/lib/fuzzer/FuzzerLoop.cpp compiler-rt/lib/fuzzer/FuzzerLoop.cpp
-index 6e3bf44f8..9d17b911c 100644
+index 149742b4c2fe..7b361423cc32 100644
 --- compiler-rt/lib/fuzzer/FuzzerLoop.cpp
 +++ compiler-rt/lib/fuzzer/FuzzerLoop.cpp
 @@ -138,8 +138,8 @@ void Fuzzer::HandleMalloc(size_t Size) {