Do not intercept JVM-internal C stdlib calls
The JVM frequently calls strcmp/memcmp/..., which fills up the table of
recent compares with entries that are either duplicates of values
already reported by the bytecode instrumentation or JDK-internal strings
that are not relevant for fuzzing.
This commit adds an ignorelist to the C stdlib interceptors that filters
out calls from known JVM libraries. If the fuzz target has not yet
loaded a native library, all such callbacks are ignored, which greatly
improves fuzzer performance for string-heavy targets. E.g.,
JsonSanitizerDenylistFuzzer takes < 1 million runs now when it used to
take over 3 million.
diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel
index f190a08..d54cdba 100644
--- a/WORKSPACE.bazel
+++ b/WORKSPACE.bazel
@@ -206,9 +206,10 @@
name = "libFuzzer",
build_file = "//third_party:libFuzzer.BUILD",
patches = [
+ "//third_party:libFuzzer-make-interceptors-configurable.patch",
"//third_party:libFuzzer-pass-death-callback-to-jazzer.patch",
],
- sha256 = "8ad4ddbafac4f2c8f2ea523c2c4196f940e8e16f9e635210537582a48622a5d5",
- strip_prefix = "llvm-project-llvmorg-11.0.0",
- url = "https://github.com/llvm/llvm-project/archive/llvmorg-11.0.0.tar.gz",
+ sha256 = "a78949f86fc9852f51b11ceb3e6c2c61bb6e4ebb073198cebddc82451f708adf",
+ strip_prefix = "llvm-project-llvmorg-12.0.0-rc3",
+ url = "https://github.com/llvm/llvm-project/archive/llvmorg-12.0.0-rc3.tar.gz",
)
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
index 43e8a48..47ebab3 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
@@ -69,6 +69,7 @@
"div" -> setOf(InstrumentationType.DIV)
"gep" -> setOf(InstrumentationType.GEP)
"indir" -> setOf(InstrumentationType.INDIR)
+ "native" -> setOf(InstrumentationType.NATIVE)
"all" -> InstrumentationType.values().toSet()
else -> {
println("WARN: Skipping unknown instrumentation type $it")
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
index 0e304f4..35ee395 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
@@ -18,6 +18,7 @@
import com.code_intelligence.jazzer.instrumentor.Hook
import com.code_intelligence.jazzer.instrumentor.InstrumentationType
import com.code_intelligence.jazzer.instrumentor.loadHooks
+import com.code_intelligence.jazzer.runtime.NativeLibHooks
import com.code_intelligence.jazzer.runtime.TraceCmpHooks
import com.code_intelligence.jazzer.runtime.TraceDivHooks
import com.code_intelligence.jazzer.runtime.TraceIndirHooks
@@ -81,6 +82,7 @@
InstrumentationType.CMP -> TraceCmpHooks::class.java
InstrumentationType.DIV -> TraceDivHooks::class.java
InstrumentationType.INDIR -> TraceIndirHooks::class.java
+ InstrumentationType.NATIVE -> NativeLibHooks::class.java
else -> null
}
}
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/instrumentor/Instrumentor.kt b/agent/src/main/java/com/code_intelligence/jazzer/instrumentor/Instrumentor.kt
index 50904e6..7879384 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/instrumentor/Instrumentor.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/instrumentor/Instrumentor.kt
@@ -23,6 +23,7 @@
DIV,
GEP,
INDIR,
+ NATIVE,
}
internal interface Instrumentor {
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/runtime/NativeLibHooks.java b/agent/src/main/java/com/code_intelligence/jazzer/runtime/NativeLibHooks.java
new file mode 100644
index 0000000..495cad7
--- /dev/null
+++ b/agent/src/main/java/com/code_intelligence/jazzer/runtime/NativeLibHooks.java
@@ -0,0 +1,35 @@
+// Copyright 2021 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.code_intelligence.jazzer.runtime;
+
+import com.code_intelligence.jazzer.api.HookType;
+import com.code_intelligence.jazzer.api.MethodHook;
+import java.lang.invoke.MethodHandle;
+
+@SuppressWarnings("unused")
+final public class NativeLibHooks {
+ @MethodHook(type = HookType.BEFORE, targetClassName = "java.lang.Runtime",
+ targetMethod = "loadLibrary", targetMethodDescriptor = "(Ljava/lang/String;)V")
+ @MethodHook(type = HookType.BEFORE, targetClassName = "java.lang.System",
+ targetMethod = "loadLibrary", targetMethodDescriptor = "(Ljava/lang/String;)V")
+ @MethodHook(type = HookType.BEFORE, targetClassName = "java.lang.Runtime", targetMethod = "load",
+ targetMethodDescriptor = "(Ljava/lang/String;)V")
+ @MethodHook(type = HookType.BEFORE, targetClassName = "java.lang.System", targetMethod = "load",
+ targetMethodDescriptor = "(Ljava/lang/String;)V")
+ public static void
+ loadLibraryHook(MethodHandle method, Object thisObject, Object[] arguments, int hookId) {
+ TraceDataFlowNativeCallbacks.handleLibraryLoad();
+ }
+}
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/runtime/TraceDataFlowNativeCallbacks.java b/agent/src/main/java/com/code_intelligence/jazzer/runtime/TraceDataFlowNativeCallbacks.java
index 147386a..f779cec 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/runtime/TraceDataFlowNativeCallbacks.java
+++ b/agent/src/main/java/com/code_intelligence/jazzer/runtime/TraceDataFlowNativeCallbacks.java
@@ -73,4 +73,6 @@
// as the stack layout required for the call can't be achieved without local variables.
return Long.compare(arg1, arg2);
}
+
+ public static native void handleLibraryLoad();
}
diff --git a/bazel/fuzz_target.bzl b/bazel/fuzz_target.bzl
index 9a18bdf..9f2fe2d 100644
--- a/bazel/fuzz_target.bzl
+++ b/bazel/fuzz_target.bzl
@@ -32,6 +32,7 @@
]
if hook_classes:
deploy_manifest_lines.append("Jazzer-Hook-Classes: %s" % ":".join(hook_classes))
+
# Deps can only be specified on java_binary targets with sources, which
# excludes e.g. Kotlin libraries wrapped into java_binary via runtime_deps.
target_deps = deps + ["//agent/src/main/java/com/code_intelligence/jazzer/api"] if srcs else []
diff --git a/driver/libfuzzer_callbacks.cpp b/driver/libfuzzer_callbacks.cpp
index d1c754a..398d69d 100644
--- a/driver/libfuzzer_callbacks.cpp
+++ b/driver/libfuzzer_callbacks.cpp
@@ -14,10 +14,14 @@
#include "libfuzzer_callbacks.h"
-#include <algorithm>
+#include <fstream>
#include <iostream>
+#include <utility>
+#include <vector>
+#include "absl/strings/match.h"
#include "absl/strings/str_format.h"
+#include "absl/strings/str_split.h"
#include "glog/logging.h"
#include "sanitizer_hooks_with_pc.h"
#include "third_party/jni/jni.h"
@@ -172,6 +176,106 @@
static_cast<uintptr_t>(callee_id));
}
+bool is_using_native_libraries = false;
+std::vector<std::pair<uintptr_t, uintptr_t>> ignore_for_interception_ranges;
+
+extern "C" [[maybe_unused]] bool __sanitizer_weak_is_relevant_pc(
+ void *caller_pc) {
+ // If the fuzz target is not using native libraries, calls to strcmp, memcmp,
+ // etc. should never be intercepted. The values reported if they were at best
+ // duplicate the values received from our bytecode instrumentation and at
+ // worst pollute the table of recent compares with string internal to the JDK.
+ if (!is_using_native_libraries) return false;
+ // If the fuzz target is using native libraries, intercept calls only if they
+ // don't originate from those address ranges that are known to belong to the
+ // JDK.
+ bool should_intercept = std::none_of(
+ ignore_for_interception_ranges.cbegin(),
+ ignore_for_interception_ranges.cend(), [caller_pc](const auto &range) {
+ uintptr_t start;
+ uintptr_t end;
+ std::tie(start, end) = range;
+ auto address = reinterpret_cast<uintptr_t>(caller_pc);
+ return start <= address && address <= end;
+ });
+ if (should_intercept) {
+ std::cout << " PC: " << caller_pc << std::endl;
+ }
+ return should_intercept;
+}
+
+/**
+ * Adds the address ranges of executable segmentes of the library lib_name to
+ * the ignorelist for C standard library function interception (strcmp, memcmp,
+ * ...).
+ */
+void ignoreLibraryForInterception(const std::string &lib_name) {
+ const auto num_address_ranges = ignore_for_interception_ranges.size();
+ std::ifstream loaded_libs("/proc/self/maps");
+ std::string line;
+ while (std::getline(loaded_libs, line)) {
+ if (!absl::StrContains(line, lib_name)) continue;
+ // clang-format off
+ // A typical line looks as follows:
+ // 7f15356c9000-7f1536367000 r-xp 0020d000 fd:01 19275673 /usr/lib/jvm/java-15-openjdk-amd64/lib/server/libjvm.so
+ // clang-format on
+ std::vector<std::string_view> parts =
+ absl::StrSplit(line, ' ', absl::SkipEmpty());
+ if (parts.size() != 6) {
+ std::cout << "ERROR: Invalid format for /proc/self/maps\n"
+ << line << std::endl;
+ exit(1);
+ }
+ // Skip non-executable address ranges.
+ if (!absl::StrContains(parts[1], 'x')) continue;
+ std::string_view range_str = parts[0];
+ std::vector<std::string> range = absl::StrSplit(range_str, '-');
+ if (range.size() != 2) {
+ std::cout
+ << "ERROR: Unexpected address range format in /proc/self/maps line: "
+ << range_str << std::endl;
+ exit(1);
+ }
+ std::size_t pos;
+ auto start = std::stoull(range[0], &pos, 16);
+ if (pos != range[0].size()) {
+ std::cout
+ << "ERROR: Unexpected address range format in /proc/self/maps line: "
+ << range_str << std::endl;
+ exit(1);
+ }
+ auto end = std::stoull(range[1], &pos, 16);
+ if (pos != range[0].size()) {
+ std::cout
+ << "ERROR: Unexpected address range format in /proc/self/maps line: "
+ << range_str << std::endl;
+ exit(1);
+ }
+ ignore_for_interception_ranges.emplace_back(start, end);
+ }
+ const auto num_code_segments =
+ ignore_for_interception_ranges.size() - num_address_ranges;
+ LOG(INFO) << "added " << num_code_segments
+ << " code segment of native library " << lib_name
+ << " to interceptor ignorelist";
+}
+
+const std::vector<std::string> kLibrariesToIgnoreForInterception = {
+ // The driver executable itself can be treated just like a library.
+ "jazzer_driver", "libinstrument.so", "libjava.so",
+ "libjimage.so", "libjli.so", "libjvm.so",
+ "libnet.so", "libverify.so", "libzip.so",
+};
+
+void JNICALL handleLibraryLoad(JNIEnv &env, jclass cls) {
+ if (is_using_native_libraries) return;
+ LOG(INFO) << "detected a native library load, enabling interception for libc "
+ "functions";
+ is_using_native_libraries = true;
+ for (const auto &lib_name : kLibrariesToIgnoreForInterception)
+ ignoreLibraryForInterception(lib_name);
+}
+
void registerCallback(JNIEnv &env, const char *java_hooks_class_name,
const JNINativeMethod *methods, int num_methods) {
auto java_hooks_class = env.FindClass(java_hooks_class_name);
@@ -263,6 +367,15 @@
sizeof(indir_methods) / sizeof(indir_methods[0]));
}
+ {
+ JNINativeMethod native_methods[]{{(char *)"handleLibraryLoad",
+ (char *)"()V",
+ (void *)(&handleLibraryLoad)}};
+
+ registerCallback(env, kLibfuzzerTraceDataFlowHooksClass, native_methods,
+ sizeof(native_methods) / sizeof(native_methods[0]));
+ }
+
return env.ExceptionCheck();
}
diff --git a/third_party/BUILD.bazel b/third_party/BUILD.bazel
index 11a555e..b391ce5 100644
--- a/third_party/BUILD.bazel
+++ b/third_party/BUILD.bazel
@@ -2,6 +2,7 @@
"gflags-use-double-dash-args.patch",
"jacoco-make-probe-inserter-subclassable.patch",
"jacoco_internal.BUILD",
+ "libFuzzer-make-interceptors-configurable.patch",
"libFuzzer-pass-death-callback-to-jazzer.patch",
"libFuzzer.BUILD",
"libjpeg_turbo.BUILD",
diff --git a/third_party/libFuzzer-make-interceptors-configurable.patch b/third_party/libFuzzer-make-interceptors-configurable.patch
new file mode 100644
index 0000000..9420c4a
--- /dev/null
+++ b/third_party/libFuzzer-make-interceptors-configurable.patch
@@ -0,0 +1,109 @@
+diff --git compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp
+index b87798603fda..10e34ee86cce 100644
+--- compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp
++++ compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp
+@@ -147,11 +147,18 @@ DEFINE_REAL(char *, strstr, const char *, const char *)
+ DEFINE_REAL(char *, strcasestr, const char *, const char *)
+ DEFINE_REAL(void *, memmem, const void *, size_t, const void *, size_t)
+
++extern "C" __attribute__((weak)) bool
++__sanitizer_weak_is_relevant_pc(void * caller_pc) {
++ return false;
++}
++
+ ATTRIBUTE_INTERFACE int bcmp(const char *s1, const char *s2, size_t n) {
+ if (!FuzzerInited)
+ return internal_memcmp(s1, s2, n);
+ int result = REAL(bcmp)(s1, s2, n);
+- __sanitizer_weak_hook_memcmp(GET_CALLER_PC(), s1, s2, n, result);
++ void *caller_pc = GET_CALLER_PC();
++ if (__sanitizer_weak_is_relevant_pc(caller_pc))
++ __sanitizer_weak_hook_memcmp(caller_pc, s1, s2, n, result);
+ return result;
+ }
+
+@@ -159,7 +166,9 @@ ATTRIBUTE_INTERFACE int memcmp(const void *s1, const void *s2, size_t n) {
+ if (!FuzzerInited)
+ return internal_memcmp(s1, s2, n);
+ int result = REAL(memcmp)(s1, s2, n);
+- __sanitizer_weak_hook_memcmp(GET_CALLER_PC(), s1, s2, n, result);
++ void *caller_pc = GET_CALLER_PC();
++ if (__sanitizer_weak_is_relevant_pc(caller_pc))
++ __sanitizer_weak_hook_memcmp(caller_pc, s1, s2, n, result);
+ return result;
+ }
+
+@@ -167,7 +176,9 @@ ATTRIBUTE_INTERFACE int strncmp(const char *s1, const char *s2, size_t n) {
+ if (!FuzzerInited)
+ return internal_strncmp(s1, s2, n);
+ int result = REAL(strncmp)(s1, s2, n);
+- __sanitizer_weak_hook_strncmp(GET_CALLER_PC(), s1, s2, n, result);
++ void *caller_pc = GET_CALLER_PC();
++ if (__sanitizer_weak_is_relevant_pc(caller_pc))
++ __sanitizer_weak_hook_strncmp(caller_pc, s1, s2, n, result);
+ return result;
+ }
+
+@@ -175,21 +186,27 @@ ATTRIBUTE_INTERFACE int strcmp(const char *s1, const char *s2) {
+ if (!FuzzerInited)
+ return internal_strcmp(s1, s2);
+ int result = REAL(strcmp)(s1, s2);
+- __sanitizer_weak_hook_strcmp(GET_CALLER_PC(), s1, s2, result);
++ void *caller_pc = GET_CALLER_PC();
++ if (__sanitizer_weak_is_relevant_pc(caller_pc))
++ __sanitizer_weak_hook_strcmp(caller_pc, s1, s2, result);
+ return result;
+ }
+
+ ATTRIBUTE_INTERFACE int strncasecmp(const char *s1, const char *s2, size_t n) {
+ ensureFuzzerInited();
+ int result = REAL(strncasecmp)(s1, s2, n);
+- __sanitizer_weak_hook_strncasecmp(GET_CALLER_PC(), s1, s2, n, result);
++ void *caller_pc = GET_CALLER_PC();
++ if (__sanitizer_weak_is_relevant_pc(caller_pc))
++ __sanitizer_weak_hook_strncasecmp(caller_pc, s1, s2, n, result);
+ return result;
+ }
+
+ ATTRIBUTE_INTERFACE int strcasecmp(const char *s1, const char *s2) {
+ ensureFuzzerInited();
+ int result = REAL(strcasecmp)(s1, s2);
+- __sanitizer_weak_hook_strcasecmp(GET_CALLER_PC(), s1, s2, result);
++ void *caller_pc = GET_CALLER_PC();
++ if (__sanitizer_weak_is_relevant_pc(caller_pc))
++ __sanitizer_weak_hook_strcasecmp(caller_pc, s1, s2, result);
+ return result;
+ }
+
+@@ -197,14 +214,18 @@ ATTRIBUTE_INTERFACE char *strstr(const char *s1, const char *s2) {
+ if (!FuzzerInited)
+ return internal_strstr(s1, s2);
+ char *result = REAL(strstr)(s1, s2);
+- __sanitizer_weak_hook_strstr(GET_CALLER_PC(), s1, s2, result);
++ void *caller_pc = GET_CALLER_PC();
++ if (__sanitizer_weak_is_relevant_pc(caller_pc))
++ __sanitizer_weak_hook_strstr(caller_pc, s1, s2, result);
+ return result;
+ }
+
+ ATTRIBUTE_INTERFACE char *strcasestr(const char *s1, const char *s2) {
+ ensureFuzzerInited();
+ char *result = REAL(strcasestr)(s1, s2);
+- __sanitizer_weak_hook_strcasestr(GET_CALLER_PC(), s1, s2, result);
++ void *caller_pc = GET_CALLER_PC();
++ if (__sanitizer_weak_is_relevant_pc(caller_pc))
++ __sanitizer_weak_hook_strcasestr(caller_pc, s1, s2, result);
+ return result;
+ }
+
+@@ -212,7 +233,9 @@ ATTRIBUTE_INTERFACE
+ void *memmem(const void *s1, size_t len1, const void *s2, size_t len2) {
+ ensureFuzzerInited();
+ void *result = REAL(memmem)(s1, len1, s2, len2);
+- __sanitizer_weak_hook_memmem(GET_CALLER_PC(), s1, len1, s2, len2, result);
++ void *caller_pc = GET_CALLER_PC();
++ if (__sanitizer_weak_is_relevant_pc(caller_pc))
++ __sanitizer_weak_hook_memmem(caller_pc, s1, len1, s2, len2, result);
+ return result;
+ }
+
diff --git a/third_party/libFuzzer-pass-death-callback-to-jazzer.patch b/third_party/libFuzzer-pass-death-callback-to-jazzer.patch
index 31bd2af..3fb9fbb 100644
--- a/third_party/libFuzzer-pass-death-callback-to-jazzer.patch
+++ b/third_party/libFuzzer-pass-death-callback-to-jazzer.patch
@@ -1,5 +1,5 @@
diff --git compiler-rt/lib/fuzzer/FuzzerExtFunctions.def compiler-rt/lib/fuzzer/FuzzerExtFunctions.def
-index 51edf8444..e31f00402 100644
+index 51edf8444e94..e31f0040268b 100644
--- compiler-rt/lib/fuzzer/FuzzerExtFunctions.def
+++ compiler-rt/lib/fuzzer/FuzzerExtFunctions.def
@@ -42,7 +42,7 @@ EXT_FUNC(__sanitizer_symbolize_pc, void,
@@ -12,7 +12,7 @@
EXT_FUNC(__msan_scoped_disable_interceptor_checks, void, (), false);
EXT_FUNC(__msan_scoped_enable_interceptor_checks, void, (), false);
diff --git compiler-rt/lib/fuzzer/FuzzerLoop.cpp compiler-rt/lib/fuzzer/FuzzerLoop.cpp
-index 6e3bf44f8..9d17b911c 100644
+index 149742b4c2fe..7b361423cc32 100644
--- compiler-rt/lib/fuzzer/FuzzerLoop.cpp
+++ compiler-rt/lib/fuzzer/FuzzerLoop.cpp
@@ -138,8 +138,8 @@ void Fuzzer::HandleMalloc(size_t Size) {