| # Copyright 2020 Google LLC |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| """A module to handle running a fuzz target for a specified amount of time.""" |
| import logging |
| import os |
| import posixpath |
| import re |
| import stat |
| import subprocess |
| import sys |
| import tempfile |
| import urllib.error |
| import urllib.request |
| import zipfile |
| |
| # pylint: disable=wrong-import-position |
| # pylint: disable=import-error |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| import utils |
| |
| # TODO: Turn default logging to WARNING when CIFuzz is stable. |
| logging.basicConfig( |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
| level=logging.DEBUG) |
| |
| LIBFUZZER_OPTIONS = '-seed=1337 -len_control=0' |
| |
| # Location of google cloud storage for latest OSS-Fuzz builds. |
| GCS_BASE_URL = 'https://storage.googleapis.com/' |
| |
| # Location of cluster fuzz builds on GCS. |
| CLUSTERFUZZ_BUILDS = 'clusterfuzz-builds' |
| |
| # The get request for the latest version of a project's build. |
| VERSION_STRING = '{project_name}-{sanitizer}-latest.version' |
| |
| # The name to store the latest OSS-Fuzz build at. |
| BUILD_ARCHIVE_NAME = 'oss_fuzz_latest.zip' |
| |
| # Zip file name containing the corpus. |
| CORPUS_ZIP_NAME = 'public.zip' |
| |
| # The sanitizer build to download. |
| SANITIZER = 'address' |
| |
| # The number of reproduce attempts for a crash. |
| REPRODUCE_ATTEMPTS = 10 |
| |
| # Seconds on top of duration till a timeout error is raised. |
| BUFFER_TIME = 10 |
| |
| |
| class FuzzTarget: |
| """A class to manage a single fuzz target. |
| |
| Attributes: |
| target_name: The name of the fuzz target. |
| duration: The length of time in seconds that the target should run. |
| target_path: The location of the fuzz target binary. |
| out_dir: The location of where output artifacts are stored. |
| project_name: The name of the relevant OSS-Fuzz project. |
| """ |
| |
| def __init__(self, target_path, duration, out_dir, project_name=None): |
| """Represents a single fuzz target. |
| |
| Note: project_name should be none when the fuzzer being run is not |
| associated with a specific OSS-Fuzz project. |
| |
| Args: |
| target_path: The location of the fuzz target binary. |
| duration: The length of time in seconds the target should run. |
| out_dir: The location of where the output from crashes should be stored. |
| project_name: The name of the relevant OSS-Fuzz project. |
| """ |
| self.target_name = os.path.basename(target_path) |
| self.duration = int(duration) |
| self.target_path = target_path |
| self.out_dir = out_dir |
| self.project_name = project_name |
| |
| def fuzz(self): |
| """Starts the fuzz target run for the length of time specified by duration. |
| |
| Returns: |
| (test_case, stack trace, time in seconds) on crash or |
| (None, None, time in seconds) on timeout or error. |
| """ |
| logging.info('Fuzzer %s, started.', self.target_name) |
| docker_container = utils.get_container_name() |
| command = ['docker', 'run', '--rm', '--privileged'] |
| if docker_container: |
| command += [ |
| '--volumes-from', docker_container, '-e', 'OUT=' + self.out_dir |
| ] |
| else: |
| command += ['-v', '%s:%s' % (self.out_dir, '/out')] |
| |
| command += [ |
| '-e', 'FUZZING_ENGINE=libfuzzer', '-e', 'SANITIZER=address', '-e', |
| 'RUN_FUZZER_MODE=interactive', 'gcr.io/oss-fuzz-base/base-runner', |
| 'bash', '-c' |
| ] |
| |
| run_fuzzer_command = 'run_fuzzer {fuzz_target} {options}'.format( |
| fuzz_target=self.target_name, |
| options=LIBFUZZER_OPTIONS + ' -max_total_time=' + str(self.duration)) |
| |
| # If corpus can be downloaded use it for fuzzing. |
| latest_corpus_path = self.download_latest_corpus() |
| if latest_corpus_path: |
| run_fuzzer_command = run_fuzzer_command + ' ' + latest_corpus_path |
| command.append(run_fuzzer_command) |
| |
| logging.info('Running command: %s', ' '.join(command)) |
| process = subprocess.Popen(command, |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE) |
| |
| try: |
| _, err = process.communicate(timeout=self.duration + BUFFER_TIME) |
| except subprocess.TimeoutExpired: |
| logging.error('Fuzzer %s timed out, ending fuzzing.', self.target_name) |
| return None, None |
| |
| # Libfuzzer timeout has been reached. |
| if not process.returncode: |
| logging.info('Fuzzer %s finished with no crashes discovered.', |
| self.target_name) |
| return None, None |
| |
| # Crash has been discovered. |
| logging.info('Fuzzer %s, ended before timeout.', self.target_name) |
| err_str = err.decode('ascii') |
| test_case = self.get_test_case(err_str) |
| if not test_case: |
| logging.error('No test case found in stack trace: %s.', err_str) |
| return None, None |
| if self.check_reproducibility_and_regression(test_case): |
| return test_case, err_str |
| return None, None |
| |
| def is_reproducible(self, test_case, target_path): |
| """Checks if the test case reproduces. |
| |
| Args: |
| test_case: The path to the test case to be tested. |
| target_path: The path to the fuzz target to be tested |
| |
| Returns: |
| True if crash is reproducible. |
| """ |
| if not os.path.exists(test_case): |
| logging.error('Test case %s is not found.', test_case) |
| return False |
| if os.path.exists(target_path): |
| os.chmod(os.path.join(target_path, self.target_name), stat.S_IRWXO) |
| |
| command = ['docker', 'run', '--rm', '--privileged'] |
| container = utils.get_container_name() |
| if container: |
| command += [ |
| '--volumes-from', container, '-e', 'OUT=' + target_path, '-e', |
| 'TESTCASE=' + test_case |
| ] |
| else: |
| command += [ |
| '-v', '%s:/out' % target_path, '-v', |
| '%s:/testcase' % test_case |
| ] |
| |
| command += [ |
| '-t', 'gcr.io/oss-fuzz-base/base-runner', 'reproduce', self.target_name, |
| '-runs=100' |
| ] |
| |
| logging.info('Running reproduce command: %s.', ' '.join(command)) |
| for _ in range(REPRODUCE_ATTEMPTS): |
| _, _, err_code = utils.execute(command) |
| if err_code: |
| return True |
| return False |
| |
| def check_reproducibility_and_regression(self, test_case): |
| """Checks if a crash is reproducible, and if it is, whether it's a new |
| regression that cannot be reproduced with the latest OSS-Fuzz build. |
| |
| NOTE: If no project is specified the crash is assumed introduced |
| by the pull request if it is reproducible. |
| |
| Args: |
| test_case: The path to the test_case that triggered the crash. |
| |
| Returns: |
| True if the crash was introduced by the current pull request. |
| """ |
| reproducible_in_pr = self.is_reproducible(test_case, |
| os.path.dirname(self.target_path)) |
| if not self.project_name: |
| return reproducible_in_pr |
| |
| if not reproducible_in_pr: |
| logging.info( |
| 'Failed to reproduce the crash using the obtained test case.') |
| return False |
| |
| oss_fuzz_build_dir = self.download_oss_fuzz_build() |
| if not oss_fuzz_build_dir: |
| return False |
| |
| reproducible_in_oss_fuzz = self.is_reproducible(test_case, |
| oss_fuzz_build_dir) |
| |
| if reproducible_in_pr and not reproducible_in_oss_fuzz: |
| logging.info('The crash is reproducible. The crash doesn\'t reproduce ' \ |
| 'on old builds. This pull request probably introduced the crash.') |
| return True |
| logging.info('The crash is reproducible without the current pull request.') |
| return False |
| |
| def get_test_case(self, error_string): |
| """Gets the file from a fuzzer run stack trace. |
| |
| Args: |
| error_string: The stack trace string containing the error. |
| |
| Returns: |
| The error test case or None if not found. |
| """ |
| match = re.search(r'\bTest unit written to \.\/([^\s]+)', error_string) |
| if match: |
| return os.path.join(self.out_dir, match.group(1)) |
| return None |
| |
| def get_lastest_build_version(self): |
| """Gets the latest OSS-Fuzz build version for a projects' fuzzers. |
| |
| Returns: |
| A string with the latest build version or None. |
| """ |
| if not self.project_name: |
| return None |
| |
| version = VERSION_STRING.format(project_name=self.project_name, |
| sanitizer=SANITIZER) |
| version_url = url_join(GCS_BASE_URL, CLUSTERFUZZ_BUILDS, self.project_name, |
| version) |
| try: |
| response = urllib.request.urlopen(version_url) |
| except urllib.error.HTTPError: |
| logging.error('Error getting latest build version for %s with url %s.', |
| self.project_name, version_url) |
| return None |
| return response.read().decode() |
| |
| def download_oss_fuzz_build(self): |
| """Downloads the latest OSS-Fuzz build from GCS. |
| |
| Returns: |
| A path to where the OSS-Fuzz build is located, or None. |
| """ |
| if not os.path.exists(self.out_dir): |
| logging.error('Out directory %s does not exist.', self.out_dir) |
| return None |
| if not self.project_name: |
| return None |
| |
| build_dir = os.path.join(self.out_dir, 'oss_fuzz_latest', self.project_name) |
| if os.path.exists(os.path.join(build_dir, self.target_name)): |
| return build_dir |
| os.makedirs(build_dir, exist_ok=True) |
| latest_build_str = self.get_lastest_build_version() |
| if not latest_build_str: |
| return None |
| |
| oss_fuzz_build_url = url_join(GCS_BASE_URL, CLUSTERFUZZ_BUILDS, |
| self.project_name, latest_build_str) |
| return download_and_unpack_zip(oss_fuzz_build_url, build_dir) |
| |
| def download_latest_corpus(self): |
| """Downloads the latest OSS-Fuzz corpus for the target from google cloud. |
| |
| Returns: |
| The local path to to corpus or None if download failed. |
| """ |
| if not self.project_name: |
| return None |
| if not os.path.exists(self.out_dir): |
| logging.error('Out directory %s does not exist.', self.out_dir) |
| return None |
| |
| corpus_dir = os.path.join(self.out_dir, 'backup_corpus', self.target_name) |
| os.makedirs(corpus_dir, exist_ok=True) |
| project_qualified_fuzz_target_name = self.target_name |
| qualified_name_prefix = '%s_' % self.project_name |
| if not self.target_name.startswith(qualified_name_prefix): |
| project_qualified_fuzz_target_name = qualified_name_prefix + \ |
| self.target_name |
| corpus_url = url_join( |
| GCS_BASE_URL, |
| '{0}-backup.clusterfuzz-external.appspot.com/corpus/libFuzzer/'.format( |
| self.project_name), project_qualified_fuzz_target_name, |
| CORPUS_ZIP_NAME) |
| return download_and_unpack_zip(corpus_url, corpus_dir) |
| |
| |
| def download_and_unpack_zip(http_url, out_dir): |
| """Downloads and unpacks a zip file from an http url. |
| |
| Args: |
| http_url: A url to the zip file to be downloaded and unpacked. |
| out_dir: The path where the zip file should be extracted to. |
| |
| Returns: |
| A path to the extracted file or None on failure. |
| """ |
| if not os.path.exists(out_dir): |
| logging.error('Out directory %s does not exist.', out_dir) |
| return None |
| |
| # Gives the temporary zip file a unique identifier in the case that |
| # that download_and_unpack_zip is done in parallel. |
| with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file: |
| try: |
| urllib.request.urlretrieve(http_url, tmp_file.name) |
| except urllib.error.HTTPError: |
| logging.error('Unable to download build from: %s.', http_url) |
| return None |
| |
| try: |
| with zipfile.ZipFile(tmp_file.name, 'r') as zip_file: |
| zip_file.extractall(out_dir) |
| except zipfile.BadZipFile: |
| logging.error('Error unpacking zip from %s. Bad Zipfile.', http_url) |
| return None |
| return out_dir |
| |
| |
| def url_join(*argv): |
| """Joins URLs together using the posix join method. |
| |
| Args: |
| argv: Sections of a URL to be joined. |
| |
| Returns: |
| Joined URL. |
| """ |
| return posixpath.join(*argv) |