[CIFuzz] Refactor HTTP code and use a newer python. (#5842)
* Move to http_utils
* Update dockerfiles
* Dont install python3 from apt if we copy it into image
* fix
* fmt
* Add missing
diff --git a/infra/build_fuzzers.Dockerfile b/infra/build_fuzzers.Dockerfile
index df06ff7..6e8adf9 100644
--- a/infra/build_fuzzers.Dockerfile
+++ b/infra/build_fuzzers.Dockerfile
@@ -22,5 +22,9 @@
# just expand to '/opt/oss-fuzz'.
ENTRYPOINT ["python3", "/opt/oss-fuzz/infra/cifuzz/build_fuzzers_entrypoint.py"]
+WORKDIR ${OSS_FUZZ_ROOT}/infra
+
# Update infra source code.
ADD . ${OSS_FUZZ_ROOT}/infra
+
+RUN python3 -m pip install -r ${OSS_FUZZ_ROOT}/infra/cifuzz/requirements.txt
\ No newline at end of file
diff --git a/infra/cifuzz/cifuzz-base/Dockerfile b/infra/cifuzz/cifuzz-base/Dockerfile
index e0599db..e9c2974 100644
--- a/infra/cifuzz/cifuzz-base/Dockerfile
+++ b/infra/cifuzz/cifuzz-base/Dockerfile
@@ -21,12 +21,18 @@
FROM ubuntu:16.04
RUN apt-get update && \
- apt-get install ca-certificates wget python3 git-core --no-install-recommends -y && \
+ apt-get install ca-certificates wget git-core --no-install-recommends -y && \
wget https://download.docker.com/linux/ubuntu/dists/xenial/pool/stable/amd64/docker-ce-cli_20.10.5~3-0~ubuntu-xenial_amd64.deb -O /tmp/docker-ce.deb && \
dpkg -i /tmp/docker-ce.deb && rm /tmp/docker-ce.deb && \
apt-get remove wget -y --purge
+COPY --from=gcr.io/oss-fuzz-base/base-builder /usr/local/bin/python3 /usr/local/bin/python3
+COPY --from=gcr.io/oss-fuzz-base/base-builder /usr/local/lib/libpython3* /usr/local/lib/
+COPY --from=gcr.io/oss-fuzz-base/base-builder /usr/local/lib/python3.8 /usr/local/lib/python3.8
+RUN ldconfig
+
ENV OSS_FUZZ_ROOT=/opt/oss-fuzz
ADD . ${OSS_FUZZ_ROOT}
+RUN python3 -m pip install -r ${OSS_FUZZ_ROOT}/infra/cifuzz/requirements.txt
RUN rm -rf ${OSS_FUZZ_ROOT}/infra
\ No newline at end of file
diff --git a/infra/cifuzz/clusterfuzz_deployment.py b/infra/cifuzz/clusterfuzz_deployment.py
index 403a38b..6c638c5 100644
--- a/infra/cifuzz/clusterfuzz_deployment.py
+++ b/infra/cifuzz/clusterfuzz_deployment.py
@@ -11,15 +11,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-"""Module for interacting with the "ClusterFuzz deployment."""
+"""Module for interacting with the ClusterFuzz deployment."""
import logging
import os
import sys
-import tempfile
-import time
import urllib.error
import urllib.request
-import zipfile
+
+import http_utils
# pylint: disable=wrong-import-position,import-error
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -155,7 +154,7 @@
self.CLUSTERFUZZ_BUILDS,
self.config.project_name,
latest_build_name)
- if download_and_unpack_zip(oss_fuzz_build_url, build_dir):
+ if http_utils.download_and_unpack_zip(oss_fuzz_build_url, build_dir):
return build_dir
return None
@@ -179,6 +178,7 @@
The local path to to corpus or None if download failed.
"""
corpus_dir = self.get_target_corpus_dir(target_name, parent_dir)
+
os.makedirs(corpus_dir, exist_ok=True)
# TODO(metzman): Clean up this code.
project_qualified_fuzz_target_name = target_name
@@ -193,7 +193,7 @@
self.config.project_name), project_qualified_fuzz_target_name,
self.CORPUS_ZIP_NAME)
- if download_and_unpack_zip(corpus_url, corpus_dir):
+ if http_utils.download_and_unpack_zip(corpus_url, corpus_dir):
return corpus_dir
return None
@@ -225,70 +225,6 @@
logging.info('Not downloading build because no ClusterFuzz deployment.')
-def download_url(url, filename, num_attempts=3):
- """Downloads the file located at |url|, using HTTP to |filename|.
-
- Args:
- url: A url to a file to download.
- filename: The path the file should be downloaded to.
- num_retries: The number of times to retry the download on
- ConnectionResetError.
-
- Returns:
- True on success.
- """
- sleep_time = 1
-
- # Don't use retry wrapper since we don't want this to raise any exceptions.
- for _ in range(num_attempts):
- try:
- urllib.request.urlretrieve(url, filename)
- return True
- except urllib.error.HTTPError:
- # In these cases, retrying probably wont work since the error probably
- # means there is nothing at the URL to download.
- logging.error('Unable to download from: %s.', url)
- return False
- except ConnectionResetError:
- # These errors are more likely to be transient. Retry.
- pass
- time.sleep(sleep_time)
-
- logging.error('Failed to download %s, %d times.', url, num_attempts)
-
- return False
-
-
-def download_and_unpack_zip(url, extract_directory):
- """Downloads and unpacks a zip file from an HTTP URL.
-
- Args:
- url: A url to the zip file to be downloaded and unpacked.
- extract_directory: The path where the zip file should be extracted to.
-
- Returns:
- True on success.
- """
- if not os.path.exists(extract_directory):
- logging.error('Extract directory: %s does not exist.', extract_directory)
- return False
-
- # Gives the temporary zip file a unique identifier in the case that
- # that download_and_unpack_zip is done in parallel.
- with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file:
- if not download_url(url, tmp_file.name):
- return False
-
- try:
- with zipfile.ZipFile(tmp_file.name, 'r') as zip_file:
- zip_file.extractall(extract_directory)
- except zipfile.BadZipFile:
- logging.error('Error unpacking zip from %s. Bad Zipfile.', url)
- return False
-
- return True
-
-
def get_clusterfuzz_deployment(config):
"""Returns object reprsenting deployment of ClusterFuzz used by |config|."""
if (config.platform == config.Platform.INTERNAL_GENERIC_CI or
diff --git a/infra/cifuzz/clusterfuzz_deployment_test.py b/infra/cifuzz/clusterfuzz_deployment_test.py
index d4a9911..5adb1f8 100644
--- a/infra/cifuzz/clusterfuzz_deployment_test.py
+++ b/infra/cifuzz/clusterfuzz_deployment_test.py
@@ -16,7 +16,6 @@
import os
import unittest
from unittest import mock
-import urllib.error
from pyfakefs import fake_filesystem_unittest
@@ -57,8 +56,7 @@
self.setUpPyfakefs()
self.deployment = _create_deployment()
- @mock.patch('clusterfuzz_deployment.download_and_unpack_zip',
- return_value=True)
+ @mock.patch('http_utils.download_and_unpack_zip', return_value=True)
def test_download_corpus(self, mocked_download_and_unpack_zip):
"""Tests that we can download a corpus for a valid project."""
result = self.deployment.download_corpus(EXAMPLE_FUZZER, self.OUT_DIR)
@@ -71,8 +69,7 @@
call_args, _ = mocked_download_and_unpack_zip.call_args
self.assertEqual(call_args, (expected_url, expected_corpus_dir))
- @mock.patch('clusterfuzz_deployment.download_and_unpack_zip',
- return_value=False)
+ @mock.patch('http_utils.download_and_unpack_zip', return_value=False)
def test_download_fail(self, _):
"""Tests that when downloading fails, None is returned."""
corpus_path = self.deployment.download_corpus(EXAMPLE_FUZZER, self.OUT_DIR)
@@ -85,58 +82,5 @@
self.assertTrue('address' in latest_build_name)
-class DownloadUrlTest(unittest.TestCase):
- """Tests that download_url works."""
- URL = 'example.com/file'
- FILE_PATH = '/tmp/file'
-
- @mock.patch('time.sleep')
- @mock.patch('urllib.request.urlretrieve', return_value=True)
- def test_download_url_no_error(self, mocked_urlretrieve, _):
- """Tests that download_url works when there is no error."""
- self.assertTrue(
- clusterfuzz_deployment.download_url(self.URL, self.FILE_PATH))
- self.assertEqual(1, mocked_urlretrieve.call_count)
-
- @mock.patch('time.sleep')
- @mock.patch('logging.error')
- @mock.patch('urllib.request.urlretrieve',
- side_effect=urllib.error.HTTPError(None, None, None, None, None))
- def test_download_url_http_error(self, mocked_urlretrieve, mocked_error, _):
- """Tests that download_url doesn't retry when there is an HTTP error."""
- self.assertFalse(
- clusterfuzz_deployment.download_url(self.URL, self.FILE_PATH))
- mocked_error.assert_called_with('Unable to download from: %s.', self.URL)
- self.assertEqual(1, mocked_urlretrieve.call_count)
-
- @mock.patch('time.sleep')
- @mock.patch('logging.error')
- @mock.patch('urllib.request.urlretrieve', side_effect=ConnectionResetError)
- def test_download_url_connection_error(self, mocked_urlretrieve, mocked_error,
- mocked_sleep):
- """Tests that download_url doesn't retry when there is an HTTP error."""
- self.assertFalse(
- clusterfuzz_deployment.download_url(self.URL, self.FILE_PATH))
- self.assertEqual(3, mocked_urlretrieve.call_count)
- self.assertEqual(3, mocked_sleep.call_count)
- mocked_error.assert_called_with('Failed to download %s, %d times.',
- self.URL, 3)
-
-
-class DownloadAndUnpackZipTest(fake_filesystem_unittest.TestCase):
- """Tests download_and_unpack_zip."""
-
- def setUp(self):
- self.setUpPyfakefs()
-
- @mock.patch('urllib.request.urlretrieve', return_value=True)
- def test_bad_zip_download(self, _):
- """Tests download_and_unpack_zip returns none when a bad zip is passed."""
- self.fs.create_file('/url_tmp.zip', contents='Test file.')
- self.assertFalse(
- clusterfuzz_deployment.download_and_unpack_zip('/not/a/real/url',
- '/extract-directory'))
-
-
if __name__ == '__main__':
unittest.main()
diff --git a/infra/cifuzz/fuzz_target_test.py b/infra/cifuzz/fuzz_target_test.py
index 22c5ac8..1ec3aed 100644
--- a/infra/cifuzz/fuzz_target_test.py
+++ b/infra/cifuzz/fuzz_target_test.py
@@ -18,6 +18,7 @@
import unittest
from unittest import mock
+import certifi
import parameterized
from pyfakefs import fake_filesystem_unittest
@@ -177,6 +178,9 @@
self.testcase_path = '/testcase'
self.fs.create_file(self.testcase_path, contents='')
+ # Do this to prevent pyfakefs from messing with requests.
+ self.fs.add_real_directory(os.path.dirname(certifi.__file__))
+
@mock.patch('fuzz_target.FuzzTarget.is_reproducible',
side_effect=[True, False])
@mock.patch('logging.info')
diff --git a/infra/cifuzz/http_utils.py b/infra/cifuzz/http_utils.py
new file mode 100644
index 0000000..5d7b163
--- /dev/null
+++ b/infra/cifuzz/http_utils.py
@@ -0,0 +1,99 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility module for HTTP."""
+import logging
+import os
+import sys
+import tempfile
+import zipfile
+
+import requests
+
+# pylint: disable=wrong-import-position,import-error
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import retry
+
+_DOWNLOAD_URL_RETRIES = 3
+_DOWNLOAD_URL_BACKOFF = 1
+
+
+def download_and_unpack_zip(url, extract_directory, headers=None):
+ """Downloads and unpacks a zip file from an HTTP URL.
+
+ Args:
+ url: A url to the zip file to be downloaded and unpacked.
+ extract_directory: The path where the zip file should be extracted to.
+ headers: (Optional) HTTP headers to send with the download request.
+
+ Returns:
+ True on success.
+ """
+ if headers is None:
+ headers = {}
+
+ if not os.path.exists(extract_directory):
+ logging.error('Extract directory: %s does not exist.', extract_directory)
+ return False
+
+ # Gives the temporary zip file a unique identifier in the case that
+ # that download_and_unpack_zip is done in parallel.
+ with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file:
+ if not download_url(url, tmp_file.name, headers=headers):
+ return False
+
+ try:
+ with zipfile.ZipFile(tmp_file.name, 'r') as zip_file:
+ zip_file.extractall(extract_directory)
+ except zipfile.BadZipFile:
+ logging.error('Error unpacking zip from %s. Bad Zipfile.', url)
+ return False
+
+ return True
+
+
+def download_url(*args, **kwargs):
+ """Wrapper around _download_url that returns False if _download_url
+ exceptions."""
+ try:
+ return _download_url(*args, **kwargs)
+ except Exception: # pylint: disable=broad-except
+ return False
+
+
+@retry.wrap(_DOWNLOAD_URL_RETRIES, _DOWNLOAD_URL_BACKOFF)
+def _download_url(url, filename, headers=None):
+ """Downloads the file located at |url|, using HTTP to |filename|.
+
+ Args:
+ url: A url to a file to download.
+ filename: The path the file should be downloaded to.
+ headers: (Optional) HTTP headers to send with the download request.
+
+ Returns:
+ True on success.
+ """
+ if headers is None:
+ headers = {}
+
+ response = requests.get(url, headers=headers)
+
+ if response.status_code != 200:
+ logging.error('Unable to download from: %s. Code: %d. Content: %s.', url,
+ response.status_code, response.content)
+ return False
+
+ with open(filename, 'wb') as file_handle:
+ file_handle.write(response.content)
+
+ return True
diff --git a/infra/cifuzz/http_utils_test.py b/infra/cifuzz/http_utils_test.py
new file mode 100644
index 0000000..2f603d5
--- /dev/null
+++ b/infra/cifuzz/http_utils_test.py
@@ -0,0 +1,71 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for http_utils.py"""
+
+import unittest
+from unittest import mock
+
+from pyfakefs import fake_filesystem_unittest
+
+import http_utils
+
+mocked_get_response = mock.MagicMock(status_code=200, content=b'')
+
+
+class DownloadUrlTest(unittest.TestCase):
+ """Tests that download_url works."""
+ URL = 'https://example.com/file'
+ FILE_PATH = '/tmp/file'
+
+ @mock.patch('time.sleep')
+ @mock.patch('requests.get', return_value=mocked_get_response)
+ def test_download_url_no_error(self, mocked_urlretrieve, _):
+ """Tests that download_url works when there is no error."""
+ self.assertTrue(http_utils.download_url(self.URL, self.FILE_PATH))
+ self.assertEqual(1, mocked_urlretrieve.call_count)
+
+ @mock.patch('time.sleep')
+ @mock.patch('logging.error')
+ @mock.patch('requests.get',
+ return_value=mock.MagicMock(status_code=404, content=b''))
+ def test_download_url_http_error(self, mocked_get, mocked_error, _):
+ """Tests that download_url doesn't retry when there is an HTTP error."""
+ self.assertFalse(http_utils.download_url(self.URL, self.FILE_PATH))
+ mocked_error.assert_called_with(
+ 'Unable to download from: %s. Code: %d. Content: %s.', self.URL, 404,
+ b'')
+ self.assertEqual(1, mocked_get.call_count)
+
+ @mock.patch('time.sleep')
+ @mock.patch('requests.get', side_effect=ConnectionResetError)
+ def test_download_url_connection_error(self, mocked_get, mocked_sleep):
+ """Tests that download_url doesn't retry when there is an HTTP error."""
+ self.assertFalse(http_utils.download_url(self.URL, self.FILE_PATH))
+ self.assertEqual(4, mocked_get.call_count)
+ self.assertEqual(3, mocked_sleep.call_count)
+
+
+class DownloadAndUnpackZipTest(fake_filesystem_unittest.TestCase):
+ """Tests download_and_unpack_zip."""
+
+ def setUp(self):
+ self.setUpPyfakefs()
+
+ @mock.patch('requests.get', return_value=mocked_get_response)
+ def test_bad_zip_download(self, _):
+ """Tests download_and_unpack_zip returns none when a bad zip is passed."""
+ self.fs.create_file('/url_tmp.zip', contents='Test file.')
+ self.assertFalse(
+ http_utils.download_and_unpack_zip('/not/a/real/url',
+ '/extract-directory'))
diff --git a/infra/cifuzz/requirements.txt b/infra/cifuzz/requirements.txt
new file mode 100644
index 0000000..9d84d35
--- /dev/null
+++ b/infra/cifuzz/requirements.txt
@@ -0,0 +1 @@
+requests==2.25.1
diff --git a/infra/run_fuzzers.Dockerfile b/infra/run_fuzzers.Dockerfile
index b00bb12..2ae3b74 100644
--- a/infra/run_fuzzers.Dockerfile
+++ b/infra/run_fuzzers.Dockerfile
@@ -22,5 +22,9 @@
# just expand to '/opt/oss-fuzz'.
ENTRYPOINT ["python3", "/opt/oss-fuzz/infra/cifuzz/run_fuzzers_entrypoint.py"]
+WORKDIR ${OSS_FUZZ_ROOT}/infra
+
# Copy infra source code.
-ADD . ${OSS_FUZZ_ROOT}/infra
\ No newline at end of file
+ADD . ${OSS_FUZZ_ROOT}/infra
+
+RUN python3 -m pip install -r ${OSS_FUZZ_ROOT}/infra/cifuzz/requirements.txt
\ No newline at end of file