[CIFuzz] Refactor HTTP code and use a newer python. (#5842)

* Move to http_utils

* Update dockerfiles

* Dont install python3 from apt if we copy it into image

* fix

* fmt

* Add missing
diff --git a/infra/build_fuzzers.Dockerfile b/infra/build_fuzzers.Dockerfile
index df06ff7..6e8adf9 100644
--- a/infra/build_fuzzers.Dockerfile
+++ b/infra/build_fuzzers.Dockerfile
@@ -22,5 +22,9 @@
 # just expand to '/opt/oss-fuzz'.
 ENTRYPOINT ["python3", "/opt/oss-fuzz/infra/cifuzz/build_fuzzers_entrypoint.py"]
 
+WORKDIR ${OSS_FUZZ_ROOT}/infra
+
 # Update infra source code.
 ADD . ${OSS_FUZZ_ROOT}/infra
+
+RUN python3 -m pip install -r ${OSS_FUZZ_ROOT}/infra/cifuzz/requirements.txt
\ No newline at end of file
diff --git a/infra/cifuzz/cifuzz-base/Dockerfile b/infra/cifuzz/cifuzz-base/Dockerfile
index e0599db..e9c2974 100644
--- a/infra/cifuzz/cifuzz-base/Dockerfile
+++ b/infra/cifuzz/cifuzz-base/Dockerfile
@@ -21,12 +21,18 @@
 FROM ubuntu:16.04
 
 RUN apt-get update && \
-    apt-get install ca-certificates wget python3 git-core --no-install-recommends -y && \
+    apt-get install ca-certificates wget git-core --no-install-recommends -y && \
     wget https://download.docker.com/linux/ubuntu/dists/xenial/pool/stable/amd64/docker-ce-cli_20.10.5~3-0~ubuntu-xenial_amd64.deb -O /tmp/docker-ce.deb && \
     dpkg -i /tmp/docker-ce.deb && rm /tmp/docker-ce.deb && \
     apt-get remove wget -y --purge
 
 
+COPY --from=gcr.io/oss-fuzz-base/base-builder /usr/local/bin/python3 /usr/local/bin/python3
+COPY --from=gcr.io/oss-fuzz-base/base-builder /usr/local/lib/libpython3* /usr/local/lib/
+COPY --from=gcr.io/oss-fuzz-base/base-builder /usr/local/lib/python3.8 /usr/local/lib/python3.8
+RUN ldconfig
+
 ENV OSS_FUZZ_ROOT=/opt/oss-fuzz
 ADD . ${OSS_FUZZ_ROOT}
+RUN python3 -m pip install -r ${OSS_FUZZ_ROOT}/infra/cifuzz/requirements.txt
 RUN rm -rf ${OSS_FUZZ_ROOT}/infra
\ No newline at end of file
diff --git a/infra/cifuzz/clusterfuzz_deployment.py b/infra/cifuzz/clusterfuzz_deployment.py
index 403a38b..6c638c5 100644
--- a/infra/cifuzz/clusterfuzz_deployment.py
+++ b/infra/cifuzz/clusterfuzz_deployment.py
@@ -11,15 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Module for interacting with the "ClusterFuzz deployment."""
+"""Module for interacting with the ClusterFuzz deployment."""
 import logging
 import os
 import sys
-import tempfile
-import time
 import urllib.error
 import urllib.request
-import zipfile
+
+import http_utils
 
 # pylint: disable=wrong-import-position,import-error
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -155,7 +154,7 @@
                                         self.CLUSTERFUZZ_BUILDS,
                                         self.config.project_name,
                                         latest_build_name)
-    if download_and_unpack_zip(oss_fuzz_build_url, build_dir):
+    if http_utils.download_and_unpack_zip(oss_fuzz_build_url, build_dir):
       return build_dir
 
     return None
@@ -179,6 +178,7 @@
       The local path to to corpus or None if download failed.
     """
     corpus_dir = self.get_target_corpus_dir(target_name, parent_dir)
+
     os.makedirs(corpus_dir, exist_ok=True)
     # TODO(metzman): Clean up this code.
     project_qualified_fuzz_target_name = target_name
@@ -193,7 +193,7 @@
             self.config.project_name), project_qualified_fuzz_target_name,
         self.CORPUS_ZIP_NAME)
 
-    if download_and_unpack_zip(corpus_url, corpus_dir):
+    if http_utils.download_and_unpack_zip(corpus_url, corpus_dir):
       return corpus_dir
 
     return None
@@ -225,70 +225,6 @@
     logging.info('Not downloading build because no ClusterFuzz deployment.')
 
 
-def download_url(url, filename, num_attempts=3):
-  """Downloads the file located at |url|, using HTTP to |filename|.
-
-  Args:
-    url: A url to a file to download.
-    filename: The path the file should be downloaded to.
-    num_retries: The number of times to retry the download on
-       ConnectionResetError.
-
-  Returns:
-    True on success.
-  """
-  sleep_time = 1
-
-  # Don't use retry wrapper since we don't want this to raise any exceptions.
-  for _ in range(num_attempts):
-    try:
-      urllib.request.urlretrieve(url, filename)
-      return True
-    except urllib.error.HTTPError:
-      # In these cases, retrying probably wont work since the error probably
-      # means there is nothing at the URL to download.
-      logging.error('Unable to download from: %s.', url)
-      return False
-    except ConnectionResetError:
-      # These errors are more likely to be transient. Retry.
-      pass
-    time.sleep(sleep_time)
-
-  logging.error('Failed to download %s, %d times.', url, num_attempts)
-
-  return False
-
-
-def download_and_unpack_zip(url, extract_directory):
-  """Downloads and unpacks a zip file from an HTTP URL.
-
-  Args:
-    url: A url to the zip file to be downloaded and unpacked.
-    extract_directory: The path where the zip file should be extracted to.
-
-  Returns:
-    True on success.
-  """
-  if not os.path.exists(extract_directory):
-    logging.error('Extract directory: %s does not exist.', extract_directory)
-    return False
-
-  # Gives the temporary zip file a unique identifier in the case that
-  # that download_and_unpack_zip is done in parallel.
-  with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file:
-    if not download_url(url, tmp_file.name):
-      return False
-
-    try:
-      with zipfile.ZipFile(tmp_file.name, 'r') as zip_file:
-        zip_file.extractall(extract_directory)
-    except zipfile.BadZipFile:
-      logging.error('Error unpacking zip from %s. Bad Zipfile.', url)
-      return False
-
-  return True
-
-
 def get_clusterfuzz_deployment(config):
   """Returns object reprsenting deployment of ClusterFuzz used by |config|."""
   if (config.platform == config.Platform.INTERNAL_GENERIC_CI or
diff --git a/infra/cifuzz/clusterfuzz_deployment_test.py b/infra/cifuzz/clusterfuzz_deployment_test.py
index d4a9911..5adb1f8 100644
--- a/infra/cifuzz/clusterfuzz_deployment_test.py
+++ b/infra/cifuzz/clusterfuzz_deployment_test.py
@@ -16,7 +16,6 @@
 import os
 import unittest
 from unittest import mock
-import urllib.error
 
 from pyfakefs import fake_filesystem_unittest
 
@@ -57,8 +56,7 @@
     self.setUpPyfakefs()
     self.deployment = _create_deployment()
 
-  @mock.patch('clusterfuzz_deployment.download_and_unpack_zip',
-              return_value=True)
+  @mock.patch('http_utils.download_and_unpack_zip', return_value=True)
   def test_download_corpus(self, mocked_download_and_unpack_zip):
     """Tests that we can download a corpus for a valid project."""
     result = self.deployment.download_corpus(EXAMPLE_FUZZER, self.OUT_DIR)
@@ -71,8 +69,7 @@
     call_args, _ = mocked_download_and_unpack_zip.call_args
     self.assertEqual(call_args, (expected_url, expected_corpus_dir))
 
-  @mock.patch('clusterfuzz_deployment.download_and_unpack_zip',
-              return_value=False)
+  @mock.patch('http_utils.download_and_unpack_zip', return_value=False)
   def test_download_fail(self, _):
     """Tests that when downloading fails, None is returned."""
     corpus_path = self.deployment.download_corpus(EXAMPLE_FUZZER, self.OUT_DIR)
@@ -85,58 +82,5 @@
     self.assertTrue('address' in latest_build_name)
 
 
-class DownloadUrlTest(unittest.TestCase):
-  """Tests that download_url works."""
-  URL = 'example.com/file'
-  FILE_PATH = '/tmp/file'
-
-  @mock.patch('time.sleep')
-  @mock.patch('urllib.request.urlretrieve', return_value=True)
-  def test_download_url_no_error(self, mocked_urlretrieve, _):
-    """Tests that download_url works when there is no error."""
-    self.assertTrue(
-        clusterfuzz_deployment.download_url(self.URL, self.FILE_PATH))
-    self.assertEqual(1, mocked_urlretrieve.call_count)
-
-  @mock.patch('time.sleep')
-  @mock.patch('logging.error')
-  @mock.patch('urllib.request.urlretrieve',
-              side_effect=urllib.error.HTTPError(None, None, None, None, None))
-  def test_download_url_http_error(self, mocked_urlretrieve, mocked_error, _):
-    """Tests that download_url doesn't retry when there is an HTTP error."""
-    self.assertFalse(
-        clusterfuzz_deployment.download_url(self.URL, self.FILE_PATH))
-    mocked_error.assert_called_with('Unable to download from: %s.', self.URL)
-    self.assertEqual(1, mocked_urlretrieve.call_count)
-
-  @mock.patch('time.sleep')
-  @mock.patch('logging.error')
-  @mock.patch('urllib.request.urlretrieve', side_effect=ConnectionResetError)
-  def test_download_url_connection_error(self, mocked_urlretrieve, mocked_error,
-                                         mocked_sleep):
-    """Tests that download_url doesn't retry when there is an HTTP error."""
-    self.assertFalse(
-        clusterfuzz_deployment.download_url(self.URL, self.FILE_PATH))
-    self.assertEqual(3, mocked_urlretrieve.call_count)
-    self.assertEqual(3, mocked_sleep.call_count)
-    mocked_error.assert_called_with('Failed to download %s, %d times.',
-                                    self.URL, 3)
-
-
-class DownloadAndUnpackZipTest(fake_filesystem_unittest.TestCase):
-  """Tests download_and_unpack_zip."""
-
-  def setUp(self):
-    self.setUpPyfakefs()
-
-  @mock.patch('urllib.request.urlretrieve', return_value=True)
-  def test_bad_zip_download(self, _):
-    """Tests download_and_unpack_zip returns none when a bad zip is passed."""
-    self.fs.create_file('/url_tmp.zip', contents='Test file.')
-    self.assertFalse(
-        clusterfuzz_deployment.download_and_unpack_zip('/not/a/real/url',
-                                                       '/extract-directory'))
-
-
 if __name__ == '__main__':
   unittest.main()
diff --git a/infra/cifuzz/fuzz_target_test.py b/infra/cifuzz/fuzz_target_test.py
index 22c5ac8..1ec3aed 100644
--- a/infra/cifuzz/fuzz_target_test.py
+++ b/infra/cifuzz/fuzz_target_test.py
@@ -18,6 +18,7 @@
 import unittest
 from unittest import mock
 
+import certifi
 import parameterized
 from pyfakefs import fake_filesystem_unittest
 
@@ -177,6 +178,9 @@
     self.testcase_path = '/testcase'
     self.fs.create_file(self.testcase_path, contents='')
 
+    # Do this to prevent pyfakefs from messing with requests.
+    self.fs.add_real_directory(os.path.dirname(certifi.__file__))
+
   @mock.patch('fuzz_target.FuzzTarget.is_reproducible',
               side_effect=[True, False])
   @mock.patch('logging.info')
diff --git a/infra/cifuzz/http_utils.py b/infra/cifuzz/http_utils.py
new file mode 100644
index 0000000..5d7b163
--- /dev/null
+++ b/infra/cifuzz/http_utils.py
@@ -0,0 +1,99 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility module for HTTP."""
+import logging
+import os
+import sys
+import tempfile
+import zipfile
+
+import requests
+
+# pylint: disable=wrong-import-position,import-error
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import retry
+
+_DOWNLOAD_URL_RETRIES = 3
+_DOWNLOAD_URL_BACKOFF = 1
+
+
+def download_and_unpack_zip(url, extract_directory, headers=None):
+  """Downloads and unpacks a zip file from an HTTP URL.
+
+  Args:
+    url: A url to the zip file to be downloaded and unpacked.
+    extract_directory: The path where the zip file should be extracted to.
+    headers: (Optional) HTTP headers to send with the download request.
+
+  Returns:
+    True on success.
+  """
+  if headers is None:
+    headers = {}
+
+  if not os.path.exists(extract_directory):
+    logging.error('Extract directory: %s does not exist.', extract_directory)
+    return False
+
+  # Gives the temporary zip file a unique identifier in the case that
+  # that download_and_unpack_zip is done in parallel.
+  with tempfile.NamedTemporaryFile(suffix='.zip') as tmp_file:
+    if not download_url(url, tmp_file.name, headers=headers):
+      return False
+
+    try:
+      with zipfile.ZipFile(tmp_file.name, 'r') as zip_file:
+        zip_file.extractall(extract_directory)
+    except zipfile.BadZipFile:
+      logging.error('Error unpacking zip from %s. Bad Zipfile.', url)
+      return False
+
+  return True
+
+
+def download_url(*args, **kwargs):
+  """Wrapper around _download_url that returns False if _download_url
+  exceptions."""
+  try:
+    return _download_url(*args, **kwargs)
+  except Exception:  # pylint: disable=broad-except
+    return False
+
+
+@retry.wrap(_DOWNLOAD_URL_RETRIES, _DOWNLOAD_URL_BACKOFF)
+def _download_url(url, filename, headers=None):
+  """Downloads the file located at |url|, using HTTP to |filename|.
+
+  Args:
+    url: A url to a file to download.
+    filename: The path the file should be downloaded to.
+    headers: (Optional) HTTP headers to send with the download request.
+
+  Returns:
+    True on success.
+  """
+  if headers is None:
+    headers = {}
+
+  response = requests.get(url, headers=headers)
+
+  if response.status_code != 200:
+    logging.error('Unable to download from: %s. Code: %d. Content: %s.', url,
+                  response.status_code, response.content)
+    return False
+
+  with open(filename, 'wb') as file_handle:
+    file_handle.write(response.content)
+
+  return True
diff --git a/infra/cifuzz/http_utils_test.py b/infra/cifuzz/http_utils_test.py
new file mode 100644
index 0000000..2f603d5
--- /dev/null
+++ b/infra/cifuzz/http_utils_test.py
@@ -0,0 +1,71 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for http_utils.py"""
+
+import unittest
+from unittest import mock
+
+from pyfakefs import fake_filesystem_unittest
+
+import http_utils
+
+mocked_get_response = mock.MagicMock(status_code=200, content=b'')
+
+
+class DownloadUrlTest(unittest.TestCase):
+  """Tests that download_url works."""
+  URL = 'https://example.com/file'
+  FILE_PATH = '/tmp/file'
+
+  @mock.patch('time.sleep')
+  @mock.patch('requests.get', return_value=mocked_get_response)
+  def test_download_url_no_error(self, mocked_urlretrieve, _):
+    """Tests that download_url works when there is no error."""
+    self.assertTrue(http_utils.download_url(self.URL, self.FILE_PATH))
+    self.assertEqual(1, mocked_urlretrieve.call_count)
+
+  @mock.patch('time.sleep')
+  @mock.patch('logging.error')
+  @mock.patch('requests.get',
+              return_value=mock.MagicMock(status_code=404, content=b''))
+  def test_download_url_http_error(self, mocked_get, mocked_error, _):
+    """Tests that download_url doesn't retry when there is an HTTP error."""
+    self.assertFalse(http_utils.download_url(self.URL, self.FILE_PATH))
+    mocked_error.assert_called_with(
+        'Unable to download from: %s. Code: %d. Content: %s.', self.URL, 404,
+        b'')
+    self.assertEqual(1, mocked_get.call_count)
+
+  @mock.patch('time.sleep')
+  @mock.patch('requests.get', side_effect=ConnectionResetError)
+  def test_download_url_connection_error(self, mocked_get, mocked_sleep):
+    """Tests that download_url doesn't retry when there is an HTTP error."""
+    self.assertFalse(http_utils.download_url(self.URL, self.FILE_PATH))
+    self.assertEqual(4, mocked_get.call_count)
+    self.assertEqual(3, mocked_sleep.call_count)
+
+
+class DownloadAndUnpackZipTest(fake_filesystem_unittest.TestCase):
+  """Tests download_and_unpack_zip."""
+
+  def setUp(self):
+    self.setUpPyfakefs()
+
+  @mock.patch('requests.get', return_value=mocked_get_response)
+  def test_bad_zip_download(self, _):
+    """Tests download_and_unpack_zip returns none when a bad zip is passed."""
+    self.fs.create_file('/url_tmp.zip', contents='Test file.')
+    self.assertFalse(
+        http_utils.download_and_unpack_zip('/not/a/real/url',
+                                           '/extract-directory'))
diff --git a/infra/cifuzz/requirements.txt b/infra/cifuzz/requirements.txt
new file mode 100644
index 0000000..9d84d35
--- /dev/null
+++ b/infra/cifuzz/requirements.txt
@@ -0,0 +1 @@
+requests==2.25.1
diff --git a/infra/run_fuzzers.Dockerfile b/infra/run_fuzzers.Dockerfile
index b00bb12..2ae3b74 100644
--- a/infra/run_fuzzers.Dockerfile
+++ b/infra/run_fuzzers.Dockerfile
@@ -22,5 +22,9 @@
 # just expand to '/opt/oss-fuzz'.
 ENTRYPOINT ["python3", "/opt/oss-fuzz/infra/cifuzz/run_fuzzers_entrypoint.py"]
 
+WORKDIR ${OSS_FUZZ_ROOT}/infra
+
 # Copy infra source code.
-ADD . ${OSS_FUZZ_ROOT}/infra
\ No newline at end of file
+ADD . ${OSS_FUZZ_ROOT}/infra
+
+RUN python3 -m pip install -r ${OSS_FUZZ_ROOT}/infra/cifuzz/requirements.txt
\ No newline at end of file