[infra][NFC] Use one source of truth for engines, languages etc. (#6163)

Do this only where it makes sense. For example, since CIFuzz doesn't
support dataflow, maintain a separate source of truth.
diff --git a/infra/ci/build.py b/infra/ci/build.py
index 8de7363..5919e44 100755
--- a/infra/ci/build.py
+++ b/infra/ci/build.py
@@ -25,15 +25,17 @@
 import subprocess
 import yaml
 
+# pylint: disable=wrong-import-position,import-error
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import constants
+
 CANARY_PROJECT = 'skcms'
 
 DEFAULT_ARCHITECTURES = ['x86_64']
 DEFAULT_ENGINES = ['afl', 'honggfuzz', 'libfuzzer']
 DEFAULT_SANITIZERS = ['address', 'undefined']
 
-# Languages from project.yaml that have code coverage support.
-LANGUAGES_WITH_COVERAGE_SUPPORT = ['c', 'c++', 'go', 'jvm', 'rust']
-
 
 def get_changed_files_output():
   """Returns the output of a git command that discovers changed files."""
@@ -112,7 +114,7 @@
     return False
 
   language = project_yaml.get('language')
-  if language not in LANGUAGES_WITH_COVERAGE_SUPPORT:
+  if language not in constants.LANGUAGES_WITH_COVERAGE_SUPPORT:
     print(('Project is written in "{language}", '
            'coverage is not supported yet.').format(language=language))
     return False
diff --git a/infra/cifuzz/config_utils.py b/infra/cifuzz/config_utils.py
index d9a8f0c..cac0762 100644
--- a/infra/cifuzz/config_utils.py
+++ b/infra/cifuzz/config_utils.py
@@ -24,26 +24,10 @@
 # pylint: disable=wrong-import-position,import-error
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-import helper
+import constants
 
 RUN_FUZZERS_MODES = ['batch', 'ci', 'coverage']
-
-# TODO(metzman): Make one source of truth for these in helper.py
 SANITIZERS = ['address', 'memory', 'undefined', 'coverage']
-LANGUAGES = [
-    'c',
-    'c++',
-    'go',
-    'jvm',
-    'python',
-    'rust',
-    'swift',
-]
-
-DEFAULT_ENGINE = 'libfuzzer'
-DEFAULT_ARCHITECTURE = 'x86_64'
-DEFAULT_LANGUAGE = 'c++'
-DEFAULT_SANITIZER = 'address'
 
 # This module deals a lot with env variables. Many of these will be set by users
 # and others beyond CIFuzz's control. Thus, you should be careful about using
@@ -60,7 +44,7 @@
 
 
 def _get_sanitizer():
-  return os.getenv('SANITIZER', DEFAULT_SANITIZER).lower()
+  return os.getenv('SANITIZER', constants.DEFAULT_SANITIZER).lower()
 
 
 def _is_dry_run():
@@ -75,7 +59,7 @@
   # getting it from the project.yaml) is outweighed by the complexity in
   # implementing this. A lot of the complexity comes from our unittests not
   # setting a proper projet at this point.
-  return os.getenv('LANGUAGE', DEFAULT_LANGUAGE)
+  return os.getenv('LANGUAGE', constants.DEFAULT_LANGUAGE)
 
 
 # pylint: disable=too-few-public-methods,too-many-instance-attributes
@@ -211,7 +195,8 @@
 
     self.sanitizer = _get_sanitizer()
 
-    self.build_integration_path = helper.DEFAULT_RELATIVE_BUILD_INTEGRATION_PATH
+    self.build_integration_path = (
+        constants.DEFAULT_EXTERNAL_BUILD_INTEGRATION_PATH)
     self.language = _get_language()
     self.low_disk_space = environment.get_bool('LOW_DISK_SPACE', False)
 
@@ -239,9 +224,9 @@
                     self.sanitizer, SANITIZERS)
       return False
 
-    if self.language not in LANGUAGES:
+    if self.language not in constants.LANGUAGES:
       logging.error('Invalid LANGUAGE: %s. Must be one of: %s.', self.language,
-                    LANGUAGES)
+                    constants.LANGUAGES)
       return False
 
     return True
diff --git a/infra/cifuzz/config_utils_test.py b/infra/cifuzz/config_utils_test.py
index d904fbf..a8da59a 100644
--- a/infra/cifuzz/config_utils_test.py
+++ b/infra/cifuzz/config_utils_test.py
@@ -17,6 +17,7 @@
 from unittest import mock
 
 import config_utils
+import constants
 import test_helpers
 
 # pylint: disable=no-self-use,protected-access
@@ -72,8 +73,7 @@
     config = self._create_config()
     self.assertFalse(config.validate())
     mocked_error.assert_called_with('Invalid LANGUAGE: %s. Must be one of: %s.',
-                                    os.environ['LANGUAGE'],
-                                    config_utils.LANGUAGES)
+                                    os.environ['LANGUAGE'], constants.LANGUAGES)
 
   @mock.patch('logging.error')
   def test_validate_invalid_sanitizer(self, mocked_error):
diff --git a/infra/cifuzz/docker.py b/infra/cifuzz/docker.py
index 15858b4..e20e5ca 100644
--- a/infra/cifuzz/docker.py
+++ b/infra/cifuzz/docker.py
@@ -16,10 +16,10 @@
 import os
 import sys
 
-import config_utils
 # pylint: disable=wrong-import-position,import-error
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+import constants
 import utils
 
 BASE_BUILDER_TAG = 'gcr.io/oss-fuzz-base/base-builder'
@@ -29,8 +29,8 @@
 
 _DEFAULT_DOCKER_RUN_ARGS = [
     '--cap-add', 'SYS_PTRACE', '-e',
-    'FUZZING_ENGINE=' + config_utils.DEFAULT_ENGINE, '-e',
-    'ARCHITECTURE=' + config_utils.DEFAULT_ARCHITECTURE, '-e', 'CIFUZZ=True'
+    'FUZZING_ENGINE=' + constants.DEFAULT_ENGINE, '-e',
+    'ARCHITECTURE=' + constants.DEFAULT_ARCHITECTURE, '-e', 'CIFUZZ=True'
 ]
 
 EXTERNAL_PROJECT_IMAGE = 'external-project'
@@ -69,8 +69,8 @@
 
 
 def get_base_docker_run_args(workspace,
-                             sanitizer=config_utils.DEFAULT_SANITIZER,
-                             language=config_utils.DEFAULT_LANGUAGE):
+                             sanitizer=constants.DEFAULT_SANITIZER,
+                             language=constants.DEFAULT_LANGUAGE):
   """Returns arguments that should be passed to every invocation of 'docker
   run'."""
   docker_args = _DEFAULT_DOCKER_RUN_ARGS.copy()
@@ -92,8 +92,8 @@
 
 
 def get_base_docker_run_command(workspace,
-                                sanitizer=config_utils.DEFAULT_SANITIZER,
-                                language=config_utils.DEFAULT_LANGUAGE):
+                                sanitizer=constants.DEFAULT_SANITIZER,
+                                language=constants.DEFAULT_LANGUAGE):
   """Returns part of the command that should be used everytime 'docker run' is
   invoked."""
   docker_args, docker_container = get_base_docker_run_args(
diff --git a/infra/constants.py b/infra/constants.py
new file mode 100644
index 0000000..2fd1836
--- /dev/null
+++ b/infra/constants.py
@@ -0,0 +1,36 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+"""Constants for OSS-Fuzz."""
+
+DEFAULT_EXTERNAL_BUILD_INTEGRATION_PATH = '.clusterfuzzlite'
+
+DEFAULT_LANGUAGE = 'c++'
+DEFAULT_SANITIZER = 'address'
+DEFAULT_ARCHITECTURE = 'x86_64'
+DEFAULT_ENGINE = 'libfuzzer'
+LANGUAGES = [
+    'c',
+    'c++',
+    'go',
+    'jvm',
+    'python',
+    'rust',
+    'swift',
+]
+LANGUAGES_WITH_COVERAGE_SUPPORT = ['c', 'c++', 'go', 'jvm', 'rust']
+SANITIZERS = ['address', 'none', 'memory', 'undefined', 'dataflow', 'thread']
+ARCHITECTURES = ['i386', 'x86_64']
+ENGINES = ['libfuzzer', 'afl', 'honggfuzz', 'dataflow', 'none']
diff --git a/infra/helper.py b/infra/helper.py
index 69e9e2d..db97698 100755
--- a/infra/helper.py
+++ b/infra/helper.py
@@ -30,6 +30,8 @@
 import sys
 import templates
 
+import constants
+
 OSS_FUZZ_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 BUILD_DIR = os.path.join(OSS_FUZZ_DIR, 'build')
 
@@ -46,9 +48,6 @@
 VALID_PROJECT_NAME_REGEX = re.compile(r'^[a-zA-Z0-9_-]+$')
 MAX_PROJECT_NAME_LENGTH = 26
 
-if sys.version_info[0] >= 3:
-  raw_input = input  # pylint: disable=invalid-name
-
 CORPUS_URL_FORMAT = (
     'gs://{project_name}-corpus.clusterfuzz-external.appspot.com/libFuzzer/'
     '{fuzz_target}/')
@@ -58,12 +57,10 @@
 
 PROJECT_LANGUAGE_REGEX = re.compile(r'\s*language\s*:\s*([^\s]+)')
 
-# Languages from project.yaml that have code coverage support.
-LANGUAGES_WITH_COVERAGE_SUPPORT = ['c', 'c++', 'go', 'jvm', 'rust']
-
 WORKDIR_REGEX = re.compile(r'\s*WORKDIR\s*([^\s]+)')
 
-DEFAULT_RELATIVE_BUILD_INTEGRATION_PATH = '.clusterfuzzlite'
+if sys.version_info[0] >= 3:
+  raw_input = input  # pylint: disable=invalid-name
 
 # pylint: disable=too-many-lines
 
@@ -72,10 +69,11 @@
   """Class representing a project that is in OSS-Fuzz or an external project
   (ClusterFuzzLite user)."""
 
-  def __init__(self,
-               project_name_or_path,
-               is_external=False,
-               build_integration_path=DEFAULT_RELATIVE_BUILD_INTEGRATION_PATH):
+  def __init__(
+      self,
+      project_name_or_path,
+      is_external=False,
+      build_integration_path=constants.DEFAULT_EXTERNAL_BUILD_INTEGRATION_PATH):
     self.is_external = is_external
     if self.is_external:
       self.name = os.path.basename(os.path.abspath(project_name_or_path))
@@ -97,7 +95,7 @@
     """Returns project language."""
     if self.is_external:
       # TODO(metzman): Handle this properly.
-      return 'c++'
+      return constants.DEFAULT_LANGUAGE
 
     project_yaml_path = os.path.join(self.path, 'project.yaml')
     with open(project_yaml_path) as file_handle:
@@ -144,7 +142,7 @@
     if args.engine == 'dataflow':
       args.sanitizer = 'dataflow'
     else:
-      args.sanitizer = 'address'
+      args.sanitizer = constants.DEFAULT_SANITIZER
 
   if args.command == 'generate':
     result = generate(args)
@@ -260,12 +258,8 @@
   check_build_parser = subparsers.add_parser(
       'check_build', help='Checks that fuzzers execute without errors.')
   _add_architecture_args(check_build_parser)
-  _add_engine_args(
-      check_build_parser,
-      choices=['libfuzzer', 'afl', 'honggfuzz', 'dataflow', 'none'])
-  _add_sanitizer_args(
-      check_build_parser,
-      choices=['address', 'memory', 'undefined', 'dataflow', 'thread'])
+  _add_engine_args(check_build_parser, choices=constants.ENGINES)
+  _add_sanitizer_args(check_build_parser, choices=constants.SANITIZERS)
   _add_environment_args(check_build_parser)
   check_build_parser.add_argument('project',
                                   help='name of the project or path (external)')
@@ -412,22 +406,28 @@
   return _get_project_build_subdir(project, 'out')
 
 
-def _add_architecture_args(parser, choices=('x86_64', 'i386')):
+def _add_architecture_args(parser, choices=None):
   """Adds common architecture args."""
-  parser.add_argument('--architecture', default='x86_64', choices=choices)
+  if choices is None:
+    choices = constants.ARCHITECTURES
+  parser.add_argument('--architecture',
+                      default=constants.DEFAULT_ARCHITECTURE,
+                      choices=choices)
 
 
-def _add_engine_args(parser,
-                     choices=('libfuzzer', 'afl', 'honggfuzz', 'dataflow',
-                              'none')):
+def _add_engine_args(parser, choices=None):
   """Adds common engine args."""
-  parser.add_argument('--engine', default='libfuzzer', choices=choices)
+  if choices is None:
+    choices = constants.ENGINES
+  parser.add_argument('--engine',
+                      default=constants.DEFAULT_ENGINE,
+                      choices=choices)
 
 
-def _add_sanitizer_args(parser,
-                        choices=('address', 'memory', 'undefined', 'coverage',
-                                 'dataflow', 'thread')):
+def _add_sanitizer_args(parser, choices=None):
   """Adds common sanitizer args."""
+  if choices is None:
+    choices = constants.SANITIZERS
   parser.add_argument(
       '--sanitizer',
       default=None,
@@ -706,9 +706,9 @@
 
   fuzzing_language = args.project.language
   if not fuzzing_language:
-    logging.warning(
-        'Language not specified in project.yaml. Defaulting to C++.')
-    fuzzing_language = 'c++'
+    fuzzing_language = constants.DEFAULT_LANGUAGE
+    logging.warning('Language not specified in project.yaml. Defaulting to %s.',
+                    fuzzing_language)
 
   env = [
       'FUZZING_ENGINE=' + args.engine,
@@ -845,7 +845,7 @@
   if not check_project_exists(args.project):
     return False
 
-  if args.project.language not in LANGUAGES_WITH_COVERAGE_SUPPORT:
+  if args.project.language not in constants.LANGUAGES_WITH_COVERAGE_SUPPORT:
     logging.error(
         'Project is written in %s, coverage for it is not supported yet.',
         args.project.language)
diff --git a/infra/helper_test.py b/infra/helper_test.py
index 572a767..6f0bc88 100644
--- a/infra/helper_test.py
+++ b/infra/helper_test.py
@@ -21,6 +21,7 @@
 
 from pyfakefs import fake_filesystem_unittest
 
+import constants
 import helper
 import templates
 
@@ -159,7 +160,7 @@
     self.assertEqual(
         self.external_project.build_integration_path,
         os.path.join(self.external_project_path,
-                     helper.DEFAULT_RELATIVE_BUILD_INTEGRATION_PATH))
+                     constants.DEFAULT_EXTERNAL_BUILD_INTEGRATION_PATH))
 
   def test_init_internal_project(self):
     """Tests __init__ method for internal projects."""
@@ -180,7 +181,7 @@
     self.assertEqual(
         self.external_project.dockerfile_path,
         os.path.join(self.external_project_path,
-                     helper.DEFAULT_RELATIVE_BUILD_INTEGRATION_PATH,
+                     constants.DEFAULT_EXTERNAL_BUILD_INTEGRATION_PATH,
                      'Dockerfile'))
 
   def test_out(self):
diff --git a/infra/presubmit.py b/infra/presubmit.py
index bd3e252..8b72b3a 100755
--- a/infra/presubmit.py
+++ b/infra/presubmit.py
@@ -23,6 +23,8 @@
 import unittest
 import yaml
 
+import constants
+
 _SRC_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
 
@@ -73,9 +75,9 @@
   # Sections in a project.yaml and the constant values that they are allowed
   # to have.
   SECTIONS_AND_CONSTANTS = {
-      'sanitizers': {'address', 'none', 'memory', 'undefined', 'dataflow'},
-      'architectures': {'i386', 'x86_64'},
-      'fuzzing_engines': {'afl', 'libfuzzer', 'honggfuzz', 'dataflow', 'none'},
+      'sanitizers': constants.SANITIZERS,
+      'architectures': constants.ARCHITECTURES,
+      'fuzzing_engines': constants.ENGINES,
   }
 
   # Note: this list must be updated when we allow new sections.
@@ -100,16 +102,6 @@
       'view_restrictions',
   ]
 
-  SUPPORTED_LANGUAGES = [
-      'c',
-      'c++',
-      'go',
-      'jvm',
-      'python',
-      'rust',
-      'swift',
-  ]
-
   # Note that some projects like boost only have auto-ccs. However, forgetting
   # primary contact is probably a mistake.
   REQUIRED_SECTIONS = ['primary_contact', 'main_repo']
@@ -226,10 +218,10 @@
     language = self.data.get('language')
     if not language:
       self.error('Missing "language" attribute in project.yaml.')
-    elif language not in self.SUPPORTED_LANGUAGES:
+    elif language not in constants.LANGUAGES:
       self.error(
           '"language: {language}" is not supported ({supported}).'.format(
-              language=language, supported=self.SUPPORTED_LANGUAGES))
+              language=language, supported=constants.LANGUAGES))
 
 
 def _check_one_project_yaml(project_yaml_filename):