Generate SBOM of the target product in file sbom.spdx.json in product out directory.

Original aosp/2374663 was reverted, so this change cnotains the implementation but disabled by default and SBOM will not be built by default with "m dist".

The feature will be enabled later in small CLs after running tests successfully in forrest.

Test: m sbom
Test: m dist
Test: on aosp, lunch aosp_bluejay-userdebug && m dist
Bug: 266726655
Change-Id: I926d0f97f3a0330ef61d059f12ea660005d370e6
diff --git a/core/Makefile b/core/Makefile
index b346500..dec2e0c 100644
--- a/core/Makefile
+++ b/core/Makefile
@@ -474,7 +474,10 @@
     $(eval BOARD_$(1)_KERNEL_MODULES_LOAD$(_sep)$(_kver) := $(BOARD_$(1)_KERNEL_MODULES$(_sep)$(_kver)))) \
   $(if $(filter false,$(BOARD_$(1)_KERNEL_MODULES_LOAD$(_sep)$(_kver))),\
     $(eval BOARD_$(1)_KERNEL_MODULES_LOAD$(_sep)$(_kver) :=),) \
-  $(call copy-many-files,$(call build-image-kernel-modules,$(BOARD_$(1)_KERNEL_MODULES$(_sep)$(_kver)),$(2),$(3),$(call intermediates-dir-for,PACKAGING,depmod_$(1)$(_sep)$(_kver)),$(BOARD_$(1)_KERNEL_MODULES_LOAD$(_sep)$(_kver)),$(4),$(BOARD_$(1)_KERNEL_MODULES_ARCHIVE$(_sep)$(_kver)),$(_stripped_staging_dir),$(_kver),$(7),$(8)))) \
+  $(eval _files := $(call build-image-kernel-modules,$(BOARD_$(1)_KERNEL_MODULES$(_sep)$(_kver)),$(2),$(3),$(call intermediates-dir-for,PACKAGING,depmod_$(1)$(_sep)$(_kver)),$(BOARD_$(1)_KERNEL_MODULES_LOAD$(_sep)$(_kver)),$(4),$(BOARD_$(1)_KERNEL_MODULES_ARCHIVE$(_sep)$(_kver)),$(_stripped_staging_dir),$(_kver),$(7),$(8))) \
+  $(call copy-many-files,$(_files)) \
+  $(eval _modules := $(BOARD_$(1)_KERNEL_MODULES$(_sep)$(_kver)) ANDROID-GEN ANDROID-GEN ANDROID-GEN ANDROID-GEN) \
+  $(eval KERNEL_MODULE_COPY_FILES += $(join $(addsuffix :,$(_modules)),$(_files)))) \
 $(if $(_kver), \
   $(eval _dir := $(_kver)/), \
   $(eval _dir :=)) \
@@ -487,6 +490,7 @@
   $(eval $(call build-image-kernel-modules-blocklist-file, \
     $(BOARD_$(1)_KERNEL_MODULES_BLOCKLIST_FILE$(_sep)$(_kver)), \
     $(2)/lib/modules/$(_dir)modules.blocklist)) \
+  $(eval ALL_KERNEL_MODULES_BLOCKLIST += $(2)/lib/modules/$(_dir)modules.blocklist) \
   $(2)/lib/modules/$(_dir)modules.blocklist)
 endef
 
@@ -1635,6 +1639,21 @@
 target_system_dlkm_notice_file_xml_gz := $(TARGET_OUT_INTERMEDIATES)/NOTICE_SYSTEM_DLKM.xml.gz
 installed_system_dlkm_notice_xml_gz := $(TARGET_OUT_SYSTEM_DLKM)/etc/NOTICE.xml.gz
 
+ALL_INSTALLED_NOTICE_FILES := \
+  $(installed_notice_html_or_xml_gz) \
+  $(installed_vendor_notice_xml_gz) \
+  $(installed_product_notice_xml_gz) \
+  $(installed_system_ext_notice_xml_gz) \
+  $(installed_odm_notice_xml_gz) \
+  $(installed_vendor_dlkm_notice_xml_gz) \
+  $(installed_odm_dlkm_notice_xml_gz) \
+  $(installed_system_dlkm_notice_xml_gz) \
+
+# $1 installed file path, e.g. out/target/product/vsoc_x86_64/system_ext/etc/NOTICE.xml.gz
+define is-notice-file
+$(if $(findstring $1,$(ALL_INSTALLED_NOTICE_FILES)),Y)
+endef
+
 # Notice files are copied to TARGET_OUT_NOTICE_FILES as a side-effect of their module
 # being built. A notice xml file must depend on all modules that could potentially
 # install a license file relevant to it.
@@ -3061,15 +3080,19 @@
 	    --cert $$(PRIVATE_KEY).x509.pem \
 	    --key $$(PRIVATE_KEY).pk8
 
-ALL_DEFAULT_INSTALLED_MODULES += $(1)
+$(1).idsig: $(1)
+
+ALL_DEFAULT_INSTALLED_MODULES += $(1) $(1).idsig
 
 endef  # fsverity-generate-and-install-manifest-apk
 
 $(eval $(call fsverity-generate-and-install-manifest-apk, \
   $(TARGET_OUT)/etc/security/fsverity/BuildManifest.apk,system))
+ALL_FSVERITY_BUILD_MANIFEST_APK += $(TARGET_OUT)/etc/security/fsverity/BuildManifest.apk $(TARGET_OUT)/etc/security/fsverity/BuildManifest.apk.idsig
 ifdef BUILDING_SYSTEM_EXT_IMAGE
   $(eval $(call fsverity-generate-and-install-manifest-apk, \
     $(TARGET_OUT_SYSTEM_EXT)/etc/security/fsverity/BuildManifestSystemExt.apk,system_ext))
+  ALL_FSVERITY_BUILD_MANIFEST_APK += $(TARGET_OUT_SYSTEM_EXT)/etc/security/fsverity/BuildManifestSystemExt.apk $(TARGET_OUT_SYSTEM_EXT)/etc/security/fsverity/BuildManifestSystemExt.apk.idsig
 endif
 
 endif  # PRODUCT_FSVERITY_GENERATE_METADATA
@@ -3141,6 +3164,7 @@
 $(call declare-license-deps,$(SYSTEM_LINKER_CONFIG),$(INTERNAL_SYSTEMIMAGE_FILES) $(SYSTEM_LINKER_CONFIG_SOURCE))
 
 FULL_SYSTEMIMAGE_DEPS += $(SYSTEM_LINKER_CONFIG)
+ALL_DEFAULT_INSTALLED_MODULES += $(SYSTEM_LINKER_CONFIG)
 
 # installed file list
 # Depending on anything that $(BUILT_SYSTEMIMAGE) depends on.
@@ -3524,6 +3548,7 @@
 		--output $@ --value "$(STUB_LIBRARIES)" --system "$(TARGET_OUT_VENDOR)"
 $(call define declare-0p-target,$(vendor_linker_config_file),)
 INTERNAL_VENDORIMAGE_FILES += $(vendor_linker_config_file)
+ALL_DEFAULT_INSTALLED_MODULES += $(vendor_linker_config_file)
 
 INSTALLED_FILES_FILE_VENDOR := $(PRODUCT_OUT)/installed-files-vendor.txt
 INSTALLED_FILES_JSON_VENDOR := $(INSTALLED_FILES_FILE_VENDOR:.txt=.json)
diff --git a/core/app_prebuilt_internal.mk b/core/app_prebuilt_internal.mk
index eb429cd..9fab44d 100644
--- a/core/app_prebuilt_internal.mk
+++ b/core/app_prebuilt_internal.mk
@@ -302,3 +302,7 @@
 
 endif # LOCAL_PACKAGE_SPLITS
 
+###########################################################
+## SBOM generation
+###########################################################
+include $(BUILD_SBOM_GEN)
\ No newline at end of file
diff --git a/core/base_rules.mk b/core/base_rules.mk
index ec5a21e..c453469 100644
--- a/core/base_rules.mk
+++ b/core/base_rules.mk
@@ -949,6 +949,8 @@
     $(ALL_MODULES.$(my_register_name).CHECKED) $(my_checked_module)
 ALL_MODULES.$(my_register_name).BUILT := \
     $(ALL_MODULES.$(my_register_name).BUILT) $(LOCAL_BUILT_MODULE)
+ALL_MODULES.$(my_register_name).SOONG_MODULE_TYPE := \
+    $(ALL_MODULES.$(my_register_name).SOONG_MODULE_TYPE) $(LOCAL_SOONG_MODULE_TYPE)
 ifndef LOCAL_IS_HOST_MODULE
 ALL_MODULES.$(my_register_name).TARGET_BUILT := \
     $(ALL_MODULES.$(my_register_name).TARGET_BUILT) $(LOCAL_BUILT_MODULE)
@@ -1240,3 +1242,8 @@
 ###########################################################
 
 include $(BUILD_NOTICE_FILE)
+
+###########################################################
+## SBOM generation
+###########################################################
+include $(BUILD_SBOM_GEN)
\ No newline at end of file
diff --git a/core/clear_vars.mk b/core/clear_vars.mk
index e325760..8913ad0 100644
--- a/core/clear_vars.mk
+++ b/core/clear_vars.mk
@@ -293,6 +293,7 @@
 LOCAL_SOONG_LICENSE_METADATA :=
 LOCAL_SOONG_LINK_TYPE :=
 LOCAL_SOONG_LINT_REPORTS :=
+LOCAL_SOONG_MODULE_TYPE :=
 LOCAL_SOONG_PROGUARD_DICT :=
 LOCAL_SOONG_PROGUARD_USAGE_ZIP :=
 LOCAL_SOONG_RESOURCE_EXPORT_PACKAGE :=
diff --git a/core/config.mk b/core/config.mk
index 025a3a1..1bb9a96 100644
--- a/core/config.mk
+++ b/core/config.mk
@@ -232,6 +232,7 @@
 BUILD_FUZZ_TEST :=$= $(BUILD_SYSTEM)/fuzz_test.mk
 
 BUILD_NOTICE_FILE :=$= $(BUILD_SYSTEM)/notice_files.mk
+BUILD_SBOM_GEN :=$= $(BUILD_SYSTEM)/sbom.mk
 
 include $(BUILD_SYSTEM)/deprecation.mk
 
@@ -641,6 +642,8 @@
 DEXDUMP := $(HOST_OUT_EXECUTABLES)/dexdump$(BUILD_EXECUTABLE_SUFFIX)
 PROFMAN := $(HOST_OUT_EXECUTABLES)/profman
 
+GEN_SBOM := $(HOST_OUT_EXECUTABLES)/generate-sbom
+
 FINDBUGS_DIR := external/owasp/sanitizer/tools/findbugs/bin
 FINDBUGS := $(FINDBUGS_DIR)/findbugs
 
diff --git a/core/dex_preopt_odex_install.mk b/core/dex_preopt_odex_install.mk
index b303b52..05bb669 100644
--- a/core/dex_preopt_odex_install.mk
+++ b/core/dex_preopt_odex_install.mk
@@ -447,6 +447,7 @@
 
   my_dexpreopt_script := $(intermediates)/dexpreopt.sh
   my_dexpreopt_zip := $(intermediates)/dexpreopt.zip
+  DEXPREOPT.$(LOCAL_MODULE).POST_INSTALLED_DEXPREOPT_ZIP := $(my_dexpreopt_zip)
   .KATI_RESTAT: $(my_dexpreopt_script)
   $(my_dexpreopt_script): PRIVATE_MODULE := $(LOCAL_MODULE)
   $(my_dexpreopt_script): PRIVATE_GLOBAL_SOONG_CONFIG := $(DEX_PREOPT_SOONG_CONFIG_FOR_MAKE)
diff --git a/core/main.mk b/core/main.mk
index 3866037..25b66ea 100644
--- a/core/main.mk
+++ b/core/main.mk
@@ -2019,6 +2019,84 @@
 # missing dependency errors.
 $(call build-license-metadata)
 
+# Generate SBOM in SPDX format
+product_copy_files_without_owner := $(foreach pcf,$(PRODUCT_COPY_FILES),$(call word-colon,1,$(pcf)):$(call word-colon,2,$(pcf)))
+ifeq ($(TARGET_BUILD_APPS),)
+dest_files_without_source := $(sort $(foreach pcf,$(product_copy_files_without_owner),$(if $(wildcard $(call word-colon,1,$(pcf))),,$(call word-colon,2,$(pcf)))))
+dest_files_without_source := $(addprefix $(PRODUCT_OUT)/,$(dest_files_without_source))
+installed_files := $(sort $(filter-out $(PRODUCT_OUT)/apex/% $(PRODUCT_OUT)/fake_packages/% $(PRODUCT_OUT)/testcases/% $(dest_files_without_source),$(filter $(PRODUCT_OUT)/%,$(modules_to_install))))
+else
+installed_files := $(apps_only_installed_files)
+endif
+
+# sbom-metadata.csv contains all raw data collected in Make for generating SBOM in generate-sbom.py.
+# There are multiple columns and each identifies the source of an installed file for a specific case.
+# The columns and their uses are described as below:
+#   installed_file: the file path on device, e.g. /product/app/Browser2/Browser2.apk
+#   module_path: the path of the module that generates the installed file, e.g. packages/apps/Browser2
+#   soong_module_type: Soong module type, e.g. android_app, cc_binary
+#   is_prebuilt_make_module: Y, if the installed file is from a prebuilt Make module, see prebuilt_internal.mk
+#   product_copy_files: the installed file is from variable PRODUCT_COPY_FILES, e.g. device/google/cuttlefish/shared/config/init.product.rc:product/etc/init/init.rc
+#   kernel_module_copy_files: the installed file is from variable KERNEL_MODULE_COPY_FILES, similar to product_copy_files
+#   is_platform_generated: this is an aggregated value including some small cases instead of adding more columns. It is set to Y if any case is Y
+#       is_build_prop: build.prop in each partition, see sysprop.mk.
+#       is_notice_file: NOTICE.xml.gz in each partition, see Makefile.
+#       is_dexpreopt_image_profile: see the usage of DEXPREOPT_IMAGE_PROFILE_BUILT_INSTALLED in Soong and Make
+#       is_product_system_other_avbkey: see INSTALLED_PRODUCT_SYSTEM_OTHER_AVBKEY_TARGET
+#       is_system_other_odex_marker: see INSTALLED_SYSTEM_OTHER_ODEX_MARKER
+#       is_event_log_tags_file: see variable event_log_tags_file in Makefile
+#       is_kernel_modules_blocklist: modules.blocklist created for _dlkm partitions, see macro build-image-kernel-modules-dir in Makefile.
+#       is_fsverity_build_manifest_apk: BuildManifest<part>.apk files for system and system_ext partition, see ALL_FSVERITY_BUILD_MANIFEST_APK in Makefile.
+#       is_linker_config: see SYSTEM_LINKER_CONFIG and vendor_linker_config_file in Makefile.
+
+# (TODO: b/272358583 find another way of always rebuilding this target)
+# Remove the sbom-metadata.csv whenever makefile is evaluated
+$(shell rm $(PRODUCT_OUT)/sbom-metadata.csv >/dev/null 2>&1)
+$(PRODUCT_OUT)/sbom-metadata.csv: $(installed_files)
+	rm -f $@
+	@echo installed_file$(comma)module_path$(comma)soong_module_type$(comma)is_prebuilt_make_module$(comma)product_copy_files$(comma)kernel_module_copy_files$(comma)is_platform_generated >> $@
+	$(foreach f,$(installed_files),\
+	  $(eval _module_name := $(ALL_INSTALLED_FILES.$f)) \
+	  $(eval _path_on_device := $(patsubst $(PRODUCT_OUT)/%,%,$f)) \
+	  $(eval _module_path := $(strip $(sort $(ALL_MODULES.$(_module_name).PATH)))) \
+	  $(eval _soong_module_type := $(strip $(sort $(ALL_MODULES.$(_module_name).SOONG_MODULE_TYPE)))) \
+	  $(eval _is_prebuilt_make_module := $(ALL_MODULES.$(_module_name).IS_PREBUILT_MAKE_MODULE)) \
+	  $(eval _post_installed_dexpreopt_zip := $(DEXPREOPT.$(_module_name).POST_INSTALLED_DEXPREOPT_ZIP)) \
+	  $(eval _product_copy_files := $(sort $(filter %:$(_path_on_device),$(product_copy_files_without_owner)))) \
+	  $(eval _kernel_module_copy_files := $(sort $(filter %$(_path_on_device),$(KERNEL_MODULE_COPY_FILES)))) \
+	  $(eval _is_build_prop := $(call is-build-prop,$f)) \
+	  $(eval _is_notice_file := $(call is-notice-file,$f)) \
+	  $(eval _is_dexpreopt_image_profile := $(if $(filter %:/$(_path_on_device),$(DEXPREOPT_IMAGE_PROFILE_BUILT_INSTALLED)),Y)) \
+	  $(eval _is_product_system_other_avbkey := $(if $(findstring $f,$(INSTALLED_PRODUCT_SYSTEM_OTHER_AVBKEY_TARGET)),Y)) \
+	  $(eval _is_event_log_tags_file := $(if $(findstring $f,$(event_log_tags_file)),Y)) \
+	  $(eval _is_system_other_odex_marker := $(if $(findstring $f,$(INSTALLED_SYSTEM_OTHER_ODEX_MARKER)),Y)) \
+	  $(eval _is_kernel_modules_blocklist := $(if $(findstring $f,$(ALL_KERNEL_MODULES_BLOCKLIST)),Y)) \
+	  $(eval _is_fsverity_build_manifest_apk := $(if $(findstring $f,$(ALL_FSVERITY_BUILD_MANIFEST_APK)),Y)) \
+	  $(eval _is_linker_config := $(if $(findstring $f,$(SYSTEM_LINKER_CONFIG) $(vendor_linker_config_file)),Y)) \
+	  $(eval _is_platform_generated := $(_is_build_prop)$(_is_notice_file)$(_is_dexpreopt_image_profile)$(_is_product_system_other_avbkey)$(_is_event_log_tags_file)$(_is_system_other_odex_marker)$(_is_kernel_modules_blocklist)$(_is_fsverity_build_manifest_apk)$(_is_linker_config)) \
+	  @echo /$(_path_on_device)$(comma)$(_module_path)$(comma)$(_soong_module_type)$(comma)$(_is_prebuilt_make_module)$(comma)$(_product_copy_files)$(comma)$(_kernel_module_copy_files)$(comma)$(_is_platform_generated) >> $@ $(newline) \
+	  $(if $(_post_installed_dexpreopt_zip), \
+	  for i in $$(zipinfo -1 $(_post_installed_dexpreopt_zip)); do echo /$$i$(comma)$(_module_path)$(comma)$(_soong_module_type)$(comma)$(_is_prebuilt_make_module)$(comma)$(_product_copy_files)$(comma)$(_kernel_module_copy_files)$(comma)$(_is_platform_generated) >> $@ ; done $(newline) \
+	  ) \
+	)
+
+.PHONY: sbom
+ifeq ($(TARGET_BUILD_APPS),)
+sbom: $(PRODUCT_OUT)/sbom.spdx.json
+$(PRODUCT_OUT)/sbom.spdx.json: $(PRODUCT_OUT)/sbom.spdx
+$(PRODUCT_OUT)/sbom.spdx: $(PRODUCT_OUT)/sbom-metadata.csv $(GEN_SBOM)
+	rm -rf $@
+	$(GEN_SBOM) --output_file $@ --metadata $(PRODUCT_OUT)/sbom-metadata.csv --product_out_dir=$(PRODUCT_OUT) --build_version $(BUILD_FINGERPRINT_FROM_FILE) --product_mfr=$(PRODUCT_MANUFACTURER) --json
+
+else
+apps_only_sbom_files := $(sort $(patsubst %,%.spdx,$(apps_only_installed_files)))
+$(apps_only_sbom_files): $(PRODUCT_OUT)/sbom-metadata.csv $(GEN_SBOM)
+	rm -rf $@
+	$(GEN_SBOM) --output_file $@ --metadata $(PRODUCT_OUT)/sbom-metadata.csv --product_out_dir=$(PRODUCT_OUT) --build_version $(BUILD_FINGERPRINT_FROM_FILE) --product_mfr=$(PRODUCT_MANUFACTURER) --unbundled
+
+sbom: $(apps_only_sbom_files)
+endif
+
 $(call dist-write-file,$(KATI_PACKAGE_MK_DIR)/dist.mk)
 
 $(info [$(call inc_and_print,subdir_makefiles_inc)/$(subdir_makefiles_total)] writing build rules ...)
diff --git a/core/prebuilt_internal.mk b/core/prebuilt_internal.mk
index ef1471d..5bea9b6 100644
--- a/core/prebuilt_internal.mk
+++ b/core/prebuilt_internal.mk
@@ -57,6 +57,9 @@
   $(error $(LOCAL_MODULE) : unexpected LOCAL_MODULE_CLASS for prebuilts: $(LOCAL_MODULE_CLASS))
 endif
 
+$(if $(filter-out $(SOONG_ANDROID_MK),$(LOCAL_MODULE_MAKEFILE)), \
+  $(eval ALL_MODULES.$(my_register_name).IS_PREBUILT_MAKE_MODULE := Y))
+
 $(built_module) : $(LOCAL_ADDITIONAL_DEPENDENCIES)
 
 my_prebuilt_src_file :=
diff --git a/core/sbom.mk b/core/sbom.mk
new file mode 100644
index 0000000..e23bbc1
--- /dev/null
+++ b/core/sbom.mk
@@ -0,0 +1,11 @@
+# For SBOM generation
+# This is included by base_rules.mk and is not necessary to be included in other .mk files
+# unless a .mk file changes its installed file after including base_rules.mk.
+
+ifdef my_register_name
+  ifneq (, $(strip $(ALL_MODULES.$(my_register_name).INSTALLED)))
+    $(foreach installed_file,$(ALL_MODULES.$(my_register_name).INSTALLED),\
+      $(eval ALL_INSTALLED_FILES.$(installed_file) := $(my_register_name))\
+    )
+  endif
+endif
\ No newline at end of file
diff --git a/core/soong_app_prebuilt.mk b/core/soong_app_prebuilt.mk
index 786a755..583788d 100644
--- a/core/soong_app_prebuilt.mk
+++ b/core/soong_app_prebuilt.mk
@@ -267,3 +267,8 @@
 endif
 
 SOONG_ALREADY_CONV += $(LOCAL_MODULE)
+
+###########################################################
+## SBOM generation
+###########################################################
+include $(BUILD_SBOM_GEN)
\ No newline at end of file
diff --git a/core/sysprop.mk b/core/sysprop.mk
index 6e2caed..bd6f3d9 100644
--- a/core/sysprop.mk
+++ b/core/sysprop.mk
@@ -543,3 +543,19 @@
     $(empty)))
 
 $(eval $(call declare-1p-target,$(INSTALLED_RAMDISK_BUILD_PROP_TARGET)))
+
+ALL_INSTALLED_BUILD_PROP_FILES := \
+  $(INSTALLED_BUILD_PROP_TARGET) \
+  $(INSTALLED_VENDOR_BUILD_PROP_TARGET) \
+  $(INSTALLED_PRODUCT_BUILD_PROP_TARGET) \
+  $(INSTALLED_ODM_BUILD_PROP_TARGET) \
+  $(INSTALLED_VENDOR_DLKM_BUILD_PROP_TARGET) \
+  $(INSTALLED_ODM_DLKM_BUILD_PROP_TARGET) \
+  $(INSTALLED_SYSTEM_DLKM_BUILD_PROP_TARGET) \
+  $(INSTALLED_SYSTEM_EXT_BUILD_PROP_TARGET) \
+  $(INSTALLED_RAMDISK_BUILD_PROP_TARGET)
+
+# $1 installed file path, e.g. out/target/product/vsoc_x86_64/system/build.prop
+define is-build-prop
+$(if $(findstring $1,$(ALL_INSTALLED_BUILD_PROP_FILES)),Y)
+endef
\ No newline at end of file
diff --git a/tools/Android.bp b/tools/Android.bp
index f446973..c5c02c6 100644
--- a/tools/Android.bp
+++ b/tools/Android.bp
@@ -69,3 +69,19 @@
   name: "generate_gts_shared_report",
   srcs: ["generate_gts_shared_report.py"],
 }
+
+python_binary_host {
+    name: "generate-sbom",
+    srcs: [
+        "generate-sbom.py",
+    ],
+    version: {
+        py3: {
+            embedded_launcher: true,
+        },
+    },
+    libs: [
+        "metadata_file_proto_py",
+        "libprotobuf-python",
+    ],
+}
diff --git a/tools/generate-sbom.py b/tools/generate-sbom.py
new file mode 100755
index 0000000..54057c9
--- /dev/null
+++ b/tools/generate-sbom.py
@@ -0,0 +1,684 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2023 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate the SBOM of the current target product in SPDX format.
+Usage example:
+  generate-sbom.py --output_file out/target/product/vsoc_x86_64/sbom.spdx \
+                   --metadata out/target/product/vsoc_x86_64/sbom-metadata.csv \
+                   --product_out_dir=out/target/product/vsoc_x86_64 \
+                   --build_version $(cat out/target/product/vsoc_x86_64/build_fingerprint.txt) \
+                   --product_mfr=Google
+"""
+
+import argparse
+import csv
+import datetime
+import google.protobuf.text_format as text_format
+import hashlib
+import json
+import os
+import metadata_file_pb2
+
+# Common
+SPDXID = 'SPDXID'
+SPDX_VERSION = 'SPDXVersion'
+DATA_LICENSE = 'DataLicense'
+DOCUMENT_NAME = 'DocumentName'
+DOCUMENT_NAMESPACE = 'DocumentNamespace'
+CREATED = 'Created'
+CREATOR = 'Creator'
+EXTERNAL_DOCUMENT_REF = 'ExternalDocumentRef'
+
+# Package
+PACKAGE_NAME = 'PackageName'
+PACKAGE_DOWNLOAD_LOCATION = 'PackageDownloadLocation'
+PACKAGE_VERSION = 'PackageVersion'
+PACKAGE_SUPPLIER = 'PackageSupplier'
+FILES_ANALYZED = 'FilesAnalyzed'
+PACKAGE_VERIFICATION_CODE = 'PackageVerificationCode'
+PACKAGE_EXTERNAL_REF = 'ExternalRef'
+# Package license
+PACKAGE_LICENSE_CONCLUDED = 'PackageLicenseConcluded'
+PACKAGE_LICENSE_INFO_FROM_FILES = 'PackageLicenseInfoFromFiles'
+PACKAGE_LICENSE_DECLARED = 'PackageLicenseDeclared'
+PACKAGE_LICENSE_COMMENTS = 'PackageLicenseComments'
+
+# File
+FILE_NAME = 'FileName'
+FILE_CHECKSUM = 'FileChecksum'
+# File license
+FILE_LICENSE_CONCLUDED = 'LicenseConcluded'
+FILE_LICENSE_INFO_IN_FILE = 'LicenseInfoInFile'
+FILE_LICENSE_COMMENTS = 'LicenseComments'
+FILE_COPYRIGHT_TEXT = 'FileCopyrightText'
+FILE_NOTICE = 'FileNotice'
+FILE_ATTRIBUTION_TEXT = 'FileAttributionText'
+
+# Relationship
+RELATIONSHIP = 'Relationship'
+REL_DESCRIBES = 'DESCRIBES'
+REL_VARIANT_OF = 'VARIANT_OF'
+REL_GENERATED_FROM = 'GENERATED_FROM'
+
+# Package type
+PKG_SOURCE = 'SOURCE'
+PKG_UPSTREAM = 'UPSTREAM'
+PKG_PREBUILT = 'PREBUILT'
+
+# Security tag
+NVD_CPE23 = 'NVD-CPE2.3:'
+
+# Report
+ISSUE_NO_METADATA = 'No metadata generated in Make for installed files:'
+ISSUE_NO_METADATA_FILE = 'No METADATA file found for installed file:'
+ISSUE_METADATA_FILE_INCOMPLETE = 'METADATA file incomplete:'
+ISSUE_UNKNOWN_SECURITY_TAG_TYPE = "Unknown security tag type:"
+INFO_METADATA_FOUND_FOR_PACKAGE = 'METADATA file found for packages:'
+
+
+def get_args():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print more information.')
+  parser.add_argument('--output_file', required=True, help='The generated SBOM file in SPDX format.')
+  parser.add_argument('--metadata', required=True, help='The SBOM metadata file path.')
+  parser.add_argument('--product_out_dir', required=True, help='The parent directory of all the installed files.')
+  parser.add_argument('--build_version', required=True, help='The build version.')
+  parser.add_argument('--product_mfr', required=True, help='The product manufacturer.')
+  parser.add_argument('--json', action='store_true', default=False, help='Generated SBOM file in SPDX JSON format')
+  parser.add_argument('--unbundled', action='store_true', default=False, help='Generate SBOM file for unbundled module')
+
+  return parser.parse_args()
+
+
+def log(*info):
+  if args.verbose:
+    for i in info:
+      print(i)
+
+
+def new_doc_header(doc_id):
+  return {
+      SPDX_VERSION: 'SPDX-2.3',
+      DATA_LICENSE: 'CC0-1.0',
+      SPDXID: doc_id,
+      DOCUMENT_NAME: args.build_version,
+      DOCUMENT_NAMESPACE: 'https://www.google.com/sbom/spdx/android/' + args.build_version,
+      CREATOR: 'Organization: Google, LLC',
+      CREATED: '<timestamp>',
+      EXTERNAL_DOCUMENT_REF: [],
+  }
+
+
+def new_package_record(id, name, version, supplier, download_location=None, files_analyzed='false', external_refs=[]):
+  package = {
+      PACKAGE_NAME: name,
+      SPDXID: id,
+      PACKAGE_DOWNLOAD_LOCATION: download_location if download_location else 'NONE',
+      FILES_ANALYZED: files_analyzed,
+  }
+  if version:
+    package[PACKAGE_VERSION] = version
+  if supplier:
+    package[PACKAGE_SUPPLIER] = 'Organization: ' + supplier
+  if external_refs:
+    package[PACKAGE_EXTERNAL_REF] = external_refs
+
+  return package
+
+
+def new_file_record(id, name, checksum):
+  return {
+      FILE_NAME: name,
+      SPDXID: id,
+      FILE_CHECKSUM: checksum
+  }
+
+
+def encode_for_spdxid(s):
+  """Simple encode for string values used in SPDXID which uses the charset of A-Za-Z0-9.-"""
+  result = ''
+  for c in s:
+    if c.isalnum() or c in '.-':
+      result += c
+    elif c in '_@/':
+      result += '-'
+    else:
+      result += '0x' + c.encode('utf-8').hex()
+
+  return result.lstrip('-')
+
+
+def new_package_id(package_name, type):
+  return 'SPDXRef-{}-{}'.format(type, encode_for_spdxid(package_name))
+
+
+def new_external_doc_ref(package_name, sbom_url, sbom_checksum):
+  doc_ref_id = 'DocumentRef-{}-{}'.format(PKG_UPSTREAM, encode_for_spdxid(package_name))
+  return '{}: {} {} {}'.format(EXTERNAL_DOCUMENT_REF, doc_ref_id, sbom_url, sbom_checksum), doc_ref_id
+
+
+def new_file_id(file_path):
+  return 'SPDXRef-' + encode_for_spdxid(file_path)
+
+
+def new_relationship_record(id1, relationship, id2):
+  return '{}: {} {} {}'.format(RELATIONSHIP, id1, relationship, id2)
+
+
+def checksum(file_path):
+  file_path = args.product_out_dir + '/' + file_path
+  h = hashlib.sha1()
+  if os.path.islink(file_path):
+    h.update(os.readlink(file_path).encode('utf-8'))
+  else:
+    with open(file_path, "rb") as f:
+      h.update(f.read())
+  return "SHA1: " + h.hexdigest()
+
+
+def is_soong_prebuilt_module(file_metadata):
+  return file_metadata['soong_module_type'] and file_metadata['soong_module_type'] in [
+      'android_app_import', 'android_library_import', 'cc_prebuilt_binary', 'cc_prebuilt_library',
+      'cc_prebuilt_library_headers', 'cc_prebuilt_library_shared', 'cc_prebuilt_library_static', 'cc_prebuilt_object',
+      'dex_import', 'java_import', 'java_sdk_library_import', 'java_system_modules_import',
+      'libclang_rt_prebuilt_library_static', 'libclang_rt_prebuilt_library_shared', 'llvm_prebuilt_library_static',
+      'ndk_prebuilt_object', 'ndk_prebuilt_shared_stl', 'nkd_prebuilt_static_stl', 'prebuilt_apex',
+      'prebuilt_bootclasspath_fragment', 'prebuilt_dsp', 'prebuilt_firmware', 'prebuilt_kernel_modules',
+      'prebuilt_rfsa', 'prebuilt_root', 'rust_prebuilt_dylib', 'rust_prebuilt_library', 'rust_prebuilt_rlib',
+      'vndk_prebuilt_shared',
+
+      # 'android_test_import',
+      # 'cc_prebuilt_test_library_shared',
+      # 'java_import_host',
+      # 'java_test_import',
+      # 'llvm_host_prebuilt_library_shared',
+      # 'prebuilt_apis',
+      # 'prebuilt_build_tool',
+      # 'prebuilt_defaults',
+      # 'prebuilt_etc',
+      # 'prebuilt_etc_host',
+      # 'prebuilt_etc_xml',
+      # 'prebuilt_font',
+      # 'prebuilt_hidl_interfaces',
+      # 'prebuilt_platform_compat_config',
+      # 'prebuilt_stubs_sources',
+      # 'prebuilt_usr_share',
+      # 'prebuilt_usr_share_host',
+      # 'soong_config_module_type_import',
+  ]
+
+
+def is_source_package(file_metadata):
+  module_path = file_metadata['module_path']
+  return module_path.startswith('external/') and not is_prebuilt_package(file_metadata)
+
+
+def is_prebuilt_package(file_metadata):
+  module_path = file_metadata['module_path']
+  if module_path:
+    return (module_path.startswith('prebuilts/') or
+            is_soong_prebuilt_module(file_metadata) or
+            file_metadata['is_prebuilt_make_module'])
+
+  kernel_module_copy_files = file_metadata['kernel_module_copy_files']
+  if kernel_module_copy_files and not kernel_module_copy_files.startswith('ANDROID-GEN:'):
+    return True
+
+  return False
+
+
+def get_source_package_info(file_metadata, metadata_file_path):
+  if not metadata_file_path:
+    return file_metadata['module_path'], []
+
+  metadata_proto = metadata_file_protos[metadata_file_path]
+  external_refs = []
+  for tag in metadata_proto.third_party.security.tag:
+    if tag.lower().startswith((NVD_CPE23 + 'cpe:2.3:').lower()):
+      external_refs.append("{}: SECURITY cpe23Type {}".format(PACKAGE_EXTERNAL_REF, tag.removeprefix(NVD_CPE23)))
+    elif tag.lower().startswith((NVD_CPE23 + 'cpe:/').lower()):
+      external_refs.append("{}: SECURITY cpe22Type {}".format(PACKAGE_EXTERNAL_REF, tag.removeprefix(NVD_CPE23)))
+
+  if metadata_proto.name:
+    return metadata_proto.name, external_refs
+  else:
+    return os.path.basename(metadata_file_path), external_refs  # return the directory name only as package name
+
+
+def get_prebuilt_package_name(file_metadata, metadata_file_path):
+  name = None
+  if metadata_file_path:
+    metadata_proto = metadata_file_protos[metadata_file_path]
+    if metadata_proto.name:
+      name = metadata_proto.name
+    else:
+      name = metadata_file_path
+  elif file_metadata['module_path']:
+    name = file_metadata['module_path']
+  elif file_metadata['kernel_module_copy_files']:
+    src_path = file_metadata['kernel_module_copy_files'].split(':')[0]
+    name = os.path.dirname(src_path)
+
+  return name.removeprefix('prebuilts/').replace('/', '-')
+
+
+def get_metadata_file_path(file_metadata):
+  metadata_path = ''
+  if file_metadata['module_path']:
+    metadata_path = file_metadata['module_path']
+  elif file_metadata['kernel_module_copy_files']:
+    metadata_path = os.path.dirname(file_metadata['kernel_module_copy_files'].split(':')[0])
+
+  while metadata_path and not os.path.exists(metadata_path + '/METADATA'):
+    metadata_path = os.path.dirname(metadata_path)
+
+  return metadata_path
+
+
+def get_package_version(metadata_file_path):
+  if not metadata_file_path:
+    return None
+  metadata_proto = metadata_file_protos[metadata_file_path]
+  return metadata_proto.third_party.version
+
+
+def get_package_homepage(metadata_file_path):
+  if not metadata_file_path:
+    return None
+  metadata_proto = metadata_file_protos[metadata_file_path]
+  if metadata_proto.third_party.homepage:
+    return metadata_proto.third_party.homepage
+  for url in metadata_proto.third_party.url:
+    if url.type == metadata_file_pb2.URL.Type.HOMEPAGE:
+      return url.value
+
+  return None
+
+
+def get_package_download_location(metadata_file_path):
+  if not metadata_file_path:
+    return None
+  metadata_proto = metadata_file_protos[metadata_file_path]
+  if metadata_proto.third_party.url:
+    urls = sorted(metadata_proto.third_party.url, key=lambda url: url.type)
+    if urls[0].type != metadata_file_pb2.URL.Type.HOMEPAGE:
+      return urls[0].value
+    elif len(urls) > 1:
+      return urls[1].value
+
+  return None
+
+
+def get_sbom_fragments(installed_file_metadata, metadata_file_path):
+  external_doc_ref = None
+  packages = []
+  relationships = []
+
+  # Info from METADATA file
+  homepage = get_package_homepage(metadata_file_path)
+  version = get_package_version(metadata_file_path)
+  download_location = get_package_download_location(metadata_file_path)
+
+  if is_source_package(installed_file_metadata):
+    # Source fork packages
+    name, external_refs = get_source_package_info(installed_file_metadata, metadata_file_path)
+    source_package_id = new_package_id(name, PKG_SOURCE)
+    source_package = new_package_record(source_package_id, name, args.build_version, args.product_mfr,
+                                        external_refs=external_refs)
+
+    upstream_package_id = new_package_id(name, PKG_UPSTREAM)
+    upstream_package = new_package_record(upstream_package_id, name, version, homepage, download_location)
+    packages += [source_package, upstream_package]
+    relationships.append(new_relationship_record(source_package_id, REL_VARIANT_OF, upstream_package_id))
+  elif is_prebuilt_package(installed_file_metadata):
+    # Prebuilt fork packages
+    name = get_prebuilt_package_name(installed_file_metadata, metadata_file_path)
+    prebuilt_package_id = new_package_id(name, PKG_PREBUILT)
+    prebuilt_package = new_package_record(prebuilt_package_id, name, args.build_version, args.product_mfr)
+    packages.append(prebuilt_package)
+
+    if metadata_file_path:
+      metadata_proto = metadata_file_protos[metadata_file_path]
+      if metadata_proto.third_party.WhichOneof('sbom') == 'sbom_ref':
+        sbom_url = metadata_proto.third_party.sbom_ref.url
+        sbom_checksum = metadata_proto.third_party.sbom_ref.checksum
+        upstream_element_id = metadata_proto.third_party.sbom_ref.element_id
+        if sbom_url and sbom_checksum and upstream_element_id:
+          external_doc_ref, doc_ref_id = new_external_doc_ref(name, sbom_url, sbom_checksum)
+          relationships.append(
+              new_relationship_record(prebuilt_package_id, REL_VARIANT_OF, doc_ref_id + ':' + upstream_element_id))
+
+  return external_doc_ref, packages, relationships
+
+
+def generate_package_verification_code(files):
+  checksums = [file[FILE_CHECKSUM] for file in files]
+  checksums.sort()
+  h = hashlib.sha1()
+  h.update(''.join(checksums).encode(encoding='utf-8'))
+  return h.hexdigest()
+
+
+def write_record(f, record):
+  if record.__class__.__name__ == 'dict':
+    for k, v in record.items():
+      if k == EXTERNAL_DOCUMENT_REF or k == PACKAGE_EXTERNAL_REF:
+        for ref in v:
+          f.write(ref + '\n')
+      else:
+        f.write('{}: {}\n'.format(k, v))
+  elif record.__class__.__name__ == 'str':
+    f.write(record + '\n')
+  f.write('\n')
+
+
+def write_tagvalue_sbom(all_records):
+  with open(args.output_file, 'w', encoding="utf-8") as output_file:
+    for rec in all_records:
+      write_record(output_file, rec)
+
+
+def write_json_sbom(all_records, product_package_id):
+  doc = {}
+  product_package = None
+  for r in all_records:
+    if r.__class__.__name__ == 'dict':
+      if DOCUMENT_NAME in r:  # Doc header
+        doc['spdxVersion'] = r[SPDX_VERSION]
+        doc['dataLicense'] = r[DATA_LICENSE]
+        doc[SPDXID] = r[SPDXID]
+        doc['name'] = r[DOCUMENT_NAME]
+        doc['documentNamespace'] = r[DOCUMENT_NAMESPACE]
+        doc['creationInfo'] = {
+            'creators': [r[CREATOR]],
+            'created': r[CREATED],
+        }
+        doc['externalDocumentRefs'] = []
+        for ref in r[EXTERNAL_DOCUMENT_REF]:
+          # ref is 'ExternalDocumentRef: <doc id> <doc url> SHA1: xxxxx'
+          fields = ref.split(' ')
+          doc_ref = {
+              'externalDocumentId': fields[1],
+              'spdxDocument': fields[2],
+              'checksum': {
+                  'algorithm': fields[3][:-1],
+                  'checksumValue': fields[4]
+              }
+          }
+          doc['externalDocumentRefs'].append(doc_ref)
+        doc['documentDescribes'] = []
+        doc['packages'] = []
+        doc['files'] = []
+        doc['relationships'] = []
+
+      elif PACKAGE_NAME in r:  # packages
+        package = {
+            'name': r[PACKAGE_NAME],
+            SPDXID: r[SPDXID],
+            'downloadLocation': r[PACKAGE_DOWNLOAD_LOCATION],
+            'filesAnalyzed': r[FILES_ANALYZED] == "true"
+        }
+        if PACKAGE_VERSION in r:
+          package['versionInfo'] = r[PACKAGE_VERSION]
+        if PACKAGE_SUPPLIER in r:
+          package['supplier'] = r[PACKAGE_SUPPLIER]
+        if PACKAGE_VERIFICATION_CODE in r:
+          package['packageVerificationCode'] = {
+              'packageVerificationCodeValue': r[PACKAGE_VERIFICATION_CODE]
+          }
+        if PACKAGE_EXTERNAL_REF in r:
+          package['externalRefs'] = []
+          for ref in r[PACKAGE_EXTERNAL_REF]:
+            # ref is 'ExternalRef: SECURITY cpe22Type cpe:/a:jsoncpp_project:jsoncpp:1.9.4'
+            fields = ref.split(' ')
+            ext_ref = {
+                'referenceCategory': fields[1],
+                'referenceType': fields[2],
+                'referenceLocator': fields[3],
+            }
+            package['externalRefs'].append(ext_ref)
+
+        doc['packages'].append(package)
+        if r[SPDXID] == product_package_id:
+          product_package = package
+          product_package['hasFiles'] = []
+
+      elif FILE_NAME in r:  # files
+        file = {
+            'fileName': r[FILE_NAME],
+            SPDXID: r[SPDXID]
+        }
+        checksum = r[FILE_CHECKSUM].split(': ')
+        file['checksums'] = [{
+            'algorithm': checksum[0],
+            'checksumValue': checksum[1],
+        }]
+        doc['files'].append(file)
+        product_package['hasFiles'].append(r[SPDXID])
+
+    elif r.__class__.__name__ == 'str':
+      if r.startswith(RELATIONSHIP):
+        # r is 'Relationship: <spdxid> <relationship> <spdxid>'
+        fields = r.split(' ')
+        rel = {
+            'spdxElementId': fields[1],
+            'relatedSpdxElement': fields[3],
+            'relationshipType': fields[2],
+        }
+        if fields[2] == REL_DESCRIBES:
+          doc['documentDescribes'].append(fields[3])
+        else:
+          doc['relationships'].append(rel)
+
+  with open(args.output_file + '.json', 'w', encoding="utf-8") as output_file:
+    output_file.write(json.dumps(doc, indent=4))
+
+
+def save_report(report):
+  prefix, _ = os.path.splitext(args.output_file)
+  with open(prefix + '-gen-report.txt', 'w', encoding="utf-8") as report_file:
+    for type, issues in report.items():
+      report_file.write(type + '\n')
+      for issue in issues:
+        report_file.write('\t' + issue + '\n')
+      report_file.write('\n')
+
+
+def sort_rels(rel):
+  # rel = 'Relationship file_id GENERATED_FROM package_id'
+  fields = rel.split(' ')
+  return fields[3] + fields[1]
+
+
+# Validate the metadata generated by Make for installed files and report if there is no metadata.
+def installed_file_has_metadata(installed_file_metadata, report):
+  installed_file = installed_file_metadata['installed_file']
+  module_path = installed_file_metadata['module_path']
+  product_copy_files = installed_file_metadata['product_copy_files']
+  kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files']
+  is_platform_generated = installed_file_metadata['is_platform_generated']
+
+  if (not module_path and
+      not product_copy_files and
+      not kernel_module_copy_files and
+      not is_platform_generated and
+      not installed_file.endswith('.fsv_meta')):
+    report[ISSUE_NO_METADATA].append(installed_file)
+    return False
+
+  return True
+
+
+def report_metadata_file(metadata_file_path, installed_file_metadata, report):
+  if metadata_file_path:
+    report[INFO_METADATA_FOUND_FOR_PACKAGE].append(
+        "installed_file: {}, module_path: {}, METADATA file: {}".format(
+            installed_file_metadata['installed_file'],
+            installed_file_metadata['module_path'],
+            metadata_file_path + '/METADATA'))
+
+    package_metadata = metadata_file_pb2.Metadata()
+    with open(metadata_file_path + '/METADATA', "rt") as f:
+      text_format.Parse(f.read(), package_metadata)
+
+    if not metadata_file_path in metadata_file_protos:
+      metadata_file_protos[metadata_file_path] = package_metadata
+      if not package_metadata.name:
+        report[ISSUE_METADATA_FILE_INCOMPLETE].append('{} does not has "name"'.format(metadata_file_path + '/METADATA'))
+
+      if not package_metadata.third_party.version:
+        report[ISSUE_METADATA_FILE_INCOMPLETE].append(
+            '{} does not has "third_party.version"'.format(metadata_file_path + '/METADATA'))
+
+      for tag in package_metadata.third_party.security.tag:
+        if not tag.startswith(NVD_CPE23):
+          report[ISSUE_UNKNOWN_SECURITY_TAG_TYPE].append(
+              "Unknown security tag type: {} in {}".format(tag, metadata_file_path + '/METADATA'))
+  else:
+    report[ISSUE_NO_METADATA_FILE].append(
+        "installed_file: {}, module_path: {}".format(
+            installed_file_metadata['installed_file'], installed_file_metadata['module_path']))
+
+
+def generate_fragment():
+  with open(args.metadata, newline='') as sbom_metadata_file:
+    reader = csv.DictReader(sbom_metadata_file)
+    for installed_file_metadata in reader:
+      installed_file = installed_file_metadata['installed_file']
+      if args.output_file != args.product_out_dir + installed_file + ".spdx":
+        continue
+
+      module_path = installed_file_metadata['module_path']
+      package_id = new_package_id(encode_for_spdxid(module_path), PKG_PREBUILT)
+      package = new_package_record(package_id, module_path, args.build_version, args.product_mfr)
+      file_id = new_file_id(installed_file)
+      file = new_file_record(file_id, installed_file, checksum(installed_file))
+      relationship = new_relationship_record(file_id, REL_GENERATED_FROM, package_id)
+      records = [package, file, relationship]
+      write_tagvalue_sbom(records)
+      break
+
+
+def main():
+  global args
+  args = get_args()
+  log("Args:", vars(args))
+
+  if args.unbundled:
+    generate_fragment()
+    return
+
+  global metadata_file_protos
+  metadata_file_protos = {}
+
+  doc_id = 'SPDXRef-DOCUMENT'
+  doc_header = new_doc_header(doc_id)
+
+  product_package_id = 'SPDXRef-PRODUCT'
+  product_package = new_package_record(product_package_id, 'PRODUCT', args.build_version, args.product_mfr,
+                                       files_analyzed='true')
+
+  platform_package_id = 'SPDXRef-PLATFORM'
+  platform_package = new_package_record(platform_package_id, 'PLATFORM', args.build_version, args.product_mfr)
+
+  # Report on some issues and information
+  report = {
+      ISSUE_NO_METADATA: [],
+      ISSUE_NO_METADATA_FILE: [],
+      ISSUE_METADATA_FILE_INCOMPLETE: [],
+      ISSUE_UNKNOWN_SECURITY_TAG_TYPE: [],
+      INFO_METADATA_FOUND_FOR_PACKAGE: []
+  }
+
+  # Scan the metadata in CSV file and create the corresponding package and file records in SPDX
+  product_files = []
+  package_ids = []
+  package_records = []
+  rels_file_gen_from = []
+  with open(args.metadata, newline='') as sbom_metadata_file:
+    reader = csv.DictReader(sbom_metadata_file)
+    for installed_file_metadata in reader:
+      installed_file = installed_file_metadata['installed_file']
+      module_path = installed_file_metadata['module_path']
+      product_copy_files = installed_file_metadata['product_copy_files']
+      kernel_module_copy_files = installed_file_metadata['kernel_module_copy_files']
+
+      if not installed_file_has_metadata(installed_file_metadata, report):
+        continue
+
+      file_id = new_file_id(installed_file)
+      product_files.append(new_file_record(file_id, installed_file, checksum(installed_file)))
+
+      if is_source_package(installed_file_metadata) or is_prebuilt_package(installed_file_metadata):
+        metadata_file_path = get_metadata_file_path(installed_file_metadata)
+        report_metadata_file(metadata_file_path, installed_file_metadata, report)
+
+        # File from source fork packages or prebuilt fork packages
+        external_doc_ref, pkgs, rels = get_sbom_fragments(installed_file_metadata, metadata_file_path)
+        if len(pkgs) > 0:
+          if external_doc_ref and external_doc_ref not in doc_header[EXTERNAL_DOCUMENT_REF]:
+            doc_header[EXTERNAL_DOCUMENT_REF].append(external_doc_ref)
+          for p in pkgs:
+            if not p[SPDXID] in package_ids:
+              package_ids.append(p[SPDXID])
+              package_records.append(p)
+          for rel in rels:
+            if not rel in package_records:
+              package_records.append(rel)
+          fork_package_id = pkgs[0][SPDXID]  # The first package should be the source/prebuilt fork package
+          rels_file_gen_from.append(new_relationship_record(file_id, REL_GENERATED_FROM, fork_package_id))
+      elif module_path or installed_file_metadata['is_platform_generated']:
+        # File from PLATFORM package
+        rels_file_gen_from.append(new_relationship_record(file_id, REL_GENERATED_FROM, platform_package_id))
+      elif product_copy_files:
+        # Format of product_copy_files: <source path>:<dest path>
+        src_path = product_copy_files.split(':')[0]
+        # So far product_copy_files are copied from directory system, kernel, hardware, frameworks and device,
+        # so process them as files from PLATFORM package
+        rels_file_gen_from.append(new_relationship_record(file_id, REL_GENERATED_FROM, platform_package_id))
+      elif installed_file.endswith('.fsv_meta'):
+        # See build/make/core/Makefile:2988
+        rels_file_gen_from.append(new_relationship_record(file_id, REL_GENERATED_FROM, platform_package_id))
+      elif kernel_module_copy_files.startswith('ANDROID-GEN'):
+        # For the four files generated for _dlkm, _ramdisk partitions
+        # See build/make/core/Makefile:323
+        rels_file_gen_from.append(new_relationship_record(file_id, REL_GENERATED_FROM, platform_package_id))
+
+  product_package[PACKAGE_VERIFICATION_CODE] = generate_package_verification_code(product_files)
+
+  all_records = [
+      doc_header,
+      product_package,
+      new_relationship_record(doc_id, REL_DESCRIBES, product_package_id),
+  ]
+  all_records += product_files
+  all_records.append(platform_package)
+  all_records += package_records
+  rels_file_gen_from.sort(key=sort_rels)
+  all_records += rels_file_gen_from
+
+  # Save SBOM records to output file
+  doc_header[CREATED] = datetime.datetime.now(tz=datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
+  write_tagvalue_sbom(all_records)
+  if args.json:
+    write_json_sbom(all_records, product_package_id)
+
+  save_report(report)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/tools/protos/Android.bp b/tools/protos/Android.bp
new file mode 100644
index 0000000..c6ad19e
--- /dev/null
+++ b/tools/protos/Android.bp
@@ -0,0 +1,32 @@
+// Copyright 2023 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package {
+    default_applicable_licenses: ["Android-Apache-2.0"],
+}
+
+python_library_host {
+    name: "metadata_file_proto_py",
+    version: {
+        py3: {
+            enabled: true,
+        },
+    },
+    srcs: [
+        "metadata_file.proto",
+    ],
+    proto: {
+        canonical_path_from_root: false,
+    },
+}
diff --git a/tools/protos/metadata_file.proto b/tools/protos/metadata_file.proto
new file mode 100644
index 0000000..ac1129a
--- /dev/null
+++ b/tools/protos/metadata_file.proto
@@ -0,0 +1,281 @@
+// Copyright (C) 2023 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package metadata_file;
+
+// Proto definition of METADATA files of packages in AOSP codebase.
+message Metadata {
+  // Name of the package.
+  optional string name = 1;
+
+  // A short description (a few lines) of the package.
+  // Example: "Handles location lookups, throttling, batching, etc."
+  optional string description = 2;
+
+  // Specifies additional data about third-party packages.
+  optional ThirdParty third_party = 3;
+}
+
+message ThirdParty {
+  // URL(s) associated with the package.
+  //
+  // At a minimum, all packages must specify a URL which identifies where it
+  // came from, containing a type of: ARCHIVE, GIT or OTHER. Typically,
+  // a package should contain only a single URL from these types.  Occasionally,
+  // a package may be broken across multiple archive files for whatever reason,
+  // in which case having multiple ARCHIVE URLs is okay.  However, this should
+  // not be used to combine different logical packages that are versioned and
+  // possibly licensed differently.
+  repeated URL url = 1;
+
+  // The package version.  In order of preference, this should contain:
+  //  - If the package comes from Git or another source control system,
+  //    a specific tag or revision in source control, such as "r123" or
+  //    "58e27d2".  This MUST NOT be a mutable ref such as a branch name.
+  //  - a released package version such as "1.0", "2.3-beta", etc.
+  //  - the date the package was retrieved, formatted as "As of YYYY-MM-DD".
+  optional string version = 2;
+
+  // The date of the change in which the package was last upgraded from
+  // upstream.
+  // This should only identify package upgrades from upstream, not local
+  // modifications. This may identify the date of either the original or
+  // merged change.
+  //
+  // Note: this is NOT the date that this version of the package was released
+  // externally.
+  optional Date last_upgrade_date = 3;
+
+  // License type that identifies how the package may be used.
+  optional LicenseType license_type = 4;
+
+  // An additional note explaining the licensing of this package.  This is most
+  // commonly used with commercial license.
+  optional string license_note = 5;
+
+  // Description of local changes that have been made to the package.  This does
+  // not need to (and in most cases should not) attempt to include an exhaustive
+  // list of all changes, but may instead direct readers to review the local
+  // commit history, a collection of patch files, a separate README.md (or
+  // similar) document, etc.
+  // Note: Use of this field to store IDs of advisories fixed with a backported
+  // patch is deprecated, use "security.mitigated_security_patch" instead.
+  optional string local_modifications = 6;
+
+  // Security related metadata including risk category and any special
+  // instructions for using the package, as determined by an ISE-TPS review.
+  optional Security security = 7;
+
+  // The type of directory this metadata represents.
+  optional DirectoryType type = 8 [default = PACKAGE];
+
+  // The homepage for the package. This will eventually replace
+  // `url { type: HOMEPAGE }`
+  optional string homepage = 9;
+
+  // SBOM information of the package. It is mandatory for prebuilt packages.
+  oneof sbom {
+    // Reference to external SBOM document provided as URL.
+    SBOMRef sbom_ref = 10;
+  }
+
+}
+
+// URL associated with a third-party package.
+message URL {
+  enum Type {
+    // The homepage for the package. For example, "https://bazel.io/". This URL
+    // is optional, but encouraged to help disambiguate similarly named packages
+    // or to get more information about the package. This is especially helpful
+    // when no other URLs provide human readable resources (such as git:// or
+    // sso:// URLs).
+    HOMEPAGE = 1;
+
+    // The URL of the archive containing the source code for the package, for
+    // example a zip or tgz file.
+    ARCHIVE = 2;
+
+    // The URL of the upstream git repository this package is retrieved from.
+    // For example:
+    //  - https://github.com/git/git.git
+    //  - git://git.kernel.org/pub/scm/git/git.git
+    //
+    // Use of a git URL requires that the package "version" value must specify a
+    // specific git tag or revision.
+    GIT = 3;
+
+    // The URL of the upstream SVN repository this package is retrieved from.
+    // For example:
+    //  - http://llvm.org/svn/llvm-project/llvm/
+    //
+    // Use of an SVN URL requires that the package "version" value must specify
+    // a specific SVN tag or revision.
+    SVN = 4;
+
+    // The URL of the upstream mercurial repository this package is retrieved
+    // from. For example:
+    //   - https://mercurial-scm.org/repo/evolve
+    //
+    // Use of a mercurial URL requires that the package "version" value must
+    // specify a specific tag or revision.
+    HG = 5;
+
+    // The URL of the upstream darcs repository this package is retrieved
+    // from. For example:
+    //   - https://hub.darcs.net/hu.dwim/hu.dwim.util
+    //
+    // Use of a DARCS URL requires that the package "version" value must
+    // specify a specific tag or revision.
+    DARCS = 6;
+
+    PIPER = 7;
+
+    // A URL that does not fit any other type. This may also indicate that the
+    // source code was received via email or some other out-of-band way. This is
+    // most commonly used with commercial software received directly from the
+    // vendor. In the case of email, the URL value can be used to provide
+    // additional information about how it was received.
+    OTHER = 8;
+
+    // The URL identifying where the local copy of the package source code can
+    // be found.
+    //
+    // Typically, the metadata files describing a package reside in the same
+    // directory as the source code for the package. In a few rare cases where
+    // they are separate, the LOCAL_SOURCE URL identifies where to find the
+    // source code. This only describes where to find the local copy of the
+    // source; there should always be an additional URL describing where the
+    // package was retrieved from.
+    //
+    // Examples:
+    //  - https://android.googlesource.com/platform/external/apache-http/
+    LOCAL_SOURCE = 9;
+  }
+
+  // The type of resource this URL identifies.
+  optional Type type = 1;
+
+  // The actual URL value.  URLs should be absolute and start with 'http://' or
+  // 'https://' (or occasionally 'git://' or 'ftp://' where appropriate).
+  optional string value = 2;
+}
+
+// License type that identifies how the packages may be used.
+enum LicenseType {
+  BY_EXCEPTION_ONLY = 1;
+  NOTICE = 2;
+  PERMISSIVE = 3;
+  RECIPROCAL = 4;
+  RESTRICTED_IF_STATICALLY_LINKED = 5;
+  RESTRICTED = 6;
+  UNENCUMBERED = 7;
+}
+
+// Identifies security related metadata including risk category and any special
+// instructions for using the package.
+message Security {
+  // Security risk category for a package, as determined by an ISE-TPS review.
+  enum Category {
+    CATEGORY_UNSPECIFIED = 0;
+
+    // Package should only be used in a sandboxed environment.
+    // Package should have restricted visibility.
+    SANDBOXED_ONLY = 1;
+
+    // Package should not be used to process user content. It is considered
+    // safe to use to process trusted data only. Package should have restricted
+    // visibility.
+    TRUSTED_DATA_ONLY = 2;
+
+    // Package is considered safe to use.
+    REVIEWED_AND_SECURE = 3;
+  }
+
+  // Identifies the security risk category for the package.  This will be
+  // provided by the ISE-TPS team as the result of a security review of the
+  // package.
+  optional Category category = 1;
+
+  // An additional security note for the package.
+  optional string note = 2;
+
+  // Text tag to categorize the package. It's currently used by security to:
+  // - to disable OSV (https://osv.dev)
+  // support via the `OSV:disable` tag
+  // - to attach CPE to their corresponding packages, for vulnerability
+  // monitoring:
+  //
+  // Please do document your usecase here should you want to add one.
+  repeated string tag = 3;
+
+  // ID of advisories fixed with a mitigated patch, for example CVE-2018-1111.
+  repeated string mitigated_security_patch = 4;
+}
+
+enum DirectoryType {
+  UNDEFINED = 0;
+
+  // This directory represents a package.
+  PACKAGE = 1;
+
+  // This directory is designed to organize multiple third-party PACKAGE
+  // directories.
+  GROUP = 2;
+
+  // This directory contains several PACKAGE directories representing
+  // different versions of the same third-party project.
+  VERSIONS = 3;
+}
+
+// Represents a whole or partial calendar date, such as a birthday. The time of
+// day and time zone are either specified elsewhere or are insignificant. The
+// date is relative to the Gregorian Calendar. This can represent one of the
+// following:
+//
+// * A full date, with non-zero year, month, and day values.
+// * A month and day, with a zero year (for example, an anniversary).
+// * A year on its own, with a zero month and a zero day.
+// * A year and month, with a zero day (for example, a credit card expiration
+//   date).
+message Date {
+  // Year of the date. Must be from 1 to 9999, or 0 to specify a date without
+  // a year.
+  optional int32 year = 1;
+  // Month of a year. Must be from 1 to 12, or 0 to specify a year without a
+  // month and day.
+  optional int32 month = 2;
+  // Day of a month. Must be from 1 to 31 and valid for the year and month, or 0
+  // to specify a year by itself or a year and month where the day isn't
+  // significant.
+  optional int32 day = 3;
+}
+
+// Reference to external SBOM document and element corresponding to the package.
+// See https://spdx.github.io/spdx-spec/v2.3/document-creation-information/#66-external-document-references-field
+message SBOMRef {
+  // The URL that points to the SBOM document of the upstream package of this
+  // third_party package.
+  optional string url = 1;
+  // Checksum of the SBOM document the url field points to.
+  // Format: e.g. SHA1:<checksum>, or any algorithm defined in
+  // https://spdx.github.io/spdx-spec/v2.3/file-information/#8.4
+  optional string checksum = 2;
+  // SPDXID of the upstream package/file defined in the SBOM document the url field points to.
+  // Format: SPDXRef-[a-zA-Z0-9.-]+, see
+  // https://spdx.github.io/spdx-spec/v2.3/package-information/#72-package-spdx-identifier-field or
+  // https://spdx.github.io/spdx-spec/v2.3/file-information/#82-file-spdx-identifier-field
+  optional string element_id = 3;
+}
\ No newline at end of file