Deduplicate gathered file list first to avoid overprocessing

target file list is full of duplicates and iterating over that
list is slow due to the size, deduplicating first speed up the process
drastically.

Test: presubmit, manual
Bug: 433529093

Change-Id: I001337ee8a53a8862ef2cc18f277c0fd9f3358d4
diff --git a/ci/optimized_targets.py b/ci/optimized_targets.py
index 0056047..95d3c2e 100644
--- a/ci/optimized_targets.py
+++ b/ci/optimized_targets.py
@@ -391,26 +391,34 @@
     print(f'modules: {self.modules_to_build}')
 
     logging.info('Getting host outputs')
-    host_outputs = [str(src_top) + '/' + file for file in self._general_tests_host_outputs if any('/'+module+'/' in file for module in self.modules_to_build)]
+    deduplicated_host_outputs = set(self._general_tests_host_outputs)
+    intermediate_host_outputs = [p for p in deduplicated_host_outputs if pathlib.Path(str(src_top) + '/' + p.strip()).exists()]
+    host_outputs = [str(src_top) + '/' + file for file in intermediate_host_outputs if any('/'+module+'/' in file for module in self.modules_to_build)]
+    logging.info('host_outputs size: %d', len(host_outputs))
     host_manifest_files, host_module_with_manifest_files = self._get_manifest_files(host_outputs)
     extra_host_files = self._get_base_module_names(host_manifest_files, host_module_with_manifest_files)
     host_outputs.extend(extra_host_files)
 
     logging.info('Getting target outputs')
-    target_outputs = [str(src_top) + '/' + file for file in self._general_tests_target_outputs if any('/'+module+'/' in file for module in self.modules_to_build)]
+    deduplicated_target_outputs = set(self._general_tests_target_outputs)
+    intermediate_target_outputs = [p for p in deduplicated_target_outputs if pathlib.Path(str(src_top) + '/' + p.strip()).exists()]
+    target_outputs = [str(src_top) + '/' + file for file in intermediate_target_outputs if any('/'+module+'/' in file for module in self.modules_to_build)]
+    logging.info('target_outputs size: %d', len(target_outputs))
     target_manifest_files, target_module_with_manifest_files = self._get_manifest_files(target_outputs)
     extra_target_files = self._get_base_module_names(target_manifest_files, target_module_with_manifest_files)
     target_outputs.extend(extra_target_files)
-    # Dedup entries in output and remove non-existent files.
+    # Dedup final entries in output and remove non-existent files.
     logging.info('Handling host and target outputs')
     host_outputs = set(host_outputs)
     host_outputs = [p for p in host_outputs if pathlib.Path(p.strip()).exists()]
     target_outputs = set(target_outputs)
     target_outputs = [p for p in target_outputs if pathlib.Path(p.strip()).exists()]
+    logging.info('host_outputs final size: %d', len(host_outputs))
+    logging.info('target_outputs final size: %d', len(target_outputs))
 
     host_config_files = [file for file in host_outputs if file.endswith('.config\n')]
     target_config_files = [file for file in target_outputs if file.endswith('.config\n')]
-    logging.info('final with outputs:')
+    logging.info('final outputs:')
     logging.info(host_outputs)
     logging.info(target_outputs)
     with open(f"{tmp_dir / 'host.list'}", 'w') as host_list_file: