Allow reading zipfile list from a file.

Android build generates 180K+ zip files that are to be merged and the
result passed to the indexing pipeline to generate the source code
cross-reference. Their names cannot be passed on the command line,
hence the need for this change. That required extensive changes to
the code as it no longer can be assumed that all the input files can
be kept open.

Bug: 121267023
Test: internal + treehugger + combine index files
Change-Id: I0a829f96ff7187ad967fb6b8cede387501ec57cc
diff --git a/cmd/merge_zips/Android.bp b/cmd/merge_zips/Android.bp
index ab658fd..f70c86e 100644
--- a/cmd/merge_zips/Android.bp
+++ b/cmd/merge_zips/Android.bp
@@ -18,6 +18,7 @@
       "android-archive-zip",
       "blueprint-pathtools",
       "soong-jar",
+      "soong-zip",
     ],
     srcs: [
         "merge_zips.go",
diff --git a/cmd/merge_zips/merge_zips.go b/cmd/merge_zips/merge_zips.go
index 68fe259..27179cb 100644
--- a/cmd/merge_zips/merge_zips.go
+++ b/cmd/merge_zips/merge_zips.go
@@ -30,8 +30,566 @@
 
 	"android/soong/jar"
 	"android/soong/third_party/zip"
+	soongZip "android/soong/zip"
 )
 
+// Input zip: we can open it, close it, and obtain an array of entries
+type InputZip interface {
+	Name() string
+	Open() error
+	Close() error
+	Entries() []*zip.File
+	IsOpen() bool
+}
+
+// An entry that can be written to the output zip
+type ZipEntryContents interface {
+	String() string
+	IsDir() bool
+	CRC32() uint32
+	Size() uint64
+	WriteToZip(dest string, zw *zip.Writer) error
+}
+
+// a ZipEntryFromZip is a ZipEntryContents that pulls its content from another zip
+// identified by the input zip and the index of the entry in its entries array
+type ZipEntryFromZip struct {
+	inputZip InputZip
+	index    int
+	name     string
+	isDir    bool
+	crc32    uint32
+	size     uint64
+}
+
+func NewZipEntryFromZip(inputZip InputZip, entryIndex int) *ZipEntryFromZip {
+	fi := inputZip.Entries()[entryIndex]
+	newEntry := ZipEntryFromZip{inputZip: inputZip,
+		index: entryIndex,
+		name:  fi.Name,
+		isDir: fi.FileInfo().IsDir(),
+		crc32: fi.CRC32,
+		size:  fi.UncompressedSize64,
+	}
+	return &newEntry
+}
+
+func (ze ZipEntryFromZip) String() string {
+	return fmt.Sprintf("%s!%s", ze.inputZip.Name(), ze.name)
+}
+
+func (ze ZipEntryFromZip) IsDir() bool {
+	return ze.isDir
+}
+
+func (ze ZipEntryFromZip) CRC32() uint32 {
+	return ze.crc32
+}
+
+func (ze ZipEntryFromZip) Size() uint64 {
+	return ze.size
+}
+
+func (ze ZipEntryFromZip) WriteToZip(dest string, zw *zip.Writer) error {
+	if err := ze.inputZip.Open(); err != nil {
+		return err
+	}
+	return zw.CopyFrom(ze.inputZip.Entries()[ze.index], dest)
+}
+
+// a ZipEntryFromBuffer is a ZipEntryContents that pulls its content from a []byte
+type ZipEntryFromBuffer struct {
+	fh      *zip.FileHeader
+	content []byte
+}
+
+func (be ZipEntryFromBuffer) String() string {
+	return "internal buffer"
+}
+
+func (be ZipEntryFromBuffer) IsDir() bool {
+	return be.fh.FileInfo().IsDir()
+}
+
+func (be ZipEntryFromBuffer) CRC32() uint32 {
+	return crc32.ChecksumIEEE(be.content)
+}
+
+func (be ZipEntryFromBuffer) Size() uint64 {
+	return uint64(len(be.content))
+}
+
+func (be ZipEntryFromBuffer) WriteToZip(dest string, zw *zip.Writer) error {
+	w, err := zw.CreateHeader(be.fh)
+	if err != nil {
+		return err
+	}
+
+	if !be.IsDir() {
+		_, err = w.Write(be.content)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// Processing state.
+type OutputZip struct {
+	outputWriter     *zip.Writer
+	stripDirEntries  bool
+	emulateJar       bool
+	sortEntries      bool
+	ignoreDuplicates bool
+	excludeDirs      []string
+	excludeFiles     []string
+	sourceByDest     map[string]ZipEntryContents
+}
+
+func NewOutputZip(outputWriter *zip.Writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates bool) *OutputZip {
+	return &OutputZip{
+		outputWriter:     outputWriter,
+		stripDirEntries:  stripDirEntries,
+		emulateJar:       emulateJar,
+		sortEntries:      sortEntries,
+		sourceByDest:     make(map[string]ZipEntryContents, 0),
+		ignoreDuplicates: ignoreDuplicates,
+	}
+}
+
+func (oz *OutputZip) setExcludeDirs(excludeDirs []string) {
+	oz.excludeDirs = make([]string, len(excludeDirs))
+	for i, dir := range excludeDirs {
+		oz.excludeDirs[i] = filepath.Clean(dir)
+	}
+}
+
+func (oz *OutputZip) setExcludeFiles(excludeFiles []string) {
+	oz.excludeFiles = excludeFiles
+}
+
+// Adds an entry with given name whose source is given ZipEntryContents. Returns old ZipEntryContents
+// if entry with given name already exists.
+func (oz *OutputZip) addZipEntry(name string, source ZipEntryContents) (ZipEntryContents, error) {
+	if existingSource, exists := oz.sourceByDest[name]; exists {
+		return existingSource, nil
+	}
+	oz.sourceByDest[name] = source
+	// Delay writing an entry if entries need to be rearranged.
+	if oz.emulateJar || oz.sortEntries {
+		return nil, nil
+	}
+	return nil, source.WriteToZip(name, oz.outputWriter)
+}
+
+// Adds an entry for the manifest (META-INF/MANIFEST.MF from the given file
+func (oz *OutputZip) addManifest(manifestPath string) error {
+	if !oz.stripDirEntries {
+		if _, err := oz.addZipEntry(jar.MetaDir, ZipEntryFromBuffer{jar.MetaDirFileHeader(), nil}); err != nil {
+			return err
+		}
+	}
+	contents, err := ioutil.ReadFile(manifestPath)
+	if err == nil {
+		fh, buf, err := jar.ManifestFileContents(contents)
+		if err == nil {
+			_, err = oz.addZipEntry(jar.ManifestFile, ZipEntryFromBuffer{fh, buf})
+		}
+	}
+	return err
+}
+
+// Adds an entry with given name and contents read from given file
+func (oz *OutputZip) addZipEntryFromFile(name string, path string) error {
+	buf, err := ioutil.ReadFile(path)
+	if err == nil {
+		fh := &zip.FileHeader{
+			Name:               name,
+			Method:             zip.Store,
+			UncompressedSize64: uint64(len(buf)),
+		}
+		fh.SetMode(0700)
+		fh.SetModTime(jar.DefaultTime)
+		_, err = oz.addZipEntry(name, ZipEntryFromBuffer{fh, buf})
+	}
+	return err
+}
+
+func (oz *OutputZip) addEmptyEntry(entry string) error {
+	var emptyBuf []byte
+	fh := &zip.FileHeader{
+		Name:               entry,
+		Method:             zip.Store,
+		UncompressedSize64: uint64(len(emptyBuf)),
+	}
+	fh.SetMode(0700)
+	fh.SetModTime(jar.DefaultTime)
+	_, err := oz.addZipEntry(entry, ZipEntryFromBuffer{fh, emptyBuf})
+	return err
+}
+
+// Returns true if given entry is to be excluded
+func (oz *OutputZip) isEntryExcluded(name string) bool {
+	for _, dir := range oz.excludeDirs {
+		dir = filepath.Clean(dir)
+		patterns := []string{
+			dir + "/",      // the directory itself
+			dir + "/**/*",  // files recursively in the directory
+			dir + "/**/*/", // directories recursively in the directory
+		}
+
+		for _, pattern := range patterns {
+			match, err := pathtools.Match(pattern, name)
+			if err != nil {
+				panic(fmt.Errorf("%s: %s", err.Error(), pattern))
+			}
+			if match {
+				if oz.emulateJar {
+					// When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
+					// requested.
+					// TODO(ccross): which files does this affect?
+					if name != jar.MetaDir && name != jar.ManifestFile {
+						return true
+					}
+				}
+				return true
+			}
+		}
+	}
+
+	for _, pattern := range oz.excludeFiles {
+		match, err := pathtools.Match(pattern, name)
+		if err != nil {
+			panic(fmt.Errorf("%s: %s", err.Error(), pattern))
+		}
+		if match {
+			return true
+		}
+	}
+	return false
+}
+
+// Creates a zip entry whose contents is an entry from the given input zip.
+func (oz *OutputZip) copyEntry(inputZip InputZip, index int) error {
+	entry := NewZipEntryFromZip(inputZip, index)
+	if oz.stripDirEntries && entry.IsDir() {
+		return nil
+	}
+	existingEntry, err := oz.addZipEntry(entry.name, entry)
+	if err != nil {
+		return err
+	}
+	if existingEntry == nil {
+		return nil
+	}
+
+	// File types should match
+	if existingEntry.IsDir() != entry.IsDir() {
+		return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
+			entry.name, existingEntry, entry)
+	}
+
+	if oz.ignoreDuplicates ||
+		// Skip manifest and module info files that are not from the first input file
+		(oz.emulateJar && entry.name == jar.ManifestFile || entry.name == jar.ModuleInfoClass) ||
+		// Identical entries
+		(existingEntry.CRC32() == entry.CRC32() && existingEntry.Size() == entry.Size()) ||
+		// Directory entries
+		entry.IsDir() {
+		return nil
+	}
+
+	return fmt.Errorf("Duplicate path %v found in %v and %v\n", entry.name, existingEntry, inputZip.Name())
+}
+
+func (oz *OutputZip) entriesArray() []string {
+	entries := make([]string, len(oz.sourceByDest))
+	i := 0
+	for entry := range oz.sourceByDest {
+		entries[i] = entry
+		i++
+	}
+	return entries
+}
+
+func (oz *OutputZip) jarSorted() []string {
+	entries := oz.entriesArray()
+	sort.SliceStable(entries, func(i, j int) bool { return jar.EntryNamesLess(entries[i], entries[j]) })
+	return entries
+}
+
+func (oz *OutputZip) alphanumericSorted() []string {
+	entries := oz.entriesArray()
+	sort.Strings(entries)
+	return entries
+}
+
+func (oz *OutputZip) writeEntries(entries []string) error {
+	for _, entry := range entries {
+		source, _ := oz.sourceByDest[entry]
+		if err := source.WriteToZip(entry, oz.outputWriter); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]string, error) {
+	// the runfiles packages needs to be populated with "__init__.py".
+	// the runfiles dirs have been treated as packages.
+	allPackages := make(map[string]bool)
+	initedPackages := make(map[string]bool)
+	getPackage := func(path string) string {
+		ret := filepath.Dir(path)
+		// filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
+		if ret == "." || ret == "/" {
+			return ""
+		}
+		return ret
+	}
+
+	// put existing __init__.py files to a set first. This set is used for preventing
+	// generated __init__.py files from overwriting existing ones.
+	for _, inputZip := range inputZips {
+		if err := inputZip.Open(); err != nil {
+			return nil, err
+		}
+		for _, file := range inputZip.Entries() {
+			pyPkg := getPackage(file.Name)
+			if filepath.Base(file.Name) == "__init__.py" {
+				if _, found := initedPackages[pyPkg]; found {
+					panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q", file.Name))
+				}
+				initedPackages[pyPkg] = true
+			}
+			for pyPkg != "" {
+				if _, found := allPackages[pyPkg]; found {
+					break
+				}
+				allPackages[pyPkg] = true
+				pyPkg = getPackage(pyPkg)
+			}
+		}
+	}
+	noInitPackages := make([]string, 0)
+	for pyPkg := range allPackages {
+		if _, found := initedPackages[pyPkg]; !found {
+			noInitPackages = append(noInitPackages, pyPkg)
+		}
+	}
+	return noInitPackages, nil
+}
+
+// An InputZip owned by the InputZipsManager. Opened ManagedInputZip's are chained in the open order.
+type ManagedInputZip struct {
+	owner        *InputZipsManager
+	realInputZip InputZip
+	older        *ManagedInputZip
+	newer        *ManagedInputZip
+}
+
+// Maintains the array of ManagedInputZips, keeping track of open input ones. When an InputZip is opened,
+// may close some other InputZip to limit the number of open ones.
+type InputZipsManager struct {
+	inputZips     []*ManagedInputZip
+	nOpenZips     int
+	maxOpenZips   int
+	openInputZips *ManagedInputZip
+}
+
+func (miz *ManagedInputZip) unlink() {
+	olderMiz := miz.older
+	newerMiz := miz.newer
+	if newerMiz.older != miz || olderMiz.newer != miz {
+		panic(fmt.Errorf("removing %p:%#v: broken list between %p:%#v and %p:%#v",
+			miz, miz, newerMiz, newerMiz, olderMiz, olderMiz))
+	}
+	olderMiz.newer = newerMiz
+	newerMiz.older = olderMiz
+	miz.newer = nil
+	miz.older = nil
+}
+
+func (miz *ManagedInputZip) link(olderMiz *ManagedInputZip) {
+	if olderMiz.newer != nil || olderMiz.older != nil {
+		panic(fmt.Errorf("inputZip is already open"))
+	}
+	oldOlderMiz := miz.older
+	if oldOlderMiz.newer != miz {
+		panic(fmt.Errorf("broken list between %p:%#v and %p:%#v", miz, oldOlderMiz))
+	}
+	miz.older = olderMiz
+	olderMiz.older = oldOlderMiz
+	oldOlderMiz.newer = olderMiz
+	olderMiz.newer = miz
+}
+
+func NewInputZipsManager(nInputZips, maxOpenZips int) *InputZipsManager {
+	if maxOpenZips < 3 {
+		panic(fmt.Errorf("open zips limit should be above 3"))
+	}
+	// In the dummy element .older points to the most recently opened InputZip, and .newer points to the oldest.
+	head := new(ManagedInputZip)
+	head.older = head
+	head.newer = head
+	return &InputZipsManager{
+		inputZips:     make([]*ManagedInputZip, 0, nInputZips),
+		maxOpenZips:   maxOpenZips,
+		openInputZips: head,
+	}
+}
+
+// InputZip factory
+func (izm *InputZipsManager) Manage(inz InputZip) InputZip {
+	iz := &ManagedInputZip{owner: izm, realInputZip: inz}
+	izm.inputZips = append(izm.inputZips, iz)
+	return iz
+}
+
+// Opens or reopens ManagedInputZip.
+func (izm *InputZipsManager) reopen(miz *ManagedInputZip) error {
+	if miz.realInputZip.IsOpen() {
+		if miz != izm.openInputZips {
+			miz.unlink()
+			izm.openInputZips.link(miz)
+		}
+		return nil
+	}
+	if izm.nOpenZips >= izm.maxOpenZips {
+		if err := izm.close(izm.openInputZips.older); err != nil {
+			return err
+		}
+	}
+	if err := miz.realInputZip.Open(); err != nil {
+		return err
+	}
+	izm.openInputZips.link(miz)
+	izm.nOpenZips++
+	return nil
+}
+
+func (izm *InputZipsManager) close(miz *ManagedInputZip) error {
+	if miz.IsOpen() {
+		err := miz.realInputZip.Close()
+		izm.nOpenZips--
+		miz.unlink()
+		return err
+	}
+	return nil
+}
+
+// Checks that openInputZips deque is valid
+func (izm *InputZipsManager) checkOpenZipsDeque() {
+	nReallyOpen := 0
+	el := izm.openInputZips
+	for {
+		elNext := el.older
+		if elNext.newer != el {
+			panic(fmt.Errorf("Element:\n  %p: %v\nNext:\n  %p %v", el, el, elNext, elNext))
+		}
+		if elNext == izm.openInputZips {
+			break
+		}
+		el = elNext
+		if !el.IsOpen() {
+			panic(fmt.Errorf("Found unopened element"))
+		}
+		nReallyOpen++
+		if nReallyOpen > izm.nOpenZips {
+			panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips))
+		}
+	}
+	if nReallyOpen > izm.nOpenZips {
+		panic(fmt.Errorf("found %d open zips, should be %d", nReallyOpen, izm.nOpenZips))
+	}
+}
+
+func (miz *ManagedInputZip) Name() string {
+	return miz.realInputZip.Name()
+}
+
+func (miz *ManagedInputZip) Open() error {
+	return miz.owner.reopen(miz)
+}
+
+func (miz *ManagedInputZip) Close() error {
+	return miz.owner.close(miz)
+}
+
+func (miz *ManagedInputZip) IsOpen() bool {
+	return miz.realInputZip.IsOpen()
+}
+
+func (miz *ManagedInputZip) Entries() []*zip.File {
+	if !miz.IsOpen() {
+		panic(fmt.Errorf("%s: is not open", miz.Name()))
+	}
+	return miz.realInputZip.Entries()
+}
+
+// Actual processing.
+func mergeZips(inputZips []InputZip, writer *zip.Writer, manifest, pyMain string,
+	sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
+	excludeFiles, excludeDirs []string, zipsToNotStrip map[string]bool) error {
+
+	out := NewOutputZip(writer, sortEntries, emulateJar, stripDirEntries, ignoreDuplicates)
+	out.setExcludeFiles(excludeFiles)
+	out.setExcludeDirs(excludeDirs)
+	if manifest != "" {
+		if err := out.addManifest(manifest); err != nil {
+			return err
+		}
+	}
+	if pyMain != "" {
+		if err := out.addZipEntryFromFile("__main__.py", pyMain); err != nil {
+			return err
+		}
+	}
+
+	if emulatePar {
+		noInitPackages, err := out.getUninitializedPythonPackages(inputZips)
+		if err != nil {
+			return err
+		}
+		for _, uninitializedPyPackage := range noInitPackages {
+			if err = out.addEmptyEntry(filepath.Join(uninitializedPyPackage, "__init__.py")); err != nil {
+				return err
+			}
+		}
+	}
+
+	// Finally, add entries from all the input zips.
+	for _, inputZip := range inputZips {
+		_, copyFully := zipsToNotStrip[inputZip.Name()]
+		if err := inputZip.Open(); err != nil {
+			return err
+		}
+
+		for i, entry := range inputZip.Entries() {
+			if copyFully || !out.isEntryExcluded(entry.Name) {
+				if err := out.copyEntry(inputZip, i); err != nil {
+					return err
+				}
+			}
+		}
+		// Unless we need to rearrange the entries, the input zip can now be closed.
+		if !(emulateJar || sortEntries) {
+			if err := inputZip.Close(); err != nil {
+				return err
+			}
+		}
+	}
+
+	if emulateJar {
+		return out.writeEntries(out.jarSorted())
+	} else if sortEntries {
+		return out.writeEntries(out.alphanumericSorted())
+	}
+	return nil
+}
+
+// Process command line
 type fileList []string
 
 func (f *fileList) String() string {
@@ -50,9 +608,8 @@
 	return `""`
 }
 
-func (s zipsToNotStripSet) Set(zip_path string) error {
-	s[zip_path] = true
-
+func (s zipsToNotStripSet) Set(path string) error {
+	s[path] = true
 	return nil
 }
 
@@ -60,8 +617,8 @@
 	sortEntries      = flag.Bool("s", false, "sort entries (defaults to the order from the input zip files)")
 	emulateJar       = flag.Bool("j", false, "sort zip entries using jar ordering (META-INF first)")
 	emulatePar       = flag.Bool("p", false, "merge zip entries based on par format")
-	stripDirs        fileList
-	stripFiles       fileList
+	excludeDirs      fileList
+	excludeFiles     fileList
 	zipsToNotStrip   = make(zipsToNotStripSet)
 	stripDirEntries  = flag.Bool("D", false, "strip directory entries from the output zip file")
 	manifest         = flag.String("m", "", "manifest file to insert in jar")
@@ -71,14 +628,52 @@
 )
 
 func init() {
-	flag.Var(&stripDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
-	flag.Var(&stripFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
+	flag.Var(&excludeDirs, "stripDir", "directories to be excluded from the output zip, accepts wildcards")
+	flag.Var(&excludeFiles, "stripFile", "files to be excluded from the output zip, accepts wildcards")
 	flag.Var(&zipsToNotStrip, "zipToNotStrip", "the input zip file which is not applicable for stripping")
 }
 
+type FileInputZip struct {
+	name   string
+	reader *zip.ReadCloser
+}
+
+func (fiz *FileInputZip) Name() string {
+	return fiz.name
+}
+
+func (fiz *FileInputZip) Close() error {
+	if fiz.IsOpen() {
+		reader := fiz.reader
+		fiz.reader = nil
+		return reader.Close()
+	}
+	return nil
+}
+
+func (fiz *FileInputZip) Entries() []*zip.File {
+	if !fiz.IsOpen() {
+		panic(fmt.Errorf("%s: is not open", fiz.Name()))
+	}
+	return fiz.reader.File
+}
+
+func (fiz *FileInputZip) IsOpen() bool {
+	return fiz.reader != nil
+}
+
+func (fiz *FileInputZip) Open() error {
+	if fiz.IsOpen() {
+		return nil
+	}
+	var err error
+	fiz.reader, err = zip.OpenReader(fiz.Name())
+	return err
+}
+
 func main() {
 	flag.Usage = func() {
-		fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] output [inputs...]")
+		fmt.Fprintln(os.Stderr, "usage: merge_zips [-jpsD] [-m manifest] [--prefix script] [-pm __main__.py] OutputZip [inputs...]")
 		flag.PrintDefaults()
 	}
 
@@ -90,16 +685,28 @@
 		os.Exit(1)
 	}
 	outputPath := args[0]
-	inputs := args[1:]
+	inputs := make([]string, 0)
+	for _, input := range args[1:] {
+		if input[0] == '@' {
+			bytes, err := ioutil.ReadFile(input[1:])
+			if err != nil {
+				log.Fatal(err)
+			}
+			inputs = append(inputs, soongZip.ReadRespFile(bytes)...)
+			continue
+		}
+		inputs = append(inputs, input)
+		continue
+	}
 
 	log.SetFlags(log.Lshortfile)
 
 	// make writer
-	output, err := os.Create(outputPath)
+	outputZip, err := os.Create(outputPath)
 	if err != nil {
 		log.Fatal(err)
 	}
-	defer output.Close()
+	defer outputZip.Close()
 
 	var offset int64
 	if *prefix != "" {
@@ -107,13 +714,13 @@
 		if err != nil {
 			log.Fatal(err)
 		}
-		offset, err = io.Copy(output, prefixFile)
+		offset, err = io.Copy(outputZip, prefixFile)
 		if err != nil {
 			log.Fatal(err)
 		}
 	}
 
-	writer := zip.NewWriter(output)
+	writer := zip.NewWriter(outputZip)
 	defer func() {
 		err := writer.Close()
 		if err != nil {
@@ -122,18 +729,6 @@
 	}()
 	writer.SetOffset(offset)
 
-	// make readers
-	readers := []namedZipReader{}
-	for _, input := range inputs {
-		reader, err := zip.OpenReader(input)
-		if err != nil {
-			log.Fatal(err)
-		}
-		defer reader.Close()
-		namedReader := namedZipReader{path: input, reader: &reader.Reader}
-		readers = append(readers, namedReader)
-	}
-
 	if *manifest != "" && !*emulateJar {
 		log.Fatal(errors.New("must specify -j when specifying a manifest via -m"))
 	}
@@ -143,344 +738,15 @@
 	}
 
 	// do merge
-	err = mergeZips(readers, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar,
-		*stripDirEntries, *ignoreDuplicates, []string(stripFiles), []string(stripDirs), map[string]bool(zipsToNotStrip))
+	inputZipsManager := NewInputZipsManager(len(inputs), 1000)
+	inputZips := make([]InputZip, len(inputs))
+	for i, input := range inputs {
+		inputZips[i] = inputZipsManager.Manage(&FileInputZip{name: input})
+	}
+	err = mergeZips(inputZips, writer, *manifest, *pyMain, *sortEntries, *emulateJar, *emulatePar,
+		*stripDirEntries, *ignoreDuplicates, []string(excludeFiles), []string(excludeDirs),
+		map[string]bool(zipsToNotStrip))
 	if err != nil {
 		log.Fatal(err)
 	}
 }
-
-// a namedZipReader reads a .zip file and can say which file it's reading
-type namedZipReader struct {
-	path   string
-	reader *zip.Reader
-}
-
-// a zipEntryPath refers to a file contained in a zip
-type zipEntryPath struct {
-	zipName   string
-	entryName string
-}
-
-func (p zipEntryPath) String() string {
-	return p.zipName + "/" + p.entryName
-}
-
-// a zipEntry is a zipSource that pulls its content from another zip
-type zipEntry struct {
-	path    zipEntryPath
-	content *zip.File
-}
-
-func (ze zipEntry) String() string {
-	return ze.path.String()
-}
-
-func (ze zipEntry) IsDir() bool {
-	return ze.content.FileInfo().IsDir()
-}
-
-func (ze zipEntry) CRC32() uint32 {
-	return ze.content.FileHeader.CRC32
-}
-
-func (ze zipEntry) Size() uint64 {
-	return ze.content.FileHeader.UncompressedSize64
-}
-
-func (ze zipEntry) WriteToZip(dest string, zw *zip.Writer) error {
-	return zw.CopyFrom(ze.content, dest)
-}
-
-// a bufferEntry is a zipSource that pulls its content from a []byte
-type bufferEntry struct {
-	fh      *zip.FileHeader
-	content []byte
-}
-
-func (be bufferEntry) String() string {
-	return "internal buffer"
-}
-
-func (be bufferEntry) IsDir() bool {
-	return be.fh.FileInfo().IsDir()
-}
-
-func (be bufferEntry) CRC32() uint32 {
-	return crc32.ChecksumIEEE(be.content)
-}
-
-func (be bufferEntry) Size() uint64 {
-	return uint64(len(be.content))
-}
-
-func (be bufferEntry) WriteToZip(dest string, zw *zip.Writer) error {
-	w, err := zw.CreateHeader(be.fh)
-	if err != nil {
-		return err
-	}
-
-	if !be.IsDir() {
-		_, err = w.Write(be.content)
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-type zipSource interface {
-	String() string
-	IsDir() bool
-	CRC32() uint32
-	Size() uint64
-	WriteToZip(dest string, zw *zip.Writer) error
-}
-
-// a fileMapping specifies to copy a zip entry from one place to another
-type fileMapping struct {
-	dest   string
-	source zipSource
-}
-
-func mergeZips(readers []namedZipReader, writer *zip.Writer, manifest, pyMain string,
-	sortEntries, emulateJar, emulatePar, stripDirEntries, ignoreDuplicates bool,
-	stripFiles, stripDirs []string, zipsToNotStrip map[string]bool) error {
-
-	sourceByDest := make(map[string]zipSource, 0)
-	orderedMappings := []fileMapping{}
-
-	// if dest already exists returns a non-null zipSource for the existing source
-	addMapping := func(dest string, source zipSource) zipSource {
-		mapKey := filepath.Clean(dest)
-		if existingSource, exists := sourceByDest[mapKey]; exists {
-			return existingSource
-		}
-
-		sourceByDest[mapKey] = source
-		orderedMappings = append(orderedMappings, fileMapping{source: source, dest: dest})
-		return nil
-	}
-
-	if manifest != "" {
-		if !stripDirEntries {
-			dirHeader := jar.MetaDirFileHeader()
-			dirSource := bufferEntry{dirHeader, nil}
-			addMapping(jar.MetaDir, dirSource)
-		}
-
-		contents, err := ioutil.ReadFile(manifest)
-		if err != nil {
-			return err
-		}
-
-		fh, buf, err := jar.ManifestFileContents(contents)
-		if err != nil {
-			return err
-		}
-
-		fileSource := bufferEntry{fh, buf}
-		addMapping(jar.ManifestFile, fileSource)
-	}
-
-	if pyMain != "" {
-		buf, err := ioutil.ReadFile(pyMain)
-		if err != nil {
-			return err
-		}
-		fh := &zip.FileHeader{
-			Name:               "__main__.py",
-			Method:             zip.Store,
-			UncompressedSize64: uint64(len(buf)),
-		}
-		fh.SetMode(0700)
-		fh.SetModTime(jar.DefaultTime)
-		fileSource := bufferEntry{fh, buf}
-		addMapping("__main__.py", fileSource)
-	}
-
-	if emulatePar {
-		// the runfiles packages needs to be populated with "__init__.py".
-		newPyPkgs := []string{}
-		// the runfiles dirs have been treated as packages.
-		existingPyPkgSet := make(map[string]bool)
-		// put existing __init__.py files to a set first. This set is used for preventing
-		// generated __init__.py files from overwriting existing ones.
-		for _, namedReader := range readers {
-			for _, file := range namedReader.reader.File {
-				if filepath.Base(file.Name) != "__init__.py" {
-					continue
-				}
-				pyPkg := pathBeforeLastSlash(file.Name)
-				if _, found := existingPyPkgSet[pyPkg]; found {
-					panic(fmt.Errorf("found __init__.py path duplicates during pars merging: %q.", file.Name))
-				} else {
-					existingPyPkgSet[pyPkg] = true
-				}
-			}
-		}
-		for _, namedReader := range readers {
-			for _, file := range namedReader.reader.File {
-				var parentPath string /* the path after trimming last "/" */
-				if filepath.Base(file.Name) == "__init__.py" {
-					// for existing __init__.py files, we should trim last "/" for twice.
-					// eg. a/b/c/__init__.py ---> a/b
-					parentPath = pathBeforeLastSlash(pathBeforeLastSlash(file.Name))
-				} else {
-					parentPath = pathBeforeLastSlash(file.Name)
-				}
-				populateNewPyPkgs(parentPath, existingPyPkgSet, &newPyPkgs)
-			}
-		}
-		for _, pkg := range newPyPkgs {
-			var emptyBuf []byte
-			fh := &zip.FileHeader{
-				Name:               filepath.Join(pkg, "__init__.py"),
-				Method:             zip.Store,
-				UncompressedSize64: uint64(len(emptyBuf)),
-			}
-			fh.SetMode(0700)
-			fh.SetModTime(jar.DefaultTime)
-			fileSource := bufferEntry{fh, emptyBuf}
-			addMapping(filepath.Join(pkg, "__init__.py"), fileSource)
-		}
-	}
-	for _, namedReader := range readers {
-		_, skipStripThisZip := zipsToNotStrip[namedReader.path]
-		for _, file := range namedReader.reader.File {
-			if !skipStripThisZip {
-				if skip, err := shouldStripEntry(emulateJar, stripFiles, stripDirs, file.Name); err != nil {
-					return err
-				} else if skip {
-					continue
-				}
-			}
-
-			if stripDirEntries && file.FileInfo().IsDir() {
-				continue
-			}
-
-			// check for other files or directories destined for the same path
-			dest := file.Name
-
-			// make a new entry to add
-			source := zipEntry{path: zipEntryPath{zipName: namedReader.path, entryName: file.Name}, content: file}
-
-			if existingSource := addMapping(dest, source); existingSource != nil {
-				// handle duplicates
-				if existingSource.IsDir() != source.IsDir() {
-					return fmt.Errorf("Directory/file mismatch at %v from %v and %v\n",
-						dest, existingSource, source)
-				}
-
-				if ignoreDuplicates {
-					continue
-				}
-
-				if emulateJar &&
-					file.Name == jar.ManifestFile || file.Name == jar.ModuleInfoClass {
-					// Skip manifest and module info files that are not from the first input file
-					continue
-				}
-
-				if source.IsDir() {
-					continue
-				}
-
-				if existingSource.CRC32() == source.CRC32() && existingSource.Size() == source.Size() {
-					continue
-				}
-
-				return fmt.Errorf("Duplicate path %v found in %v and %v\n",
-					dest, existingSource, source)
-			}
-		}
-	}
-
-	if emulateJar {
-		jarSort(orderedMappings)
-	} else if sortEntries {
-		alphanumericSort(orderedMappings)
-	}
-
-	for _, entry := range orderedMappings {
-		if err := entry.source.WriteToZip(entry.dest, writer); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-// Sets the given directory and all its ancestor directories as Python packages.
-func populateNewPyPkgs(pkgPath string, existingPyPkgSet map[string]bool, newPyPkgs *[]string) {
-	for pkgPath != "" {
-		if _, found := existingPyPkgSet[pkgPath]; !found {
-			existingPyPkgSet[pkgPath] = true
-			*newPyPkgs = append(*newPyPkgs, pkgPath)
-			// Gets its ancestor directory by trimming last slash.
-			pkgPath = pathBeforeLastSlash(pkgPath)
-		} else {
-			break
-		}
-	}
-}
-
-func pathBeforeLastSlash(path string) string {
-	ret := filepath.Dir(path)
-	// filepath.Dir("abc") -> "." and filepath.Dir("/abc") -> "/".
-	if ret == "." || ret == "/" {
-		return ""
-	}
-	return ret
-}
-
-func shouldStripEntry(emulateJar bool, stripFiles, stripDirs []string, name string) (bool, error) {
-	for _, dir := range stripDirs {
-		dir = filepath.Clean(dir)
-		patterns := []string{
-			dir + "/",      // the directory itself
-			dir + "/**/*",  // files recursively in the directory
-			dir + "/**/*/", // directories recursively in the directory
-		}
-
-		for _, pattern := range patterns {
-			match, err := pathtools.Match(pattern, name)
-			if err != nil {
-				return false, fmt.Errorf("%s: %s", err.Error(), pattern)
-			} else if match {
-				if emulateJar {
-					// When merging jar files, don't strip META-INF/MANIFEST.MF even if stripping META-INF is
-					// requested.
-					// TODO(ccross): which files does this affect?
-					if name != jar.MetaDir && name != jar.ManifestFile {
-						return true, nil
-					}
-				}
-				return true, nil
-			}
-		}
-	}
-
-	for _, pattern := range stripFiles {
-		if match, err := pathtools.Match(pattern, name); err != nil {
-			return false, fmt.Errorf("%s: %s", err.Error(), pattern)
-		} else if match {
-			return true, nil
-		}
-	}
-	return false, nil
-}
-
-func jarSort(files []fileMapping) {
-	sort.SliceStable(files, func(i, j int) bool {
-		return jar.EntryNamesLess(files[i].dest, files[j].dest)
-	})
-}
-
-func alphanumericSort(files []fileMapping) {
-	sort.SliceStable(files, func(i, j int) bool {
-		return files[i].dest < files[j].dest
-	})
-}
diff --git a/cmd/merge_zips/merge_zips_test.go b/cmd/merge_zips/merge_zips_test.go
index dbde270..cb58436 100644
--- a/cmd/merge_zips/merge_zips_test.go
+++ b/cmd/merge_zips/merge_zips_test.go
@@ -51,6 +51,39 @@
 	moduleInfoFile = testZipEntry{jar.ModuleInfoClass, 0755, []byte("module-info")}
 )
 
+type testInputZip struct {
+	name    string
+	entries []testZipEntry
+	reader  *zip.Reader
+}
+
+func (tiz *testInputZip) Name() string {
+	return tiz.name
+}
+
+func (tiz *testInputZip) Open() error {
+	if tiz.reader == nil {
+		tiz.reader = testZipEntriesToZipReader(tiz.entries)
+	}
+	return nil
+}
+
+func (tiz *testInputZip) Close() error {
+	tiz.reader = nil
+	return nil
+}
+
+func (tiz *testInputZip) Entries() []*zip.File {
+	if tiz.reader == nil {
+		panic(fmt.Errorf("%s: should be open to get entries", tiz.Name()))
+	}
+	return tiz.reader.File
+}
+
+func (tiz *testInputZip) IsOpen() bool {
+	return tiz.reader != nil
+}
+
 func TestMergeZips(t *testing.T) {
 	testCases := []struct {
 		name             string
@@ -207,13 +240,9 @@
 
 	for _, test := range testCases {
 		t.Run(test.name, func(t *testing.T) {
-			var readers []namedZipReader
+			inputZips := make([]InputZip, len(test.in))
 			for i, in := range test.in {
-				r := testZipEntriesToZipReader(in)
-				readers = append(readers, namedZipReader{
-					path:   "in" + strconv.Itoa(i),
-					reader: r,
-				})
+				inputZips[i] = &testInputZip{name: "in" + strconv.Itoa(i), entries: in}
 			}
 
 			want := testZipEntriesToBuf(test.out)
@@ -221,7 +250,7 @@
 			out := &bytes.Buffer{}
 			writer := zip.NewWriter(out)
 
-			err := mergeZips(readers, writer, "", "",
+			err := mergeZips(inputZips, writer, "", "",
 				test.sort, test.jar, false, test.stripDirEntries, test.ignoreDuplicates,
 				test.stripFiles, test.stripDirs, test.zipsToNotStrip)
 
@@ -304,3 +333,60 @@
 
 	return ret
 }
+
+type DummyInpuZip struct {
+	isOpen bool
+}
+
+func (diz *DummyInpuZip) Name() string {
+	return "dummy"
+}
+
+func (diz *DummyInpuZip) Open() error {
+	diz.isOpen = true
+	return nil
+}
+
+func (diz *DummyInpuZip) Close() error {
+	diz.isOpen = false
+	return nil
+}
+
+func (DummyInpuZip) Entries() []*zip.File {
+	panic("implement me")
+}
+
+func (diz *DummyInpuZip) IsOpen() bool {
+	return diz.isOpen
+}
+
+func TestInputZipsManager(t *testing.T) {
+	const nInputZips = 20
+	const nMaxOpenZips = 10
+	izm := NewInputZipsManager(20, 10)
+	managedZips := make([]InputZip, nInputZips)
+	for i := 0; i < nInputZips; i++ {
+		managedZips[i] = izm.Manage(&DummyInpuZip{})
+	}
+
+	t.Run("InputZipsManager", func(t *testing.T) {
+		for i, iz := range managedZips {
+			if err := iz.Open(); err != nil {
+				t.Fatalf("Step %d: open failed: %s", i, err)
+				return
+			}
+			if izm.nOpenZips > nMaxOpenZips {
+				t.Errorf("Step %d: should be <=%d open zips", i, nMaxOpenZips)
+			}
+		}
+		if !managedZips[nInputZips-1].IsOpen() {
+			t.Error("The last input should stay open")
+		}
+		for _, iz := range managedZips {
+			iz.Close()
+		}
+		if izm.nOpenZips > 0 {
+			t.Error("Some input zips are still open")
+		}
+	})
+}