3rd-party-merge: add basic version matching

The original design was that people could only install the latest version
of packages and that's it.  Requests are now coming in to support older
SLOTs (like getting python-2.7).  That means we have to make the merger
more intelligent and support extended atoms like "dev-lang/python:2.7".

BUG=b:26861037
TEST=merging with python:2.7 works now

Change-Id: I6085adf87a37a601e2ebace9894ac5e558f448a4
diff --git a/usr/bin/3rd-party-merge b/usr/bin/3rd-party-merge
index 9cbde58..c28a2eb 100755
--- a/usr/bin/3rd-party-merge
+++ b/usr/bin/3rd-party-merge
@@ -20,8 +20,8 @@
 
 import argparse
 import errno
-import glob
 import os
+import re
 import shutil
 import sys
 
@@ -44,39 +44,149 @@
             raise
 
 
-def load_contents(pkg, root, filter=None):
-    """Read the CONTENTS file for |pkg| from |root|."""
-    if not filter:
-        filter = lambda x: True
+class Atom(object):
+    """Hold details about a package atom (optionally in a vdb)."""
 
-    ret = []
+    _PREFIX_RE = re.compile(r'^([>=<~!]*)(.*)$')
+    _PN_RE = r'(?P<package>[A-Za-z0-9_]+[A-Za-z0-9+_-]*?)'
+    _PV_RE = (r'(?P<version>[0-9](\.[0-9]+)*(_(pre|p|beta|alpha|rc)[0-9]*)*)'
+              r'(-r(?P<revision>[0-9]+))?')
+    _PN_PV_RE = re.compile(r'^%s(-%s)?$' % (_PN_RE, _PV_RE))
 
-    contents = os.path.join(root, 'var', 'db', 'pkg', pkg + '-*', 'CONTENTS')
-    files = glob.glob(contents)
-    if not files:
-        raise Exception('No packages found matching %s' % pkg)
+    def __init__(self, atom, path=None):
+        self.atom = atom
+        self.path = path
 
-    with open(files[0]) as f:
-        for line in f.read().splitlines():
+        # Extract any version matching prefixes.
+        m = self._PREFIX_RE.match(atom)
+        self.prefix, atom = m.groups()
+
+        # Split off trailing slot -- must be before category for subslots.
+        if ':' in atom:
+            atom, self._slot = atom.split(':', 1)
+        else:
+            self._slot = None
+
+        # Split off leading category.
+        if '/' in atom:
+            self.category, atom = atom.split('/', 1)
+        else:
+            self.category = None
+
+        # Split apart the name & version.
+        m = self._PN_PV_RE.match(atom)
+        self.pn = m.group('package')
+        self.pv = m.group('version')
+        self.rev = m.group('revision')
+
+    def _load_vdb_entry(self, entry):
+        if not self.path:
+            return None
+        path = os.path.join(self.path, entry)
+        with open(path) as f:
+            return f.read().strip()
+
+    @property
+    def slot(self):
+        if self._slot is None and self.path:
+            self._slot = self._load_vdb_entry('SLOT')
+        return self._slot
+
+    def load_contents(self, path_filter=None):
+        """Read the CONTENTS file for |pkg| from |root|."""
+        if not path_filter:
+            path_filter = lambda x: True
+
+        ret = []
+
+        for line in self._load_vdb_entry('CONTENTS').splitlines():
             line = line.strip()
             if not line:
                 continue
             typ, data = line.split(' ', 1)
             if typ == 'obj':
                 path, _hash, _mtime = data.rsplit(' ', 2)
-                if filter(path):
+                if path_filter(path):
                     ret.append(('obj', path))
             elif typ == 'sym':
                 source, target = data.split(' -> ', 1)
                 target, _mtime = target.rsplit(' ', 1)
-                if filter(source):
+                if path_filter(source):
                     ret.append(('sym', source, target))
             elif typ == 'dir':
                 pass
             else:
                 raise Exception('Unhandled entry: %s' % line)
 
-    return ret
+        return ret
+
+    def match(self, atom):
+        """See if |atom| matches us."""
+        # TODO: Support the range matching like >=.
+        if self.prefix not in (None, '', '=', '~'):
+            raise ValueError('Only exact matches supported (not %s)'
+                             % (self.prefix))
+
+        # Package names must always exist & match.
+        if atom.pn != self.pn:
+            return False
+
+        # Only require the category to match if both atoms specify it.
+        if len(set((atom.category, self.category, None))) == 3:
+            return False
+
+        # Only require the slot to match if both atoms specify it.
+        if self.slot and len(set((atom.slot, self.slot, None))) == 3:
+            return False
+
+        # Only require the version to match if both atoms specify it.
+        if len(set((atom.pv, self.pv, None))) == 3:
+            return False
+
+        # Only require the revision to match if both atoms specify it.
+        if (self.prefix is not '~' and
+            len(set((atom.rev, self.rev, None)))) == 3:
+            return False
+
+        # If we're still here, there's nothing left to say :).
+        return True
+
+    def __str__(self):
+        return ('Atom(%s/%s-%s-r%s:%s)' %
+                (self.category, self.pn, self.pv, self.rev, self.slot))
+
+
+class Vdb(object):
+    """Hold details about a vdb (a database of installed packages)."""
+
+    # https://projects.gentoo.org/pms/6/pms.html#x1-180003.1.1
+    CATEGORY_RE = re.compile(r'^[A-Za-z0-9_]+[A-Za-z0-9+_.-]*$')
+
+    # https://projects.gentoo.org/pms/6/pms.html#x1-200003.1.2
+    PACKAGE_RE = re.compile(r'^[A-Za-z0-9_]+[A-Za-z0-9+_-]*-[0-9]+')
+
+    def __init__(self, root):
+        self.pkgs = []
+
+        path = os.path.join(root, 'var', 'db', 'pkg')
+        for cat in os.listdir(path):
+            catdir = os.path.join(path, cat)
+            if not os.path.isdir(catdir) or not self.CATEGORY_RE.match(cat):
+                continue
+
+            for pkg in os.listdir(catdir):
+                pkgdir = os.path.join(catdir, pkg)
+                if (os.path.isdir(pkgdir) and
+                    os.path.isfile(os.path.join(pkgdir, 'CONTENTS')) and
+                    self.PACKAGE_RE.match(pkg)):
+                    self.pkgs.append(Atom('%s/%s' % (cat, pkg), pkgdir))
+
+    def find_pkg(self, match_pkg):
+        """Try to find the best match for |match_pkg|."""
+        match_atom = Atom(match_pkg)
+        for vdb_pkg in self.pkgs:
+            if match_atom.match(vdb_pkg):
+                return vdb_pkg
 
 
 def ignore_files(path):
@@ -102,16 +212,20 @@
     return True
 
 
-def merge(pkg, input_root, output_root, make_root, verbose=0):
+def merge(vdb, pkg, input_root, output_root, make_root, verbose=0):
     """Merge |pkg| from |input_root| to |output_root|."""
     print('Merging %s ' % pkg, end='')
     if verbose:
-        print('from %s to %s' % (input_root, output_root))
+        print('from %s to %s ' % (input_root, output_root))
 
     # TODO: Grab a lock for |pkg|.
 
     # First get the file listing to merge.
-    contents = load_contents(pkg, input_root, filter=ignore_files)
+    vdb_pkg = vdb.find_pkg(pkg)
+    if not vdb_pkg:
+        raise Exception('No packages found matching %s ' % pkg)
+    print('matched %s ' % vdb_pkg, end='')
+    contents = vdb_pkg.load_contents(path_filter=ignore_files)
 
     # Now actually merge them.
     for entry in contents:
@@ -123,6 +237,8 @@
             else:
                 print('.', end='')
             makedirs(os.path.dirname(output))
+            if os.path.exists(output):
+                os.chmod(output, 0o666)
             shutil.copy2(os.path.join(input_root, path), output)
         elif entry[0] == 'sym':
             path = entry[1].lstrip('/')
@@ -167,8 +283,9 @@
     parser = get_parser()
     opts = parser.parse_args(argv)
 
+    vdb = Vdb(opts.input_root)
     for pkg in opts.packages:
-        merge(pkg, opts.input_root, opts.output_root, opts.make_root,
+        merge(vdb, pkg, opts.input_root, opts.output_root, opts.make_root,
               verbose=opts.verbose)