Move VPN routing decisions from iptables to ip

Routes are now encoded by ip rules that send connections to the Vpn
table if the connection is marked and the destination falls into a
route. This differs from the previous design where a mark meant that
the connection must go over the VPN, now a mark simply means that it
may.
Bug: 12549060
Change-Id: I9be7e27a0f46858f109d8bc5c5bced309b05201a
diff --git a/CommandListener.cpp b/CommandListener.cpp
index 0ca5d8f..b9df842 100644
--- a/CommandListener.cpp
+++ b/CommandListener.cpp
@@ -77,7 +77,6 @@
         OEM_IPTABLES_FILTER_OUTPUT,
         FirewallController::LOCAL_OUTPUT,
         BandwidthController::LOCAL_OUTPUT,
-        SecondaryTableController::LOCAL_FILTER_OUTPUT,
         NULL,
 };
 
@@ -95,7 +94,6 @@
 };
 
 static const char* MANGLE_OUTPUT[] = {
-        SecondaryTableController::LOCAL_MANGLE_EXEMPT,
         SecondaryTableController::LOCAL_MANGLE_OUTPUT,
         NULL,
 };
diff --git a/SecondaryTableController.cpp b/SecondaryTableController.cpp
index 736b5fe..5750b41 100644
--- a/SecondaryTableController.cpp
+++ b/SecondaryTableController.cpp
@@ -38,10 +38,7 @@
 
 const char* SecondaryTableController::LOCAL_MANGLE_OUTPUT = "st_mangle_OUTPUT";
 const char* SecondaryTableController::LOCAL_MANGLE_POSTROUTING = "st_mangle_POSTROUTING";
-const char* SecondaryTableController::LOCAL_MANGLE_EXEMPT = "st_mangle_EXEMPT";
-const char* SecondaryTableController::LOCAL_MANGLE_IFACE_FORMAT = "st_mangle_%s_OUTPUT";
 const char* SecondaryTableController::LOCAL_NAT_POSTROUTING = "st_nat_POSTROUTING";
-const char* SecondaryTableController::LOCAL_FILTER_OUTPUT = "st_filter_OUTPUT";
 
 SecondaryTableController::SecondaryTableController(UidMarkMap *map) : mUidMarkMap(map) {
     int i;
@@ -62,15 +59,7 @@
             "-F",
             LOCAL_MANGLE_OUTPUT,
             NULL);
-    res |= execIptables(V4V6,
-            "-t",
-            "mangle",
-            "-F",
-            LOCAL_MANGLE_EXEMPT,
-            NULL);
-    // rule for skipping anything marked with the PROTECT_MARK
-    char protect_mark_str[11];
-    snprintf(protect_mark_str, sizeof(protect_mark_str), "%d", PROTECT_MARK);
+    // Do not mark sockets that have already been marked elsewhere(for example in DNS or protect).
     res |= execIptables(V4V6,
             "-t",
             "mangle",
@@ -78,8 +67,9 @@
             LOCAL_MANGLE_OUTPUT,
             "-m",
             "mark",
+            "!",
             "--mark",
-            protect_mark_str,
+            "0",
             "-j",
             "RETURN",
             NULL);
@@ -325,7 +315,38 @@
     }
 
     snprintf(mark_str, sizeof(mark_str), "%d", mark);
-    //add the catch all route to the tun. Route rules will make sure the right packets hit the table
+    // Flush any marked routes we added
+    if (!add) {
+        // iproute2 rule del will delete anything that matches, but only one rule at a time.
+        // So clearing the rules requires a bunch of calls.
+        // ip rule del will fail once there are no remaining rules that match.
+        const char *v4_cmd[] = {
+            IP_PATH,
+            "-4",
+            "rule",
+            "del",
+            "fwmark",
+            mark_str,
+            "table",
+            mark_str
+        };
+        while(!runCmd(ARRAY_SIZE(v4_cmd), v4_cmd)) {}
+
+        const char *v6_cmd[] = {
+            IP_PATH,
+            "-6",
+            "rule",
+            "del",
+            "fwmark",
+            mark_str,
+            "table",
+            mark_str
+        };
+        while(!runCmd(ARRAY_SIZE(v6_cmd), v6_cmd)) {}
+    }
+    // Add a route to the table to send all traffic to iface.
+    // We only need a default route because this table is only selected if a packet matches an
+    // IP rule that checks both the route and the mark.
     const char *route_cmd[] = {
         IP_PATH,
         "route",
@@ -337,22 +358,10 @@
         mark_str
     };
     ret = runCmd(ARRAY_SIZE(route_cmd), route_cmd);
+    // The command might fail during delete if the iface is gone
+    if (add && ret) return ret;
 
-    const char *fwmark_cmd[] = {
-        IP_PATH,
-        "rule",
-        add ? "add" : "del",
-        "prio",
-        RULE_PRIO,
-        "fwmark",
-        mark_str,
-        "table",
-        mark_str
-    };
-    ret = runCmd(ARRAY_SIZE(fwmark_cmd), fwmark_cmd);
-    if (ret) return ret;
-
-    //add rules for v6
+    // As above for IPv6
     const char *route6_cmd[] = {
         IP_PATH,
         "-6",
@@ -365,120 +374,25 @@
         mark_str
     };
     ret = runCmd(ARRAY_SIZE(route6_cmd), route6_cmd);
+    // The command might fail during delete if the iface is gone
+    if (add && ret) return ret;
 
-    const char *fwmark6_cmd[] = {
-        IP_PATH,
-        "-6",
-        "rule",
-        add ? "add" : "del",
-        "prio",
-        RULE_PRIO,
-        "fwmark",
-        mark_str,
-        "table",
-        mark_str
-    };
-    ret = runCmd(ARRAY_SIZE(fwmark6_cmd), fwmark6_cmd);
+    /* Best effort, because some kernels might not have the needed TCPMSS */
+    execIptables(V4V6,
+            "-t",
+            "mangle",
+            add ? "-A" : "-D",
+            LOCAL_MANGLE_POSTROUTING,
+            "-p", "tcp", "-o", iface, "--tcp-flags", "SYN,RST", "SYN",
+            "-j",
+            "TCPMSS",
+            "--clamp-mss-to-pmtu",
+            NULL);
 
-
-    if (ret) return ret;
-
-    //create the route rule chain
-    char chain_str[IFNAMSIZ + 18];
-    snprintf(chain_str, sizeof(chain_str), LOCAL_MANGLE_IFACE_FORMAT, iface);
-    //code split due to ordering requirements
-    if (add) {
-        ret = execIptables(V4V6,
-                "-t",
-                "mangle",
-                "-N",
-                chain_str,
-                NULL);
-        //set up the rule for sending premarked packets to the VPN chain
-        //Insert these at the top of the chain so they trigger before any UID rules
-        ret |= execIptables(V4V6,
-                "-t",
-                "mangle",
-                "-I",
-                LOCAL_MANGLE_OUTPUT,
-                "3",
-                "-m",
-                "mark",
-                "--mark",
-                mark_str,
-                "-g",
-                chain_str,
-                NULL);
-        //add a rule to clear the mark in the VPN chain
-        //packets marked with SO_MARK already have the iface's mark set but unless they match a
-        //route they should hit the network instead of the VPN
-        ret |= execIptables(V4V6,
-                "-t",
-                "mangle",
-                "-A",
-                chain_str,
-                "-j",
-                "MARK",
-                "--set-mark",
-                "0",
-                NULL);
-
-        /* Best effort, because some kernels might not have the needed TCPMSS */
-        execIptables(V4V6,
-                "-t",
-                "mangle",
-                "-A",
-                LOCAL_MANGLE_POSTROUTING,
-                "-p", "tcp", "-o", iface, "--tcp-flags", "SYN,RST", "SYN",
-                "-j",
-                "TCPMSS",
-                "--clamp-mss-to-pmtu",
-                NULL);
-
-    } else {
-        ret = execIptables(V4V6,
-                "-t",
-                "mangle",
-                "-D",
-                LOCAL_MANGLE_OUTPUT,
-                "-m",
-                "mark",
-                "--mark",
-                mark_str,
-                "-g",
-                chain_str,
-                NULL);
-
-        //clear and delete the chain
-        ret |= execIptables(V4V6,
-                "-t",
-                "mangle",
-                "-F",
-                chain_str,
-                NULL);
-
-        ret |= execIptables(V4V6,
-                "-t",
-                "mangle",
-                "-X",
-                chain_str,
-                NULL);
-
-        /* Best effort, because some kernels might not have the needed TCPMSS */
-        execIptables(V4V6,
-                "-t",
-                "mangle",
-                "-D",
-                LOCAL_MANGLE_POSTROUTING,
-                "-p", "tcp", "-o", iface, "--tcp-flags", "SYN,RST", "SYN",
-                "-j",
-                "TCPMSS",
-                "--clamp-mss-to-pmtu",
-                NULL);
-    }
-
-    //set up the needed source IP rewriting
-    //NOTE: Without ipv6 NAT in the kernel <3.7 only support V4 NAT
+    // Because the mark gets set after the intial routing decision the source IP address is that
+    // of the original out interface. The only way to change the source IP address to that of the
+    // VPN iface is using source NAT.
+    // TODO: Remove this when we get the mark set correctly before the first routing pass.
     ret = execIptables(V4,
             "-t",
             "nat",
@@ -496,7 +410,7 @@
 
     if (ret) return ret;
 
-    //try and set up for ipv6. ipv6 nat came in the kernel only in 3.7, so this can fail
+    // Try and set up NAT for IPv6 as well. This was only added in Linux 3.7 so this may fail.
     ret = execIptables(V6,
             "-t",
             "nat",
@@ -512,21 +426,28 @@
             "MASQUERADE",
             NULL);
     if (ret) {
-        //Without V6 NAT we can't do V6 over VPNs.
-        ret = execIptables(V6,
-                "-t",
-                "filter",
-                add ? "-A" : "-D",
-                LOCAL_FILTER_OUTPUT,
-                "-m",
-                "mark",
-                "--mark",
-                mark_str,
-                "-j",
-                "REJECT",
-                NULL);
+        // Without V6 NAT we can't do V6 over VPNs. If an IPv6 packet matches a VPN rule, then it
+        // will go out on the VPN interface, but without NAT, it will have the wrong source
+        // address. So reject all these packets.
+        // Due to rule application by the time the connection hits the output filter chain the
+        // routing pass based on the new mark has not yet happened. Reject in ip instead.
+        // TODO: Make the VPN code refuse to install IPv6 routes until we don't need IPv6 NAT.
+        const char *reject_cmd[] = {
+            IP_PATH,
+            "-6",
+            "route",
+            add ? "replace" : "del",
+            "unreachable",
+            "default",
+            "table",
+            mark_str
+        };
+        ret = runCmd(ARRAY_SIZE(reject_cmd), reject_cmd);
+        // The command might fail during delete if the iface is gone
+        if (add && ret) return ret;
+
     }
-    return ret;
+    return 0;
 
 }
 
@@ -547,24 +468,25 @@
     }
     int mark = tableIndex + BASE_TABLE_NUMBER;
     char mark_str[11] = {0};
-    char chain_str[IFNAMSIZ + 18];
     char dest_str[44]; // enough to store an IPv6 address + 3 character bitmask
 
     snprintf(mark_str, sizeof(mark_str), "%d", mark);
-    snprintf(chain_str, sizeof(chain_str), LOCAL_MANGLE_IFACE_FORMAT, iface);
     snprintf(dest_str, sizeof(dest_str), "%s/%d", dest, prefix);
-    return execIptables(getIptablesTarget(dest),
-            "-t",
-            "mangle",
-            add ? "-A" : "-D",
-            chain_str,
-            "-d",
-            dest_str,
-            "-j",
-            "MARK",
-            "--set-mark",
-            mark_str,
-            NULL);
+    const char *rule_cmd[] = {
+        IP_PATH,
+        getVersion(dest_str),
+        "rule",
+        add ? "add" : "del",
+        "prio",
+        RULE_PRIO,
+        "to",
+        dest_str,
+        "fwmark",
+        mark_str,
+        "table",
+        mark_str
+    };
+    return runCmd(ARRAY_SIZE(rule_cmd), rule_cmd);
 }
 
 int SecondaryTableController::addUidRule(const char *iface, int uid_start, int uid_end) {
@@ -594,9 +516,9 @@
         }
     }
     char uid_str[24] = {0};
-    char chain_str[IFNAMSIZ + 18];
     snprintf(uid_str, sizeof(uid_str), "%d-%d", uid_start, uid_end);
-    snprintf(chain_str, sizeof(chain_str), LOCAL_MANGLE_IFACE_FORMAT, iface);
+    char mark_str[11] = {0};
+    snprintf(mark_str, sizeof(mark_str), "%d", mark);
     return execIptables(V4V6,
             "-t",
             "mangle",
@@ -606,8 +528,10 @@
             "owner",
             "--uid-owner",
             uid_str,
-            "-g",
-            chain_str,
+            "-j",
+            "MARK",
+            "--set-mark",
+            mark_str,
             NULL);
 }
 
@@ -620,21 +544,6 @@
 }
 
 int SecondaryTableController::setHostExemption(const char *host, bool add) {
-    IptablesTarget target = !strcmp(getVersion(host), "-4") ? V4 : V6;
-    char protect_mark_str[11];
-    snprintf(protect_mark_str, sizeof(protect_mark_str), "%d", PROTECT_MARK);
-    int ret = execIptables(target,
-            "-t",
-            "mangle",
-            add ? "-A" : "-D",
-            LOCAL_MANGLE_EXEMPT,
-            "-d",
-            host,
-            "-j",
-            "MARK",
-            "--set-mark",
-            protect_mark_str,
-            NULL);
     const char *cmd[] = {
         IP_PATH,
         getVersion(host),
@@ -647,8 +556,7 @@
         "table",
         "main"
     };
-    ret |= runCmd(ARRAY_SIZE(cmd), cmd);
-    return ret;
+    return runCmd(ARRAY_SIZE(cmd), cmd);
 }
 
 void SecondaryTableController::getUidMark(SocketClient *cli, int uid) {
diff --git a/SecondaryTableController.h b/SecondaryTableController.h
index a153685..ded60a8 100644
--- a/SecondaryTableController.h
+++ b/SecondaryTableController.h
@@ -60,10 +60,7 @@
 
     static const char* LOCAL_MANGLE_OUTPUT;
     static const char* LOCAL_MANGLE_POSTROUTING;
-    static const char* LOCAL_MANGLE_EXEMPT;
-    static const char* LOCAL_MANGLE_IFACE_FORMAT;
     static const char* LOCAL_NAT_POSTROUTING;
-    static const char* LOCAL_FILTER_OUTPUT;
 
 
 private: