Derive mtu from ipv6 route to ipv4 /96 prefix instead of device

Test: builds, atest, still needs testing on real network
  x86 clatd_test
  --------------
  clatd_test (19 Tests)
  ...
  [6/19] ClatdTest#DetectMtu: PASSED (0ms)
  ...
Bug: 147935930
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Change-Id: I3d11ba082dabf70089867146acd84f6436953663
diff --git a/Android.bp b/Android.bp
index 42fbdaa..ae4bf1d 100644
--- a/Android.bp
+++ b/Android.bp
@@ -30,7 +30,6 @@
         "ipv4.c",
         "ipv6.c",
         "logging.c",
-        "mtu.c",
         "netlink_callbacks.c",
         "netlink_msg.c",
         "ring.c",
diff --git a/clatd.c b/clatd.c
index 8a0d55f..82df63d 100644
--- a/clatd.c
+++ b/clatd.c
@@ -46,7 +46,6 @@
 #include "dump.h"
 #include "getaddr.h"
 #include "logging.h"
-#include "mtu.h"
 #include "resolv_netid.h"
 #include "ring.h"
 #include "setif.h"
@@ -149,8 +148,9 @@
 /* function: configure_tun_ip
  * configures the ipv4 and ipv6 addresses on the tunnel interface
  *   tunnel - tun device data
+ *   mtu    - mtu of tun device
  */
-void configure_tun_ip(const struct tun_data *tunnel, const char *v4_addr) {
+void configure_tun_ip(const struct tun_data *tunnel, const char *v4_addr, int mtu) {
   if (v4_addr) {
     Global_Clatd_Config.ipv4_local_subnet.s_addr = ipv4_address_from_cmdline(v4_addr);
   } else {
@@ -170,7 +170,7 @@
     exit(1);
   }
 
-  status = if_up(tunnel->device4, Global_Clatd_Config.ipv4mtu);
+  status = if_up(tunnel->device4, mtu);
   if (status < 0) {
     logmsg(ANDROID_LOG_FATAL, "configure_tun_ip/if_up(4) failed: %s", strerror(-status));
     exit(1);
@@ -346,41 +346,74 @@
   return 1;
 }
 
+int detect_mtu(const struct in6_addr *plat_subnet, uint32_t plat_suffix, uint32_t mark) {
+  // Create an IPv6 UDP socket.
+  int s = socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+  if (s < 0) {
+    logmsg(ANDROID_LOG_FATAL, "socket(AF_INET6, SOCK_DGRAM, 0) failed");
+    exit(1);
+  }
+
+  // Socket's mark affects routing decisions (network selection)
+  if ((mark != MARK_UNSET) && setsockopt(s, SOL_SOCKET, SO_MARK, &mark, sizeof(mark))) {
+    logmsg(ANDROID_LOG_FATAL, "setsockopt(SOL_SOCKET, SO_MARK) failed: %s", strerror(errno));
+    exit(1);
+  }
+
+  // Try to connect udp socket to plat_subnet(96 bits):plat_suffix(32 bits)
+  struct sockaddr_in6 dst = {
+    .sin6_family = AF_INET6,
+    .sin6_addr   = *plat_subnet,
+  };
+  dst.sin6_addr.s6_addr32[3] = plat_suffix;
+  if (connect(s, (struct sockaddr *)&dst, sizeof(dst))) {
+    logmsg(ANDROID_LOG_FATAL, "connect() failed: %s", strerror(errno));
+    exit(1);
+  }
+
+  // Fetch the socket's IPv6 mtu - this is effectively fetching mtu from routing table
+  int mtu;
+  socklen_t sz_mtu = sizeof(mtu);
+  if (getsockopt(s, SOL_IPV6, IPV6_MTU, &mtu, &sz_mtu)) {
+    logmsg(ANDROID_LOG_FATAL, "getsockopt(SOL_IPV6, IPV6_MTU) failed: %s", strerror(errno));
+    exit(1);
+  }
+  if (sz_mtu != sizeof(mtu)) {
+    logmsg(ANDROID_LOG_FATAL, "getsockopt(SOL_IPV6, IPV6_MTU) returned unexpected size: %d",
+           sz_mtu);
+    exit(1);
+  }
+  close(s);
+
+  return mtu;
+}
+
 /* function: configure_interface
  * reads the configuration and applies it to the interface
  *   uplink_interface - network interface to use to reach the ipv6 internet
  *   plat_prefix      - PLAT prefix to use
  *   tunnel           - tun device data
  *   net_id           - NetID to use, NETID_UNSET indicates use of default network
+ *   mark             - the socket mark to use for the sending raw socket
  */
 void configure_interface(const char *uplink_interface, const char *plat_prefix, const char *v4_addr,
-                         const char *v6_addr, struct tun_data *tunnel, unsigned net_id) {
-
+                         const char *v6_addr, struct tun_data *tunnel, unsigned net_id,
+                         uint32_t mark) {
   if (!read_config("/system/etc/clatd.conf", uplink_interface, plat_prefix, net_id)) {
     logmsg(ANDROID_LOG_FATAL, "read_config failed");
     exit(1);
   }
 
-  if (Global_Clatd_Config.mtu > MAXMTU) {
-    logmsg(ANDROID_LOG_WARN, "Max MTU is %d, requested %d", MAXMTU, Global_Clatd_Config.mtu);
-    Global_Clatd_Config.mtu = MAXMTU;
-  }
-  if (Global_Clatd_Config.mtu <= 0) {
-    Global_Clatd_Config.mtu = getifmtu(Global_Clatd_Config.default_pdp_interface);
-    logmsg(ANDROID_LOG_WARN, "ifmtu=%d", Global_Clatd_Config.mtu);
-  }
-  if (Global_Clatd_Config.mtu < 1280) {
-    logmsg(ANDROID_LOG_WARN, "mtu too small = %d", Global_Clatd_Config.mtu);
-    Global_Clatd_Config.mtu = 1280;
-  }
+  int mtu = detect_mtu(&Global_Clatd_Config.plat_subnet, htonl(0x08080808), mark);
+  // clamp to minimum ipv6 mtu - this probably cannot ever trigger
+  if (mtu < 1280) mtu = 1280;
+  // clamp to buffer size
+  if (mtu > MAXMTU) mtu = MAXMTU;
+  // decrease by ipv6(40) + ipv6 fragmentation header(8) vs ipv4(20) overhead of 28 bytes
+  mtu -= MTU_DELTA;
+  logmsg(ANDROID_LOG_WARN, "ipv4 mtu is %d", mtu);
 
-  if (Global_Clatd_Config.ipv4mtu <= 0 ||
-      Global_Clatd_Config.ipv4mtu > Global_Clatd_Config.mtu - MTU_DELTA) {
-    Global_Clatd_Config.ipv4mtu = Global_Clatd_Config.mtu - MTU_DELTA;
-    logmsg(ANDROID_LOG_WARN, "ipv4mtu now set to = %d", Global_Clatd_Config.ipv4mtu);
-  }
-
-  configure_tun_ip(tunnel, v4_addr);
+  configure_tun_ip(tunnel, v4_addr, mtu);
 
   if (!configure_clat_ipv6_address(tunnel, uplink_interface, v6_addr)) {
     exit(1);
diff --git a/clatd.h b/clatd.h
index d3869bf..e1d28a7 100644
--- a/clatd.h
+++ b/clatd.h
@@ -36,15 +36,16 @@
 #define NO_TRAFFIC_INTERFACE_POLL_FREQUENCY 90
 
 void stop_loop();
-void configure_tun_ip(const struct tun_data *tunnel, const char *v4_addr);
+void configure_tun_ip(const struct tun_data *tunnel, const char *v4_addr, int mtu);
 void set_capability(uint64_t target_cap);
 void drop_root_but_keep_caps();
 void open_sockets(struct tun_data *tunnel, uint32_t mark);
 int ipv6_address_changed(const char *interface);
 int configure_clat_ipv6_address(const struct tun_data *tunnel, const char *interface,
                                 const char *src_addr);
+int detect_mtu(const struct in6_addr *plat_subnet, uint32_t plat_suffix, uint32_t mark);
 void configure_interface(const char *uplink_interface, const char *plat_prefix, const char *v4_addr,
-                         const char *v6, struct tun_data *tunnel, unsigned net_id);
+                         const char *v6, struct tun_data *tunnel, unsigned net_id, uint32_t mark);
 void event_loop(struct tun_data *tunnel);
 
 /* function: parse_int
diff --git a/clatd_test.cpp b/clatd_test.cpp
index f01f49d..08b444c 100644
--- a/clatd_test.cpp
+++ b/clatd_test.cpp
@@ -772,12 +772,16 @@
   EXPECT_EQ(inet_addr("127.0.0.2"), config_select_ipv4_address(&addr, 29));
 }
 
+TEST_F(ClatdTest, DetectMtu) {
+  // ::1 with bottom 32 bits set to 1 is still ::1 which routes via lo with mtu of 64KiB
+  ASSERT_EQ(detect_mtu(&in6addr_loopback, htonl(1), 0 /*MARK_UNSET*/), 65536);
+}
+
 TEST_F(ClatdTest, ConfigureTunIp) {
   addr_free_func orig_config_is_ipv4_address_free = config_is_ipv4_address_free;
   config_is_ipv4_address_free                     = over6_free;
 
   Global_Clatd_Config.ipv4_local_prefixlen = 29;
-  Global_Clatd_Config.ipv4mtu              = 1472;
 
   // Create an interface for configure_tun_ip to configure and bring up.
   TunInterface v4Iface;
@@ -785,7 +789,7 @@
   struct tun_data tunnel = makeTunData();
   strlcpy(tunnel.device4, v4Iface.name().c_str(), sizeof(tunnel.device4));
 
-  configure_tun_ip(&tunnel, nullptr /* v4_addr */);
+  configure_tun_ip(&tunnel, nullptr /* v4_addr */, 1472);
   EXPECT_EQ(inet_addr("192.0.0.6"), Global_Clatd_Config.ipv4_local_subnet.s_addr);
 
   union anyip *ip = getinterface_ip(v4Iface.name().c_str(), AF_INET);
@@ -801,7 +805,6 @@
   config_is_ipv4_address_free                     = over6_free;
 
   Global_Clatd_Config.ipv4_local_prefixlen = 29;
-  Global_Clatd_Config.ipv4mtu              = 1472;
 
   // Create an interface for configure_tun_ip to configure and bring up.
   TunInterface v4Iface;
@@ -809,7 +812,7 @@
   struct tun_data tunnel = makeTunData();
   strlcpy(tunnel.device4, v4Iface.name().c_str(), sizeof(tunnel.device4));
 
-  configure_tun_ip(&tunnel, "192.0.2.1" /* v4_addr */);
+  configure_tun_ip(&tunnel, "192.0.2.1" /* v4_addr */, 1472);
   EXPECT_EQ(inet_addr("192.0.2.1"), Global_Clatd_Config.ipv4_local_subnet.s_addr);
 
   union anyip *ip = getinterface_ip(v4Iface.name().c_str(), AF_INET);
diff --git a/config.c b/config.c
index 3ea510d..1c1a1f7 100644
--- a/config.c
+++ b/config.c
@@ -318,10 +318,6 @@
   Global_Clatd_Config.default_pdp_interface = strdup(uplink_interface);
   if (!Global_Clatd_Config.default_pdp_interface) goto failed;
 
-  Global_Clatd_Config.mtu = -1;
-
-  Global_Clatd_Config.ipv4mtu = -1;
-
   if (!config_item_ip(root, "ipv4_local_subnet", DEFAULT_IPV4_LOCAL_SUBNET,
                       &Global_Clatd_Config.ipv4_local_subnet))
     goto failed;
@@ -377,8 +373,6 @@
 void dump_config() {
   char charbuffer[INET6_ADDRSTRLEN];
 
-  logmsg(ANDROID_LOG_DEBUG, "mtu = %d", Global_Clatd_Config.mtu);
-  logmsg(ANDROID_LOG_DEBUG, "ipv4mtu = %d", Global_Clatd_Config.ipv4mtu);
   logmsg(
     ANDROID_LOG_DEBUG, "ipv6_local_subnet = %s",
     inet_ntop(AF_INET6, &Global_Clatd_Config.ipv6_local_subnet, charbuffer, sizeof(charbuffer)));
diff --git a/config.h b/config.h
index 88c632a..d5c4399 100644
--- a/config.h
+++ b/config.h
@@ -26,7 +26,6 @@
 #define DNS64_DETECTION_HOSTNAME "ipv4only.arpa"
 
 struct clat_config {
-  int16_t mtu, ipv4mtu;
   struct in6_addr ipv6_local_subnet;
   struct in6_addr ipv6_host_id;
   struct in_addr ipv4_local_subnet;
diff --git a/main.c b/main.c
index 9e796fd..e973bf3 100644
--- a/main.c
+++ b/main.c
@@ -142,7 +142,7 @@
   // following line causes XLAT failure in permissive mode.
   unsetenv("ANDROID_DNS_MODE");
 
-  configure_interface(uplink_interface, plat_prefix, v4_addr, v6_addr, &tunnel, net_id);
+  configure_interface(uplink_interface, plat_prefix, v4_addr, v6_addr, &tunnel, net_id, mark);
 
   // Drop all remaining capabilities.
   set_capability(0);
diff --git a/mtu.c b/mtu.c
deleted file mode 100644
index 472bd4e..0000000
--- a/mtu.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2011 Daniel Drown
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * mtu.c - get interface mtu
- */
-
-#include <net/if.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "mtu.h"
-
-/* function: getifmtu
- * returns the interface mtu or -1 on failure
- * ifname - interface name
- */
-int getifmtu(const char *ifname) {
-  int fd;
-  struct ifreq if_mtu;
-
-  fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
-  if (fd < 0) {
-    return -1;
-  }
-  strncpy(if_mtu.ifr_name, ifname, IFNAMSIZ);
-  if_mtu.ifr_name[IFNAMSIZ - 1] = '\0';
-  if (ioctl(fd, SIOCGIFMTU, &if_mtu) < 0) {
-    close(fd);
-    return -1;
-  }
-  close(fd);
-  return if_mtu.ifr_mtu;
-}
diff --git a/mtu.h b/mtu.h
deleted file mode 100644
index c330c24..0000000
--- a/mtu.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2011 Daniel Drown
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * mtu.c - get interface mtu
- */
-
-#ifndef __MTU_H__
-#define __MTU_H__
-
-int getifmtu(const char *ifname);
-
-#endif