Manually merge AOSP into nyc-dev

Bug: 26839129
Change-Id: Ifbeb031a3c30acdb0dc0f5e218f9f8c53b91553e
diff --git a/Android.bp b/Android.bp
index b7e94ba..91c2972 100644
--- a/Android.bp
+++ b/Android.bp
@@ -15,6 +15,7 @@
 cc_library_host_static {
     name: "libckati",
     srcs: [
+        "affinity.cc",
         "command.cc",
         "dep.cc",
         "eval.cc",
@@ -30,6 +31,7 @@
         "log.cc",
         "ninja.cc",
         "parser.cc",
+        "regen.cc",
         "rule.cc",
         "stats.cc",
         "stmt.cc",
@@ -37,6 +39,7 @@
         "stringprintf.cc",
         "strutil.cc",
         "symtab.cc",
+        "thread_pool.cc",
         "timeutil.cc",
         "var.cc",
         "version_unknown.cc",
@@ -53,7 +56,7 @@
     cflags: ["-W", "-Wall", "-DNOLOG"],
     target: {
         linux: {
-            host_ldlibs: ["-lrt"],
+            host_ldlibs: ["-lrt", "-lpthread"],
         },
     },
 }
@@ -65,13 +68,14 @@
         "find_test.cc",
         "ninja_test.cc",
         "string_piece_test.cc",
+        "strutil_bench.cc",
         "strutil_test.cc",
     ],
     gtest: false,
     whole_static_libs: ["libckati"],
     target: {
         linux: {
-            host_ldlibs: ["-lrt"],
+            host_ldlibs: ["-lrt", "-lpthread"],
         },
     },
 }
diff --git a/Makefile b/Makefile
index 81dd938..5d0d45c 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-all: kati ckati ckati_tests
+all: ckati ckati_tests
 
 include Makefile.kati
 include Makefile.ckati
diff --git a/Makefile.ckati b/Makefile.ckati
index e3361d8..377e36d 100644
--- a/Makefile.ckati
+++ b/Makefile.ckati
@@ -22,8 +22,8 @@
 KATI_BIN_PATH ?= .
 
 KATI_CXX_SRCS := \
+	affinity.cc \
 	command.cc \
-	condvar.cc \
 	dep.cc \
 	eval.cc \
 	exec.cc \
@@ -37,7 +37,6 @@
 	io.cc \
 	log.cc \
 	main.cc \
-	mutex.cc \
 	ninja.cc \
 	parser.cc \
 	regen.cc \
@@ -48,7 +47,6 @@
 	stringprintf.cc \
 	strutil.cc \
 	symtab.cc \
-	thread.cc \
 	thread_pool.cc \
 	timeutil.cc \
 	var.cc
@@ -57,7 +55,9 @@
 	version.cc
 
 KATI_CXX_SRCS := $(addprefix $(KATI_SRC_PATH)/,$(KATI_CXX_SRCS))
-KATI_CXX_TEST_SRCS:= $(wildcard $(KATI_SRC_PATH)/*_test.cc)
+KATI_CXX_TEST_SRCS := \
+  $(wildcard $(KATI_SRC_PATH)/*_test.cc) \
+  $(wildcard $(KATI_SRC_PATH)/*_bench.cc)
 
 KATI_CXX_OBJS := $(patsubst $(KATI_SRC_PATH)/%.cc,$(KATI_INTERMEDIATES_PATH)/%.o,\
 	$(KATI_CXX_SRCS))
@@ -71,6 +71,7 @@
 
 KATI_CXXFLAGS := -g -W -Wall -MMD -MP
 KATI_CXXFLAGS += -O -DNOLOG
+KATI_CXXFLAGS += -march=native
 #KATI_CXXFLAGS += -pg
 
 ifeq ($(shell uname),Linux)
diff --git a/affinity.cc b/affinity.cc
new file mode 100644
index 0000000..0743f94
--- /dev/null
+++ b/affinity.cc
@@ -0,0 +1,50 @@
+// Copyright 2016 Google Inc. All rights reserved
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "affinity.h"
+
+#include "flags.h"
+#include "log.h"
+
+#ifdef __linux__
+
+#include <sched.h>
+
+void SetAffinityForSingleThread() {
+  cpu_set_t cs;
+  CPU_ZERO(&cs);
+  int n = g_flags.num_cpus / 2;
+  CPU_SET(n, &cs);
+  if (n > 1)
+    CPU_SET(n + 1, &cs);
+  if (sched_setaffinity(0, sizeof(cs), &cs) < 0)
+    WARN("sched_setaffinity: %s", strerror(errno));
+}
+
+void SetAffinityForMultiThread() {
+  cpu_set_t cs;
+  CPU_ZERO(&cs);
+  for (int i = 0; i < g_flags.num_cpus; i++) {
+    CPU_SET(i, &cs);
+  }
+  if (sched_setaffinity(0, sizeof(cs), &cs) < 0)
+    WARN("sched_setaffinity: %s", strerror(errno));
+}
+
+#else
+
+void SetAffinityForSingleThread() {}
+void SetAffinityForMultiThread() {}
+
+#endif
diff --git a/thread.h b/affinity.h
similarity index 67%
rename from thread.h
rename to affinity.h
index 440fc5b..e6f6adc 100644
--- a/thread.h
+++ b/affinity.h
@@ -12,26 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef THREAD_H_
-#define THREAD_H_
+#ifndef AFFINITY_H_
+#define AFFINITY_H_
 
-#include <pthread.h>
+void SetAffinityForSingleThread();
+void SetAffinityForMultiThread();
 
-#include <functional>
-
-using namespace std;
-
-class thread {
-  typedef function<void(void)> fn_t;
-
- public:
-  explicit thread(const fn_t& fn);
-  void join();
-
- private:
-  static void* Run(void* p);
-
-  pthread_t th_;
-};
-
-#endif  // THREAD_H_
+#endif
diff --git a/command.cc b/command.cc
index ac7fadd..f75a8a0 100644
--- a/command.cc
+++ b/command.cc
@@ -171,12 +171,11 @@
 
 CommandEvaluator::CommandEvaluator(Evaluator* ev)
     : ev_(ev) {
-  Vars* vars = ev_->mutable_vars();
 #define INSERT_AUTO_VAR(name, sym) do {                                 \
     Var* v = new name(this, sym);                                       \
-    (*vars)[Intern(sym)] = v;                                           \
-    (*vars)[Intern(sym"D")] = new AutoSuffixDVar(this, sym"D", v);      \
-    (*vars)[Intern(sym"F")] = new AutoSuffixFVar(this, sym"F", v);      \
+    Intern(sym).SetGlobalVar(v);                                        \
+    Intern(sym"D").SetGlobalVar(new AutoSuffixDVar(this, sym"D", v));   \
+    Intern(sym"F").SetGlobalVar(new AutoSuffixFVar(this, sym"F", v));   \
   } while (0)
   INSERT_AUTO_VAR(AutoAtVar, "@");
   INSERT_AUTO_VAR(AutoLessVar, "<");
diff --git a/condvar.cc b/condvar.cc
deleted file mode 100644
index d7d4e39..0000000
--- a/condvar.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "condvar.h"
-
-#include "log.h"
-
-condition_variable::condition_variable() {
-  if (pthread_cond_init(&cond_, NULL) != 0)
-    PERROR("pthread_cond_init");
-}
-
-condition_variable::~condition_variable() {
-  if (pthread_cond_destroy(&cond_) != 0)
-    PERROR("pthread_cond_destroy");
-}
-
-void condition_variable::wait(const unique_lock<mutex>& mu) {
-  if (pthread_cond_wait(&cond_, &mu.mutex()->mu_) != 0)
-    PERROR("pthread_cond_wait");
-}
-
-void condition_variable::notify_one() {
-  if (pthread_cond_signal(&cond_) != 0)
-    PERROR("pthread_cond_signal");
-}
-
-void condition_variable::notify_all() {
-  if (pthread_cond_broadcast(&cond_) != 0)
-    PERROR("pthread_cond_broadcast");
-}
diff --git a/condvar.h b/condvar.h
deleted file mode 100644
index 2f9ab3a..0000000
--- a/condvar.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef CONDVAR_H_
-#define CONDVAR_H_
-
-#include <pthread.h>
-
-#include "mutex.h"
-
-class condition_variable {
- public:
-  condition_variable();
-  ~condition_variable();
-
-  void wait(const unique_lock<mutex>& mu);
-  void notify_one();
-  void notify_all();
-
- private:
-  pthread_cond_t cond_;
-};
-
-#endif  // CONDVAR_H_
diff --git a/dep.cc b/dep.cc
index 45f9584..da209c7 100644
--- a/dep.cc
+++ b/dep.cc
@@ -27,8 +27,10 @@
 #include "fileutil.h"
 #include "log.h"
 #include "rule.h"
+#include "stats.h"
 #include "strutil.h"
 #include "symtab.h"
+#include "timeutil.h"
 #include "var.h"
 
 namespace {
@@ -43,6 +45,31 @@
   return Intern(r);
 }
 
+void ApplyOutputPattern(const Rule& r,
+                        Symbol output,
+                        const vector<Symbol>& inputs,
+                        vector<Symbol>* out_inputs) {
+  if (inputs.empty())
+    return;
+  if (r.is_suffix_rule) {
+    for (Symbol input : inputs) {
+      out_inputs->push_back(ReplaceSuffix(output, input));
+    }
+    return;
+  }
+  if (r.output_patterns.empty()) {
+    copy(inputs.begin(), inputs.end(), back_inserter(*out_inputs));
+    return;
+  }
+  CHECK(r.output_patterns.size() == 1);
+  Pattern pat(r.output_patterns[0].str());
+  for (Symbol input : inputs) {
+    string buf;
+    pat.AppendSubst(output.str(), input.str(), &buf);
+    out_inputs->push_back(Intern(buf));
+  }
+}
+
 class RuleTrie {
   struct Entry {
     Entry(const Rule* r, StringPiece s)
@@ -99,6 +126,98 @@
   unordered_map<char, RuleTrie*> children_;
 };
 
+
+bool IsSuffixRule(Symbol output) {
+  if (output.empty() || output.str()[0] != '.')
+    return false;
+  const StringPiece rest = StringPiece(output.str()).substr(1);
+  size_t dot_index = rest.find('.');
+  // If there is only a single dot or the third dot, this is not a
+  // suffix rule.
+  if (dot_index == string::npos ||
+      rest.substr(dot_index+1).find('.') != string::npos) {
+    return false;
+  }
+  return true;
+}
+
+struct RuleMerger {
+  vector<const Rule*> rules;
+  const Rule* primary_rule;
+  bool is_double_colon;
+
+  RuleMerger()
+      : primary_rule(nullptr),
+        is_double_colon(false) {
+  }
+
+  void AddRule(Symbol output, const Rule* r) {
+    if (rules.empty()) {
+      is_double_colon = r->is_double_colon;
+    } else if (is_double_colon != r->is_double_colon) {
+      ERROR("%s:%d: *** target file `%s' has both : and :: entries.",
+            LOCF(r->loc), output.c_str());
+    }
+
+    if (primary_rule && !r->cmds.empty() &&
+        !IsSuffixRule(output) && !r->is_double_colon) {
+      WARN("%s:%d: warning: overriding commands for target `%s'",
+           LOCF(r->cmd_loc()), output.c_str());
+      WARN("%s:%d: warning: ignoring old commands for target `%s'",
+           LOCF(primary_rule->cmd_loc()), output.c_str());
+      primary_rule = r;
+    }
+    if (!primary_rule && !r->cmds.empty()) {
+      primary_rule = r;
+    }
+
+    rules.push_back(r);
+  }
+
+  void FillDepNodeFromRule(Symbol output,
+                           const Rule* r,
+                           DepNode* n) const {
+    if (is_double_colon)
+      copy(r->cmds.begin(), r->cmds.end(), back_inserter(n->cmds));
+
+    ApplyOutputPattern(*r, output, r->inputs, &n->actual_inputs);
+    ApplyOutputPattern(*r, output, r->order_only_inputs,
+                       &n->actual_order_only_inputs);
+
+    if (r->output_patterns.size() >= 1) {
+      CHECK(r->output_patterns.size() == 1);
+      n->output_pattern = r->output_patterns[0];
+    }
+  }
+
+  void FillDepNodeLoc(const Rule* r, DepNode* n) const {
+    n->loc = r->loc;
+    if (!r->cmds.empty() && r->cmd_lineno)
+      n->loc.lineno = r->cmd_lineno;
+  }
+
+  void FillDepNode(Symbol output,
+                   const Rule* pattern_rule,
+                   DepNode* n) const {
+    if (primary_rule) {
+      CHECK(!pattern_rule);
+      FillDepNodeFromRule(output, primary_rule, n);
+      FillDepNodeLoc(primary_rule, n);
+      n->cmds = primary_rule->cmds;
+    } else if (pattern_rule) {
+      FillDepNodeFromRule(output, pattern_rule, n);
+      FillDepNodeLoc(pattern_rule, n);
+      n->cmds = pattern_rule->cmds;
+    }
+
+    for (const Rule* r : rules) {
+      if (r == primary_rule)
+        continue;
+      FillDepNodeFromRule(output, r, n);
+    }
+  }
+};
+
 }  // namespace
 
 DepNode::DepNode(Symbol o, bool p, bool r)
@@ -116,38 +235,42 @@
 class DepBuilder {
  public:
   DepBuilder(Evaluator* ev,
-             const vector<shared_ptr<Rule>>& rules,
+             const vector<const Rule*>& rules,
              const unordered_map<Symbol, Vars*>& rule_vars)
       : ev_(ev),
         rule_vars_(rule_vars),
         implicit_rules_(new RuleTrie()),
-        first_rule_(NULL),
+        first_rule_(Symbol::IsUninitialized{}),
         depfile_var_name_(Intern(".KATI_DEPFILE")) {
+    ScopedTimeReporter tr("make dep (populate)");
     PopulateRules(rules);
-    LOG_STAT("%zu variables", ev->mutable_vars()->size());
+    // TODO?
+    //LOG_STAT("%zu variables", ev->mutable_vars()->size());
     LOG_STAT("%zu explicit rules", rules_.size());
     LOG_STAT("%zu implicit rules", implicit_rules_->size());
     LOG_STAT("%zu suffix rules", suffix_rules_.size());
 
-    auto found = rules_.find(Intern(".PHONY"));
-    if (found != rules_.end()) {
-      for (Symbol input : found->second->inputs) {
-        phony_.insert(input);
-      }
+    HandleSpecialTargets();
+  }
+
+  void HandleSpecialTargets() {
+    Loc loc;
+    vector<Symbol> targets;
+
+    if (GetRuleInputs(Intern(".PHONY"), &targets, &loc)) {
+      for (Symbol t : targets)
+        phony_.insert(t);
     }
-    found = rules_.find(Intern(".KATI_RESTAT"));
-    if (found != rules_.end()) {
-      for (Symbol input : found->second->inputs) {
-        restat_.insert(input);
-      }
+    if (GetRuleInputs(Intern(".KATI_RESTAT"), &targets, &loc)) {
+      for (Symbol t : targets)
+        restat_.insert(t);
     }
-    found = rules_.find(Intern(".SUFFIXES"));
-    if (found != rules_.end()) {
-      if (found->second->inputs.empty()) {
+    if (GetRuleInputs(Intern(".SUFFIXES"), &targets, &loc)) {
+      if (targets.empty()) {
         suffix_rules_.clear();
       } else {
         WARN("%s:%d: kati doesn't support .SUFFIXES with prerequisites",
-             LOCF(found->second->loc));
+             LOCF(loc));
       }
     }
 
@@ -168,9 +291,8 @@
       NULL
     };
     for (const char** p = kUnsupportedBuiltinTargets; *p; p++) {
-      auto found = rules_.find(Intern(*p));
-      if (found != rules_.end()) {
-        WARN("%s:%d: kati doesn't support %s", LOCF(found->second->loc), *p);
+      if (GetRuleInputs(Intern(*p), &targets, &loc)) {
+        WARN("%s:%d: kati doesn't support %s", LOCF(loc), *p);
       }
     }
   }
@@ -179,21 +301,22 @@
   }
 
   void Build(vector<Symbol> targets, vector<DepNode*>* nodes) {
-    if (!first_rule_) {
+    if (!first_rule_.IsValid()) {
       ERROR("*** No targets.");
     }
-    CHECK(!first_rule_->outputs.empty());
 
     if (!g_flags.gen_all_targets && targets.empty()) {
-      targets.push_back(first_rule_->outputs[0]);
+      targets.push_back(first_rule_);
     }
     if (g_flags.gen_all_targets) {
       unordered_set<Symbol> non_root_targets;
       for (const auto& p : rules_) {
-        for (Symbol t : p.second->inputs)
-          non_root_targets.insert(t);
-        for (Symbol t : p.second->order_only_inputs)
-          non_root_targets.insert(t);
+        for (const Rule* r : p.second.rules) {
+          for (Symbol t : r->inputs)
+            non_root_targets.insert(t);
+          for (Symbol t : r->order_only_inputs)
+            non_root_targets.insert(t);
+        }
       }
 
       for (const auto& p : rules_) {
@@ -226,8 +349,23 @@
     return ::Exists(target.str());
   }
 
-  void PopulateRules(const vector<shared_ptr<Rule>>& rules) {
-    for (shared_ptr<Rule> rule : rules) {
+  bool GetRuleInputs(Symbol s, vector<Symbol>* o, Loc* l) {
+    auto found = rules_.find(s);
+    if (found == rules_.end())
+      return false;
+
+    o->clear();
+    CHECK(!found->second.rules.empty());
+    *l = found->second.rules.front()->loc;
+    for (const Rule* r : found->second.rules) {
+      for (Symbol i : r->inputs)
+        o->push_back(i);
+    }
+    return true;
+  }
+
+  void PopulateRules(const vector<const Rule*>& rules) {
+    for (const Rule* rule : rules) {
       if (rule->outputs.empty()) {
         PopulateImplicitRule(rule);
       } else {
@@ -239,18 +377,12 @@
     }
   }
 
-  bool PopulateSuffixRule(shared_ptr<Rule> rule, Symbol output) {
-    if (output.empty() || output.str()[0] != '.')
+  bool PopulateSuffixRule(const Rule* rule, Symbol output) {
+    if (!IsSuffixRule(output))
       return false;
 
     const StringPiece rest = StringPiece(output.str()).substr(1);
     size_t dot_index = rest.find('.');
-    // If there is only a single dot or the third dot, this is not a
-    // suffix rule.
-    if (dot_index == string::npos ||
-        rest.substr(dot_index+1).find('.') != string::npos) {
-      return false;
-    }
 
     StringPiece input_suffix = rest.substr(0, dot_index);
     StringPiece output_suffix = rest.substr(dot_index+1);
@@ -262,120 +394,50 @@
     return true;
   }
 
-  void ApplyOutputPattern(const Rule& r,
-                          Symbol output,
-                          const vector<Symbol>& inputs,
-                          vector<Symbol>* out_inputs) {
-    if (inputs.empty())
-      return;
-    if (r.is_suffix_rule) {
-      for (Symbol input : inputs) {
-        out_inputs->push_back(ReplaceSuffix(output, input));
+  void PopulateExplicitRule(const Rule* rule) {
+    for (Symbol output : rule->outputs) {
+      if (!first_rule_.IsValid() && output.get(0) != '.') {
+        first_rule_ = output;
       }
-      return;
-    }
-    if (r.output_patterns.empty()) {
-      copy(inputs.begin(), inputs.end(), back_inserter(*out_inputs));
-      return;
-    }
-    CHECK(r.output_patterns.size() == 1);
-    Pattern pat(r.output_patterns[0].str());
-    for (Symbol input : inputs) {
-      string buf;
-      pat.AppendSubst(output.str(), input.str(), &buf);
-      out_inputs->push_back(Intern(buf));
+      rules_[output].AddRule(output, rule);
+      PopulateSuffixRule(rule, output);
     }
   }
 
-  shared_ptr<Rule> MergeRules(const Rule& old_rule,
-                              const Rule& rule,
-                              Symbol output,
-                              bool is_suffix_rule) {
-    if (old_rule.is_double_colon != rule.is_double_colon) {
-      ERROR("%s:%d: *** target file `%s' has both : and :: entries.",
-            LOCF(rule.loc), output.str().c_str());
-    }
-    if (!old_rule.cmds.empty() && !rule.cmds.empty() &&
-        !is_suffix_rule && !rule.is_double_colon) {
-      WARN("%s:%d: warning: overriding commands for target `%s'",
-           LOCF(rule.cmd_loc()), output.str().c_str());
-      WARN("%s:%d: warning: ignoring old commands for target `%s'",
-           LOCF(old_rule.cmd_loc()), output.str().c_str());
-    }
-
-    shared_ptr<Rule> r = make_shared<Rule>(rule);
-    if (rule.is_double_colon) {
-      r->cmds.clear();
-      for (Value* c : old_rule.cmds)
-        r->cmds.push_back(c);
-      for (Value* c : rule.cmds)
-        r->cmds.push_back(c);
-      if (!rule.output_patterns.empty() && !old_rule.output_patterns.empty() &&
-          rule.output_patterns != old_rule.output_patterns) {
-        ERROR("%s:%d: TODO: merging two double colon rules with output "
-              "patterns is not supported", LOCF(rule.loc));
-      }
-    } else if (!old_rule.cmds.empty() && rule.cmds.empty()) {
-      r->cmds = old_rule.cmds;
-    }
-
-    // If the latter rule has a command (regardless of the commands in
-    // |old_rule|), inputs in the latter rule has a priority.
-    if (rule.cmds.empty()) {
-      r->inputs = old_rule.inputs;
-      ApplyOutputPattern(rule, output, rule.inputs, &r->inputs);
-      r->order_only_inputs = old_rule.order_only_inputs;
-      ApplyOutputPattern(rule, output, rule.order_only_inputs,
-                         &r->order_only_inputs);
-      r->output_patterns = old_rule.output_patterns;
-    } else {
-      ApplyOutputPattern(old_rule, output, old_rule.inputs, &r->inputs);
-      ApplyOutputPattern(old_rule, output, old_rule.order_only_inputs,
-                         &r->order_only_inputs);
-    }
-    r->is_default_target |= old_rule.is_default_target;
-    return r;
+  static bool IsIgnorableImplicitRule(const Rule* rule) {
+    // As kati doesn't have RCS/SCCS related default rules, we can
+    // safely ignore suppression for them.
+    if (rule->inputs.size() != 1)
+      return false;
+    if (!rule->order_only_inputs.empty())
+      return false;
+    if (!rule->cmds.empty())
+      return false;
+    const string& i = rule->inputs[0].str();
+    return (i == "RCS/%,v" || i == "RCS/%" || i == "%,v" ||
+            i == "s.%" || i == "SCCS/s.%");
   }
 
-  void PopulateExplicitRule(shared_ptr<Rule> orig_rule) {
-    for (Symbol output : orig_rule->outputs) {
-      const bool is_suffix_rule = PopulateSuffixRule(orig_rule, output);
-
-      shared_ptr<Rule> rule = make_shared<Rule>(*orig_rule);
-      rule->outputs.clear();
-      rule->outputs.push_back(output);
-
-      auto p = rules_.insert(make_pair(output, rule));
-      if (p.second) {
-        if (!first_rule_ && output.get(0) != '.') {
-          rule->is_default_target = true;
-          first_rule_ = rule;
-        }
-      } else {
-        p.first->second =
-            MergeRules(*p.first->second, *rule, output, is_suffix_rule);
-      }
-    }
-  }
-
-  void PopulateImplicitRule(shared_ptr<Rule> rule) {
+  void PopulateImplicitRule(const Rule* rule) {
     for (Symbol output_pattern : rule->output_patterns) {
-      implicit_rules_->Add(output_pattern.str(), rule.get());
+      if (output_pattern.str() != "%" || !IsIgnorableImplicitRule(rule))
+        implicit_rules_->Add(output_pattern.str(), rule);
     }
   }
 
-  shared_ptr<Rule> LookupRule(Symbol o) {
+  const RuleMerger* LookupRuleMerger(Symbol o) {
     auto found = rules_.find(o);
-    if (found != rules_.end())
-      return found->second;
-    return NULL;
+    if (found != rules_.end()) {
+      return &found->second;
+    }
+    return nullptr;
   }
 
   Vars* LookupRuleVars(Symbol o) {
     auto found = rule_vars_.find(o);
     if (found != rule_vars_.end())
       return found->second;
-    return NULL;
+    return nullptr;
   }
 
   bool CanPickImplicitRule(const Rule* rule, Symbol output, DepNode* n,
@@ -435,53 +497,40 @@
     return r;
   }
 
-  bool PickRule(Symbol output, DepNode* n,
-                shared_ptr<Rule>* out_rule, Vars** out_var) {
-    shared_ptr<Rule> rule = LookupRule(output);
+  bool PickRule(Symbol output,
+                DepNode* n,
+                const RuleMerger** out_rule_merger,
+                shared_ptr<Rule>* pattern_rule,
+                Vars** out_var) {
+    const RuleMerger* rule_merger = LookupRuleMerger(output);
     Vars* vars = LookupRuleVars(output);
-    *out_rule = rule;
+    *out_rule_merger = rule_merger;
     *out_var = vars;
-    if (rule) {
-      if (!rule->cmds.empty()) {
-        return true;
-      }
-    }
+    if (rule_merger && rule_merger->primary_rule)
+      return true;
 
     vector<const Rule*> irules;
     implicit_rules_->Get(output.str(), &irules);
     for (auto iter = irules.rbegin(); iter != irules.rend(); ++iter) {
-      shared_ptr<Rule> irule;
-      if (!CanPickImplicitRule(*iter, output, n, &irule))
+      if (!CanPickImplicitRule(*iter, output, n, pattern_rule))
         continue;
-      if (rule) {
-        shared_ptr<Rule> r = make_shared<Rule>(*rule);
-        r->output_patterns = irule->output_patterns;
-        r->inputs.clear();
-        r->inputs = irule->inputs;
-        copy(rule->inputs.begin(), rule->inputs.end(),
-             back_inserter(r->inputs));
-        r->cmds = irule->cmds;
-        r->is_default_target |= irule->is_default_target;
-        r->loc = irule->loc;
-        r->cmd_lineno = irule->cmd_lineno;
-        *out_rule = r;
+      if (rule_merger) {
         return true;
       }
-      CHECK(irule->output_patterns.size() == 1);
-      vars = MergeImplicitRuleVars(irule->output_patterns[0], vars);
+      CHECK((*pattern_rule)->output_patterns.size() == 1);
+      vars = MergeImplicitRuleVars((*pattern_rule)->output_patterns[0], vars);
       *out_var = vars;
-      *out_rule = make_shared<Rule>(*irule);
       return true;
     }
 
     StringPiece output_suffix = GetExt(output.str());
     if (output_suffix.get(0) != '.')
-      return rule.get();
+      return rule_merger;
     output_suffix = output_suffix.substr(1);
 
     SuffixRuleMap::const_iterator found = suffix_rules_.find(output_suffix);
     if (found == suffix_rules_.end())
-      return rule.get();
+      return rule_merger;
 
     for (shared_ptr<Rule> irule : found->second) {
       CHECK(irule->inputs.size() == 1);
@@ -489,26 +538,18 @@
       if (!Exists(input))
         continue;
 
-      if (rule) {
-        shared_ptr<Rule> r = make_shared<Rule>(*rule);
-        r->inputs.insert(r->inputs.begin(), input);
-        r->cmds = irule->cmds;
-        r->is_default_target |= irule->is_default_target;
-        r->loc = irule->loc;
-        r->cmd_lineno = irule->cmd_lineno;
-        *out_rule = r;
+      *pattern_rule = irule;
+      if (rule_merger)
         return true;
-      }
       if (vars) {
         CHECK(irule->outputs.size() == 1);
         vars = MergeImplicitRuleVars(irule->outputs[0], vars);
         *out_var = vars;
       }
-      *out_rule = irule;
       return true;
     }
 
-    return rule.get();
+    return rule_merger;
   }
 
   DepNode* BuildPlan(Symbol output, Symbol needed_by UNUSED) {
@@ -526,16 +567,16 @@
                              restat_.count(output));
     done_[output] = n;
 
-    shared_ptr<Rule> rule;
+    const RuleMerger* rule_merger = nullptr;
+    shared_ptr<Rule> pattern_rule;
     Vars* vars;
-    if (!PickRule(output, n, &rule, &vars)) {
+    if (!PickRule(output, n, &rule_merger, &pattern_rule, &vars)) {
       return n;
     }
-
-    if (rule->output_patterns.size() >= 1) {
-      CHECK(rule->output_patterns.size() == 1);
-      n->output_pattern = rule->output_patterns[0];
-    }
+    if (rule_merger)
+      rule_merger->FillDepNode(output, pattern_rule.get(), n);
+    else
+      RuleMerger().FillDepNode(output, pattern_rule.get(), n);
 
     vector<unique_ptr<ScopedVar>> sv;
     if (vars) {
@@ -570,23 +611,18 @@
       }
     }
 
-    ApplyOutputPattern(*rule, output, rule->inputs, &n->actual_inputs);
     for (Symbol input : n->actual_inputs) {
       DepNode* c = BuildPlan(input, output);
       n->deps.push_back(c);
     }
 
-    vector<Symbol> order_only_inputs;
-    ApplyOutputPattern(*rule, output, rule->order_only_inputs,
-                       &order_only_inputs);
-    for (Symbol input : order_only_inputs) {
+    for (Symbol input : n->actual_order_only_inputs) {
       DepNode* c = BuildPlan(input, output);
       n->order_onlys.push_back(c);
     }
 
     n->has_rule = true;
-    n->cmds = rule->cmds;
-    n->is_default_target = rule->is_default_target;
+    n->is_default_target = first_rule_ == output;
     if (cur_rule_vars_->empty()) {
       n->rule_vars = NULL;
     } else {
@@ -595,15 +631,12 @@
         n->rule_vars->insert(p);
       }
     }
-    n->loc = rule->loc;
-    if (!rule->cmds.empty() && rule->cmd_lineno)
-      n->loc.lineno = rule->cmd_lineno;
 
     return n;
   }
 
   Evaluator* ev_;
-  map<Symbol, shared_ptr<Rule>> rules_;
+  map<Symbol, RuleMerger> rules_;
   const unordered_map<Symbol, Vars*>& rule_vars_;
   unique_ptr<Vars> cur_rule_vars_;
 
@@ -611,7 +644,7 @@
   typedef unordered_map<StringPiece, vector<shared_ptr<Rule>>> SuffixRuleMap;
   SuffixRuleMap suffix_rules_;
 
-  shared_ptr<Rule> first_rule_;
+  Symbol first_rule_;
   unordered_map<Symbol, DepNode*> done_;
   unordered_set<Symbol> phony_;
   unordered_set<Symbol> restat_;
@@ -619,11 +652,12 @@
 };
 
 void MakeDep(Evaluator* ev,
-             const vector<shared_ptr<Rule>>& rules,
+             const vector<const Rule*>& rules,
              const unordered_map<Symbol, Vars*>& rule_vars,
              const vector<Symbol>& targets,
              vector<DepNode*>* nodes) {
   DepBuilder db(ev, rules, rule_vars);
+  ScopedTimeReporter tr("make dep (build)");
   db.Build(targets, nodes);
 }
 
diff --git a/dep.h b/dep.h
index bb70eb3..4302997 100644
--- a/dep.h
+++ b/dep.h
@@ -15,7 +15,6 @@
 #ifndef DEP_H_
 #define DEP_H_
 
-#include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -43,6 +42,7 @@
   bool is_phony;
   bool is_restat;
   vector<Symbol> actual_inputs;
+  vector<Symbol> actual_order_only_inputs;
   Vars* rule_vars;
   Var* depfile_var;
   Symbol output_pattern;
@@ -53,7 +53,7 @@
 void QuitDepNodePool();
 
 void MakeDep(Evaluator* ev,
-             const vector<shared_ptr<Rule>>& rules,
+             const vector<const Rule*>& rules,
              const unordered_map<Symbol, Vars*>& rule_vars,
              const vector<Symbol>& targets,
              vector<DepNode*>* nodes);
diff --git a/eval.cc b/eval.cc
index ae41567..6322fc1 100644
--- a/eval.cc
+++ b/eval.cc
@@ -30,16 +30,8 @@
 #include "symtab.h"
 #include "var.h"
 
-EvalResult::~EvalResult() {
-  for (auto p : rule_vars)
-    delete p.second;
-  delete vars;
-}
-
-Evaluator::Evaluator(const Vars* vars)
-    : in_vars_(vars),
-      vars_(new Vars()),
-      last_rule_(NULL),
+Evaluator::Evaluator()
+    : last_rule_(NULL),
       current_scope_(NULL),
       avoid_io_(false),
       eval_depth_(0) {
@@ -61,9 +53,12 @@
   Var* rhs = NULL;
   bool needs_assign = true;
   switch (op) {
-    case AssignOp::COLON_EQ:
-      rhs = new SimpleVar(rhs_v->Eval(this), origin);
+    case AssignOp::COLON_EQ: {
+      SimpleVar* sv = new SimpleVar(origin);
+      rhs_v->Eval(this, sv->mutable_value());
+      rhs = sv;
       break;
+    }
     case AssignOp::EQ:
       rhs = new RecursiveVar(rhs_v, origin, orig_rhs);
       break;
@@ -100,13 +95,13 @@
 void Evaluator::EvalAssign(const AssignStmt* stmt) {
   loc_ = stmt->loc();
   last_rule_ = NULL;
-  Symbol lhs = Intern(stmt->lhs->Eval(this));
+  Symbol lhs = stmt->GetLhsSymbol(this);
   if (lhs.empty())
     Error("*** empty variable name.");
   Var* rhs = EvalRHS(lhs, stmt->rhs, stmt->orig_rhs, stmt->op,
                      stmt->directive == AssignDirective::OVERRIDE);
   if (rhs)
-    vars_->Assign(lhs, rhs);
+    lhs.SetGlobalVar(rhs);
 }
 
 void Evaluator::EvalRule(const RuleStmt* stmt) {
@@ -131,11 +126,12 @@
     }
 
     LOG("Rule: %s", rule->DebugString().c_str());
-    rules_.push_back(shared_ptr<Rule>(rule));
+    rules_.push_back(rule);
     last_rule_ = rule;
     return;
   }
 
+  Symbol lhs = Intern(rule_var.lhs);
   for (Symbol output : rule_var.outputs) {
     auto p = rule_vars_.emplace(output, nullptr);
     if (p.second) {
@@ -159,7 +155,6 @@
     }
 
     current_scope_ = p.first->second;
-    Symbol lhs = Intern(rule_var.lhs);
     Var* rhs_var = EvalRHS(lhs, rhs, StringPiece("*TODO*"), rule_var.op);
     if (rhs_var)
       current_scope_->Assign(lhs, new RuleVar(rhs_var, rule_var.op));
@@ -288,10 +283,7 @@
 }
 
 Var* Evaluator::LookupVarGlobal(Symbol name) {
-  Var* v = vars_->Lookup(name);
-  if (v->IsDefined())
-    return v;
-  v = in_vars_->Lookup(name);
+  Var* v = name.GetGlobalVar();
   if (v->IsDefined())
     return v;
   used_undefined_vars_.insert(name);
diff --git a/eval.h b/eval.h
index 6bc21b9..3eede2b 100644
--- a/eval.h
+++ b/eval.h
@@ -15,7 +15,6 @@
 #ifndef EVAL_H_
 #define EVAL_H_
 
-#include <memory>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
@@ -32,18 +31,9 @@
 class Var;
 class Vars;
 
-struct EvalResult {
-  ~EvalResult();
-  vector<shared_ptr<Rule>> rules;
-  Vars* vars;
-  unordered_map<StringPiece, Vars*> rule_vars;
-  // TODO: read_mks
-  unordered_map<StringPiece, bool> exports;
-};
-
 class Evaluator {
  public:
-  Evaluator(const Vars* vars);
+  Evaluator();
   ~Evaluator();
 
   void EvalAssign(const AssignStmt* stmt);
@@ -62,11 +52,10 @@
   const Loc& loc() const { return loc_; }
   void set_loc(const Loc& loc) { loc_ = loc; }
 
-  const vector<shared_ptr<Rule>>& rules() const { return rules_; }
+  const vector<const Rule*>& rules() const { return rules_; }
   const unordered_map<Symbol, Vars*>& rule_vars() const {
     return rule_vars_;
   }
-  Vars* mutable_vars() { return vars_; }
   const unordered_map<Symbol, bool>& exports() const { return exports_; }
 
   void Error(const string& msg);
@@ -107,10 +96,8 @@
 
   Var* LookupVarGlobal(Symbol name);
 
-  const Vars* in_vars_;
-  Vars* vars_;
   unordered_map<Symbol, Vars*> rule_vars_;
-  vector<shared_ptr<Rule>> rules_;
+  vector<const Rule*> rules_;
   unordered_map<Symbol, bool> exports_;
 
   Rule* last_rule_;
diff --git a/expr.cc b/expr.cc
index a8f306c..ad7d19e 100644
--- a/expr.cc
+++ b/expr.cc
@@ -62,7 +62,8 @@
     s->append(s_.begin(), s_.end());
   }
 
-  virtual bool IsLiteral() const { return true; }
+  virtual bool IsLiteral() const override { return true; }
+  virtual StringPiece GetLiteralValueUnsafe() const override { return s_; }
 
   virtual string DebugString_() const override {
     return s_.as_string();
@@ -109,7 +110,7 @@
     return r;
   }
 
-  virtual Value* Compact() {
+  virtual Value* Compact() override {
     if (vals_.size() != 1) {
       return this;
     }
diff --git a/expr.h b/expr.h
index e479ed8..fdd6d36 100644
--- a/expr.h
+++ b/expr.h
@@ -42,6 +42,8 @@
   virtual Value* Compact() { return this; }
 
   virtual bool IsLiteral() const { return false; }
+  // Only safe after IsLiteral() returns true.
+  virtual StringPiece GetLiteralValueUnsafe() const { return ""; }
 
   string DebugString() const;
 
diff --git a/file.cc b/file.cc
index 4c423e5..eca09dd 100644
--- a/file.cc
+++ b/file.cc
@@ -26,7 +26,7 @@
 #include "stmt.h"
 
 Makefile::Makefile(const string& filename)
-    : buf_(NULL), len_(0), mtime_(0), filename_(filename) {
+    : mtime_(0), filename_(filename), exists_(false) {
   int fd = open(filename.c_str(), O_RDONLY);
   if (fd < 0) {
     return;
@@ -37,14 +37,15 @@
     PERROR("fstat failed for %s", filename.c_str());
   }
 
-  len_ = st.st_size;
+  size_t len = st.st_size;
   mtime_ = st.st_mtime;
-  buf_ = new char[len_];
-  ssize_t r = read(fd, buf_, len_);
-  if (r != static_cast<ssize_t>(len_)) {
+  buf_.resize(len);
+  exists_ = true;
+  ssize_t r = read(fd, &buf_[0], len);
+  if (r != static_cast<ssize_t>(len)) {
     if (r < 0)
       PERROR("read failed for %s", filename.c_str());
-    ERROR("Unexpected read length=%zd expected=%zu", r, len_);
+    ERROR("Unexpected read length=%zd expected=%zu", r, len);
   }
 
   if (close(fd) < 0) {
@@ -55,7 +56,6 @@
 }
 
 Makefile::~Makefile() {
-  delete[] buf_;
   for (Stmt* stmt : stmts_)
     delete stmt;
 }
diff --git a/file.h b/file.h
index 1746343..6af6696 100644
--- a/file.h
+++ b/file.h
@@ -29,21 +29,20 @@
   explicit Makefile(const string& filename);
   ~Makefile();
 
-  const char* buf() const { return buf_; }
-  size_t len() const { return len_; }
+  const string& buf() const { return buf_; }
   const string& filename() const { return filename_; }
 
   const vector<Stmt*>& stmts() const { return stmts_; }
   vector<Stmt*>* mutable_stmts() { return &stmts_; }
 
-  bool Exists() const { return buf_; }
+  bool Exists() const { return exists_; }
 
  private:
-  char* buf_;
-  size_t len_;
+  string buf_;
   uint64_t mtime_;
   string filename_;
   vector<Stmt*> stmts_;
+  bool exists_;
 };
 
 #endif  // FILE_H_
diff --git a/file_cache.cc b/file_cache.cc
index 6a05550..afdb3bd 100644
--- a/file_cache.cc
+++ b/file_cache.cc
@@ -44,7 +44,7 @@
 
   virtual Makefile* ReadMakefile(const string& filename) override {
     Makefile* result = NULL;
-    auto p = cache_.insert(make_pair(filename, result));
+    auto p = cache_.emplace(filename, result);
     if (p.second) {
       p.first->second = result = new Makefile(filename);
     } else {
diff --git a/fileutil.cc b/fileutil.cc
index 42e81a2..a2a0d09 100644
--- a/fileutil.cc
+++ b/fileutil.cc
@@ -32,6 +32,7 @@
 #include <unordered_map>
 
 #include "log.h"
+#include "strutil.h"
 
 bool Exists(StringPiece filename) {
   CHECK(filename.size() < PATH_MAX);
@@ -62,6 +63,13 @@
 int RunCommand(const string& shell, const string& cmd,
                RedirectStderr redirect_stderr,
                string* s) {
+  string cmd_escaped = cmd;
+  EscapeShell(&cmd_escaped);
+  string cmd_with_shell = shell + " -c \"" + cmd_escaped + "\"";
+  const char* argv[] = {
+    "/bin/sh", "-c", cmd_with_shell.c_str(), NULL
+  };
+
   int pipefd[2];
   if (pipe(pipefd) != 0)
     PERROR("pipe failed");
@@ -106,9 +114,6 @@
       PERROR("dup2 failed");
     close(pipefd[1]);
 
-    const char* argv[] = {
-      shell.c_str(), "-c", cmd.c_str(), NULL
-    };
     execvp(argv[0], const_cast<char**>(argv));
     PLOG("execvp for %s failed", argv[0]);
     kill(getppid(), SIGTERM);
diff --git a/find.cc b/find.cc
index 4c7cd8f..b783acb 100644
--- a/find.cc
+++ b/find.cc
@@ -154,7 +154,7 @@
   virtual bool RunFind(const FindCommand& fc, int d,
                        string* path,
                        unordered_map<const DirentNode*, string>*,
-                       string* out) const {
+                       string* out) const override {
     PrintIfNecessary(fc, *path, type_, d, out);
     return true;
   }
@@ -205,7 +205,7 @@
     }
   }
 
-  virtual const DirentNode* FindDir(StringPiece d) const {
+  virtual const DirentNode* FindDir(StringPiece d) const override {
     if (d.empty() || d == ".")
       return this;
     size_t index = d.find('/');
@@ -224,7 +224,7 @@
   virtual bool RunFind(const FindCommand& fc, int d,
                        string* path,
                        unordered_map<const DirentNode*, string>* cur_read_dirs,
-                       string* out) const {
+                       string* out) const override {
     ScopedReadDirTracker srdt(this, *path, cur_read_dirs);
     if (!srdt.ok()) {
       fprintf(stderr, "FindEmulator: find: File system loop detected; `%s' is "
@@ -308,7 +308,7 @@
       : DirentNode(name), to_(NULL), errno_(0) {
   }
 
-  virtual const DirentNode* FindDir(StringPiece d) const {
+  virtual const DirentNode* FindDir(StringPiece d) const override {
     if (errno_ == 0 && to_)
       return to_->FindDir(d);
     return NULL;
@@ -317,7 +317,7 @@
   virtual bool RunFind(const FindCommand& fc, int d,
                        string* path,
                        unordered_map<const DirentNode*, string>* cur_read_dirs,
-                       string* out) const {
+                       string* out) const override {
     unsigned char type = DT_LNK;
     if (fc.follows_symlinks && errno_ != ENOENT) {
       if (errno_) {
diff --git a/flags.cc b/flags.cc
index 06772f0..7a995a5 100644
--- a/flags.cc
+++ b/flags.cc
@@ -49,7 +49,7 @@
 
 void Flags::Parse(int argc, char** argv) {
   subkati_args.push_back(argv[0]);
-  num_jobs = sysconf(_SC_NPROCESSORS_ONLN);
+  num_jobs = num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
   const char* num_jobs_str;
 
   for (int i = 1; i < argc; i++) {
@@ -82,6 +82,8 @@
       dump_kati_stamp = true;
     } else if (!strcmp(arg, "--detect_android_echo")) {
       detect_android_echo = true;
+    } else if (!strcmp(arg, "--detect_depfiles")) {
+      detect_depfiles = true;
     } else if (ParseCommandLineOptionWithArg(
         "-j", argv, &i, &num_jobs_str)) {
       num_jobs = strtol(num_jobs_str, NULL, 10);
@@ -100,10 +102,6 @@
         "--ninja_dir", argv, &i, &ninja_dir)) {
     } else if (!strcmp(arg, "--use_find_emulator")) {
       use_find_emulator = true;
-    } else if (!strcmp(arg, "--gen_regen_rule")) {
-      // TODO: Make this default once we have removed unnecessary
-      // command line change from Android build.
-      gen_regen_rule = true;
     } else if (ParseCommandLineOptionWithArg(
         "--goma_dir", argv, &i, &goma_dir)) {
     } else if (ParseCommandLineOptionWithArg(
diff --git a/flags.h b/flags.h
index d2fe923..910acbf 100644
--- a/flags.h
+++ b/flags.h
@@ -25,11 +25,11 @@
 
 struct Flags {
   bool detect_android_echo;
+  bool detect_depfiles;
   bool dump_kati_stamp;
   bool enable_kati_warnings;
   bool enable_stat_logs;
   bool gen_all_targets;
-  bool gen_regen_rule;
   bool generate_ninja;
   bool is_dry_run;
   bool is_silent_mode;
@@ -44,6 +44,7 @@
   const char* makefile;
   const char* ninja_dir;
   const char* ninja_suffix;
+  int num_cpus;
   int num_jobs;
   int remote_num_jobs;
   vector<const char*> subkati_args;
diff --git a/func.cc b/func.cc
index 39af0e9..876b274 100644
--- a/func.cc
+++ b/func.cc
@@ -182,10 +182,14 @@
 }
 
 void SortFunc(const vector<Value*>& args, Evaluator* ev, string* s) {
-  const string&& list = args[0]->Eval(ev);
+  string list;
+  args[0]->Eval(ev, &list);
+  COLLECT_STATS("func sort time");
+  // TODO(hamaji): Probably we could use a faster string-specific sort
+  // algorithm.
   vector<StringPiece> toks;
   WordScanner(list).Split(&toks);
-  sort(toks.begin(), toks.end());
+  stable_sort(toks.begin(), toks.end());
   WordWriter ww(s);
   StringPiece prev;
   for (StringPiece tok : toks) {
@@ -574,8 +578,9 @@
 }
 
 void CallFunc(const vector<Value*>& args, Evaluator* ev, string* s) {
-  static const string tmpvar_names[] = {
-    "0", "1",  "2",  "3",  "4",  "5",  "6",  "7",  "8",  "9"
+  static const Symbol tmpvar_names[] = {
+    Intern("0"), Intern("1"),  Intern("2"), Intern("3"), Intern("4"),
+    Intern("5"), Intern("6"),  Intern("7"), Intern("8"), Intern("9")
   };
 
   const string&& func_name = args[0]->Eval(ev);
@@ -590,28 +595,26 @@
         new SimpleVar(args[i]->Eval(ev), VarOrigin::AUTOMATIC));
     av.push_back(move(s));
   }
-  vector<unique_ptr<ScopedVar>> sv;
+  vector<unique_ptr<ScopedGlobalVar>> sv;
   for (size_t i = 1; ; i++) {
     string s;
-    StringPiece tmpvar_name;
+    Symbol tmpvar_name_sym(Symbol::IsUninitialized{});
     if (i < sizeof(tmpvar_names)/sizeof(tmpvar_names[0])) {
-      tmpvar_name = tmpvar_names[i];
+      tmpvar_name_sym = tmpvar_names[i];
     } else {
       s = StringPrintf("%d", i);
-      tmpvar_name = s;
+      tmpvar_name_sym = Intern(s);
     }
     if (i < args.size()) {
-      sv.emplace_back(new ScopedVar(ev->mutable_vars(),
-                                    Intern(tmpvar_name), av[i-1].get()));
+      sv.emplace_back(new ScopedGlobalVar(tmpvar_name_sym, av[i-1].get()));
     } else {
       // We need to blank further automatic vars
-      Var *v = ev->LookupVar(Intern(tmpvar_name));
+      Var *v = ev->LookupVar(tmpvar_name_sym);
       if (!v->IsDefined()) break;
       if (v->Origin() != VarOrigin::AUTOMATIC) break;
 
       av.emplace_back(new SimpleVar("", VarOrigin::AUTOMATIC));
-      sv.emplace_back(new ScopedVar(ev->mutable_vars(),
-                                    Intern(tmpvar_name), av[i-1].get()));
+      sv.emplace_back(new ScopedGlobalVar(tmpvar_name_sym, av[i-1].get()));
     }
   }
 
@@ -628,7 +631,7 @@
   for (StringPiece tok : WordScanner(list)) {
     unique_ptr<SimpleVar> v(new SimpleVar(
         tok.as_string(), VarOrigin::AUTOMATIC));
-    ScopedVar sv(ev->mutable_vars(), Intern(varname), v.get());
+    ScopedGlobalVar sv(Intern(varname), v.get());
     ww.MaybeAddWhitespace();
     args[2]->Eval(ev, s);
   }
diff --git a/m2n b/m2n
index b6798b8..3fd6661 100755
--- a/m2n
+++ b/m2n
@@ -114,7 +114,7 @@
   ninja_suffix_flag=--ninja_suffix=${ninja_suffix}
 fi
 
-${kati} --ninja ${ninja_suffix_flag} --ignore_optional_include=out/%.P --ignore_dirty=out/% --use_find_emulator --detect_android_echo --gen_all_targets ${goma_flag} ${extra_flags} ${targets}
+${kati} --ninja ${ninja_suffix_flag} --ignore_optional_include=out/%.P --ignore_dirty=out/% --use_find_emulator --detect_android_echo --detect_depfiles --gen_all_targets ${goma_flag} ${extra_flags} ${targets}
 
 echo
 echo ninja${ninja_suffix}.sh and build${ninja_suffix}.ninja were generated, please run ./ninja${ninja_suffix}.sh
diff --git a/main.cc b/main.cc
index a64a37e..2627bfd 100644
--- a/main.cc
+++ b/main.cc
@@ -21,6 +21,7 @@
 #include <time.h>
 #include <unistd.h>
 
+#include "affinity.h"
 #include "dep.h"
 #include "eval.h"
 #include "exec.h"
@@ -104,13 +105,13 @@
   Parse(Intern(bootstrap).str(), Loc("*bootstrap*", 0), stmts);
 }
 
-static void SetVar(StringPiece l, VarOrigin origin, Vars* vars) {
+static void SetVar(StringPiece l, VarOrigin origin) {
   size_t found = l.find('=');
   CHECK(found != string::npos);
   Symbol lhs = Intern(l.substr(0, found));
   StringPiece rhs = l.substr(found + 1);
-  vars->Assign(lhs,
-               new RecursiveVar(NewLiteral(rhs.data()), origin, rhs.data()));
+  lhs.SetGlobalVar(
+      new RecursiveVar(NewLiteral(rhs.data()), origin, rhs.data()));
 }
 
 extern "C" char** environ;
@@ -123,7 +124,7 @@
   if (g_flags.generate_ninja && (g_flags.regen || g_flags.dump_kati_stamp)) {
     ScopedTimeReporter tr("regen check time");
     if (!NeedsRegen(start_time, orig_args)) {
-      printf("No need to regenerate ninja file\n");
+      fprintf(stderr, "No need to regenerate ninja file\n");
       return 0;
     }
     if (g_flags.dump_kati_stamp) {
@@ -133,13 +134,16 @@
     ClearGlobCache();
   }
 
+  SetAffinityForSingleThread();
+
   MakefileCacheManager* cache_mgr = NewMakefileCacheManager();
 
-  Vars* vars = new Vars();
+  Intern("MAKEFILE_LIST").SetGlobalVar(
+      new SimpleVar(StringPrintf(" %s", g_flags.makefile), VarOrigin::FILE));
   for (char** p = environ; *p; p++) {
-    SetVar(*p, VarOrigin::ENVIRONMENT, vars);
+    SetVar(*p, VarOrigin::ENVIRONMENT);
   }
-  Evaluator* ev = new Evaluator(vars);
+  Evaluator* ev = new Evaluator();
 
   vector<Stmt*> bootstrap_asts;
   ReadBootstrapMakefile(targets, &bootstrap_asts);
@@ -151,13 +155,9 @@
   ev->set_is_bootstrap(false);
 
   for (StringPiece l : cl_vars) {
-    SetVar(l, VarOrigin::COMMAND_LINE, ev->mutable_vars());
+    SetVar(l, VarOrigin::COMMAND_LINE);
   }
 
-  vars->Assign(Intern("MAKEFILE_LIST"),
-               new SimpleVar(StringPrintf(" %s", g_flags.makefile),
-                             VarOrigin::FILE));
-
   {
     ScopedTimeReporter tr("eval time");
     Makefile* mk = cache_mgr->ReadMakefile(g_flags.makefile);
@@ -208,7 +208,6 @@
   for (Stmt* stmt : bootstrap_asts)
     delete stmt;
   delete ev;
-  delete vars;
   delete cache_mgr;
 
   return 0;
diff --git a/mutex.cc b/mutex.cc
deleted file mode 100644
index ae4bd3b..0000000
--- a/mutex.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "mutex.h"
-
-#include "log.h"
-
-mutex::mutex() {
-  if (pthread_mutex_init(&mu_, NULL) != 0)
-    PERROR("pthread_mutex_init");
-}
-
-mutex::~mutex() {
-  if (pthread_mutex_destroy(&mu_) != 0)
-    PERROR("pthread_mutex_destroy");
-}
-
-void mutex::lock() {
-  if (pthread_mutex_lock(&mu_) != 0)
-    PERROR("pthread_mutex_lock");
-}
-
-void mutex::unlock() {
-  if (pthread_mutex_unlock(&mu_) != 0)
-    PERROR("pthread_mutex_unlock");
-}
diff --git a/mutex.h b/mutex.h
deleted file mode 100644
index d6f1f5a..0000000
--- a/mutex.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef MUTEX_H_
-#define MUTEX_H_
-
-#include <pthread.h>
-
-class mutex {
- public:
-  explicit mutex();
-  ~mutex();
-
-  void lock();
-  void unlock();
-
- private:
-  pthread_mutex_t mu_;
-
-  friend class condition_variable;
-};
-
-template<class T> class unique_lock {
- public:
-  explicit unique_lock(T& mu)
-      : mu_(mu) {
-    mu_.lock();
-  }
-  ~unique_lock() {
-    mu_.unlock();
-  }
-
-  T* mutex() const { return &mu_; }
-
- private:
-  T& mu_;
-};
-
-#endif  // MUTEX_H_
diff --git a/ninja.cc b/ninja.cc
index f14610f..6ba2ec8 100644
--- a/ninja.cc
+++ b/ninja.cc
@@ -22,6 +22,7 @@
 #include <unistd.h>
 
 #include <map>
+#include <sstream>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -40,6 +41,8 @@
 #include "string_piece.h"
 #include "stringprintf.h"
 #include "strutil.h"
+#include "thread_pool.h"
+#include "timeutil.h"
 #include "var.h"
 #include "version.h"
 
@@ -165,6 +168,12 @@
   return true;
 }
 
+struct NinjaNode {
+  const DepNode* node;
+  vector<Command*> commands;
+  int rule_id;
+};
+
 class NinjaGenerator {
  public:
   NinjaGenerator(Evaluator* ev, double start_time)
@@ -175,7 +184,7 @@
         start_time_(start_time),
         default_target_(NULL) {
     ev_->set_avoid_io(true);
-    shell_ = ev->EvalVar(kShellSym);
+    shell_ = EscapeNinja(ev->EvalVar(kShellSym));
     if (g_flags.goma_dir)
       gomacc_ = StringPrintf("%s/gomacc ", g_flags.goma_dir);
 
@@ -184,12 +193,15 @@
 
   ~NinjaGenerator() {
     ev_->set_avoid_io(false);
+    for (NinjaNode* nn : nodes_)
+      delete nn;
   }
 
   void Generate(const vector<DepNode*>& nodes,
                 const string& orig_args) {
     unlink(GetNinjaStampFilename().c_str());
-    GenerateNinja(nodes, orig_args);
+    PopulateNinjaNodes(nodes);
+    GenerateNinja();
     GenerateShell();
     GenerateStamp(orig_args);
   }
@@ -206,8 +218,40 @@
   }
 
  private:
-  string GenRuleName() {
-    return StringPrintf("rule%d", rule_id_++);
+  void PopulateNinjaNodes(const vector<DepNode*>& nodes) {
+    ScopedTimeReporter tr("ninja gen (eval)");
+    for (DepNode* node : nodes) {
+      PopulateNinjaNode(node);
+    }
+  }
+
+  void PopulateNinjaNode(DepNode* node) {
+    auto p = done_.insert(node->output);
+    if (!p.second)
+      return;
+
+    // A hack to exclude out phony target in Android. If this exists,
+    // "ninja -t clean" tries to remove this directory and fails.
+    if (g_flags.detect_android_echo && node->output.str() == "out")
+      return;
+
+    // This node is a leaf node
+    if (!node->has_rule && !node->is_phony) {
+      return;
+    }
+
+    NinjaNode* nn = new NinjaNode;
+    nn->node = node;
+    ce_.Eval(node, &nn->commands);
+    nn->rule_id = nn->commands.empty() ? -1 : rule_id_++;
+    nodes_.push_back(nn);
+
+    for (DepNode* d : node->deps) {
+      PopulateNinjaNode(d);
+    }
+    for (DepNode* d : node->order_onlys) {
+      PopulateNinjaNode(d);
+    }
   }
 
   StringPiece TranslateCommand(const char* in, string* cmd_buf) {
@@ -284,6 +328,22 @@
                        cmd_buf->size() - orig_size);
   }
 
+  bool IsOutputMkdir(const char *name, StringPiece cmd) {
+    if (!HasPrefix(cmd, "mkdir -p ")) {
+      return false;
+    }
+    cmd = cmd.substr(9, cmd.size());
+    if (cmd.get(cmd.size() - 1) == '/') {
+      cmd = cmd.substr(0, cmd.size() - 1);
+    }
+
+    StringPiece dir = Dirname(name);
+    if (cmd == dir) {
+      return true;
+    }
+    return false;
+  }
+
   bool GetDescriptionFromCommand(StringPiece cmd, string *out) {
     if (!HasPrefix(cmd, "echo ")) {
       return false;
@@ -333,7 +393,8 @@
     return true;
   }
 
-  bool GenShellScript(const vector<Command*>& commands,
+  bool GenShellScript(const char *name,
+                      const vector<Command*>& commands,
                       string* cmd_buf,
                       string* description) {
     // TODO: This is a dirty hack to set local_pool even without
@@ -346,25 +407,19 @@
     static bool was_gomacc_found = false;
     bool got_descritpion = false;
     bool use_gomacc = false;
-    bool should_ignore_error = false;
+    auto command_count = commands.size();
     for (const Command* c : commands) {
+      size_t cmd_begin = cmd_buf->size();
+
       if (!cmd_buf->empty()) {
-        if (should_ignore_error) {
-          *cmd_buf += " ; ";
-        } else {
-          *cmd_buf += " && ";
-        }
+        *cmd_buf += " && ";
       }
-      should_ignore_error = c->ignore_error;
 
       const char* in = c->cmd.c_str();
       while (isspace(*in))
         in++;
 
-      bool needs_subshell = commands.size() > 1;
-      if (*in == '(') {
-        needs_subshell = false;
-      }
+      bool needs_subshell = (command_count > 1 || c->ignore_error);
 
       if (needs_subshell)
         *cmd_buf += '(';
@@ -374,11 +429,15 @@
       if (g_flags.detect_android_echo && !got_descritpion && !c->echo &&
           GetDescriptionFromCommand(translated, description)) {
         got_descritpion = true;
-        cmd_buf->resize(cmd_start);
+        translated.clear();
+      } else if (IsOutputMkdir(name, translated) && !c->echo &&
+                 cmd_begin == 0) {
         translated.clear();
       }
       if (translated.empty()) {
-        *cmd_buf += "true";
+        cmd_buf->resize(cmd_begin);
+        command_count -= 1;
+        continue;
       } else if (g_flags.goma_dir) {
         size_t pos = GetGomaccPosForAndroidCompileCommand(translated);
         if (pos != string::npos) {
@@ -390,22 +449,24 @@
         was_gomacc_found = true;
       }
 
-      if (c == commands.back() && c->ignore_error) {
+      if (c->ignore_error) {
         *cmd_buf += " ; true";
       }
 
       if (needs_subshell)
-        *cmd_buf += ')';
+        *cmd_buf += " )";
     }
     return (was_gomacc_found || g_flags.remote_num_jobs ||
             g_flags.goma_dir) && !use_gomacc;
   }
 
-  bool GetDepfile(DepNode* node, string* cmd_buf, string* depfile) {
+  bool GetDepfile(const DepNode* node, string* cmd_buf, string* depfile) {
     if (node->depfile_var) {
       node->depfile_var->Eval(ev_, depfile);
       return true;
     }
+    if (!g_flags.detect_depfiles)
+      return false;
 
     *cmd_buf += ' ';
     bool result = GetDepfileFromCommand(cmd_buf, depfile);
@@ -413,75 +474,55 @@
     return result;
   }
 
-  void EmitDepfile(DepNode* node, string* cmd_buf) {
+  void EmitDepfile(NinjaNode* nn, string* cmd_buf, ostringstream* o) {
+    const DepNode* node = nn->node;
     string depfile;
     if (!GetDepfile(node, cmd_buf, &depfile))
       return;
-    fprintf(fp_, " depfile = %s\n", depfile.c_str());
-    fprintf(fp_, " deps = gcc\n");
+    *o << " depfile = " << depfile << "\n";
+    *o << " deps = gcc\n";
   }
 
-  void EmitNode(DepNode* node) {
-    auto p = done_.insert(node->output);
-    if (!p.second)
-      return;
-
-    // A hack to exclude out phony target in Android. If this exists,
-    // "ninja -t clean" tries to remove this directory and fails.
-    if (g_flags.detect_android_echo && node->output.str() == "out")
-      return;
-
-    // This node is a leaf node
-    if (!node->has_rule && !node->is_phony) {
-      return;
-    }
-
-    vector<Command*> commands;
-    ce_.Eval(node, &commands);
+  void EmitNode(NinjaNode* nn, ostringstream* o) {
+    const DepNode* node = nn->node;
+    const vector<Command*>& commands = nn->commands;
 
     string rule_name = "phony";
     bool use_local_pool = false;
     if (!commands.empty()) {
-      rule_name = GenRuleName();
-      fprintf(fp_, "rule %s\n", rule_name.c_str());
+      rule_name = StringPrintf("rule%d", nn->rule_id);
+      *o << "rule " << rule_name << "\n";
 
       string description = "build $out";
       string cmd_buf;
-      use_local_pool |= GenShellScript(commands, &cmd_buf, &description);
-      fprintf(fp_, " description = %s\n", description.c_str());
-      EmitDepfile(node, &cmd_buf);
+      use_local_pool |= GenShellScript(node->output.c_str(), commands,
+                                       &cmd_buf, &description);
+      *o << " description = " << description << "\n";
+      EmitDepfile(nn, &cmd_buf, o);
 
       // It seems Linux is OK with ~130kB and Mac's limit is ~250kB.
       // TODO: Find this number automatically.
       if (cmd_buf.size() > 100 * 1000) {
-        fprintf(fp_, " rspfile = $out.rsp\n");
-        fprintf(fp_, " rspfile_content = %s\n", cmd_buf.c_str());
-        fprintf(fp_, " command = %s $out.rsp\n", shell_.c_str());
+        *o << " rspfile = $out.rsp\n";
+        *o << " rspfile_content = " << cmd_buf << "\n";
+        *o << " command = " << shell_ << " $out.rsp\n";
       } else {
         EscapeShell(&cmd_buf);
-        fprintf(fp_, " command = %s -c \"%s\"\n",
-                shell_.c_str(), cmd_buf.c_str());
+        *o << " command = " << shell_ << " -c \"" << cmd_buf << "\"\n";
       }
       if (node->is_restat) {
-        fprintf(fp_, " restat = 1\n");
+        *o << " restat = 1\n";
       }
     }
 
-    EmitBuild(node, rule_name, use_local_pool);
-
-    for (DepNode* d : node->deps) {
-      EmitNode(d);
-    }
-    for (DepNode* d : node->order_onlys) {
-      EmitNode(d);
-    }
+    EmitBuild(nn, rule_name, use_local_pool, o);
   }
 
-  string EscapeBuildTarget(Symbol s) const {
-    if (s.str().find_first_of("$: ") == string::npos)
-      return s.str();
+  string EscapeNinja(const string& s) const {
+    if (s.find_first_of("$: ") == string::npos)
+      return s;
     string r;
-    for (char c : s.str()) {
+    for (char c : s) {
       switch (c) {
         case '$':
         case ':':
@@ -495,86 +536,43 @@
     return r;
   }
 
-  void EscapeShell(string* s) const {
-    if (s->find_first_of("$`\\\"") == string::npos)
-      return;
-    string r;
-    bool last_dollar = false;
-    for (char c : *s) {
-      switch (c) {
-        case '$':
-          if (last_dollar) {
-            r += c;
-            last_dollar = false;
-          } else {
-            r += '\\';
-            r += c;
-            last_dollar = true;
-          }
-          break;
-        case '`':
-        case '"':
-        case '\\':
-          r += '\\';
-          // fall through.
-        default:
-          r += c;
-          last_dollar = false;
-      }
-    }
-    s->swap(r);
+  string EscapeBuildTarget(Symbol s) const {
+    return EscapeNinja(s.str());
   }
 
-  void EmitBuild(DepNode* node, const string& rule_name, bool use_local_pool) {
+  void EmitBuild(NinjaNode* nn, const string& rule_name,
+                 bool use_local_pool, ostringstream* o) {
+    const DepNode* node = nn->node;
     string target = EscapeBuildTarget(node->output);
-    fprintf(fp_, "build %s: %s",
-            target.c_str(),
-            rule_name.c_str());
+    *o << "build " << target << ": " << rule_name;
     vector<Symbol> order_onlys;
     if (node->is_phony) {
-      fprintf(fp_, " _kati_always_build_");
+      *o << " _kati_always_build_";
     }
     for (DepNode* d : node->deps) {
-      fprintf(fp_, " %s", EscapeBuildTarget(d->output).c_str());
+      *o << " " << EscapeBuildTarget(d->output).c_str();
     }
     if (!node->order_onlys.empty()) {
-      fprintf(fp_, " ||");
+      *o << " ||";
       for (DepNode* d : node->order_onlys) {
-        fprintf(fp_, " %s", EscapeBuildTarget(d->output).c_str());
+        *o << " " << EscapeBuildTarget(d->output).c_str();
       }
     }
-    fprintf(fp_, "\n");
+    *o << "\n";
     if (use_local_pool)
-      fprintf(fp_, " pool = local_pool\n");
+      *o << " pool = local_pool\n";
     if (node->is_default_target) {
+      unique_lock<mutex> lock(mu_);
       default_target_ = node;
     }
   }
 
-  void EmitRegenRules(const string& orig_args) {
-    if (!g_flags.gen_regen_rule)
-      return;
-
-    fprintf(fp_, "rule regen_ninja\n");
-    fprintf(fp_, " command = %s\n", orig_args.c_str());
-    fprintf(fp_, " generator = 1\n");
-    fprintf(fp_, " description = Regenerate ninja files due to dependency\n");
-    fprintf(fp_, "build %s: regen_ninja", GetNinjaFilename().c_str());
-    unordered_set<string> makefiles;
-    MakefileCacheManager::Get()->GetAllFilenames(&makefiles);
-    for (const string& makefile : makefiles) {
-      fprintf(fp_, " %.*s", SPF(makefile));
-    }
-    fprintf(fp_, " %s", kati_binary_.c_str());
-    fprintf(fp_, "\n\n");
-  }
-
   static string GetEnvScriptFilename() {
     return GetFilename("env%s.sh");
   }
 
-  void GenerateNinja(const vector<DepNode*>& nodes,
-                     const string& orig_args) {
+  void GenerateNinja() {
+    ScopedTimeReporter tr("ninja gen (emit)");
     fp_ = fopen(GetNinjaFilename().c_str(), "wb");
     if (fp_ == NULL)
       PERROR("fopen(build.ninja) failed");
@@ -599,10 +597,24 @@
 
     fprintf(fp_, "build _kati_always_build_: phony\n\n");
 
-    EmitRegenRules(orig_args);
+    unique_ptr<ThreadPool> tp(NewThreadPool(g_flags.num_jobs));
+    CHECK(g_flags.num_jobs);
+    int num_nodes_per_task = nodes_.size() / (g_flags.num_jobs * 10) + 1;
+    int num_tasks = nodes_.size() / num_nodes_per_task + 1;
+    vector<ostringstream> bufs(num_tasks);
+    for (int i = 0; i < num_tasks; i++) {
+      tp->Submit([this, i, num_nodes_per_task, &bufs]() {
+          int l = min(num_nodes_per_task * (i + 1),
+                      static_cast<int>(nodes_.size()));
+          for (int j = num_nodes_per_task * i; j < l; j++) {
+            EmitNode(nodes_[j], &bufs[i]);
+          }
+        });
+    }
+    tp->Wait();
 
-    for (DepNode* node : nodes) {
-      EmitNode(node);
+    for (const ostringstream& buf : bufs) {
+      fprintf(fp_, "%s", buf.str().c_str());
     }
 
     unordered_set<Symbol> used_env_vars(Vars::used_env_vars());
@@ -765,8 +777,11 @@
   string shell_;
   map<string, string> used_envs_;
   string kati_binary_;
-  double start_time_;
-  DepNode* default_target_;
+  const double start_time_;
+  vector<NinjaNode*> nodes_;
+
+  mutex mu_;
+  const DepNode* default_target_;
 };
 
 string GetNinjaFilename() {
diff --git a/parser.cc b/parser.cc
index fff3a7d..4b4012e 100644
--- a/parser.cc
+++ b/parser.cc
@@ -529,7 +529,7 @@
 
 void Parse(Makefile* mk) {
   COLLECT_STATS("parse file time");
-  Parser parser(StringPiece(mk->buf(), mk->len()),
+  Parser parser(StringPiece(mk->buf()),
                 mk->filename().c_str(),
                 mk->mutable_stmts());
   parser.Parse();
diff --git a/regen.cc b/regen.cc
index a40ebf3..23151b4 100644
--- a/regen.cc
+++ b/regen.cc
@@ -18,13 +18,13 @@
 
 #include <algorithm>
 #include <memory>
+#include <mutex>
 #include <vector>
 
 #include "fileutil.h"
 #include "find.h"
 #include "io.h"
 #include "log.h"
-#include "mutex.h"
 #include "ninja.h"
 #include "stats.h"
 #include "strutil.h"
diff --git a/rule.cc b/rule.cc
index fb69c5a..f3f5203 100644
--- a/rule.cc
+++ b/rule.cc
@@ -50,8 +50,7 @@
 Rule::Rule()
     : is_double_colon(false),
       is_suffix_rule(false),
-      cmd_lineno(0),
-      is_default_target(false) {
+      cmd_lineno(0) {
 }
 
 void ParseRule(Loc& loc, StringPiece line, char term,
@@ -167,8 +166,6 @@
     v.push_back("is_double_colon");
   if (is_suffix_rule)
     v.push_back("is_suffix_rule");
-  if (is_default_target)
-    v.push_back("is_default_target");
   if (!cmds.empty()) {
     v.push_back(StringPrintf("cmds=[%s]", JoinValues(cmds, ",").c_str()));
   }
diff --git a/rule.h b/rule.h
index d94abce..2a67368 100644
--- a/rule.h
+++ b/rule.h
@@ -44,7 +44,6 @@
   vector<Value*> cmds;
   Loc loc;
   int cmd_lineno;
-  bool is_default_target;
 
  private:
   void Error(const string& msg) {
diff --git a/run_integration_test.rb b/run_integration_test.rb
deleted file mode 100755
index 3a5b7d0..0000000
--- a/run_integration_test.rb
+++ /dev/null
@@ -1,182 +0,0 @@
-#!/usr/bin/env ruby
-#
-# Copyright 2015 Google Inc. All rights reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-require 'fileutils'
-
-FileUtils.mkdir_p('repo')
-Dir.chdir('repo')
-
-def check_command(cmd)
-  puts cmd
-  if !system(cmd)
-    puts "#{cmd} failed"
-    exit 1
-  end
-end
-
-class TestCase
-  attr_reader :name
-
-  def initialize(name, checkout, prepare, clean, target)
-    @name = name
-    @checkout = checkout
-    @prepare = prepare
-    @clean = clean
-    @target = target
-  end
-
-  def normalize_log(log, out)
-    # TODO: Fix.
-    if @name == 'android'
-      log = log.gsub(/[ \t]+/, ' ')
-      log = log.split("\n").sort.join("\n").sub(/ Stop\.$/, '')
-      # This is a completely sane warning from kati for Android.
-      log.sub!(%r(build/core/product_config.mk:152: warning: Unmatched parens: .*\n), '')
-      # Not sure why the order can be inconsistent, but this would be OK.
-      # TODO: Inevestigate.
-      log.gsub!(/(\.mk\.PRODUCT_COPY_FILES := )(.*)/){$1 + $2.split.sort * ' '}
-    end
-    File.open(out, 'w') do |of|
-      of.print log
-    end
-    log
-  end
-
-  def run
-    @checkout.call(self)
-
-    Dir.chdir(@name) do
-      @prepare.call(self)
-
-      [['make', 'make'], ['kati', '../../kati']].each do |n, cmd|
-        @clean.call(self)
-        print "Running #{n} for #{@name}..."
-        STDOUT.flush
-        started = Time.now
-        system("#{cmd} #{@target} > #{n}.log 2>&1")
-        elapsed = Time.now - started
-        puts " %.2f secs" % elapsed
-      end
-
-      make_log = File.read('make.log')
-      kati_log = File.read('kati.log')
-      kati_log.gsub!(/^\*kati\*.*\n/, '')
-
-      make_log = normalize_log(make_log, 'make-normalized.log')
-      kati_log = normalize_log(kati_log, 'kati-normalized.log')
-      if make_log == kati_log
-        puts "#{@name}: OK"
-        return true
-      else
-        puts "#{@name}: FAIL"
-        return false
-      end
-    end
-  end
-end
-
-class GitTestCase < TestCase
-  def initialize(name, repo, rev, prepare, clean, target)
-    checkout = Proc.new{|tc|
-      if !File.exist?(@name)
-        check_command("git clone #{repo}")
-      end
-      Dir.chdir(@name) {
-        check_command("git checkout #{rev}")
-      }
-    }
-
-    super(name, checkout, prepare, clean, target)
-  end
-end
-
-class AndroidTestCase < TestCase
-  def initialize
-    name = 'android'
-    checkout = Proc.new{|tc|
-      FileUtils.mkdir_p(@name)
-      md5 = `md5sum android.tgz`
-      need_update = true
-      if File.exist?("#{@name}/STAMP")
-        stamp = File.read("#{@name}/STAMP")
-        if md5 == stamp
-          need_update = false
-        end
-      end
-
-      if need_update
-        check_command("tar -xzf android.tgz")
-        File.open("#{@name}/STAMP.tmp", 'w') do |ofile|
-          ofile.print(md5)
-        end
-        File.rename("#{@name}/STAMP.tmp", "#{@name}/STAMP")
-      end
-    }
-
-    super(name, checkout, DO_NOTHING, DO_NOTHING, 'dump-products')
-  end
-end
-
-DO_NOTHING = Proc.new{|tc|}
-MAKE_CLEAN = Proc.new{|tc|
-  check_command("make clean > /dev/null")
-}
-CONFIGURE = Proc.new{|tc|
-  check_command("./configure > /dev/null")
-}
-
-TESTS = [
-    GitTestCase.new('maloader',
-                    'https://github.com/shinh/maloader.git',
-                    '5d125933bc6c141bed05c309c2dc0e14ada6f5c7',
-                    DO_NOTHING,
-                    MAKE_CLEAN,
-                    ''),
-    GitTestCase.new('glog',
-                    'https://github.com/google/glog',
-                    '1b0b08c8dda1659027677966b03a3ff3c488e549',
-                    CONFIGURE,
-                    MAKE_CLEAN,
-                    ''),
-   AndroidTestCase.new(),
-]
-
-fails = []
-TESTS.each do |tc|
-  if !ARGV.empty?
-    if !ARGV.include?(tc.name)
-      next
-    end
-  end
-
-  if !tc.run
-    fails << tc.name
-  end
-end
-
-puts
-
-if fails.empty?
-  puts "PASS!"
-else
-  puts "=== Failures ==="
-  fails.each do |n|
-    puts n
-  end
-  puts
-
-  puts "FAIL!"
-end
diff --git a/runtest.rb b/runtest.rb
index 7123113..ab1e26b 100755
--- a/runtest.rb
+++ b/runtest.rb
@@ -108,6 +108,10 @@
     # This test expects ninja fails. Strip ninja specific error logs.
     log.gsub!(/^FAILED: .*\n/, '')
     log.gsub!(/^ninja: .*\n/, '')
+  elsif mk =~ /\/fail_/
+    # Recipes in these tests fail.
+    log.gsub!(/^FAILED: .*/, '*** [test] Error 1')
+    log.gsub!(/^ninja: .*\n/, '')
   end
   log
 end
@@ -341,7 +345,6 @@
     output = normalize_kati_log(output)
     if is_ninja_test
       output = normalize_ninja_log(output, sh)
-      output.gsub!(/No need to regenerate ninja file\n/, '')
     end
     File.open('out.make', 'w'){|ofile|ofile.print(expected)}
     File.open('out.kati', 'w'){|ofile|ofile.print(output)}
diff --git a/stats.cc b/stats.cc
index 9d4a160..fc170f0 100644
--- a/stats.cc
+++ b/stats.cc
@@ -16,11 +16,11 @@
 
 #include "stats.h"
 
+#include <mutex>
 #include <vector>
 
 #include "flags.h"
 #include "log.h"
-#include "mutex.h"
 #include "stringprintf.h"
 #include "thread_local.h"
 #include "timeutil.h"
@@ -29,13 +29,7 @@
 
 mutex g_mu;
 vector<Stats*>* g_stats;
-#ifdef __linux__
-thread_local double g_start_time;
-#define REF(x) x
-#else
 DEFINE_THREAD_LOCAL(double, g_start_time);
-#define REF(x) x.Ref()
-#endif
 
 }  // namespace
 
@@ -53,16 +47,16 @@
 }
 
 void Stats::Start() {
-  CHECK(!REF(g_start_time));
-  REF(g_start_time) = GetTime();
+  CHECK(!TLS_REF(g_start_time));
+  TLS_REF(g_start_time) = GetTime();
   unique_lock<mutex> lock(mu_);
   cnt_++;
 }
 
 double Stats::End() {
-  CHECK(REF(g_start_time));
-  double e = GetTime() - REF(g_start_time);
-  REF(g_start_time) = 0;
+  CHECK(TLS_REF(g_start_time));
+  double e = GetTime() - TLS_REF(g_start_time);
+  TLS_REF(g_start_time) = 0;
   unique_lock<mutex> lock(mu_);
   elapsed_ += e;
   return e;
diff --git a/stats.h b/stats.h
index 6244b54..63acc55 100644
--- a/stats.h
+++ b/stats.h
@@ -15,10 +15,9 @@
 #ifndef STATS_H_
 #define STATS_H_
 
+#include <mutex>
 #include <string>
 
-#include "mutex.h"
-
 using namespace std;
 
 class Stats {
diff --git a/stmt.cc b/stmt.cc
index 1abab32..155e0cd 100644
--- a/stmt.cc
+++ b/stmt.cc
@@ -55,6 +55,19 @@
                       opstr, dirstr, LOCF(loc()));
 }
 
+Symbol AssignStmt::GetLhsSymbol(Evaluator* ev) const {
+  if (!lhs->IsLiteral()) {
+    string buf;
+    lhs->Eval(ev, &buf);
+    return Intern(buf);
+  }
+
+  if (!lhs_sym_cache_.IsValid()) {
+    lhs_sym_cache_ = Intern(lhs->GetLiteralValueUnsafe());
+  }
+  return lhs_sym_cache_;
+}
+
 string CommandStmt::DebugString() const {
   return StringPrintf("CommandStmt(%s, loc=%s:%d)",
                       expr->DebugString().c_str(), LOCF(loc()));
diff --git a/stmt.h b/stmt.h
index 4d4c5eb..3b6feeb 100644
--- a/stmt.h
+++ b/stmt.h
@@ -20,6 +20,7 @@
 
 #include "loc.h"
 #include "string_piece.h"
+#include "symtab.h"
 
 using namespace std;
 
@@ -85,11 +86,19 @@
   AssignOp op;
   AssignDirective directive;
 
+  AssignStmt()
+      : lhs_sym_cache_(Symbol::IsUninitialized{}) {
+  }
   virtual ~AssignStmt();
 
   virtual void Eval(Evaluator* ev) const;
 
   virtual string DebugString() const;
+
+  Symbol GetLhsSymbol(Evaluator* ev) const;
+
+ private:
+  mutable Symbol lhs_sym_cache_;
 };
 
 struct CommandStmt : public Stmt {
diff --git a/string_piece.cc b/string_piece.cc
index 78de4ed..d287616 100644
--- a/string_piece.cc
+++ b/string_piece.cc
@@ -21,6 +21,7 @@
 
 #include <ctype.h>
 #include <limits.h>
+#include <stdint.h>
 
 #include <algorithm>
 #include <ostream>
@@ -32,8 +33,15 @@
 bool operator==(const StringPiece& x, const StringPiece& y) {
   if (x.size() != y.size())
     return false;
-
-  return StringPiece::wordmemcmp(x.data(), y.data(), x.size()) == 0;
+  size_t len = x.size();
+  if (len >= sizeof(uint64_t)) {
+    len -= sizeof(uint64_t);
+    uint64_t xt = *reinterpret_cast<const uint64_t*>(x.data() + len);
+    uint64_t yt = *reinterpret_cast<const uint64_t*>(y.data() + len);
+    if (xt != yt)
+      return false;
+  }
+  return StringPiece::wordmemcmp(x.data(), y.data(), len) == 0;
 }
 
 void StringPiece::CopyToString(std::string* target) const {
diff --git a/string_piece_test.cc b/string_piece_test.cc
index 7b253a0..0434cbe 100644
--- a/string_piece_test.cc
+++ b/string_piece_test.cc
@@ -30,4 +30,8 @@
   assert(sps.size() == 2);
   assert(sps.count(StringPiece("foo")) == 1);
   assert(sps.count(StringPiece("bar")) == 1);
+
+  assert(StringPiece("hogefugahige") == StringPiece("hogefugahige"));
+  assert(StringPiece("hogefugahoge") != StringPiece("hogefugahige"));
+  assert(StringPiece("hogefugahige") != StringPiece("higefugahige"));
 }
diff --git a/strutil.cc b/strutil.cc
index 7707482..d3c14d8 100644
--- a/strutil.cc
+++ b/strutil.cc
@@ -24,24 +24,59 @@
 #include <stack>
 #include <utility>
 
+#ifdef __SSE4_2__
+#include <smmintrin.h>
+#endif
+
 #include "log.h"
 
+static bool isSpace(char c) {
+  return (9 <= c && c <= 13) || c == 32;
+}
+
+#ifdef __SSE4_2__
+static int SkipUntilSSE42(const char* s, int len,
+                          const char* ranges, int ranges_size) {
+  __m128i ranges16 = _mm_loadu_si128((const __m128i*)ranges);
+  int i = 0;
+  do {
+    __m128i b16 = _mm_loadu_si128((const __m128i*)(s + i));
+    int r = _mm_cmpestri(
+        ranges16, ranges_size, b16, len - i,
+        _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
+    if (r != 16) {
+      return i + r;
+    }
+    i += 16;
+  } while (i < len);
+  return len;
+}
+#endif
+
 WordScanner::Iterator& WordScanner::Iterator::operator++() {
   int len = static_cast<int>(in->size());
-  for (s = i; s < len; s++) {
-    if (!isspace((*in)[s]))
+  for (s = i + 1; s < len; s++) {
+    if (!isSpace((*in)[s]))
       break;
   }
-  if (s == len) {
+  if (s >= len) {
     in = NULL;
     s = 0;
     i = 0;
     return *this;
   }
+
+#ifdef __SSE4_2__
+  static const char ranges[] = "\x09\x0d  ";
+  i = s;
+  i += SkipUntilSSE42(in->data() + s, len - s, ranges, 4);
+#else
   for (i = s; i < len; i++) {
-    if (isspace((*in)[i]))
+    if (isSpace((*in)[i]))
       break;
   }
+#endif
+
   return *this;
 }
 
@@ -57,7 +92,7 @@
   Iterator iter;
   iter.in = &in_;
   iter.s = 0;
-  iter.i = 0;
+  iter.i = -1;
   ++iter;
   return iter;
 }
@@ -120,10 +155,10 @@
   size_t found = str.find(w);
   if (found == string::npos)
     return false;
-  if (found != 0 && !isspace(str[found-1]))
+  if (found != 0 && !isSpace(str[found-1]))
     return false;
   size_t end = found + w.size();
-  if (end != str.size() && !isspace(str[end]))
+  if (end != str.size() && !isSpace(str[end]))
     return false;
   return true;
 }
@@ -211,7 +246,7 @@
 StringPiece TrimLeftSpace(StringPiece s) {
   size_t i = 0;
   for (; i < s.size(); i++) {
-    if (isspace(s[i]))
+    if (isSpace(s[i]))
       continue;
     char n = s.get(i+1);
     if (s[i] == '\\' && (n == '\r' || n == '\n')) {
@@ -227,7 +262,7 @@
   size_t i = 0;
   for (; i < s.size(); i++) {
     char c = s[s.size() - 1 - i];
-    if (isspace(c)) {
+    if (isSpace(c)) {
       if ((c == '\r' || c == '\n') && s.get(s.size() - 2 - i) == '\\')
         i++;
       continue;
@@ -388,6 +423,36 @@
 }
 
 size_t FindEndOfLine(StringPiece s, size_t e, size_t* lf_cnt) {
+#ifdef __SSE4_2__
+  static const char ranges[] = "\0\0\n\n\\\\";
+  while (e < s.size()) {
+    e += SkipUntilSSE42(s.data() + e, s.size() - e, ranges, 6);
+    if (e >= s.size()) {
+      CHECK(s.size() == e);
+      break;
+    }
+    char c = s[e];
+    if (c == '\0')
+      break;
+    if (c == '\\') {
+      if (s[e+1] == '\n') {
+        e += 2;
+        ++*lf_cnt;
+      } else if (s[e+1] == '\r' && s[e+2] == '\n') {
+        e += 3;
+        ++*lf_cnt;
+      } else if (s[e+1] == '\\') {
+        e += 2;
+      } else {
+        e++;
+      }
+    } else if (c == '\n') {
+      ++*lf_cnt;
+      return e;
+    }
+  }
+  return e;
+#else
   bool prev_backslash = false;
   for (; e < s.size(); e++) {
     char c = s[e];
@@ -404,6 +469,7 @@
     }
   }
   return e;
+#endif
 }
 
 StringPiece TrimLeadingCurdir(StringPiece s) {
@@ -461,3 +527,60 @@
   }
   return buf;
 }
+
+void EscapeShell(string* s) {
+#ifdef __SSE4_2__
+  static const char ranges[] = "\0\0\"\"$$\\\\``";
+  size_t prev = 0;
+  size_t i = SkipUntilSSE42(s->c_str(), s->size(), ranges, 10);
+  if (i == s->size())
+    return;
+
+  string r;
+  for (; i < s->size();) {
+    StringPiece(*s).substr(prev, i - prev).AppendToString(&r);
+    char c = (*s)[i];
+    r += '\\';
+    if (c == '$') {
+      if ((*s)[i+1] == '$') {
+        r += '$';
+        i++;
+      }
+    }
+    r += c;
+    i++;
+    prev = i;
+    i += SkipUntilSSE42(s->c_str() + i, s->size() - i, ranges, 10);
+  }
+  StringPiece(*s).substr(prev).AppendToString(&r);
+  s->swap(r);
+#else
+  if (s->find_first_of("$`\\\"") == string::npos)
+    return;
+  string r;
+  bool last_dollar = false;
+  for (char c : *s) {
+    switch (c) {
+      case '$':
+        if (last_dollar) {
+          r += c;
+          last_dollar = false;
+        } else {
+          r += '\\';
+          r += c;
+          last_dollar = true;
+        }
+        break;
+      case '`':
+      case '"':
+      case '\\':
+        r += '\\';
+        // fall through.
+      default:
+        r += c;
+        last_dollar = false;
+    }
+  }
+  s->swap(r);
+#endif
+}
diff --git a/strutil.h b/strutil.h
index 5938efb..12e46a5 100644
--- a/strutil.h
+++ b/strutil.h
@@ -142,4 +142,6 @@
 
 string EchoEscape(const string str);
 
+void EscapeShell(string* s);
+
 #endif  // STRUTIL_H_
diff --git a/strutil_bench.cc b/strutil_bench.cc
new file mode 100644
index 0000000..c52e43c
--- /dev/null
+++ b/strutil_bench.cc
@@ -0,0 +1,40 @@
+// Copyright 2016 Google Inc. All rights reserved
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string>
+#include <vector>
+
+#include "flags.h"
+#include "string_piece.h"
+#include "strutil.h"
+#include "timeutil.h"
+
+using namespace std;
+
+int main() {
+  g_flags.enable_stat_logs = true;
+  string s;
+  while (s.size() < 400000) {
+    if (!s.empty())
+      s += ' ';
+    s += "frameworks/base/docs/html/tv/adt-1/index.jd";
+  }
+
+  ScopedTimeReporter tr("WordScanner");
+  static const int N = 1000;
+  for (int i = 0; i < N; i++) {
+    vector<StringPiece> toks;
+    WordScanner(s).Split(&toks);
+  }
+}
diff --git a/strutil_test.cc b/strutil_test.cc
index eb1e195..a89786f 100644
--- a/strutil_test.cc
+++ b/strutil_test.cc
@@ -30,13 +30,14 @@
 
 void TestWordScanner() {
   vector<StringPiece> ss;
-  for (StringPiece tok : WordScanner("foo bar baz")) {
+  for (StringPiece tok : WordScanner("foo bar baz hogeeeeeeeeeeeeeeee")) {
     ss.push_back(tok);
   }
-  assert(ss.size() == 3LU);
+  assert(ss.size() == 4LU);
   ASSERT_EQ(ss[0], "foo");
   ASSERT_EQ(ss[1], "bar");
   ASSERT_EQ(ss[2], "baz");
+  ASSERT_EQ(ss[3], "hogeeeeeeeeeeeeeeee");
 }
 
 void TestHasPrefix() {
@@ -115,6 +116,28 @@
   ASSERT_EQ(NormalizePath("./../../a/b"), "../../a/b");
 }
 
+string EscapeShell(string s) {
+  ::EscapeShell(&s);
+  return s;
+}
+
+void TestEscapeShell() {
+  ASSERT_EQ(EscapeShell(""), "");
+  ASSERT_EQ(EscapeShell("foo"), "foo");
+  ASSERT_EQ(EscapeShell("foo$`\\baz\"bar"), "foo\\$\\`\\\\baz\\\"bar");
+  ASSERT_EQ(EscapeShell("$$"), "\\$$");
+  ASSERT_EQ(EscapeShell("$$$"), "\\$$\\$");
+  ASSERT_EQ(EscapeShell("\\\n"), "\\\\\n");
+}
+
+void TestFindEndOfLine() {
+  size_t lf_cnt = 0;
+  ASSERT_EQ(FindEndOfLine("foo", 0, &lf_cnt), 3);
+  char buf[10] = {'f', 'o', '\\', '\0', 'x', 'y'};
+  ASSERT_EQ(FindEndOfLine(StringPiece(buf, 6), 0, &lf_cnt), 3);
+  ASSERT_EQ(FindEndOfLine(StringPiece(buf, 2), 0, &lf_cnt), 2);
+}
+
 }  // namespace
 
 int main() {
@@ -125,5 +148,7 @@
   TestNoLineBreak();
   TestHasWord();
   TestNormalizePath();
+  TestEscapeShell();
+  TestFindEndOfLine();
   assert(!g_failed);
 }
diff --git a/symtab.cc b/symtab.cc
index d8d7e93..fb81bfe 100644
--- a/symtab.cc
+++ b/symtab.cc
@@ -14,16 +14,31 @@
 
 // +build ignore
 
+//#define ENABLE_TID_CHECK
+
 #include "symtab.h"
 
+#ifdef ENABLE_TID_CHECK
+#include <pthread.h>
+#endif
 #include <string.h>
 
 #include <unordered_map>
 
 #include "log.h"
 #include "strutil.h"
+#include "var.h"
+
+struct SymbolData {
+  SymbolData()
+      : gv(kUndefined) {
+  }
+
+  Var* gv;
+};
 
 vector<string*>* g_symbols;
+static vector<SymbolData> g_symbol_data;
 
 Symbol kEmptySym = Symbol(Symbol::IsUninitialized());
 Symbol kShellSym = Symbol(Symbol::IsUninitialized());
@@ -32,9 +47,52 @@
     : v_(v) {
 }
 
+Var* Symbol::GetGlobalVar() const {
+  if (static_cast<size_t>(v_) >= g_symbol_data.size()) {
+    g_symbol_data.resize(v_ + 1);
+  }
+  Var* v = g_symbol_data[v_].gv;
+  if (v->Origin() == VarOrigin::ENVIRONMENT ||
+      v->Origin() == VarOrigin::ENVIRONMENT_OVERRIDE) {
+    Vars::add_used_env_vars(*this);
+  }
+  return v;
+}
+
+void Symbol::SetGlobalVar(Var* v) const {
+  if (static_cast<size_t>(v_) >= g_symbol_data.size()) {
+    g_symbol_data.resize(v_ + 1);
+  }
+  Var* orig = g_symbol_data[v_].gv;
+  if (orig->Origin() == VarOrigin::OVERRIDE ||
+      orig->Origin() == VarOrigin::ENVIRONMENT_OVERRIDE) {
+    return;
+  }
+  if (orig->Origin() == VarOrigin::AUTOMATIC) {
+    ERROR("overriding automatic variable is not implemented yet");
+  }
+  if (orig->IsDefined())
+    delete orig;
+  g_symbol_data[v_].gv = v;
+}
+
+ScopedGlobalVar::ScopedGlobalVar(Symbol name, Var* var)
+    : name_(name), orig_(NULL) {
+  orig_ = name.GetGlobalVar();
+  g_symbol_data[name_.val()].gv = var;
+}
+
+ScopedGlobalVar::~ScopedGlobalVar() {
+  g_symbol_data[name_.val()].gv = orig_;
+}
+
 class Symtab {
  public:
   Symtab() {
+#ifdef ENABLE_TID_CHECK
+    tid_ = pthread_self();
+#endif
+
     CHECK(g_symbols == NULL);
     g_symbols = &symbols_;
 
@@ -72,6 +130,11 @@
   }
 
   Symbol Intern(StringPiece s) {
+#ifdef ENABLE_TID_CHECK
+    if (tid_ != pthread_self())
+      abort();
+#endif
+
     if (s.size() <= 1) {
       return Symbol(s.empty() ? 0 : (unsigned char)s[0]);
     }
@@ -81,6 +144,9 @@
  private:
   unordered_map<StringPiece, Symbol> symtab_;
   vector<string*> symbols_;
+#ifdef ENABLE_TID_CHECK
+  pthread_t tid_;
+#endif
 };
 
 static Symtab* g_symtab;
diff --git a/symtab.h b/symtab.h
index 9d8a120..d1de4e1 100644
--- a/symtab.h
+++ b/symtab.h
@@ -25,6 +25,7 @@
 extern vector<string*>* g_symbols;
 
 class Symtab;
+class Var;
 
 class Symbol {
  public:
@@ -54,6 +55,9 @@
 
   bool IsValid() const { return v_ >= 0; }
 
+  Var* GetGlobalVar() const;
+  void SetGlobalVar(Var* v) const;
+
  private:
   explicit Symbol(int v);
 
@@ -62,6 +66,16 @@
   friend class Symtab;
 };
 
+class ScopedGlobalVar {
+ public:
+  ScopedGlobalVar(Symbol name, Var* var);
+  ~ScopedGlobalVar();
+
+ private:
+  Symbol name_;
+  Var* orig_;
+};
+
 inline bool operator==(const Symbol& x, const Symbol& y) {
   return x.val() == y.val();
 }
diff --git a/testcase/append_self_reference.mk b/testcase/append_self_reference.mk
new file mode 100644
index 0000000..8f3912f
--- /dev/null
+++ b/testcase/append_self_reference.mk
@@ -0,0 +1,8 @@
+x := one
+x += two $(x)
+$(info $(x))
+
+# TODO: shouldn't crash.
+#y = one
+#y += two $(y)
+#$(info $(y))
\ No newline at end of file
diff --git a/testcase/fail_ignore_error.mk b/testcase/fail_ignore_error.mk
new file mode 100644
index 0000000..c4e71a6
--- /dev/null
+++ b/testcase/fail_ignore_error.mk
@@ -0,0 +1,6 @@
+# TODO(go-ninja): Fix
+
+test:
+	false
+	-false
+	echo FAIL
diff --git a/testcase/fail_subshell_in_recipe.mk b/testcase/fail_subshell_in_recipe.mk
new file mode 100644
index 0000000..c2d2b2d
--- /dev/null
+++ b/testcase/fail_subshell_in_recipe.mk
@@ -0,0 +1,5 @@
+# TODO(go-ninja): Fix
+
+test:
+	false
+	(true) ; echo FAIL
diff --git a/testcase/ninja_mkdir.sh b/testcase/ninja_mkdir.sh
new file mode 100644
index 0000000..e54298d
--- /dev/null
+++ b/testcase/ninja_mkdir.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+#
+# Copyright 2016 Google Inc. All rights reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http:#www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+log=/tmp/log
+mk="$@"
+
+cat <<EOF > Makefile
+test: a/b
+
+a/b:
+	@mkdir -p \$(dir \$@)
+	touch \$@
+EOF
+
+${mk} 2> ${log}
+if [ -e ninja.sh ]; then
+  ./ninja.sh
+fi
+if [[ ! -d a ]]; then
+  echo "Created 'a'"
+fi
+if [ -e ninja.sh ]; then
+  if grep -q "mkdir -p" build.ninja; then
+    echo "Should not include 'mkdir -p' in build.ninja"
+    echo "Ninja will automatically create this directory"
+  fi
+fi
diff --git a/testcase/shell_var_with_args.mk b/testcase/shell_var_with_args.mk
new file mode 100644
index 0000000..ca7b54a
--- /dev/null
+++ b/testcase/shell_var_with_args.mk
@@ -0,0 +1,9 @@
+# TODO(go): Fix
+
+export FOO=-x
+
+SHELL := PS4="cmd: " /bin/bash $${FOO}
+$(info $(shell echo foo))
+
+test:
+	@echo baz
diff --git a/testcase/sort.mk b/testcase/sort.mk
index c521289..bf67a14 100644
--- a/testcase/sort.mk
+++ b/testcase/sort.mk
@@ -1,5 +1,12 @@
+sp := $(subst S, ,S)
+
 test:
 	echo $(sort foo bar lose)
 	echo $(sort foo bar aaaa)
 	echo $(sort foo bar lose lose foo bar bar)
+	echo $(sort baz bar)
+	echo $(sort single)
+	echo $(sort $(sp)foo$(sp))
 	echo $(sort )
+	echo $(sort device/sample/products/AndroidProducts.mk device/moto/shamu/AndroidProducts.mk device/asus/fugu/AndroidProducts.mk device/asus/deb/AndroidProducts.mk device/asus/flo/AndroidProducts.mk device/generic/arm64/AndroidProducts.mk device/generic/qemu/AndroidProducts.mk device/generic/mini-emulator-x86_64/AndroidProducts.mk device/generic/x86/AndroidProducts.mk device/generic/mips/AndroidProducts.mk device/generic/mini-emulator-x86/AndroidProducts.mk device/generic/mini-emulator-mips/AndroidProducts.mk device/generic/mini-emulator-arm64/AndroidProducts.mk device/generic/mini-emulator-armv7-a-neon/AndroidProducts.mk device/generic/x86_64/AndroidProducts.mk device/generic/armv7-a-neon/AndroidProducts.mk device/htc/flounder/AndroidProducts.mk device/lge/bullhead/AndroidProducts.mk device/lge/hammerhead/AndroidProducts.mk device/huawei/angler/AndroidProducts.mk)
+	echo $(sort cpplint-art-phony libart libartd libgabi++ libopenjdkjvm libopenjdkjvmd libart)
diff --git a/testcase/subshell_in_recipe.mk b/testcase/subshell_in_recipe.mk
new file mode 100644
index 0000000..7578fe7
--- /dev/null
+++ b/testcase/subshell_in_recipe.mk
@@ -0,0 +1,3 @@
+test:
+	true
+	(echo PASS)
diff --git a/thread.cc b/thread.cc
deleted file mode 100644
index 9733208..0000000
--- a/thread.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2016 Google Inc. All rights reserved
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "thread.h"
-
-#include "log.h"
-
-thread::thread(const fn_t& fn) {
-  if (pthread_create(&th_, NULL, &thread::Run, new fn_t(fn)) != 0)
-    PERROR("pthread_create");
-}
-
-void thread::join() {
-  if (pthread_join(th_, NULL) != 0)
-    PERROR("pthread_join");
-}
-
-void* thread::Run(void* p) {
-  fn_t* fn = static_cast<fn_t*>(p);
-  (*fn)();
-  delete fn;
-  return NULL;
-}
diff --git a/thread_local.h b/thread_local.h
index 3bbf663..28b146a 100644
--- a/thread_local.h
+++ b/thread_local.h
@@ -39,6 +39,13 @@
 
 #include "log.h"
 
+#ifdef __linux__
+
+#define DEFINE_THREAD_LOCAL(Type, name) thread_local Type name
+#define TLS_REF(x) x
+
+#else
+
 // Thread local storage implementation which uses pthread.
 // Note that DEFINE_THREAD_LOCAL creates a global variable just like
 // thread local storage based on __thread keyword. So we should not use
@@ -88,4 +95,8 @@
   }                                                     \
   ThreadLocal<Type, &name##_key, &name##_once> name;
 
+#define TLS_REF(x) x.Ref()
+
+#endif
+
 #endif  // THREAD_LOCAL_H_
diff --git a/thread_pool.cc b/thread_pool.cc
index ba7ee00..0a12bfa 100644
--- a/thread_pool.cc
+++ b/thread_pool.cc
@@ -14,17 +14,19 @@
 
 #include "thread_pool.h"
 
+#include <condition_variable>
+#include <mutex>
 #include <stack>
+#include <thread>
 #include <vector>
 
-#include "condvar.h"
-#include "mutex.h"
-#include "thread.h"
+#include "affinity.h"
 
 class ThreadPoolImpl : public ThreadPool {
  public:
   explicit ThreadPoolImpl(int num_threads)
       : is_waiting_(false) {
+    SetAffinityForMultiThread();
     threads_.reserve(num_threads);
     for (int i = 0; i < num_threads; i++) {
       threads_.push_back(thread([this]() { Loop(); }));
@@ -50,6 +52,8 @@
     for (thread& th : threads_) {
       th.join();
     }
+
+    SetAffinityForSingleThread();
   }
 
  private:
diff --git a/var.cc b/var.cc
index d21dc4f..4b67c4f 100644
--- a/var.cc
+++ b/var.cc
@@ -47,6 +47,10 @@
   CHECK(false);
 }
 
+SimpleVar::SimpleVar(VarOrigin origin)
+    : origin_(origin) {
+}
+
 SimpleVar::SimpleVar(const string& v, VarOrigin origin)
     : v_(v), origin_(origin) {
 }
@@ -56,8 +60,10 @@
 }
 
 void SimpleVar::AppendVar(Evaluator* ev, Value* v) {
+  string buf;
+  v->Eval(ev, &buf);
   v_.push_back(' ');
-  v->Eval(ev, &v_);
+  v_ += buf;
 }
 
 StringPiece SimpleVar::String() const {
@@ -108,6 +114,10 @@
   }
 }
 
+void Vars::add_used_env_vars(Symbol v) {
+  used_env_vars_.insert(v);
+}
+
 Var* Vars::Lookup(Symbol name) const {
   auto found = find(name);
   if (found == end())
@@ -121,7 +131,7 @@
 }
 
 void Vars::Assign(Symbol name, Var* v) {
-  auto p = insert(make_pair(name, v));
+  auto p = emplace(name, v);
   if (!p.second) {
     Var* orig = p.first->second;
     if (orig->Origin() == VarOrigin::OVERRIDE ||
@@ -141,7 +151,7 @@
 
 ScopedVar::ScopedVar(Vars* vars, Symbol name, Var* var)
     : vars_(vars), orig_(NULL) {
-  auto p = vars->insert(make_pair(name, var));
+  auto p = vars->emplace(name, var);
   iter_ = p.first;
   if (!p.second) {
     orig_ = iter_->second;
diff --git a/var.h b/var.h
index 3bae262..5fc09fa 100644
--- a/var.h
+++ b/var.h
@@ -62,23 +62,26 @@
 
 class SimpleVar : public Var {
  public:
+  explicit SimpleVar(VarOrigin origin);
   SimpleVar(const string& v, VarOrigin origin);
 
-  virtual const char* Flavor() const {
+  virtual const char* Flavor() const override {
     return "simple";
   }
-  virtual VarOrigin Origin() const {
+  virtual VarOrigin Origin() const override {
     return origin_;
   }
 
   virtual void Eval(Evaluator* ev, string* s) const override;
 
-  virtual void AppendVar(Evaluator* ev, Value* v);
+  virtual void AppendVar(Evaluator* ev, Value* v) override;
 
   virtual StringPiece String() const override;
 
   virtual string DebugString() const override;
 
+  string* mutable_value() { return &v_; }
+
  private:
   string v_;
   VarOrigin origin_;
@@ -88,16 +91,16 @@
  public:
   RecursiveVar(Value* v, VarOrigin origin, StringPiece orig);
 
-  virtual const char* Flavor() const {
+  virtual const char* Flavor() const override {
     return "recursive";
   }
-  virtual VarOrigin Origin() const {
+  virtual VarOrigin Origin() const override {
     return origin_;
   }
 
   virtual void Eval(Evaluator* ev, string* s) const override;
 
-  virtual void AppendVar(Evaluator* ev, Value* v);
+  virtual void AppendVar(Evaluator* ev, Value* v) override;
 
   virtual StringPiece String() const override;
 
@@ -113,13 +116,13 @@
  public:
   UndefinedVar();
 
-  virtual const char* Flavor() const {
+  virtual const char* Flavor() const override {
     return "undefined";
   }
-  virtual VarOrigin Origin() const {
+  virtual VarOrigin Origin() const override {
     return VarOrigin::UNDEFINED;
   }
-  virtual bool IsDefined() const { return false; }
+  virtual bool IsDefined() const override { return false; }
 
   virtual void Eval(Evaluator* ev, string* s) const override;
 
@@ -138,19 +141,19 @@
     delete v_;
   }
 
-  virtual const char* Flavor() const {
+  virtual const char* Flavor() const override {
     return v_->Flavor();
   }
-  virtual VarOrigin Origin() const {
+  virtual VarOrigin Origin() const override {
     return v_->Origin();
   }
-  virtual bool IsDefined() const {
+  virtual bool IsDefined() const override {
     return v_->IsDefined();
   }
-  virtual void Eval(Evaluator* ev, string* s) const {
+  virtual void Eval(Evaluator* ev, string* s) const override {
     v_->Eval(ev, s);
   }
-  virtual void AppendVar(Evaluator* ev, Value* v) {
+  virtual void AppendVar(Evaluator* ev, Value* v) override {
     v_->AppendVar(ev, v);
   }
   virtual StringPiece String() const override {
@@ -176,6 +179,8 @@
 
   void Assign(Symbol name, Var* v);
 
+  static void add_used_env_vars(Symbol v);
+
   static const unordered_set<Symbol>& used_env_vars() {
     return used_env_vars_;
   }