Merge pull request #214 from onoratoj/kati_symbols

.VARIABLES and .KATI_SYMBOLS extension
diff --git a/src/command.cc b/src/command.cc
index b2eb159..98772fa 100644
--- a/src/command.cc
+++ b/src/command.cc
@@ -44,6 +44,8 @@
     return string("AutoVar(") + sym_ + ")";
   }
 
+  virtual bool IsFunc(Evaluator*) const override { return true; }
+
  protected:
   AutoVar(CommandEvaluator* ce, const char* sym) : ce_(ce), sym_(sym) {}
   virtual ~AutoVar() = default;
diff --git a/src/expr.cc b/src/expr.cc
index 97eb8c6..8132a48 100644
--- a/src/expr.cc
+++ b/src/expr.cc
@@ -49,6 +49,8 @@
 
   StringPiece val() const { return s_; }
 
+  virtual bool IsFunc(Evaluator*) const override { return false; }
+
   virtual void Eval(Evaluator* ev, string* s) const override {
     ev->CheckStack();
     s->append(s_.begin(), s_.end());
@@ -91,6 +93,15 @@
     }
   }
 
+  virtual bool IsFunc(Evaluator* ev) const override {
+    for (Value* v : vals_) {
+      if (v->IsFunc(ev)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   virtual void Eval(Evaluator* ev, string* s) const override {
     ev->CheckStack();
     for (Value* v : vals_) {
@@ -122,6 +133,14 @@
   explicit SymRef(const Loc& loc, Symbol n) : Value(loc), name_(n) {}
   virtual ~SymRef() {}
 
+  virtual bool IsFunc(Evaluator*) const override {
+    // This is a heuristic, where say that if a variable has positional
+    // parameters, we think it is likely to be a function. Callers can use
+    // .KATI_SYMBOLS to extract variables and their values, without evaluating
+    // macros that are likely to have side effects.
+    return IsInteger(name_.str());
+  }
+
   virtual void Eval(Evaluator* ev, string* s) const override {
     ev->CheckStack();
     Var* v = ev->LookupVarForEval(name_);
@@ -143,6 +162,11 @@
   explicit VarRef(const Loc& loc, Value* n) : Value(loc), name_(n) {}
   virtual ~VarRef() { delete name_; }
 
+  virtual bool IsFunc(Evaluator*) const override {
+    // This is the unhandled edge case as described in expr.h.
+    return true;
+  }
+
   virtual void Eval(Evaluator* ev, string* s) const override {
     ev->CheckStack();
     ev->IncrementEvalDepth();
@@ -173,6 +197,10 @@
     delete subst_;
   }
 
+  virtual bool IsFunc(Evaluator* ev) const override {
+    return name_->IsFunc(ev) || pat_->IsFunc(ev) || subst_->IsFunc(ev);
+  }
+
   virtual void Eval(Evaluator* ev, string* s) const override {
     ev->CheckStack();
     ev->IncrementEvalDepth();
@@ -213,6 +241,8 @@
       delete a;
   }
 
+  virtual bool IsFunc(Evaluator*) const override { return true; }
+
   virtual void Eval(Evaluator* ev, string* s) const override {
     ScopedFrame frame(ev->Enter(FrameType::FUNCALL, fi_->name, Location()));
     ev->CheckStack();
diff --git a/src/expr.h b/src/expr.h
index 447aea5..a731077 100644
--- a/src/expr.h
+++ b/src/expr.h
@@ -30,6 +30,17 @@
   virtual void Eval(Evaluator* ev, string* s) const = 0;
   string Eval(Evaluator*) const;
   const Loc& Location() const { return loc_; }
+  // Whether this Evaluable is either knowably a function (e.g. one of the
+  // built-ins) or likely to be a function-type macro (i.e. one that has
+  // positional $(1) arguments to be expanded inside it. However, this is
+  // only a heuristic guess. In order to not actually evaluate the expression,
+  // because doing so could have side effects like calling $(error ...) or
+  // doing a nested eval that assigns variables, we don't handle the case where
+  // the variable name is itself a variable expansion inside a deferred
+  // expansion variable, and return true in that case. Implementations of this
+  // function must also not mark variables as used, as that can trigger unwanted
+  // warnings. They should use ev->PeekVar().
+  virtual bool IsFunc(Evaluator* ev) const = 0;
 
  protected:
   Evaluable(const Loc& loc);
diff --git a/src/strutil.cc b/src/strutil.cc
index 797775f..814324b 100644
--- a/src/strutil.cc
+++ b/src/strutil.cc
@@ -554,3 +554,15 @@
   StringPiece(*s).substr(prev).AppendToString(&r);
   s->swap(r);
 }
+
+bool IsInteger(StringPiece s) {
+  if (s.size() == 0) {
+    return false;
+  }
+  for (auto c : s) {
+    if (c < '0' || c > '9') {
+      return false;
+    }
+  }
+  return true;
+}
diff --git a/src/strutil.h b/src/strutil.h
index cc4d519..d573b79 100644
--- a/src/strutil.h
+++ b/src/strutil.h
@@ -146,4 +146,6 @@
 
 void EscapeShell(string* s);
 
+bool IsInteger(StringPiece s);
+
 #endif  // STRUTIL_H_
diff --git a/src/strutil_test.cc b/src/strutil_test.cc
index d0db1f3..d95d351 100644
--- a/src/strutil_test.cc
+++ b/src/strutil_test.cc
@@ -210,6 +210,16 @@
   ASSERT_EQ(ConcatDir("a", "../../b"), "../b");
 }
 
+void TestIsInteger() {
+  ASSERT_BOOL(IsInteger("0"), true);
+  ASSERT_BOOL(IsInteger("9"), true);
+  ASSERT_BOOL(IsInteger("1234"), true);
+  ASSERT_BOOL(IsInteger(""), false);
+  ASSERT_BOOL(IsInteger("a234"), false);
+  ASSERT_BOOL(IsInteger("123a"), false);
+  ASSERT_BOOL(IsInteger("12a4"), false);
+}
+
 }  // namespace
 
 int main() {
@@ -227,5 +237,6 @@
   TestWordScannerInvalidAccess();
   TestFindEndOfLineInvalidAccess();
   TestConcatDir();
+  TestIsInteger();
   assert(!g_failed);
 }
diff --git a/src/symtab.cc b/src/symtab.cc
index 3d49f2e..2795bfb 100644
--- a/src/symtab.cc
+++ b/src/symtab.cc
@@ -41,6 +41,8 @@
 Symbol kEmptySym;
 Symbol kShellSym;
 Symbol kKatiReadonlySym;
+Symbol kVariablesSym;
+Symbol kKatiSymbolsSym;
 
 Symbol::Symbol(int v) : v_(v) {}
 
@@ -127,6 +129,12 @@
     kEmptySym = Intern("");
     kShellSym = Intern("SHELL");
     kKatiReadonlySym = Intern(".KATI_READONLY");
+    kVariablesSym = Intern(".VARIABLES");
+    kVariablesSym.SetGlobalVar(new VariableNamesVar(".VARIABLES", true), false,
+                               nullptr);
+    kKatiSymbolsSym = Intern(".KATI_SYMBOLS");
+    kKatiSymbolsSym.SetGlobalVar(new VariableNamesVar(".KATI_SYMBOLS", false),
+                                 false, nullptr);
   }
 
   ~Symtab() {
@@ -159,6 +167,22 @@
     return InternImpl(s);
   }
 
+  vector<StringPiece> GetSymbolNames(std::function<bool(Var*)> const& filter) {
+    vector<StringPiece> result;
+    for (auto entry : symtab_) {
+      Var* var = entry.second.PeekGlobalVar();
+      // The symbol table contains all interned strings, not just variables
+      // which have been defined.
+      if (!var->IsDefined()) {
+        continue;
+      }
+      if (filter(var)) {
+        result.push_back(entry.first);
+      }
+    }
+    return result;
+  }
+
  private:
   unordered_map<StringPiece, Symbol> symtab_;
   vector<string*> symbols_;
@@ -188,3 +212,7 @@
   }
   return JoinStrings(strs, sep);
 }
+
+vector<StringPiece> GetSymbolNames(std::function<bool(Var*)> const& filter) {
+  return g_symtab->GetSymbolNames(filter);
+}
diff --git a/src/symtab.h b/src/symtab.h
index 455d967..dd27fda 100644
--- a/src/symtab.h
+++ b/src/symtab.h
@@ -16,6 +16,7 @@
 #define SYMTAB_H_
 
 #include <bitset>
+#include <functional>
 #include <string>
 #include <vector>
 
@@ -225,4 +226,7 @@
 
 string JoinSymbols(const vector<Symbol>& syms, const char* sep);
 
+// Get all symbol names for which filter returns true.
+vector<StringPiece> GetSymbolNames(std::function<bool(Var*)> const& filter);
+
 #endif  // SYMTAB_H_
diff --git a/src/testutil.h b/src/testutil.h
index be6125c..8de3666 100644
--- a/src/testutil.h
+++ b/src/testutil.h
@@ -18,6 +18,16 @@
 
 bool g_failed;
 
+#define ASSERT_BOOL(a, b)                                                     \
+  do {                                                                        \
+    bool A = (a);                                                             \
+    if ((A) != (b)) {                                                         \
+      fprintf(stderr, "Assertion failure at %s:%d: %s (which is %s) vs %s\n", \
+              __FILE__, __LINE__, #a, (A ? "true" : "false"), #b);            \
+      g_failed = true;                                                        \
+    }                                                                         \
+  } while (0)
+
 #define ASSERT_EQ(a, b)                                                     \
   do {                                                                      \
     if ((a) != (b)) {                                                       \
diff --git a/src/var.cc b/src/var.cc
index da0fd29..4bc9bbb 100644
--- a/src/var.cc
+++ b/src/var.cc
@@ -19,6 +19,7 @@
 #include "eval.h"
 #include "expr.h"
 #include "log.h"
+#include "strutil.h"
 
 unordered_map<const Var*, string> Var::diagnostic_messages_;
 
@@ -121,6 +122,10 @@
   v->Eval(ev, &v_);
 }
 
+bool SimpleVar::IsFunc(Evaluator*) const {
+  return false;
+}
+
 void SimpleVar::Eval(Evaluator* ev, string* s) const {
   ev->CheckStack();
   *s += v_;
@@ -149,6 +154,10 @@
                            StringPiece orig)
     : Var(origin, definition, loc), v_(v), orig_(orig) {}
 
+bool RecursiveVar::IsFunc(Evaluator* ev) const {
+  return v_->IsFunc(ev);
+}
+
 void RecursiveVar::Eval(Evaluator* ev, string* s) const {
   ev->CheckStack();
   v_->Eval(ev, s);
@@ -183,6 +192,10 @@
 
 UndefinedVar::UndefinedVar() {}
 
+bool UndefinedVar::IsFunc(Evaluator*) const {
+  return false;
+}
+
 void UndefinedVar::Eval(Evaluator*, string*) const {
   // Nothing to do.
 }
@@ -195,6 +208,46 @@
   return "*undefined*";
 }
 
+VariableNamesVar::VariableNamesVar(StringPiece name, bool all)
+    : name_(name), all_(all) {
+  SetReadOnly();
+  SetAssignOp(AssignOp::COLON_EQ);
+}
+
+bool VariableNamesVar::IsFunc(Evaluator*) const {
+  return false;
+}
+
+void VariableNamesVar::Eval(Evaluator* ev, string* s) const {
+  ConcatVariableNames(ev, s);
+}
+
+StringPiece VariableNamesVar::String() const {
+  return name_;
+}
+
+void VariableNamesVar::ConcatVariableNames(Evaluator* ev, string* s) const {
+  WordWriter ww(s);
+  vector<StringPiece>&& symbols = GetSymbolNames([=](Var* var) -> bool {
+    if (var->Obsolete()) {
+      return false;
+    }
+    if (!all_) {
+      if (var->IsFunc(ev)) {
+        return false;
+      }
+    }
+    return true;
+  });
+  for (auto entry : symbols) {
+    ww.Write(entry);
+  }
+}
+
+string VariableNamesVar::DebugString() const {
+  return "*VariableNamesVar*";
+}
+
 Vars::~Vars() {
   for (auto p : *this) {
     delete p.second;
diff --git a/src/var.h b/src/var.h
index f34bfdc..b091ffb 100644
--- a/src/var.h
+++ b/src/var.h
@@ -117,6 +117,8 @@
 
   virtual const char* Flavor() const override { return "simple"; }
 
+  virtual bool IsFunc(Evaluator* ev) const override;
+
   virtual void Eval(Evaluator* ev, string* s) const override;
 
   virtual void AppendVar(Evaluator* ev, Value* v) override;
@@ -138,6 +140,8 @@
 
   virtual const char* Flavor() const override { return "recursive"; }
 
+  virtual bool IsFunc(Evaluator* ev) const override;
+
   virtual void Eval(Evaluator* ev, string* s) const override;
 
   virtual void AppendVar(Evaluator* ev, Value* v) override;
@@ -159,6 +163,8 @@
   virtual const char* Flavor() const override { return "undefined"; }
   virtual bool IsDefined() const override { return false; }
 
+  virtual bool IsFunc(Evaluator* ev) const override;
+
   virtual void Eval(Evaluator* ev, string* s) const override;
 
   virtual StringPiece String() const override;
@@ -166,6 +172,29 @@
   virtual string DebugString() const override;
 };
 
+// The built-in VARIABLES and KATI_SYMBOLS variables
+class VariableNamesVar : public Var {
+ public:
+  VariableNamesVar(StringPiece name, bool all);
+
+  virtual const char* Flavor() const override { return "kati_variable_names"; }
+  virtual bool IsDefined() const override { return true; }
+
+  virtual bool IsFunc(Evaluator* ev) const override;
+
+  virtual void Eval(Evaluator* ev, string* s) const override;
+
+  virtual StringPiece String() const override;
+
+  virtual string DebugString() const override;
+
+ private:
+  StringPiece name_;
+  bool all_;
+
+  void ConcatVariableNames(Evaluator* ev, string* s) const;
+};
+
 class Vars : public unordered_map<Symbol, Var*> {
  public:
   ~Vars();
diff --git a/testcase/variables.mk b/testcase/variables.mk
new file mode 100644
index 0000000..ff0de6c
--- /dev/null
+++ b/testcase/variables.mk
@@ -0,0 +1,44 @@
+
+SET_BEFORE := should_not_appear_in_output_before
+
+# Save .VARIABLES so we can filter out all the built-in stuff later
+BEFORE:=$(.VARIABLES)
+
+# Simple variable
+ONE := 1
+
+# This is := so $(1) is evaluable right now
+EVALUABLE := $(ONE)$(1)
+
+# Deferred execution with $(1), so call it a function
+LOOKS_LIKE_A_FUNCTION_1 = $(1)
+LOOKS_LIKE_A_FUNCTION_2 = $(ONE)$(1)
+
+# Deferred execution without $(1), so should not be a function
+NOT_A_FUNCTION_1 = SIMPLE_TEXT
+NOT_A_FUNCTION_2 = $(ONE)
+
+# We can't evaluate it without eval, so we assume that it *is* a function.
+THE_EDGE_CASE_1 = $($(ONE))
+THE_EDGE_CASE_2 = $($(SET_BEFORE))
+THE_EDGE_CASE_3 = asdf$($(SET_BEFORE))
+THE_EDGE_CASE_4 = $($(SET_BEFORE))fsda
+THE_EDGE_CASE_5 = fdsa$($(SET_BEFORE))fdsa
+
+# This was already set before we saved the snapshot, so it shouldn't
+# reappear
+SET_BEFORE += should_not_appear_in_output_before
+
+$(info .VARIABLES (from make): $(sort $(filter-out $(BEFORE), $(.VARIABLES))))
+$(info .VARIABLES (hard coded): BEFORE EVALUABLE LOOKS_LIKE_A_FUNCTION_1 LOOKS_LIKE_A_FUNCTION_2 NOT_A_FUNCTION_1 NOT_A_FUNCTION_2 ONE THE_EDGE_CASE_1 THE_EDGE_CASE_2 THE_EDGE_CASE_3 THE_EDGE_CASE_4 THE_EDGE_CASE_5)
+
+ifdef KATI
+$(info .KATI_SYMBOLS: $(sort $(filter-out $(BEFORE), $(.KATI_SYMBOLS))))
+else
+# Make doesn't support .VARIABLES so output the expected values manually
+# for comparison
+$(info .KATI_SYMBOLS: BEFORE EVALUABLE NOT_A_FUNCTION_1 NOT_A_FUNCTION_2 ONE)
+endif
+
+# Updating this variable should not cause it to appear
+SET_BEFORE += a_new_value