[analyzer] Taint: add system and popen as undesirable sinks for taint
data.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@148176 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 1d112c5..62b5493 100644
--- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -29,9 +29,16 @@
 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
                                             check::PreStmt<CallExpr> > {
 public:
-  static const unsigned ReturnValueIndex = UINT_MAX;
+  static void *getTag() { static int Tag; return &Tag; }
+
+  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
+  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
+
+  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
 
 private:
+  static const unsigned ReturnValueIndex = UINT_MAX;
+
   mutable llvm::OwningPtr<BugType> BT;
   void initBugType() const;
 
@@ -71,18 +78,30 @@
   bool isStdin(const Expr *E, CheckerContext &C) const;
 
   /// Check for CWE-134: Uncontrolled Format String.
+  static const char MsgUncontrolledFormatString[];
   bool checkUncontrolledFormatString(const CallExpr *CE,
                                      CheckerContext &C) const;
 
-public:
-  static void *getTag() { static int Tag; return &Tag; }
+  /// Check for:
+  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
+  /// CWE-78, "Failure to Sanitize Data into an OS Command"
+  static const char MsgSanitizeSystemArgs[];
+  bool checkSystemCall(const CallExpr *CE, StringRef Name,
+                       CheckerContext &C) const;
 
-  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
-  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
-
-  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
+  /// Generate a report if the expression is tainted or points to tainted data.
+  bool generateReportIfTainted(const Expr *E, const char Msg[],
+                               CheckerContext &C) const;
 
 };
+// TODO: We probably could use TableGen here.
+const char GenericTaintChecker::MsgUncontrolledFormatString[] =
+  "Tainted format string (CWE-134: Uncontrolled Format String)";
+
+const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
+  "Tainted data passed to a system call "
+  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
+
 }
 
 /// A set which is used to pass information from call pre-visit instruction
@@ -215,6 +234,13 @@
   if (checkUncontrolledFormatString(CE, C))
     return true;
 
+  StringRef Name = C.getCalleeName(CE);
+  if (Name.empty())
+    return false;
+
+  if (checkSystemCall(CE, Name, C))
+    return true;
+
   return false;
 }
 
@@ -269,7 +295,7 @@
   return 0;
 }
 
-// If any other arguments are tainted, mark state as tainted on pre-visit.
+// If any arguments are tainted, mark the return value as tainted on post-visit.
 const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
                                                      CheckerContext &C) const {
   for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
@@ -377,6 +403,28 @@
   return false;
 }
 
+bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
+                                                  const char Msg[],
+                                                  CheckerContext &C) const {
+  assert(E);
+
+  // Check for taint.
+  const ProgramState *State = C.getState();
+  if (!State->isTainted(getPointedToSymbol(C, E)) &&
+      !State->isTainted(E, C.getLocationContext()))
+    return false;
+
+  // Generate diagnostic.
+  if (ExplodedNode *N = C.addTransition()) {
+    initBugType();
+    BugReport *report = new BugReport(*BT, Msg, N);
+    report->addRange(E->getSourceRange());
+    C.EmitReport(report);
+    return true;
+  }
+  return false;
+}
+
 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
                                                         CheckerContext &C) const{
   // Check if the function contains a format string argument.
@@ -385,18 +433,27 @@
     return false;
 
   // If either the format string content or the pointer itself are tainted, warn.
-  const ProgramState *State = C.getState();
-  const Expr *Arg = CE->getArg(ArgNum);
-  if (State->isTainted(getPointedToSymbol(C, Arg)) ||
-      State->isTainted(Arg, C.getLocationContext()))
-    if (ExplodedNode *N = C.addTransition()) {
-      initBugType();
-      BugReport *report = new BugReport(*BT,
-        "Tainted format string (CWE-134: Uncontrolled Format String)", N);
-      report->addRange(Arg->getSourceRange());
-      C.EmitReport(report);
-      return true;
-    }
+  if (generateReportIfTainted(CE->getArg(ArgNum),
+                              MsgUncontrolledFormatString, C))
+    return true;
+  return false;
+}
+
+bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
+                                          StringRef Name,
+                                          CheckerContext &C) const {
+  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
+    .Case("system", 0)
+    .Case("popen", 0)
+    .Default(UINT_MAX);
+
+  if (ArgNum == UINT_MAX)
+    return false;
+
+  if (generateReportIfTainted(CE->getArg(ArgNum),
+                              MsgSanitizeSystemArgs, C))
+    return true;
+
   return false;
 }
 
diff --git a/test/Analysis/taint-generic.c b/test/Analysis/taint-generic.c
index 5423d05..0f89966 100644
--- a/test/Analysis/taint-generic.c
+++ b/test/Analysis/taint-generic.c
@@ -90,3 +90,11 @@
   strncpy(sncpy, s, 20);
   setproctitle(sncpy, 3); // expected-warning {{Uncontrolled Format String}}
 }
+
+int system(const char *command);
+void testTaintSystemCall() {
+  char buffer[156];
+  char addr[128];
+  scanf("%s", addr);
+  system(addr); // expected-warning {{Tainted data passed to a system call}}
+}