crash-reporter: Validate proc files and log the state of crashed process.

This CL makes the following changes to UserCollector:
1. Check if the maps file is empty after copying proc files to the
   temporary container directory, which prevents the core-to-minidump
   conversion failure due to unusable maps file.
2. Obtain and log the state of the crashed process, which helps
   determine if the crashed process is in the zombie state.
3. Refactor the status file processing code.

BUG=chromium-os:24820
TEST=Tested the following:
1. Build crash-reporter and run unit tests.
2. Run the following autotest tests on a Cr48:
   - logging_CrashSender
   - logging_UserCrash
3. Check that the process state of a crashed process is logged in
   /var/log/messages.

Change-Id: Iebaf9c36c18185c703c18e7028ee4673dd9ebc93
Reviewed-on: https://gerrit.chromium.org/gerrit/13826
Reviewed-by: Ken Mixter <kmixter@chromium.org>
Tested-by: Ben Chan <benchan@chromium.org>
Commit-Ready: Ben Chan <benchan@chromium.org>
diff --git a/crash_reporter/user_collector.cc b/crash_reporter/user_collector.cc
index 01af4eb..dffca9d 100644
--- a/crash_reporter/user_collector.cc
+++ b/crash_reporter/user_collector.cc
@@ -6,7 +6,6 @@
 
 #include <grp.h>  // For struct group.
 #include <pcrecpp.h>
-#include <pcrecpp.h>
 #include <pwd.h>  // For struct passwd.
 #include <sys/types.h>  // For getpwuid_r, getgrnam_r, WEXITSTATUS.
 
@@ -41,6 +40,7 @@
 static const char kCoreToMinidumpConverterPath[] = "/usr/bin/core2md";
 
 static const char kDefaultLogConfig[] = "/etc/crash_reporter_logs.conf";
+static const char kStatePrefix[] = "State:\t";
 
 const char *UserCollector::kUserId = "Uid:\t";
 const char *UserCollector::kGroupId = "Gid:\t";
@@ -161,26 +161,30 @@
   return true;
 }
 
-bool UserCollector::GetIdFromStatus(const char *prefix,
-                                    IdKind kind,
-                                    const std::string &status_contents,
-                                    int *id) {
+bool UserCollector::GetFirstLineWithPrefix(
+    const std::vector<std::string> &lines,
+    const char *prefix, std::string *line) {
+  std::vector<std::string>::const_iterator line_iterator;
+  for (line_iterator = lines.begin(); line_iterator != lines.end();
+       ++line_iterator) {
+    if (line_iterator->find(prefix) == 0) {
+      *line = *line_iterator;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool UserCollector::GetIdFromStatus(
+    const char *prefix, IdKind kind,
+    const std::vector<std::string> &status_lines, int *id) {
   // From fs/proc/array.c:task_state(), this file contains:
   // \nUid:\t<uid>\t<euid>\t<suid>\t<fsuid>\n
-  std::vector<std::string> status_lines;
-  base::SplitString(status_contents, '\n', &status_lines);
-  std::vector<std::string>::iterator line_iterator;
-  for (line_iterator = status_lines.begin();
-       line_iterator != status_lines.end();
-       ++line_iterator) {
-    if (line_iterator->find(prefix) == 0)
-      break;
-  }
-  if (line_iterator == status_lines.end()) {
+  std::string id_line;
+  if (!GetFirstLineWithPrefix(status_lines, prefix, &id_line)) {
     return false;
   }
-  std::string id_substring = line_iterator->substr(strlen(prefix),
-                                                   std::string::npos);
+  std::string id_substring = id_line.substr(strlen(prefix), std::string::npos);
   std::vector<std::string> ids;
   base::SplitString(id_substring, '\t', &ids);
   if (ids.size() != kIdMax || kind < 0 || kind >= kIdMax) {
@@ -189,8 +193,19 @@
   const char *number = ids[kind].c_str();
   char *end_number = NULL;
   *id = strtol(number, &end_number, 10);
-  if (*end_number != '\0')
+  if (*end_number != '\0') {
     return false;
+  }
+  return true;
+}
+
+bool UserCollector::GetStateFromStatus(
+    const std::vector<std::string> &status_lines, std::string *state) {
+  std::string state_line;
+  if (!GetFirstLineWithPrefix(status_lines, kStatePrefix, &state_line)) {
+    return false;
+  }
+  *state = state_line.substr(strlen(kStatePrefix), std::string::npos);
   return true;
 }
 
@@ -250,6 +265,21 @@
       return false;
     }
   }
+  return ValidateProcFiles(container_dir);
+}
+
+bool UserCollector::ValidateProcFiles(const FilePath &container_dir) {
+  // Check if the maps file is empty, which could be due to the crashed
+  // process being reaped by the kernel before finishing a core dump.
+  int64 file_size = 0;
+  if (!file_util::GetFileSize(container_dir.Append("maps"), &file_size)) {
+    LOG(ERROR) << "Could not get the size of maps file";
+    return false;
+  }
+  if (file_size == 0) {
+    LOG(ERROR) << "maps file is empty";
+    return false;
+  }
   return true;
 }
 
@@ -269,8 +299,19 @@
               << file_util::DirectoryExists(process_path);
     return false;
   }
+
+  std::vector<std::string> status_lines;
+  base::SplitString(status, '\n', &status_lines);
+
+  std::string process_state;
+  if (!GetStateFromStatus(status_lines, &process_state)) {
+    LOG(ERROR) << "Could not find process state in status file";
+    return false;
+  }
+  LOG(INFO) << "State of crashed process [" << pid << "]: " << process_state;
+
   int process_euid;
-  if (!GetIdFromStatus(kUserId, kIdEffective, status, &process_euid)) {
+  if (!GetIdFromStatus(kUserId, kIdEffective, status_lines, &process_euid)) {
     LOG(ERROR) << "Could not find euid in status file";
     return false;
   }
diff --git a/crash_reporter/user_collector.h b/crash_reporter/user_collector.h
index d041628..043d38c 100644
--- a/crash_reporter/user_collector.h
+++ b/crash_reporter/user_collector.h
@@ -58,7 +58,9 @@
   FRIEND_TEST(UserCollectorTest, CopyOffProcFilesBadPid);
   FRIEND_TEST(UserCollectorTest, CopyOffProcFilesOK);
   FRIEND_TEST(UserCollectorTest, GetExecutableBaseNameFromPid);
+  FRIEND_TEST(UserCollectorTest, GetFirstLineWithPrefix);
   FRIEND_TEST(UserCollectorTest, GetIdFromStatus);
+  FRIEND_TEST(UserCollectorTest, GetStateFromStatus);
   FRIEND_TEST(UserCollectorTest, GetProcessPath);
   FRIEND_TEST(UserCollectorTest, GetSymlinkTarget);
   FRIEND_TEST(UserCollectorTest, GetUserInfoFromName);
@@ -66,6 +68,7 @@
   FRIEND_TEST(UserCollectorTest, ShouldDumpChromeOverridesDeveloperImage);
   FRIEND_TEST(UserCollectorTest, ShouldDumpDeveloperImageOverridesConsent);
   FRIEND_TEST(UserCollectorTest, ShouldDumpUseConsentProductionImage);
+  FRIEND_TEST(UserCollectorTest, ValidateProcFiles);
 
   // Enumeration to pass to GetIdFromStatus.  Must match the order
   // that the kernel lists IDs in the status file.
@@ -87,15 +90,37 @@
                         FilePath *target);
   bool GetExecutableBaseNameFromPid(uid_t pid,
                                     std::string *base_name);
+  // Returns, via |line|, the first line in |lines| that starts with |prefix|.
+  // Returns true if a line is found, or false otherwise.
+  bool GetFirstLineWithPrefix(const std::vector<std::string> &lines,
+                              const char *prefix, std::string *line);
+
+  // Returns the identifier of |kind|, via |id|, found in |status_lines| on
+  // the line starting with |prefix|. |status_lines| contains the lines in
+  // the status file. Returns true if the identifier can be determined.
   bool GetIdFromStatus(const char *prefix,
                        IdKind kind,
-                       const std::string &status_contents,
+                       const std::vector<std::string> &status_lines,
                        int *id);
 
+  // Returns the process state, via |state|, found in |status_lines|, which
+  // contains the lines in the status file. Returns true if the process state
+  // can be determined.
+  bool GetStateFromStatus(const std::vector<std::string> &status_lines,
+                          std::string *state);
+
   void LogCollectionError(const std::string &error_message);
   void EnqueueCollectionErrorLog(pid_t pid, const std::string &exec_name);
 
-  bool CopyOffProcFiles(pid_t pid, const FilePath &process_map);
+  bool CopyOffProcFiles(pid_t pid, const FilePath &container_dir);
+
+  // Validates the proc files at |container_dir| and returns true if they
+  // are usable for the core-to-minidump conversion later. For instance, if
+  // a process is reaped by the kernel before the copying of its proc files
+  // takes place, some proc files like /proc/<pid>/maps may contain nothing
+  // and thus become unusable.
+  bool ValidateProcFiles(const FilePath &container_dir);
+
   // Determines the crash directory for given pid based on pid's owner,
   // and creates the directory if necessary with appropriate permissions.
   // Returns true whether or not directory needed to be created, false on
diff --git a/crash_reporter/user_collector_test.cc b/crash_reporter/user_collector_test.cc
index 7afb712..4259f4e 100644
--- a/crash_reporter/user_collector_test.cc
+++ b/crash_reporter/user_collector_test.cc
@@ -5,6 +5,8 @@
 #include <unistd.h>
 
 #include "base/file_util.h"
+#include "base/string_split.h"
+#include "base/memory/scoped_temp_dir.h"
 #include "chromeos/syslog_logging.h"
 #include "chromeos/test_helpers.h"
 #include "crash-reporter/user_collector.h"
@@ -40,6 +42,7 @@
     pid_ = getpid();
     chromeos::ClearLog();
   }
+
  protected:
   void ExpectFileEquals(const char *golden,
                         const char *file_path) {
@@ -49,6 +52,12 @@
     EXPECT_EQ(golden, contents);
   }
 
+  std::vector<std::string> SplitLines(const std::string &lines) const {
+    std::vector<std::string> result;
+    base::SplitString(lines, '\n', &result);
+    return result;
+  }
+
   UserCollector collector_;
   pid_t pid_;
 };
@@ -238,20 +247,54 @@
   EXPECT_EQ("user_collector_test", base_name);
 }
 
+TEST_F(UserCollectorTest, GetFirstLineWithPrefix) {
+  std::vector<std::string> lines;
+  std::string line;
+
+  EXPECT_FALSE(collector_.GetFirstLineWithPrefix(lines, "Name:", &line));
+  EXPECT_EQ("", line);
+
+  lines.push_back("Name:\tls");
+  lines.push_back("State:\tR (running)");
+  lines.push_back(" Foo:\t1000");
+
+  line.clear();
+  EXPECT_TRUE(collector_.GetFirstLineWithPrefix(lines, "Name:", &line));
+  EXPECT_EQ(lines[0], line);
+
+  line.clear();
+  EXPECT_TRUE(collector_.GetFirstLineWithPrefix(lines, "State:", &line));
+  EXPECT_EQ(lines[1], line);
+
+  line.clear();
+  EXPECT_FALSE(collector_.GetFirstLineWithPrefix(lines, "Foo:", &line));
+  EXPECT_EQ("", line);
+
+  line.clear();
+  EXPECT_TRUE(collector_.GetFirstLineWithPrefix(lines, " Foo:", &line));
+  EXPECT_EQ(lines[2], line);
+
+  line.clear();
+  EXPECT_FALSE(collector_.GetFirstLineWithPrefix(lines, "Bar:", &line));
+  EXPECT_EQ("", line);
+}
+
 TEST_F(UserCollectorTest, GetIdFromStatus) {
   int id = 1;
   EXPECT_FALSE(collector_.GetIdFromStatus(UserCollector::kUserId,
                                           UserCollector::kIdEffective,
-                                          "nothing here",
+                                          SplitLines("nothing here"),
                                           &id));
   EXPECT_EQ(id, 1);
 
   // Not enough parameters.
   EXPECT_FALSE(collector_.GetIdFromStatus(UserCollector::kUserId,
                                           UserCollector::kIdReal,
-                                          "line 1\nUid:\t1\n", &id));
+                                          SplitLines("line 1\nUid:\t1\n"),
+                                          &id));
 
-  const char valid_contents[] = "\nUid:\t1\t2\t3\t4\nGid:\t5\t6\t7\t8\n";
+  const std::vector<std::string> valid_contents =
+      SplitLines("\nUid:\t1\t2\t3\t4\nGid:\t5\t6\t7\t8\n");
   EXPECT_TRUE(collector_.GetIdFromStatus(UserCollector::kUserId,
                                          UserCollector::kIdReal,
                                          valid_contents,
@@ -294,21 +337,36 @@
   // Fail if junk after number
   EXPECT_FALSE(collector_.GetIdFromStatus(UserCollector::kUserId,
                                           UserCollector::kIdReal,
-                                          "Uid:\t1f\t2\t3\t4\n",
+                                          SplitLines("Uid:\t1f\t2\t3\t4\n"),
                                           &id));
   EXPECT_TRUE(collector_.GetIdFromStatus(UserCollector::kUserId,
                                          UserCollector::kIdReal,
-                                         "Uid:\t1\t2\t3\t4\n",
+                                         SplitLines("Uid:\t1\t2\t3\t4\n"),
                                          &id));
   EXPECT_EQ(1, id);
 
   // Fail if more than 4 numbers.
   EXPECT_FALSE(collector_.GetIdFromStatus(UserCollector::kUserId,
                                           UserCollector::kIdReal,
-                                          "Uid:\t1\t2\t3\t4\t5\n",
+                                          SplitLines("Uid:\t1\t2\t3\t4\t5\n"),
                                           &id));
 }
 
+TEST_F(UserCollectorTest, GetStateFromStatus) {
+  std::string state;
+  EXPECT_FALSE(collector_.GetStateFromStatus(SplitLines("nothing here"),
+                                             &state));
+  EXPECT_EQ("", state);
+
+  EXPECT_TRUE(collector_.GetStateFromStatus(SplitLines("State:\tR (running)"),
+                                            &state));
+  EXPECT_EQ("R (running)", state);
+
+  EXPECT_TRUE(collector_.GetStateFromStatus(
+      SplitLines("Name:\tls\nState:\tZ (zombie)\n"), &state));
+  EXPECT_EQ("Z (zombie)", state);
+}
+
 TEST_F(UserCollectorTest, GetUserInfoFromName) {
   gid_t gid = 100;
   uid_t uid = 100;
@@ -353,6 +411,27 @@
   }
 }
 
+TEST_F(UserCollectorTest, ValidateProcFiles) {
+  ScopedTempDir temp_dir;
+  ASSERT_TRUE(temp_dir.CreateUniqueTempDir());
+  FilePath container_dir = temp_dir.path();
+
+  // maps file not exists (i.e. GetFileSize fails)
+  EXPECT_FALSE(collector_.ValidateProcFiles(container_dir));
+
+  // maps file is empty
+  FilePath maps_file = container_dir.Append("maps");
+  ASSERT_EQ(0, file_util::WriteFile(maps_file, NULL, 0));
+  ASSERT_TRUE(file_util::PathExists(maps_file));
+  EXPECT_FALSE(collector_.ValidateProcFiles(container_dir));
+
+  // maps file is not empty
+  const char data[] = "test data";
+  ASSERT_EQ(sizeof(data), file_util::WriteFile(maps_file, data, sizeof(data)));
+  ASSERT_TRUE(file_util::PathExists(maps_file));
+  EXPECT_TRUE(collector_.ValidateProcFiles(container_dir));
+}
+
 int main(int argc, char **argv) {
   SetUpTests(&argc, argv, false);
   return RUN_ALL_TESTS();