blob: ab1898f1f5b264bd88d4d9df4735a7add5a4014f [file] [log] [blame]
#!/bin/sh
# Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
set -e
# Product ID in crash report
CHROMEOS_PRODUCT=ChromeOS
# Should remove the run file when this process finishes. We don't want
# to always remove it since it may be for pre-existing crash_sender
# process.
CLEAN_UP_RUN_FILE=0
# File whose existence implies crash reports may be sent, and whose
# contents includes our machine's anonymized guid.
CONSENT_ID="/home/chronos/Consent To Send Stats"
# Path to file that indicates a crash test is currently running.
CRASH_TEST_IN_PROGRESS_FILE="/tmp/crash-test-in-progress"
# Path to find which is required for computing the crash rate.
FIND="/usr/bin/find"
# Set this to 1 in the environment to allow uploading crash reports
# for unofficial versions.
FORCE_OFFICIAL=${FORCE_OFFICIAL:-0}
# Path to hardware class description.
HWCLASS_PATH="/sys/devices/platform/chromeos_acpi/HWID"
# Path to file that indicates this is a developer image.
LEAVE_CORE_FILE="/root/.leave_core"
# Path to list_proxies.
LIST_PROXIES="/usr/bin/list_proxies"
# Maximum crashes to send per day.
MAX_CRASH_RATE=${MAX_CRASH_RATE:-32}
# Path to metrics_client.
METRICS_CLIENT="/usr/bin/metrics_client"
# File whose existence mocks crash sending. If empty we pretend the
# crash sending was successful, otherwise unsuccessful.
MOCK_CRASH_SENDING="/tmp/mock-crash-sending"
# Set this to 1 in the environment to pretend to have booted in developer
# mode. This is used by autotests.
MOCK_DEVELOPER_MODE=${MOCK_DEVELOPER_MODE:-0}
# Ignore PAUSE_CRASH_SENDING file if set.
OVERRIDE_PAUSE_SENDING=${OVERRIDE_PAUSE_SENDING:-0}
# File whose existence causes crash sending to be delayed (for testing).
# Must be stateful to enable testing kernel crashes.
PAUSE_CRASH_SENDING="/var/lib/crash_sender_paused"
# URL to send official build crash reports to.
REPORT_UPLOAD_PROD_URL="https://clients2.google.com/cr/report"
# Path to a directory of restricted certificates which includes
# a certificate for ${REPORT_UPLOAD_PROD_URL}.
RESTRICTED_CERTIFICATES_PATH="/usr/share/chromeos-ca-certificates"
# File whose existence implies we're running and not to start again.
RUN_FILE="/var/run/crash_sender.pid"
# Maximum time to sleep between sends.
SECONDS_SEND_SPREAD=${SECONDS_SEND_SPREAD:-600}
# The syslog tag for all logging we emit.
TAG="$(basename $0)[$$]"
# Directory to store timestamp files indicating the uploads in the past 24
# hours.
TIMESTAMPS_DIR="/var/lib/crash_sender"
# Temp directory for this process.
TMP_DIR=""
lecho() {
logger -t "${TAG}" "$@"
}
# Returns true if mock is enabled.
is_mock() {
[ -f "${MOCK_CRASH_SENDING}" ] && return 0
return 1
}
is_mock_successful() {
local mock_in=$(cat "${MOCK_CRASH_SENDING}")
[ "${mock_in}" = "" ] && return 0 # empty file means success
return 1
}
cleanup() {
if [ -n "${TMP_DIR}" ]; then
rm -rf "${TMP_DIR}"
fi
if [ ${CLEAN_UP_RUN_FILE} -eq 1 ]; then
rm -f "${RUN_FILE}"
fi
if is_mock; then
# For testing purposes, emit a message to log so that we
# know when the test has received all the messages from this run.
lecho "crash_sender done."
fi
}
check_not_already_running() {
set -o noclobber
if echo $$ 2>/dev/null > "${RUN_FILE}"; then
# Able to write RUN_FILE without contention.
CLEAN_UP_RUN_FILE=1
set +o noclobber
return
fi
set +o noclobber
local last_pid=$(cat "${RUN_FILE}")
if [ ! -f "/proc/${last_pid}/cmdline" ]; then
CLEAN_UP_RUN_FILE=1
# Note that this write may be executed by two crash_senders who
# simulataneously reap the existing dangling run file
echo $$ > "${RUN_FILE}"
return
fi
# This could just be an unrelated process, but it's ok to be conservative.
lecho "Already running. Exiting now."
exit 1
}
is_official_image() {
[ ${FORCE_OFFICIAL} -ne 0 ] && return 0
grep ^CHROMEOS_RELEASE_DESCRIPTION /etc/lsb-release | grep -q Official
}
# Returns 0 if the a crash test is currently running. NOTE: Mirrors
# crash_collector.cc:CrashCollector::IsCrashTestInProgress().
is_crash_test_in_progress() {
[ -f "${CRASH_TEST_IN_PROGRESS_FILE}" ] && return 0
return 1
}
# Returns 0 if we should consider ourselves to be running on a developer
# image. NOTE: Mirrors crash_collector.cc:CrashCollector::IsDeveloperImage().
is_developer_image() {
# If we're testing crash reporter itself, we don't want to special-case
# for developer images.
is_crash_test_in_progress && return 1
[ -f "${LEAVE_CORE_FILE}" ] && return 0
return 1
}
# Returns 0 if the machine booted up in developer mode.
is_developer_mode() {
[ ${MOCK_DEVELOPER_MODE} -ne 0 ] && return 0
# If we're testing crash reporter itself, we don't want to special-case
# for developer mode.
is_crash_test_in_progress && return 1
crossystem "devsw_boot?1" # exit status will be accurate
}
# Generate a uniform random number in 0..max-1.
generate_uniform_random() {
local max=$1
local random="$(od -An -N4 -tu /dev/urandom)"
echo $((random % max))
}
is_on_3g() {
# See crosbug.com/3304.
return 1
}
# Check if sending a crash now does not exceed the maximum 24hr rate and
# commit to doing so, if not.
check_rate() {
mkdir -p ${TIMESTAMPS_DIR}
# Only consider minidumps written in the past 24 hours by removing all older.
${FIND} "${TIMESTAMPS_DIR}" -mindepth 1 -mmin +$((24 * 60)) \
-exec rm -- '{}' ';'
local sends_in_24hrs=$(echo "${TIMESTAMPS_DIR}"/* | wc -w)
lecho "Current send rate: ${sends_in_24hrs}sends/24hrs"
if [ ${sends_in_24hrs} -ge ${MAX_CRASH_RATE} ]; then
lecho "Cannot send more crashes:"
lecho " current ${sends_in_24hrs}send/24hrs >= " \
"max ${MAX_CRASH_RATE}send/24hrs"
return 1
fi
mktemp "${TIMESTAMPS_DIR}"/XXXX > /dev/null
return 0
}
# Gets the base part of a crash report file, such as
# name.01234.5678.9012 from name.01234.5678.9012.meta
get_base() {
echo "${1%.*}"
}
get_extension() {
echo "${1##*.}"
}
# Return which kind of report the given metadata file relates to
get_kind() {
local payload="$(get_key_value "$1" "payload")"
if [ ! -r "${payload}" ]; then
lecho "Missing payload: ${payload}"
echo "undefined"
return
fi
local kind="$(get_extension "${payload}")"
if [ "${kind}" = "dmp" ]; then
echo "minidump"
return
fi
echo "${kind}"
}
get_key_value() {
if ! grep -q "$2=" "$1"; then
echo "undefined"
return
fi
grep "$2=" "$1" | cut -d = -f 2-
}
# Return the board name.
get_board() {
echo $(get_key_value "/etc/lsb-release" "CHROMEOS_RELEASE_BOARD")
}
# Return the hardware class or "undefined".
get_hardware_class() {
if [ -r "${HWCLASS_PATH}" ]; then
cat "${HWCLASS_PATH}"
elif crossystem hwid > /dev/null 2>&1; then
echo "$(crossystem hwid)"
else
echo "undefined"
fi
}
send_crash() {
local meta_path="$1"
local report_payload="$(get_key_value "${meta_path}" "payload")"
local kind="$(get_kind "${meta_path}")"
local exec_name="$(get_key_value "${meta_path}" "exec_name")"
local sleep_time=$(generate_uniform_random $SECONDS_SEND_SPREAD)
local url="${REPORT_UPLOAD_PROD_URL}"
local chromeos_version="$(get_key_value "${meta_path}" "ver")"
local board="$(get_board)"
local hwclass="$(get_hardware_class)"
local write_payload_size="$(get_key_value "${meta_path}" "payload_size")"
local log="$(get_key_value "${meta_path}" "log")"
local sig="$(get_key_value "${meta_path}" "sig")"
local send_payload_size="$(stat --printf=%s "${report_payload}" 2>/dev/null)"
local image_type
if is_developer_image; then
image_type="dev"
elif [ ${FORCE_OFFICIAL} -ne 0 ]; then
image_type="force-official"
elif is_mock && ! is_mock_successful; then
image_type="mock-fail"
fi
local boot_mode
if ! crossystem "cros_debug" > /dev/null 2>&1; then
# Sanity-check failed that makes sure crossystem exists.
lecho "Cannot determine boot mode due to error running crossystem command"
boot_mode="missing-crossystem"
elif is_developer_mode; then
boot_mode="dev"
fi
local extra_key1="write_payload_size"
local extra_value1="${write_payload_size}"
local extra_key2="send_payload_size"
local extra_value2="${send_payload_size}"
if [ "${sig}" != "undefined" ]; then
extra_key1="sig"
extra_value1="${sig}"
extra_key2="sig2"
extra_value2="${sig}"
elif [ "${log}" != "undefined" ]; then
# Upload a log file if it was specified.
extra_key1="log"
extra_value1="@${log}"
fi
local error_type="$(get_key_value "${meta_path}" "error_type")"
[ "${error_type}" = "undefined" ] && error_type=
lecho "Sending crash:"
lecho " Scheduled to send in ${sleep_time}s"
lecho " Metadata: ${meta_path} (${kind})"
lecho " Payload: ${report_payload}"
lecho " Version: ${chromeos_version}"
[ -n "${image_type}" ] && lecho " Image type: ${image_type}"
[ -n "${boot_mode}" ] && lecho " Boot mode: ${boot_mode}"
if is_mock; then
lecho " Product: ${CHROMEOS_PRODUCT}"
lecho " URL: ${url}"
lecho " Board: ${board}"
lecho " HWClass: ${hwclass}"
lecho " ${extra_key1}: ${extra_value1}"
lecho " ${extra_key2}: ${extra_value2}"
fi
lecho " Exec name: ${exec_name}"
[ -n "${error_type}" ] && lecho " Error type: ${error_type}"
if is_mock; then
if ! is_mock_successful; then
lecho "Mocking unsuccessful send"
return 1
fi
lecho "Mocking successful send"
return 0
fi
if ! sleep ${sleep_time}; then
lecho "Sleep failed"
return 1
fi
# Read in the first proxy, if any, for a given URL. NOTE: The
# double-quotes are necessary due to a bug in dash with the "local"
# builtin command and values that have spaces in them (see
# "https://bugs.launchpad.net/ubuntu/+source/dash/+bug/139097").
local proxy="`${LIST_PROXIES} -quiet "${url}" | head -1`"
# if a direct connection should be used, unset the proxy variable.
[ "${proxy}" = "direct://" ] && proxy=
local report_id="${TMP_DIR}/report_id"
local curl_stderr="${TMP_DIR}/curl_stderr"
set +e
curl "${url}" ${proxy:+--proxy "$proxy"} \
--capath "${RESTRICTED_CERTIFICATES_PATH}" --ciphers HIGH \
-F "prod=${CHROMEOS_PRODUCT}" \
-F "ver=${chromeos_version}" \
-F "upload_file_${kind}=@${report_payload}" \
-F "board=${board}" \
-F "hwclass=${hwclass}" \
-F "exec_name=${exec_name}" \
${image_type:+-F "image_type=${image_type}"} \
${boot_mode:+-F "boot_mode=${boot_mode}"} \
${error_type:+-F "error_type=${error_type}"} \
-F "${extra_key1}=${extra_value1}" \
-F "${extra_key2}=${extra_value2}" \
-F "guid=<${CONSENT_ID}" -o "${report_id}" 2>"${curl_stderr}"
curl_result=$?
set -e
if [ ${curl_result} -eq 0 ]; then
lecho "Crash report receipt ID $(cat ${report_id})"
else
lecho "Crash sending failed with: $(cat ${curl_stderr})"
fi
rm -f "${report_id}"
return ${curl_result}
}
# *.meta files always end with done=1 so we can tell if they are complete.
is_complete_metadata() {
grep -q "done=1" "$1"
}
# Remove the given report path.
remove_report() {
local base="${1%.*}"
rm -f -- "${base}".*
}
# Send all crashes from the given directory.
send_crashes() {
local dir="$1"
# Cycle through minidumps, most recent first. That way if we're about
# to exceed the daily rate, we send the most recent minidumps.
if [ ! -d "${dir}" ]; then
return
fi
# Consider any old files which still have no corresponding meta file
# as orphaned, and remove them.
for old_file in $(${FIND} "${dir}" -mindepth 1 \
-mmin +$((24 * 60)) -type f); do
if [ ! -e "$(get_base "${old_file}").meta" ]; then
lecho "Removing old orphaned file: ${old_file}."
rm -f -- "${old_file}"
fi
done
# Look through all metadata (*.meta) files, if any exist.
for meta_path in $(ls -1t "${dir}"/*.meta 2>/dev/null); do
lecho "Considering metadata ${meta_path}."
local kind=$(get_kind "${meta_path}")
if [ "${kind}" != "minidump" ] && \
[ "${kind}" != "kcrash" ] && \
[ "${kind}" != "log" ]; then
lecho "Unknown report kind ${kind}. Removing report."
remove_report "${meta_path}"
continue
fi
if ${METRICS_CLIENT} -g; then
lecho "Guest mode has been entered. Delaying crash sending until exited."
return 0
fi
if ! ${METRICS_CLIENT} -c; then
lecho "Uploading is disabled. Removing crash."
remove_report "${meta_path}"
continue
fi
if ! is_mock && ! is_official_image; then
lecho "Not an official OS version. Removing crash."
remove_report "${meta_path}"
continue
fi
if is_on_3g; then
lecho "Not sending crash reports while on 3G, saving for later."
return 0
fi
if ! is_complete_metadata "${meta_path}"; then
# This report is incomplete, so if it's old, just remove it.
local old_meta=$(${FIND} "${dir}" -mindepth 1 -name \
$(basename "${meta_path}") -mmin +$((24 * 60)) -type f)
if [ -n "${old_meta}" ]; then
lecho "Removing old incomplete metadata."
remove_report "${meta_path}"
else
lecho "Ignoring recent incomplete metadata."
fi
continue
fi
if ! check_rate; then
lecho "Sending ${meta_path} would exceed rate. Leaving for later."
return 0
fi
if ! send_crash "${meta_path}"; then
lecho "Problem sending ${meta_path}, not removing."
continue
fi
# Send was successful, now remove.
lecho "Successfully sent crash ${meta_path} and removing."
remove_report "${meta_path}"
done
}
main() {
trap cleanup EXIT INT TERM
if [ -e "${PAUSE_CRASH_SENDING}" ] && \
[ ${OVERRIDE_PAUSE_SENDING} -eq 0 ]; then
lecho "Exiting early due to ${PAUSE_CRASH_SENDING}."
exit 1
fi
check_not_already_running
for dependency in "${FIND}" "${METRICS_CLIENT}" \
"${RESTRICTED_CERTIFICATES_PATH}"; do
if [ ! -x "${dependency}" ]; then
lecho "Fatal: Crash sending disabled: ${dependency} not found."
exit 1
fi
done
TMP_DIR="$(mktemp -d /tmp/crash_sender.XXXX)"
# Send system-wide crashes
send_crashes "/var/spool/crash"
# Send user-specific crashes
send_crashes "/home/chronos/user/crash"
}
main