blob: c01a52d5c6721897f877dc28e0de9b53a36c472b [file] [log] [blame]
#!/bin/sh
# Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
set -e
# Product ID in crash report
CHROMEOS_PRODUCT=ChromeOS
# Should remove the run file when this process finishes. We don't want
# to always remove it since it may be for pre-existing crash_sender
# process.
CLEAN_UP_RUN_FILE=0
# File whose existence implies crash reports may be sent, and whose
# contents includes our machine's anonymized guid.
CONSENT_ID="/home/chronos/Consent To Send Stats"
# Path to find which is required for computing the crash rate.
FIND="/usr/bin/find"
# Set this to 1 in the environment to allow uploading crash reports
# for unofficial versions.
FORCE_OFFICIAL=${FORCE_OFFICIAL:-0}
# Path to hardware class description.
HWCLASS_PATH="/sys/devices/platform/chromeos_acpi/HWID"
# Ignore PAUSE_CRASH_SENDING file if set.
OVERRIDE_PAUSE_SENDING=${OVERRIDE_PAUSE_SENDING:-0}
# Maximum crashes to send per day.
MAX_CRASH_RATE=${MAX_CRASH_RATE:-32}
# Path to metrics_client.
METRICS_CLIENT="/usr/bin/metrics_client"
# File whose existence mocks crash sending. If empty we pretend the
# crash sending was successful, otherwise unsuccessful.
MOCK_CRASH_SENDING="/tmp/mock-crash-sending"
# File whose existence causes crash sending to be delayed (for testing).
# Must be stateful to enable testing kernel crashes.
PAUSE_CRASH_SENDING="/var/lib/crash_sender_paused"
# URL to send official build crash reports to.
REPORT_UPLOAD_PROD_URL="http://clients2.google.com/cr/report"
# File whose existence implies we're running and not to start again.
RUN_FILE="/var/run/crash_sender.pid"
# Maximum time to sleep between sends.
SECONDS_SEND_SPREAD=${SECONDS_SEND_SPREAD:-600}
# The syslog tag for all logging we emit.
TAG="$(basename $0)[$$]"
# Directory to store timestamp files indicating the uploads in the past 24
# hours.
TIMESTAMPS_DIR="/var/lib/crash_sender"
# Temp directory for this process.
TMP_DIR=""
lecho() {
logger -t "${TAG}" "$@"
}
# Returns true if mock is enabled.
is_mock() {
[ -f "${MOCK_CRASH_SENDING}" ] && return 0
return 1
}
cleanup() {
if [ -n "${TMP_DIR}" ]; then
rm -rf "${TMP_DIR}"
fi
if [ ${CLEAN_UP_RUN_FILE} -eq 1 ]; then
rm -f "${RUN_FILE}"
fi
if is_mock; then
# For testing purposes, emit a message to log so that we
# know when the test has received all the messages from this run.
lecho "crash_sender done."
fi
}
check_not_already_running() {
set -o noclobber
if echo $$ 2>/dev/null > "${RUN_FILE}"; then
# Able to write RUN_FILE without contention.
CLEAN_UP_RUN_FILE=1
set +o noclobber
return
fi
set +o noclobber
local last_pid=$(cat "${RUN_FILE}")
if [ ! -f "/proc/${last_pid}/cmdline" ]; then
CLEAN_UP_RUN_FILE=1
# Note that this write may be executed by two crash_senders who
# simulataneously reap the existing dangling run file
echo $$ > "${RUN_FILE}"
return
fi
# This could just be an unrelated process, but it's ok to be conservative.
lecho "Already running. Exiting now."
exit 1
}
is_official() {
[ ${FORCE_OFFICIAL} -ne 0 ] && return 0
grep ^CHROMEOS_RELEASE_DESCRIPTION /etc/lsb-release | grep -q Official
}
# Generate a uniform random number in 0..max-1.
generate_uniform_random() {
local max=$1
local random="$(od -An -N4 -tu /dev/urandom)"
echo $((random % max))
}
is_on_3g() {
# See crosbug.com/3304.
return 1
}
# Check if sending a crash now does not exceed the maximum 24hr rate and
# commit to doing so, if not.
check_rate() {
mkdir -p ${TIMESTAMPS_DIR}
# Only consider minidumps written in the past 24 hours by removing all older.
${FIND} "${TIMESTAMPS_DIR}" -mindepth 1 -mmin +$((24 * 60)) \
-exec rm -- '{}' ';'
local sends_in_24hrs=$(echo "${TIMESTAMPS_DIR}"/* | wc -w)
lecho "Current send rate: ${sends_in_24hrs}sends/24hrs"
if [ ${sends_in_24hrs} -ge ${MAX_CRASH_RATE} ]; then
lecho "Cannot send more crashes:"
lecho " current ${sends_in_24hrs}send/24hrs >= " \
"max ${MAX_CRASH_RATE}send/24hrs"
return 1
fi
mktemp "${TIMESTAMPS_DIR}"/XXXX > /dev/null
return 0
}
# Gets the base part of a crash report file, such as
# name.01234.5678.9012 from name.01234.5678.9012.meta
get_base() {
echo "${1%.*}"
}
get_extension() {
echo "${1##*.}"
}
# Return which kind of report the given metadata file relates to
get_kind() {
local payload="$(get_key_value "$1" "payload")"
if [ ! -r "${payload}" ]; then
lecho "Missing payload: ${payload}"
echo "unknown"
return
fi
local kind="$(get_extension "${payload}")"
if [ "${kind}" = "dmp" ]; then
echo "minidump"
return
fi
echo "${kind}"
}
get_key_value() {
if ! grep -q "$2=" "$1"; then
echo "undefined"
return
fi
grep "$2=" "$1" | cut -d = -f 2-
}
# Return the board name.
get_board() {
echo $(get_key_value "/etc/lsb-release" "CHROMEOS_RELEASE_BOARD")
}
# Return the hardware class or "unknown".
get_hardware_class() {
if [ -r "${HWCLASS_PATH}" ]; then
cat "${HWCLASS_PATH}"
else
echo "unknown"
fi
}
send_crash() {
local meta_path="$1"
local report_payload="$(get_key_value "${meta_path}" "payload")"
local kind="$(get_kind "${meta_path}")"
local exec_name="$(get_key_value "${meta_path}" "exec_name")"
local sleep_time=$(generate_uniform_random $SECONDS_SEND_SPREAD)
local url="${REPORT_UPLOAD_PROD_URL}"
local chromeos_version="$(get_key_value "${meta_path}" "ver")"
local board="$(get_board)"
local hwclass="$(get_hardware_class)"
local write_payload_size="$(get_key_value "${meta_path}" "payload_size")"
local sig="$(get_key_value "${meta_path}" "sig")"
local send_payload_size="$(stat --printf=%s "${report_payload}" 2>/dev/null)"
lecho "Sending crash:"
lecho " Scheduled to send in ${sleep_time}s"
lecho " Metadata: ${meta_path} (${kind})"
lecho " Payload: ${report_payload}"
lecho " Version: ${chromeos_version}"
if is_mock; then
lecho " Product: ${CHROMEOS_PRODUCT}"
lecho " URL: ${url}"
lecho " Board: ${board}"
lecho " HWClass: ${hwclass}"
[ "${sig}" != "undefined" ] && lecho " Sig: ${sig}"
fi
lecho " Exec name: ${exec_name}"
if is_mock; then
local mock_in=$(cat "${MOCK_CRASH_SENDING}")
if [ "${mock_in}" = "" ]; then
lecho "Mocking successful send"
return 0
else
lecho "Mocking unsuccessful send"
return 1
fi
fi
if ! sleep ${sleep_time}; then
lecho "Sleep failed"
return 1
fi
local report_id="${TMP_DIR}/report_id"
local curl_stderr="${TMP_DIR}/curl_stderr"
local extra_key1="write_payload_size"
local extra_value1="${write_payload_size}"
local extra_key2="send_payload_size"
local extra_value2="${send_payload_size}"
if [ "${sig}" != "unknown" ]; then
extra_key1="sig"
extra_value1="${sig}"
extra_key2="sig2"
extra_value2="${sig}"
fi
set +e
curl "${url}" \
-F "prod=${CHROMEOS_PRODUCT}" \
-F "ver=${chromeos_version}" \
-F "upload_file_${kind}=@${report_payload}" \
-F "board=${board}" \
-F "hwclass=${hwclass}" \
-F "exec_name=${exec_name}" \
-F "${extra_key1}=${extra_value1}" \
-F "${extra_key2}=${extra_value2}" \
-F "guid=<${CONSENT_ID}" -o "${report_id}" 2>"${curl_stderr}"
curl_result=$?
set -e
if [ ${curl_result} -eq 0 ]; then
lecho "Crash report receipt ID $(cat ${report_id})"
else
lecho "Crash sending failed with: $(cat ${curl_stderr})"
fi
rm -f "${report_id}"
return ${curl_result}
}
# *.meta files always end with done=1 so we can tell if they are complete.
is_complete_metadata() {
grep -q "done=1" "$1"
}
# Remove the given report path.
remove_report() {
local base="${1%.*}"
rm -f -- "${base}".*
}
# Send all crashes from the given directory.
send_crashes() {
local dir="$1"
# Cycle through minidumps, most recent first. That way if we're about
# to exceed the daily rate, we send the most recent minidumps.
if [ ! -d "${dir}" ]; then
return
fi
# Consider any old files which still have no corresponding meta file
# as orphaned, and remove them.
for old_file in $(${FIND} "${dir}" -mindepth 1 \
-mmin +$((24 * 60)) -type f); do
if [ ! -e "$(get_base "${old_file}").meta" ]; then
lecho "Removing old orphaned file: ${old_file}."
rm -f -- "${old_file}"
fi
done
# Look through all metadata (*.meta) files, if any exist.
for meta_path in $(ls -1t "${dir}"/*.meta 2>/dev/null); do
lecho "Considering metadata ${meta_path}."
local kind=$(get_kind "${meta_path}")
if [ "${kind}" != "minidump" ] && \
[ "${kind}" != "kcrash" ] && \
[ "${kind}" != "log" ]; then
lecho "Unknown report kind ${kind}. Removing report."
remove_report "${meta_path}"
continue
fi
if ${METRICS_CLIENT} -g; then
lecho "Guest mode has been entered. Delaying crash sending until exited."
return 0
fi
if ! ${METRICS_CLIENT} -c; then
lecho "Uploading is disabled. Removing crash."
remove_report "${meta_path}"
continue
fi
if ! is_mock && ! is_official; then
lecho "Not an official OS version. Removing crash."
remove_report "${meta_path}"
continue
fi
if is_on_3g; then
lecho "Not sending crash reports while on 3G, saving for later."
return 0
fi
if ! is_complete_metadata "${meta_path}"; then
# This report is incomplete, so if it's old, just remove it.
local old_meta=$(${FIND} "${dir}" -mindepth 1 -name \
$(basename "${meta_path}") -mmin +$((24 * 60)) -type f)
if [ -n "${old_meta}" ]; then
lecho "Removing old incomplete metadata."
remove_report "${meta_path}"
else
lecho "Ignoring recent incomplete metadata."
fi
continue
fi
if ! check_rate; then
lecho "Sending ${meta_path} would exceed rate. Leaving for later."
return 0
fi
if ! send_crash "${meta_path}"; then
lecho "Problem sending ${meta_path}, not removing."
continue
fi
# Send was successful, now remove.
lecho "Successfully sent crash ${meta_path} and removing."
remove_report "${meta_path}"
done
}
main() {
trap cleanup EXIT INT TERM
if [ -e "${PAUSE_CRASH_SENDING}" ] && \
[ ${OVERRIDE_PAUSE_SENDING} -eq 0 ]; then
lecho "Exiting early due to ${PAUSE_CRASH_SENDING}."
exit 1
fi
check_not_already_running
for dependency in "${FIND}" "${METRICS_CLIENT}"; do
if [ ! -x "${dependency}" ]; then
lecho "Fatal: Crash sending disabled: ${dependency} not found."
exit 1
fi
done
TMP_DIR="$(mktemp -d /tmp/crash_sender.XXXX)"
# Send system-wide crashes
send_crashes "/var/spool/crash"
# Send user-specific crashes
send_crashes "/home/chronos/user/crash"
}
main