blob: 7f9343f01d41d35eaa092a1ec7439fc0e9e820f7 [file] [log] [blame]
# Copyright 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Start and stop Web Page Replay."""
from telemetry.internal.util import atexit_with_log
import logging
import os
import re
import signal
import subprocess
import sys
import tempfile
import urllib
from telemetry.core import exceptions
from telemetry.core import util
from telemetry.internal import forwarders
_REPLAY_DIR = os.path.join(
util.GetTelemetryThirdPartyDir(), 'web-page-replay')
class ReplayError(Exception):
"""Catch-all exception for the module."""
pass
class ReplayNotFoundError(ReplayError):
def __init__(self, label, path):
super(ReplayNotFoundError, self).__init__()
self.args = (label, path)
def __str__(self):
label, path = self.args
return 'Path does not exist for %s: %s' % (label, path)
class ReplayNotStartedError(ReplayError):
pass
class ReplayServer(object):
"""Start and Stop Web Page Replay.
Web Page Replay is a proxy that can record and "replay" web pages with
simulated network characteristics -- without having to edit the pages
by hand. With WPR, tests can use "real" web content, and catch
performance issues that may result from introducing network delays and
bandwidth throttling.
Example:
with ReplayServer(archive_path):
self.NavigateToURL(start_url)
self.WaitUntil(...)
"""
def __init__(self, archive_path, replay_host, http_port, https_port, dns_port,
replay_options):
"""Initialize ReplayServer.
Args:
archive_path: a path to a specific WPR archive (required).
replay_host: the hostname to serve traffic.
http_port: an integer port on which to serve HTTP traffic. May be zero
to let the OS choose an available port.
https_port: an integer port on which to serve HTTPS traffic. May be zero
to let the OS choose an available port.
dns_port: an integer port on which to serve DNS traffic. May be zero
to let the OS choose an available port. If None DNS forwarding is
disabled.
replay_options: an iterable of options strings to forward to replay.py.
"""
self.archive_path = archive_path
self._replay_host = replay_host
self._use_dns_server = dns_port is not None
self._started_ports = {} # a dict such as {'http': 80, 'https': 443}
# A temporary path for storing stdout & stderr of the webpagereplay
# subprocess.
self._temp_log_file_path = None
replay_py = os.path.join(_REPLAY_DIR, 'replay.py')
self._cmd_line = self._GetCommandLine(
replay_py, self._replay_host, http_port, https_port, dns_port,
replay_options, archive_path)
if '--record' in replay_options:
self._CheckPath('archive directory', os.path.dirname(self.archive_path))
elif not os.path.exists(self.archive_path):
self._CheckPath('archive file', self.archive_path)
self._CheckPath('replay script', replay_py)
self.replay_process = None
@staticmethod
def _GetLoggingLevel(log_level=None):
return {
logging.DEBUG: 'debug',
logging.INFO: 'info',
logging.WARNING: 'warning',
logging.ERROR: 'error',
logging.CRITICAL: 'critical',
}[log_level or logging.getLogger().level]
@staticmethod
def _GetCommandLine(replay_py, host_ip, http_port, https_port, dns_port,
replay_options, archive_path, log_level=None):
"""Set WPR command-line options. Can be overridden if needed."""
cmd_line = [sys.executable, replay_py]
cmd_line.extend([
'--host=%s' % host_ip,
'--port=%s' % http_port,
'--ssl_port=%s' % https_port
])
if dns_port is not None:
# Note that if --host is not '127.0.0.1', Replay will override the local
# DNS nameserver settings to point to the replay-started DNS server.
cmd_line.append('--dns_port=%s' % dns_port)
else:
cmd_line.append('--no-dns_forwarding')
cmd_line.extend([
'--use_closest_match',
'--log_level=%s' % ReplayServer._GetLoggingLevel(log_level)
])
cmd_line.extend(replay_options)
cmd_line.append(archive_path)
return cmd_line
def _CheckPath(self, label, path):
if not os.path.exists(path):
raise ReplayNotFoundError(label, path)
def _OpenLogFile(self):
"""Opens the log file for writing."""
log_dir = os.path.dirname(self._temp_log_file_path)
if not os.path.exists(log_dir):
os.makedirs(log_dir)
return open(self._temp_log_file_path, 'w')
def _LogLines(self):
"""Yields the log lines."""
if not os.path.isfile(self._temp_log_file_path):
return
with open(self._temp_log_file_path) as f:
for line in f:
yield line
def _IsStarted(self):
"""Returns true if the server is up and running."""
if self.replay_process.poll() is not None:
# The process terminated.
return False
def HasIncompleteStartedPorts():
return ('http' not in self._started_ports or
'https' not in self._started_ports or
(self._use_dns_server and 'dns' not in self._started_ports))
if HasIncompleteStartedPorts():
self._started_ports = self._ParseLogFilePorts(self._LogLines())
logging.info('WPR ports: %s' % self._started_ports)
if HasIncompleteStartedPorts():
return False
try:
# HTTPS may require SNI (which urllib does not speak), so only check
# that HTTP responds.
return 200 == self._UrlOpen('web-page-replay-generate-200').getcode()
except IOError:
return False
@staticmethod
def _ParseLogFilePorts(log_lines):
"""Returns the ports on which replay listens as reported in its log file.
Only matches HTTP, HTTPS, and DNS. One call may return only some
of the ports depending on what has been written to the log file.
Example log lines:
2014-09-03 17:04:27,978 WARNING HTTP server started on 127.0.0.1:51673
2014-09-03 17:04:27,978 WARNING HTTPS server started on 127.0.0.1:35270
Returns:
a dict with ports available in log_lines. For example,
{} # no ports found
{'http': 1234, 'https': 2345, 'dns': 3456}
"""
ports = {}
port_re = re.compile(
r'.*?(?P<protocol>HTTP|HTTPS|DNS)'
r' server started on '
r'(?P<host>[^:]*):'
r'(?P<port>\d+)')
for line in log_lines:
m = port_re.match(line.strip())
if m:
protocol = m.group('protocol').lower()
ports[protocol] = int(m.group('port'))
return ports
def StartServer(self):
"""Start Web Page Replay and verify that it started.
Returns:
A forwarders.PortSet(http, https, dns) tuple; with dns None if unused.
Raises:
ReplayNotStartedError: if Replay start-up fails.
"""
is_posix = sys.platform.startswith('linux') or sys.platform == 'darwin'
logging.info('Starting Web-Page-Replay: %s', self._cmd_line)
self._CreateTempLogFilePath()
with open(self._temp_log_file_path, 'w') as log_fh:
self.replay_process = subprocess.Popen(
self._cmd_line, stdout=log_fh, stderr=subprocess.STDOUT,
preexec_fn=(_ResetInterruptHandler if is_posix else None))
try:
util.WaitFor(self._IsStarted, 30)
atexit_with_log.Register(self.StopServer)
return forwarders.PortSet(
self._started_ports['http'],
self._started_ports['https'],
self._started_ports.get('dns'), # None if unused
)
except exceptions.TimeoutException:
raise ReplayNotStartedError(
'Web Page Replay failed to start. Log output:\n%s' %
''.join(self._LogLines()))
def StopServer(self):
"""Stop Web Page Replay."""
if self._IsStarted():
try:
self._StopReplayProcess()
finally:
# TODO(rnephew): Upload logs to google storage. crbug.com/525787
self._CleanUpTempLogFilePath()
else:
logging.warning('Attempting to stop WPR server that is not running.')
def _StopReplayProcess(self):
if not self.replay_process:
return
logging.debug('Trying to stop Web-Page-Replay gracefully')
try:
if self._started_ports:
self._UrlOpen('web-page-replay-command-exit').close()
except IOError:
# IOError is possible because the server might exit without response.
pass
try:
util.WaitFor(lambda: self.replay_process.poll() is not None, 10)
except exceptions.TimeoutException:
try:
# Use a SIGINT so that it can do graceful cleanup.
self.replay_process.send_signal(signal.SIGINT)
except: # pylint: disable=bare-except
# On Windows, we are left with no other option than terminate().
is_primary_nameserver_changed_by_replay = (
self._use_dns_server and self._replay_host == '127.0.0.1')
if is_primary_nameserver_changed_by_replay:
# Replay changes the DNS nameserver configuration so that DNS
# requests are resolved by replay's own DNS server. It resolves
# all DNS requests to it own IP address to it can server the
# HTTP and HTTPS requests.
# If the replay host is not '127.0.0.1', then replay skips the
# nameserver change because it assumes a different mechanism
# will be used to route DNS requests to replay's DNS server.
logging.warning(
'Unable to stop Web-Page-Replay gracefully.\n'
'Replay changed the DNS nameserver configuration to make replay '
'the primary nameserver. That might not be restored!')
try:
self.replay_process.terminate()
except: # pylint: disable=bare-except
pass
self.replay_process.wait()
def _CreateTempLogFilePath(self):
assert self._temp_log_file_path is None
handle, self._temp_log_file_path = tempfile.mkstemp()
os.close(handle)
def _CleanUpTempLogFilePath(self):
assert self._temp_log_file_path
if logging.getLogger('').isEnabledFor(logging.DEBUG):
with open(self._temp_log_file_path, 'r') as f:
wpr_log_content = '\n'.join([
'************************** WPR LOG *****************************',
f.read(),
'************************** END OF WPR LOG **********************'])
logging.debug(wpr_log_content)
os.remove(self._temp_log_file_path)
self._temp_log_file_path = None
def __enter__(self):
"""Add support for with-statement."""
self.StartServer()
return self
def __exit__(self, unused_exc_type, unused_exc_val, unused_exc_tb):
"""Add support for with-statement."""
self.StopServer()
def _UrlOpen(self, url_path, protocol='http'):
"""Open a Replay URL.
For matching requests in the archive, Replay relies on the "Host:" header.
For Replay command URLs, the "Host:" header is not needed.
Args:
url_path: WPR server request path.
protocol: 'http' or 'https'
Returns:
a file-like object from urllib.urlopen
"""
url = '%s://%s:%s/%s' % (
protocol, self._replay_host, self._started_ports[protocol], url_path)
return urllib.urlopen(url, proxies={})
def _ResetInterruptHandler():
"""Reset the interrupt handler back to the default.
The replay process is stopped gracefully by making an HTTP request
('web-page-replay-command-exit'). The graceful exit is important for
restoring the DNS configuration. If the HTTP request fails, the fallback
is to send SIGINT to the process.
On posix system, running this function before starting replay fixes a
bug that shows up when Telemetry is run as a background command from a
script. https://crbug.com/254572.
Background: Signal masks on Linux are inherited from parent
processes. If anything invoking us accidentally masks SIGINT
(e.g. by putting a process in the background from a shell script),
sending a SIGINT to the child will fail to terminate it.
"""
signal.signal(signal.SIGINT, signal.SIG_DFL)