blob: dda6b9288ad91ac08df51099bb6d4c6f00bca5d0 [file] [log] [blame]
# Copyright (c) 2010 Jeremy Thurgood <firxen+boto@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, dis-
# tribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the fol-
# lowing conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
# NOTE: These tests only cover the very simple cases I needed to test
# for the InstanceGroup fix.
import xml.sax
from boto import handler
from boto.emr import emrobject
from boto.resultset import ResultSet
from tests.compat import unittest
JOB_FLOW_EXAMPLE = b"""
<DescribeJobFlowsResponse
xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-01-15">
<DescribeJobFlowsResult>
<JobFlows>
<member>
<ExecutionStatusDetail>
<CreationDateTime>2009-01-28T21:49:16Z</CreationDateTime>
<StartDateTime>2009-01-28T21:49:16Z</StartDateTime>
<State>STARTING</State>
</ExecutionStatusDetail>
<BootstrapActions>
<member>
<BootstrapActionConfig>
<ScriptBootstrapAction>
<Args/>
<Path>s3://elasticmapreduce/libs/hue/install-hue</Path>
</ScriptBootstrapAction>
<Name>Install Hue</Name>
</BootstrapActionConfig>
</member>
</BootstrapActions>
<VisibleToAllUsers>true</VisibleToAllUsers>
<SupportedProducts>
<member>Hue</member>
</SupportedProducts>
<Name>MyJobFlowName</Name>
<LogUri>mybucket/subdir/</LogUri>
<Steps>
<member>
<ExecutionStatusDetail>
<CreationDateTime>2009-01-28T21:49:16Z</CreationDateTime>
<State>PENDING</State>
</ExecutionStatusDetail>
<StepConfig>
<HadoopJarStep>
<Jar>MyJarFile</Jar>
<MainClass>MyMailClass</MainClass>
<Args>
<member>arg1</member>
<member>arg2</member>
</Args>
<Properties/>
</HadoopJarStep>
<Name>MyStepName</Name>
<ActionOnFailure>CONTINUE</ActionOnFailure>
</StepConfig>
</member>
</Steps>
<JobFlowId>j-3UN6WX5RRO2AG</JobFlowId>
<Instances>
<Placement>
<AvailabilityZone>us-east-1a</AvailabilityZone>
</Placement>
<SlaveInstanceType>m1.small</SlaveInstanceType>
<MasterInstanceType>m1.small</MasterInstanceType>
<Ec2KeyName>myec2keyname</Ec2KeyName>
<InstanceCount>4</InstanceCount>
<KeepJobFlowAliveWhenNoSteps>true</KeepJobFlowAliveWhenNoSteps>
</Instances>
</member>
</JobFlows>
</DescribeJobFlowsResult>
<ResponseMetadata>
<RequestId>9cea3229-ed85-11dd-9877-6fad448a8419</RequestId>
</ResponseMetadata>
</DescribeJobFlowsResponse>
"""
JOB_FLOW_COMPLETED = b"""
<DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
<DescribeJobFlowsResult>
<JobFlows>
<member>
<ExecutionStatusDetail>
<CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
<LastStateChangeReason>Steps completed</LastStateChangeReason>
<StartDateTime>2010-10-21T01:03:59Z</StartDateTime>
<ReadyDateTime>2010-10-21T01:03:59Z</ReadyDateTime>
<State>COMPLETED</State>
<EndDateTime>2010-10-21T01:44:18Z</EndDateTime>
</ExecutionStatusDetail>
<BootstrapActions/>
<Name>RealJobFlowName</Name>
<LogUri>s3n://example.emrtest.scripts/jobflow_logs/</LogUri>
<Steps>
<member>
<StepConfig>
<HadoopJarStep>
<Jar>s3n://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar</Jar>
<Args>
<member>s3n://us-east-1.elasticmapreduce/libs/state-pusher/0.1/fetch</member>
</Args>
<Properties/>
</HadoopJarStep>
<Name>Setup Hadoop Debugging</Name>
<ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
</StepConfig>
<ExecutionStatusDetail>
<CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
<StartDateTime>2010-10-21T01:03:59Z</StartDateTime>
<State>COMPLETED</State>
<EndDateTime>2010-10-21T01:04:22Z</EndDateTime>
</ExecutionStatusDetail>
</member>
<member>
<StepConfig>
<HadoopJarStep>
<Jar>/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar</Jar>
<Args>
<member>-mapper</member>
<member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-InitialMapper.py</member>
<member>-reducer</member>
<member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-InitialReducer.py</member>
<member>-input</member>
<member>s3://example.emrtest.data/raw/2010/10/20/*</member>
<member>-input</member>
<member>s3://example.emrtest.data/raw/2010/10/19/*</member>
<member>-input</member>
<member>s3://example.emrtest.data/raw/2010/10/18/*</member>
<member>-input</member>
<member>s3://example.emrtest.data/raw/2010/10/17/*</member>
<member>-input</member>
<member>s3://example.emrtest.data/raw/2010/10/16/*</member>
<member>-input</member>
<member>s3://example.emrtest.data/raw/2010/10/15/*</member>
<member>-input</member>
<member>s3://example.emrtest.data/raw/2010/10/14/*</member>
<member>-output</member>
<member>s3://example.emrtest.crunched/</member>
</Args>
<Properties/>
</HadoopJarStep>
<Name>testjob_Initial</Name>
<ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
</StepConfig>
<ExecutionStatusDetail>
<CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
<StartDateTime>2010-10-21T01:04:22Z</StartDateTime>
<State>COMPLETED</State>
<EndDateTime>2010-10-21T01:36:18Z</EndDateTime>
</ExecutionStatusDetail>
</member>
<member>
<StepConfig>
<HadoopJarStep>
<Jar>/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar</Jar>
<Args>
<member>-mapper</member>
<member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step1Mapper.py</member>
<member>-reducer</member>
<member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step1Reducer.py</member>
<member>-input</member>
<member>s3://example.emrtest.crunched/*</member>
<member>-output</member>
<member>s3://example.emrtest.step1/</member>
</Args>
<Properties/>
</HadoopJarStep>
<Name>testjob_step1</Name>
<ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
</StepConfig>
<ExecutionStatusDetail>
<CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
<StartDateTime>2010-10-21T01:36:18Z</StartDateTime>
<State>COMPLETED</State>
<EndDateTime>2010-10-21T01:37:51Z</EndDateTime>
</ExecutionStatusDetail>
</member>
<member>
<StepConfig>
<HadoopJarStep>
<Jar>/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar</Jar>
<Args>
<member>-mapper</member>
<member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step2Mapper.py</member>
<member>-reducer</member>
<member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step2Reducer.py</member>
<member>-input</member>
<member>s3://example.emrtest.crunched/*</member>
<member>-output</member>
<member>s3://example.emrtest.step2/</member>
</Args>
<Properties/>
</HadoopJarStep>
<Name>testjob_step2</Name>
<ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
</StepConfig>
<ExecutionStatusDetail>
<CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
<StartDateTime>2010-10-21T01:37:51Z</StartDateTime>
<State>COMPLETED</State>
<EndDateTime>2010-10-21T01:39:32Z</EndDateTime>
</ExecutionStatusDetail>
</member>
<member>
<StepConfig>
<HadoopJarStep>
<Jar>/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar</Jar>
<Args>
<member>-mapper</member>
<member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step3Mapper.py</member>
<member>-reducer</member>
<member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step3Reducer.py</member>
<member>-input</member>
<member>s3://example.emrtest.step1/*</member>
<member>-output</member>
<member>s3://example.emrtest.step3/</member>
</Args>
<Properties/>
</HadoopJarStep>
<Name>testjob_step3</Name>
<ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
</StepConfig>
<ExecutionStatusDetail>
<CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
<StartDateTime>2010-10-21T01:39:32Z</StartDateTime>
<State>COMPLETED</State>
<EndDateTime>2010-10-21T01:41:22Z</EndDateTime>
</ExecutionStatusDetail>
</member>
<member>
<StepConfig>
<HadoopJarStep>
<Jar>/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar</Jar>
<Args>
<member>-mapper</member>
<member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step4Mapper.py</member>
<member>-reducer</member>
<member>s3://example.emrtest.scripts/81d8-5a9d3df4a86c-step4Reducer.py</member>
<member>-input</member>
<member>s3://example.emrtest.step1/*</member>
<member>-output</member>
<member>s3://example.emrtest.step4/</member>
</Args>
<Properties/>
</HadoopJarStep>
<Name>testjob_step4</Name>
<ActionOnFailure>TERMINATE_JOB_FLOW</ActionOnFailure>
</StepConfig>
<ExecutionStatusDetail>
<CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
<StartDateTime>2010-10-21T01:41:22Z</StartDateTime>
<State>COMPLETED</State>
<EndDateTime>2010-10-21T01:43:03Z</EndDateTime>
</ExecutionStatusDetail>
</member>
</Steps>
<JobFlowId>j-3H3Q13JPFLU22</JobFlowId>
<Instances>
<SlaveInstanceType>m1.large</SlaveInstanceType>
<MasterInstanceId>i-64c21609</MasterInstanceId>
<Placement>
<AvailabilityZone>us-east-1b</AvailabilityZone>
</Placement>
<InstanceGroups>
<member>
<CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
<InstanceRunningCount>0</InstanceRunningCount>
<StartDateTime>2010-10-21T01:02:09Z</StartDateTime>
<ReadyDateTime>2010-10-21T01:03:03Z</ReadyDateTime>
<State>ENDED</State>
<EndDateTime>2010-10-21T01:44:18Z</EndDateTime>
<InstanceRequestCount>1</InstanceRequestCount>
<InstanceType>m1.large</InstanceType>
<Market>ON_DEMAND</Market>
<LastStateChangeReason>Job flow terminated</LastStateChangeReason>
<InstanceRole>MASTER</InstanceRole>
<InstanceGroupId>ig-EVMHOZJ2SCO8</InstanceGroupId>
<Name>master</Name>
</member>
<member>
<CreationDateTime>2010-10-21T01:00:25Z</CreationDateTime>
<InstanceRunningCount>0</InstanceRunningCount>
<StartDateTime>2010-10-21T01:03:59Z</StartDateTime>
<ReadyDateTime>2010-10-21T01:03:59Z</ReadyDateTime>
<State>ENDED</State>
<EndDateTime>2010-10-21T01:44:18Z</EndDateTime>
<InstanceRequestCount>9</InstanceRequestCount>
<InstanceType>m1.large</InstanceType>
<Market>ON_DEMAND</Market>
<LastStateChangeReason>Job flow terminated</LastStateChangeReason>
<InstanceRole>CORE</InstanceRole>
<InstanceGroupId>ig-YZHDYVITVHKB</InstanceGroupId>
<Name>slave</Name>
</member>
</InstanceGroups>
<NormalizedInstanceHours>40</NormalizedInstanceHours>
<HadoopVersion>0.20</HadoopVersion>
<MasterInstanceType>m1.large</MasterInstanceType>
<MasterPublicDnsName>ec2-184-72-153-139.compute-1.amazonaws.com</MasterPublicDnsName>
<Ec2KeyName>myubersecurekey</Ec2KeyName>
<InstanceCount>10</InstanceCount>
<KeepJobFlowAliveWhenNoSteps>false</KeepJobFlowAliveWhenNoSteps>
</Instances>
</member>
</JobFlows>
</DescribeJobFlowsResult>
<ResponseMetadata>
<RequestId>c31e701d-dcb4-11df-b5d9-337fc7fe4773</RequestId>
</ResponseMetadata>
</DescribeJobFlowsResponse>
"""
class TestEMRResponses(unittest.TestCase):
def _parse_xml(self, body, markers):
rs = ResultSet(markers)
h = handler.XmlHandler(rs, None)
xml.sax.parseString(body, h)
return rs
def _assert_fields(self, response, **fields):
for field, expected in fields.items():
actual = getattr(response, field)
self.assertEquals(expected, actual,
"Field %s: %r != %r" % (field, expected, actual))
def test_JobFlows_example(self):
[jobflow] = self._parse_xml(JOB_FLOW_EXAMPLE,
[('member', emrobject.JobFlow)])
self._assert_fields(jobflow,
creationdatetime='2009-01-28T21:49:16Z',
startdatetime='2009-01-28T21:49:16Z',
state='STARTING',
instancecount='4',
jobflowid='j-3UN6WX5RRO2AG',
loguri='mybucket/subdir/',
name='MyJobFlowName',
availabilityzone='us-east-1a',
slaveinstancetype='m1.small',
masterinstancetype='m1.small',
ec2keyname='myec2keyname',
keepjobflowalivewhennosteps='true')
def test_JobFlows_completed(self):
[jobflow] = self._parse_xml(JOB_FLOW_COMPLETED,
[('member', emrobject.JobFlow)])
self._assert_fields(jobflow,
creationdatetime='2010-10-21T01:00:25Z',
startdatetime='2010-10-21T01:03:59Z',
enddatetime='2010-10-21T01:44:18Z',
state='COMPLETED',
instancecount='10',
jobflowid='j-3H3Q13JPFLU22',
loguri='s3n://example.emrtest.scripts/jobflow_logs/',
name='RealJobFlowName',
availabilityzone='us-east-1b',
slaveinstancetype='m1.large',
masterinstancetype='m1.large',
ec2keyname='myubersecurekey',
keepjobflowalivewhennosteps='false')
self.assertEquals(6, len(jobflow.steps))
self.assertEquals(2, len(jobflow.instancegroups))