Skip to content
Snippets Groups Projects
Commit abb95a68 authored by Tomasz Maczukin's avatar Tomasz Maczukin
Browse files

Merge branch 'kayma/use-imdsv2-spot' into 'main'

chore: use IMDSv2 for spot instances

Closes #62

See merge request https://gitlab.com/gitlab-org/ci-cd/docker-machine/-/merge_requests/100



Merged-by: default avatarTomasz Maczukin <tomasz@maczukin.pl>
Approved-by: default avatarAlex Hanselka <alex@gitlab.com>
Approved-by: default avatarTomasz Maczukin <tomasz@maczukin.pl>
Reviewed-by: default avatarMarco Zille <marco.zille@gmail.com>
Co-authored-by: default avatarMichael Hofmann <mhofmann@redhat.com>
parents 106a0b8b 8d7ea867
Branches
Tags
No related merge requests found
......@@ -12,7 +12,6 @@ import (
"net/url"
"strconv"
"strings"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
......@@ -50,7 +49,6 @@ const (
const (
keypairNotFoundCode = "InvalidKeyPair.NotFound"
spotInstanceRequestNotFoundCode = "InvalidSpotInstanceRequestID.NotFound"
)
var (
......@@ -119,8 +117,6 @@ type Driver struct {
UserDataFile string
MetadataTokenSetting string
MetadataTokenResponseHopLimit int64
spotInstanceRequestId string
}
type clientFactory interface {
......@@ -675,98 +671,7 @@ func (d *Driver) innerCreate() error {
var instance *ec2.Instance
if d.RequestSpotInstance {
req := ec2.RequestSpotInstancesInput{
TagSpecifications: tagSpecifications,
LaunchSpecification: &ec2.RequestSpotLaunchSpecification{
ImageId: &d.AMI,
Placement: &ec2.SpotPlacement{
AvailabilityZone: &regionZone,
},
KeyName: &d.KeyName,
InstanceType: &d.InstanceType,
NetworkInterfaces: netSpecs,
Monitoring: &ec2.RunInstancesMonitoringEnabled{Enabled: aws.Bool(d.Monitoring)},
IamInstanceProfile: &ec2.IamInstanceProfileSpecification{
Name: &d.IamInstanceProfile,
},
EbsOptimized: &d.UseEbsOptimizedInstance,
BlockDeviceMappings: []*ec2.BlockDeviceMapping{bdm},
UserData: &userdata,
},
InstanceCount: aws.Int64(1),
SpotPrice: &d.SpotPrice,
}
if d.BlockDurationMinutes != 0 {
req.BlockDurationMinutes = &d.BlockDurationMinutes
}
spotInstanceRequest, err := d.getClient().RequestSpotInstances(&req)
if err != nil {
return fmt.Errorf("Error request spot instance: %v", err)
}
if spotInstanceRequest == nil || len((*spotInstanceRequest).SpotInstanceRequests) < 1 {
return fmt.Errorf("error requesting spot instance: %v", errorUnprocessableResponse)
}
d.spotInstanceRequestId = *spotInstanceRequest.SpotInstanceRequests[0].SpotInstanceRequestId
log.Info("Waiting for spot instance...")
for i := 0; i < 3; i++ {
// AWS eventual consistency means we could not have SpotInstanceRequest ready yet
err = d.getClient().WaitUntilSpotInstanceRequestFulfilled(&ec2.DescribeSpotInstanceRequestsInput{
SpotInstanceRequestIds: []*string{&d.spotInstanceRequestId},
})
if err != nil {
if awsErr, ok := err.(awserr.Error); ok {
if awsErr.Code() == spotInstanceRequestNotFoundCode {
time.Sleep(5 * time.Second)
continue
}
}
return fmt.Errorf("Error fulfilling spot request: %v", err)
}
break
}
log.Infof("Created spot instance request %v", d.spotInstanceRequestId)
// resolve instance id
for i := 0; i < 3; i++ {
// Even though the waiter succeeded, eventual consistency means we could
// get a describe output that does not include this information. Try a
// few times just in case
var resolvedSpotInstance *ec2.DescribeSpotInstanceRequestsOutput
resolvedSpotInstance, err = d.getClient().DescribeSpotInstanceRequests(&ec2.DescribeSpotInstanceRequestsInput{
SpotInstanceRequestIds: []*string{&d.spotInstanceRequestId},
})
if err != nil {
// Unexpected; no need to retry
return fmt.Errorf("Error describing previously made spot instance request: %v", err)
}
if resolvedSpotInstance == nil || len((*resolvedSpotInstance).SpotInstanceRequests) < 1 {
return fmt.Errorf("Error describing spot instance: %v", errorUnprocessableResponse)
}
maybeInstanceId := resolvedSpotInstance.SpotInstanceRequests[0].InstanceId
if maybeInstanceId != nil {
var instances *ec2.DescribeInstancesOutput
instances, err = d.getClient().DescribeInstances(&ec2.DescribeInstancesInput{
InstanceIds: []*string{maybeInstanceId},
})
if err != nil {
// Retry if we get an id from spot instance but EC2 doesn't recognize it yet; see above, eventual consistency possible
continue
}
instance = instances.Reservations[0].Instances[0]
err = nil
break
}
time.Sleep(5 * time.Second)
}
if err != nil {
return fmt.Errorf("Error resolving spot instance to real instance: %v", err)
}
} else {
inst, err := d.getClient().RunInstances(&ec2.RunInstancesInput{
req := ec2.RunInstancesInput{
TagSpecifications: tagSpecifications,
ImageId: &d.AMI,
MinCount: aws.Int64(1),
......@@ -788,7 +693,19 @@ func (d *Driver) innerCreate() error {
EbsOptimized: &d.UseEbsOptimizedInstance,
BlockDeviceMappings: []*ec2.BlockDeviceMapping{bdm},
UserData: &userdata,
})
}
if d.RequestSpotInstance {
req.InstanceMarketOptions = &ec2.InstanceMarketOptionsRequest{
MarketType: aws.String("spot"),
SpotOptions: &ec2.SpotMarketOptions{
MaxPrice: &d.SpotPrice,
},
}
if d.BlockDurationMinutes != 0 {
req.InstanceMarketOptions.SpotOptions.BlockDurationMinutes = &d.BlockDurationMinutes
}
}
inst, err := d.getClient().RunInstances(&req)
if err != nil {
return fmt.Errorf("Error launching instance: %v", err)
......@@ -797,7 +714,6 @@ func (d *Driver) innerCreate() error {
return fmt.Errorf("error launching instance: %v", errorUnprocessableResponse)
}
instance = inst.Instances[0]
}
d.InstanceId = *instance.InstanceId
......@@ -968,14 +884,6 @@ func (d *Driver) Remove() error {
multierr.Errs = append(multierr.Errs, err)
}
// In case of failure waiting for a SpotInstance, we must cancel the unfulfilled request, otherwise an instance may be created later.
// If the instance was created, terminating it will be enough for canceling the SpotInstanceRequest
if d.RequestSpotInstance && d.spotInstanceRequestId != "" {
if err := d.cancelSpotInstanceRequest(); err != nil {
multierr.Errs = append(multierr.Errs, err)
}
}
if !d.ExistingKey {
if err := d.deleteKeyPair(); err != nil {
multierr.Errs = append(multierr.Errs, err)
......@@ -989,15 +897,6 @@ func (d *Driver) Remove() error {
return multierr
}
func (d *Driver) cancelSpotInstanceRequest() error {
// NB: Canceling a Spot instance request does not terminate running Spot instances associated with the request
_, err := d.getClient().CancelSpotInstanceRequests(&ec2.CancelSpotInstanceRequestsInput{
SpotInstanceRequestIds: []*string{&d.spotInstanceRequestId},
})
return err
}
func (d *Driver) getInstance() (*ec2.Instance, error) {
instances, err := d.getClient().DescribeInstances(&ec2.DescribeInstancesInput{
InstanceIds: []*string{&d.InstanceId},
......
......@@ -39,60 +39,6 @@ func TestConfigureSecurityGroupPermissionsEmpty(t *testing.T) {
assert.Len(t, perms, 2)
}
func setupTestFailedSpotInstanceRequest(t *testing.T, r *ec2.RequestSpotInstancesOutput) (*Driver, func()) {
dir, _ := ioutil.TempDir("", "awsuserdata")
privKeyPath := filepath.Join(dir, "key")
pubKeyPath := filepath.Join(dir, "key.pub")
content := []byte("test\n")
err := ioutil.WriteFile(privKeyPath, content, 0666)
assert.NoError(t, err)
err = ioutil.WriteFile(pubKeyPath, content, 0666)
assert.NoError(t, err)
driver := NewDriver("machineFoo", "path")
driver.clientFactory = func() Ec2Client {
return &fakeEC2SpotInstance{
output: r,
err: nil,
}
}
driver.SecurityGroupNames = []string{}
driver.SSHPrivateKeyPath = privKeyPath
driver.SSHKeyPath = pubKeyPath
driver.KeyName = "foo"
driver.RequestSpotInstance = true
return driver, func() {
os.RemoveAll(dir)
}
}
func TestFailedSpotInstanceRequest(t *testing.T) {
var failureScenarios = []struct {
requestResult *ec2.RequestSpotInstancesOutput
}{
{requestResult: nil},
{
requestResult: &ec2.RequestSpotInstancesOutput{
SpotInstanceRequests: []*ec2.SpotInstanceRequest{},
},
},
}
for _, tt := range failureScenarios {
driver, cleanup := setupTestFailedSpotInstanceRequest(t, tt.requestResult)
defer cleanup()
err := driver.innerCreate()
assert.Error(t, err)
assert.Contains(t, err.Error(), errorUnprocessableResponse.Error())
}
}
func TestConfigureSecurityGroupPermissionsSshOnly(t *testing.T) {
driver := NewTestDriver()
group := securityGroup
......
......@@ -40,13 +40,4 @@ type Ec2Client interface {
RunInstances(input *ec2.RunInstancesInput) (*ec2.Reservation, error)
TerminateInstances(input *ec2.TerminateInstancesInput) (*ec2.TerminateInstancesOutput, error)
//SpotInstances
RequestSpotInstances(input *ec2.RequestSpotInstancesInput) (*ec2.RequestSpotInstancesOutput, error)
DescribeSpotInstanceRequests(input *ec2.DescribeSpotInstanceRequestsInput) (*ec2.DescribeSpotInstanceRequestsOutput, error)
WaitUntilSpotInstanceRequestFulfilled(input *ec2.DescribeSpotInstanceRequestsInput) error
CancelSpotInstanceRequests(input *ec2.CancelSpotInstanceRequestsInput) (*ec2.CancelSpotInstanceRequestsOutput, error)
}
......@@ -97,16 +97,6 @@ func (f *fakeEC2WithLogin) DescribeAccountAttributes(input *ec2.DescribeAccountA
}, nil
}
type fakeEC2SpotInstance struct {
output *ec2.RequestSpotInstancesOutput
err error
*fakeEC2
}
func (f *fakeEC2SpotInstance) RequestSpotInstances(_ *ec2.RequestSpotInstancesInput) (*ec2.RequestSpotInstancesOutput, error) {
return f.output, f.err
}
type fakeEC2SecurityGroupTestRecorder struct {
*fakeEC2
mock.Mock
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment