Im trying to use boto3 in a job of AWS Glue to call a Lambda Function but without results.
I upload a zip with the libraries:
Like the examples by AWS
and without a zip.
The error is this " Unable to load data for: endpoints".
Im trying to invoke without zip but this go to a timeout exception.
import boto3
client = boto3.client('lambda' , region_name='us-east-1')
r_lambda = client.invoke(FunctionName='S3GlueJson')
Can someone help me ?
In Python, use Boto3 Lambda client 'invoke()'. For example, you can create a Lambda container, then call that from a Glue Job:
import boto3
import pandas as pd
lambda_client = boto3.client('lambda',region_name='us-east-1')
def get_predictions( df ):
# Call getPredictions Lambda container
response = lambda_client.invoke(
FunctionName='getPredictions',
InvocationType='RequestResponse',
LogType='Tail',
Payload=df
)
logger.info('Received response from Lambda container.')
data = response["Payload"].read().decode('utf-8')
x = json.loads(data)
df_pred = pd.DataFrame.from_dict(x)
return df_pred
dfjson = df.to_json()
df_pred = get_predictions( dfjson )
df_pred.head()
If you want to call a Glue Jobs from Lambda Function, can do it like this:
import boto3
glue = boto3.client(service_name='glue', region_name='us-east-1',
endpoint_url='https://glue.us-east-1.amazonaws.com')
#Start Job
myNewJobRun = glue.start_job_run(JobName=JOB_NAME)
#Get current state of Job, to be sure it's running
status = glue.get_job_run(JobName=JOB_NAME, RunId=myNewJobRun['JobRunId'])
logger.info('JOB State {}: {}'.format(
JOB_NAME, status['JobRun']['JobRunState']))
As Job execution can late some time to finish, it's better to don't wait on Lambda function for it to finish.
Related
Trying to add some basic error handling & logging to a working Lambda function. After adding the elements I now receive this error:
{
"errorMessage": "Unable to marshal response: sqs.Queue(url='https://us-west-2.queue.amazonaws.com/225565555556/Messages') is not JSON serializable",
"errorType": "Runtime.MarshalError",
"stackTrace": []
}
My searches on Stack Overflow have led me to believe something needs to be converted to a string object but I don't know where to fix that. Here is the entire function:
# Backport print_function for backwards compatibility
from __future__ import print_function
import logging
# Use built-in package for encoding/decoding JSON data
import json
# Module required to work with Boto3 environment variables
import os
# Module provides classes for manipulating date/time
from datetime import datetime
# AWS Python SDK module
import boto3
from botocore.exceptions import ClientError
# Reference function environment variables
QUEUE_NAME = os.environ['QUEUE_NAME']
MAX_QUEUE_MESSAGES = os.environ['MAX_QUEUE_MESSAGES']
DYNAMODB_TABLE = os.environ['DYNAMODB_TABLE']
# Create AWS service resource objects
sqs = boto3.resource('sqs')
dynamodb = boto3.resource('dynamodb')
logger = logging.getLogger(__name__)
# Define function entry point
def lambda_handler(event, context):
# Use service resource to call API to retrieve SQS queue name
try:
queue = sqs.get_queue_by_name(QueueName=QUEUE_NAME)
logger.info("Got queue '%s' with URL=%s", QUEUE_NAME, queue.url)
except ClientError as error:
logger.exception("Couldn't get queue named %s.", QUEUE_NAME)
raise error
else:
return queue
# Print the number of messages waiting in queue for consumer
print("ApproximateNumberOfMessages:",
queue.attributes.get('ApproximateNumberOfMessages'))
# Iterate through message event records
for message in event['Records']:
print("Starting your Lambda Function...")
body = message["body"]
id = message['messageId']
print(str(body))
# Write message to DynamoDB
table = dynamodb.Table(DYNAMODB_TABLE)
# Call DDB API to add message item to table variable
response = table.put_item(
Item={
'MessageId':message['messageId'],
'Body':message['body'],
'Timestamp':datetime.now().isoformat()
},
)
print("Wrote message to DynamoDB:", json.dumps(response))
The resolution to this error was to change the else statement from:
else:
return queue
To:
else:
return str(queue)
Pretty new to asynch so here is my question and thank you in advance.
Hi All very simple question I might be thinking too much into.
I am trying to access this cassandra client outside of these defined listeners below that get registered to a sanic main app.
I need the session in order to use an update query which will execute Asynchronously. I can definetly connect and event query from the 'setup_cassandra_session_listener' method below. But having tough time figuring how to call this Cassandra session outside and isolate so i can access else where.
from aiocassandra import aiosession
from cassandra.cluster import Cluster
from sanic import Sanic
from config import CLUSTER_HOST, TABLE_NAME, CASSANDRA_KEY_SPACE, CASSANDRA_PORT, DATA_CENTER, DEBUG_LEVEL, LOGGER_FORMAT
log = logging.getLogger('sanic')
log.setLevel('INFO')
cassandra_cluster = None
def setup_cassandra_session_listener(app, loop):
global cassandra_cluster
cassandra_cluster = Cluster([CLUSTER_HOST], CASSANDRA_PORT, DATA_CENTER)
session = cassandra_cluster.connect(CASSANDRA_KEY_SPACE)
metadata = cassandra_cluster.metadata
app.session = cassandra_cluster.connect(CASSANDRA_KEY_SPACE)
log.info('Connected to cluster: ' + metadata.cluster_name)
aiosession(session)
app.cassandra = session
def teardown_cassandra_session_listener(app, loop):
global cassandra_cluster
cassandra_cluster.shutdown()
def register_cassandra(app: Sanic):
app.listener('before_server_start')(setup_cassandra_session_listener)
app.listener('after_server_stop')(teardown_cassandra_session_listener)
Here is a working example that should do what you need. It does not actually run Cassandra (since I have no experience doing that). But, in principle this should work with any database connection you need to manage across the lifespan of your running server.
from sanic import Sanic
from sanic.response import text
app = Sanic()
class DummyCluser:
def connect(self):
print("Connecting")
return "session"
def shutdown(self):
print("Shutting down")
def setup_cassandra_session_listener(app, loop):
# No global variables needed
app.cluster = DummyCluser()
app.session = app.cluster.connect()
def teardown_cassandra_session_listener(app, loop):
app.cluster.shutdown()
def register_cassandra(app: Sanic):
# Changed these listeners to be more friendly if running with and ASGI server
app.listener('after_server_start')(setup_cassandra_session_listener)
app.listener('before_server_stop')(teardown_cassandra_session_listener)
#app.get("/")
async def get(request):
return text(app.session)
if __name__ == "__main__":
register_cassandra(app)
app.run(debug=True)
The idea is that you attach to your app instance (as you did) and then are able to simply access that inside your routes with request.app.
I am trying to create a tag using ec2.resource method NOT ec2.CLIENT... I can create it with client but trying to figure out with resource. I am getting an error "errorMessage": "'dict' object has no attribute 'create_tags'". I have provided my code. I am comparing certain tag which I am using if statement but when I use create_tag method I get an error.
import boto3
import collections
import sys
ec2 = boto3.resource('ec2',region_name='us-east-2')
def lambda_handler(event, context):
vol = ec2.Volume(id='1256')
for delV in vol.tags:
delV.create_tags(Tags=[{'Key':'Name', 'Value':'Test1'}])
Try this, you don't need to iterate over the existing tags like you are doing;
import boto3
import collections
import sys
ec2 = boto3.resource('ec2',region_name='us-east-2')
def lambda_handler(event, context):
my_list_of_ids = [1256, 1234, 2300]
for volid in my_list_of_ids:
vol = ec2.Volume(id=volid)
for tag in vol.tags:
if tag['Name'] == 'Name' and tag['Value'] == Value:
print("Exists")
vol.create_tags(Tags=[{'Key':'Name', 'Value':'Test1'}])
What I am looking to do is set an instance to standby mode when it hits an alarm state. I already have an alarm set up to detect when my instance hits 90% CPU for a while. The alarm currently sends a Slack and text message via SNS calling a Lambda function. I would like to add is to have the instance go into standby mode. The instances are in an autoscaling group.
I found that you can perform this through the CLI using the command :
aws autoscaling enter-standby --instance-ids i-66b4f7d5be234234234 --auto-scaling-group-name my-asg --should-decrement-desired-capacity
You can also do this with boto3 :
response = client.enter_standby(
InstanceIds=[
'string',
],
AutoScalingGroupName='string',
ShouldDecrementDesiredCapacity=True|False
)
I assume I need to write another Lambda function that will be triggered by SNS that will use the boto3 code to do this?
Is there a better/easier way before I start?
I already have the InstanceId passed into the event to the Lambda so I will have to add the ASG name in the event.
Is there a way to get the ASG name in the Lambda function when I already have the Instance ID? Then I do not have to pass it in with the event.
Thanks!
Your question has a couple sub-parts, so I'll try to answer them in order:
I assume I need to write another Lambda function that will be triggered by SNS that will use the boto3 code to do this?
You don't need to, you could overload your existing function. I could see a valid argument for either separate functions (separation of concerns) or one function (since "reacting to CPU hitting 90%" is basically "one thing").
Is there a better/easier way before I start?
I don't know of any other way you could do it, other than Cloudwatch -> SNS -> Lambda.
Is there a way to get the ASG name in the Lambda function when I already have the Instance ID?
Yes, see this question for an example. It's up to you whether it looks like doing it in the Lambda or passing an additional parameter is the cleaner option.
For anyone interested, here is what I came up with for the Lambda function (in Python) :
# Puts the instance in the standby mode which takes it off the load balancer
# and a replacement unit is spun up to take its place
#
import json
import boto3
ec2_client = boto3.client('ec2')
asg_client = boto3.client('autoscaling')
def lambda_handler(event, context):
# Get the id from the event JSON
msg = event['Records'][0]['Sns']['Message']
msg_json = json.loads(msg)
id = msg_json['Trigger']['Dimensions'][0]['value']
print("Instance id is " + str(id))
# Capture all the info about the instance so we can extract the ASG name later
response = ec2_client.describe_instances(
Filters=[
{
'Name': 'instance-id',
'Values': [str(id)]
},
],
)
# Get the ASG name from the response JSON
#autoscaling_name = response['Reservations'][0]['Instances'][0]['Tags'][1]['Value']
tags = response['Reservations'][0]['Instances'][0]['Tags']
autoscaling_name = next(t["Value"] for t in tags if t["Key"] == "aws:autoscaling:groupName")
print("Autoscaling name is - " + str(autoscaling_name))
# Put the instance in standby
response = asg_client.enter_standby(
InstanceIds=[
str(id),
],
AutoScalingGroupName=str(autoscaling_name),
ShouldDecrementDesiredCapacity=False
)
I wanted to ask if anyone has ever saved jmeter test results (sampler names, duration, pass/fail) to Datadog? Kinda like the backend listener for influx/graphite... but for Datadog. Jmeter-plugins has no such plugin. Datadog seems to offer something called "JMX integration" but I'm not sure whether that is what I need.
I figured out how to do this using the datadog api https://docs.datadoghq.com/api/?lang=python#post-timeseries-points. The following python script takes in the jtl file (jmeter results) and posts the transaction name, response time, and status (pass/fail) to datadog.
#!/usr/bin/env python3
import sys
import pandas as pd
from datadog import initialize, api
options = {
'api_key': '<API_KEY>',
'app_key': '<APPLICATION_KEY>'
}
metrics = []
def get_current_metric(timestamp, label, elapsed, success):
metric = {}
metric.update({'metric': 'jmeter'})
metric.update({'points': [(timestamp, elapsed)]})
curtags = {}
curtags.update({'testcase': label})
curtags.update({'success': success})
metric.update({'tags': curtags})
return metric
initialize(**options)
jtl_file = sys.argv[1]
df = pd.read_csv(jtl_file)
for index, row in df.iterrows():
timestamp = row['timeStamp']/1000
label = row['label']
elapsed = row['elapsed']
success = str(row['success'])
metric = get_current_metric(timestamp, label, elapsed, success)
metrics.append(metric)
api.Metric.send(metrics)