trying to get ec2 image list including blockDeviceMappings.snpshotID - amazon-ec2

I need to get list of ec2 images including the bloak device snapshot ID, the list should include only the images belongs to me.
Till now i have managed to get the list of image iD without the extra information
In addtion, is there a way to get the list i own instead of adding my ID?
import boto3
ec2 = boto3.resource('ec2',aws_access_key_id = "ID",aws_secret_access_key = "ID",region_name = "eu-west-1")
filter=[{'Name':'owner-id','Values':['MY-ID']}]
count_aim = 1
for each_aim in ec2.images.filter(Filters=filter):
print (each_aim)
count_aim = count_aim + 1
print (count_aim)

I think the following should do:
from collections import defaultdict
from pprint import pprint
import boto3
ec2 = boto3.resource('ec2', aws_access_key_id = 'dddddd', aws_secret_access_key="ggggfffff",region_name = "eu-west-1")
filter=[{'Name':'owner-id','Values':['123455']}]
block_ids = defaultdict(list)
for each_aim in ec2.images.filter(Filters=filter):
for block_map in each_aim.block_device_mappings:
block_ids[each_aim.id].append(block_map['Ebs']['SnapshotId'])
pprint(dict(block_ids))
Example output:
{'ami-02b8a850c975bb610': ['snap-02f277ce5b3b670fc'],
'ami-06422cd44a94bab38': ['snap-0b0c9048f46992ee1']}

Related

How To Use Django-Crontab in Ec2 ubuntu

cronjob
*/30 * * * * /home/ubuntu/web-coin-crawler/venv/bin/python3 /home/ubuntu/web-coin-crawler/webcoincrawler/cron.crontab
cron.py, crontab() Function that crawls on two sites. Save to Django orm as save().
import json
import collections
import crawl_coinmarketcal as coinmarketcal
import crawl_coinscalendar as coinscalendar
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
import django
import datetime
django.setup()
from crawled_data.models import BlogData
def preprocessingDict(dic: dict):
coin_dict = collections.defaultdict(dict)
for key, value in dic.items():
if value['symbol'] in coin_dict[value['date']]:
coin_dict[value['date']][value['symbol']].append([key, value['title'], value['name']])
else:
coin_dict[value['date']][value['symbol']] = [[key, value['title'], value['name']]]
return coin_dict
def crontab():
result = dict()
urls = coinmarketcal.get_urls()
for url in urls:
coinmarketcal.do_crawl(url, result)
urls = coinscalendar.get_urls()
for url in urls:
coinscalendar.do_crawl(url, result)
BlogData(title="COIN_DATA", content=json.dumps(preprocessingDict(result.copy()))).save()
I checked that the cron tab runs every 30 minutes.
However, no data was stored in Django orm.
I changed the Python file itself to run and it was solved.
cron.crontab -> cron

How do I create a tag using resource method in boto3 aws lambda

I am trying to create a tag using ec2.resource method NOT ec2.CLIENT... I can create it with client but trying to figure out with resource. I am getting an error "errorMessage": "'dict' object has no attribute 'create_tags'". I have provided my code. I am comparing certain tag which I am using if statement but when I use create_tag method I get an error.
import boto3
import collections
import sys
ec2 = boto3.resource('ec2',region_name='us-east-2')
def lambda_handler(event, context):
vol = ec2.Volume(id='1256')
for delV in vol.tags:
delV.create_tags(Tags=[{'Key':'Name', 'Value':'Test1'}])
Try this, you don't need to iterate over the existing tags like you are doing;
import boto3
import collections
import sys
ec2 = boto3.resource('ec2',region_name='us-east-2')
def lambda_handler(event, context):
my_list_of_ids = [1256, 1234, 2300]
for volid in my_list_of_ids:
vol = ec2.Volume(id=volid)
for tag in vol.tags:
if tag['Name'] == 'Name' and tag['Value'] == Value:
print("Exists")
vol.create_tags(Tags=[{'Key':'Name', 'Value':'Test1'}])

Scrapy works in shell but spider returns empty csv

I am learning Scrapy. Now I just try to scrapy items and when I call spider:
planefinder]# scrapy crawl planefinder -o /User/spider/planefinder/pf.csv -t csv
it shows tech information and no scraped content (Crawled 0 pages .... etc), and it returns an empty csv file.
The problem is when i test xpath in scrapy shell it works:
>>> from scrapy.selector import Selector
>>> sel = Selector(response)
>>> flights = sel.xpath("//div[#class='col-md-12'][1]/div/div/table//tr")
>>> items = []
>>> for flt in flights:
... item = flt.xpath("td[1]/a/#href").extract_first()
... items.append(item)
...
>>> items
The following is my planeFinder.py code:
# -*-:coding:utf-8 -*-
from scrapy.spiders import CrawlSpider
from scrapy.selector import Selector, HtmlXPathSelector
from planefinder.items import arr_flt_Item, dep_flt_Item
class planefinder(CrawlSpider):
name = 'planefinder'
host = 'https://planefinder.net'
start_url = ['https://planefinder.net/data/airport/PEK/']
def parse(self, response):
arr_flights = response.xpath("//div[#class='col-md-12'][1]/div/div/table//tr")
dep_flights = response.xpath("//div[#class='col-md-12'][2]/div/div/table//tr")
for flight in arr_flights:
arr_item = arr_flt_Item()
arr_flt_url = flight.xpath('td[1]/a/#href').extract_first()
arr_item['arr_flt_No'] = flight.xpath('td[1]/a/text()').extract_first()
arr_item['STA'] = flight.xpath('td[2]/text()').extract_first()
arr_item['From'] = flight.xpath('td[3]/a/text()').extract_first()
arr_item['ETA'] = flight.xpath('td[4]/text()').extract_first()
yield arr_item
Please before going to CrawlSpider please check the docs for Spiders, some of the issues I've found were:
Instead of host use allowed_domains
Instead of start_url use start_urls
It seem that the page needs to have some cookies set or maybe it's using some kind of basic anti-bot protection, and you need to land somewhere else first.
Try this (I've also changed a bit :
# -*-:coding:utf-8 -*-
from scrapy import Field, Item, Request
from scrapy.spiders import CrawlSpider, Spider
class ArrivalFlightItem(Item):
arr_flt_no = Field()
arr_sta = Field()
arr_from = Field()
arr_eta = Field()
class PlaneFinder(Spider):
name = 'planefinder'
allowed_domains = ['planefinder.net']
start_urls = ['https://planefinder.net/data/airports']
def parse(self, response):
yield Request('https://planefinder.net/data/airport/PEK', callback=self.parse_flight)
def parse_flight(self, response):
flights_xpath = ('//*[contains(#class, "departure-board") and '
'./preceding-sibling::h2[contains(., "Arrivals")]]'
'//tr[not(./th) and not(./td[#class="spacer"])]')
for flight in response.xpath(flights_xpath):
arrival = ArrivalFlightItem()
arr_flt_url = flight.xpath('td[1]/a/#href').extract_first()
arrival['arr_flt_no'] = flight.xpath('td[1]/a/text()').extract_first()
arrival['arr_sta'] = flight.xpath('td[2]/text()').extract_first()
arrival['arr_from'] = flight.xpath('td[3]/a/text()').extract_first()
arrival['arr_eta'] = flight.xpath('td[4]/text()').extract_first()
yield arrival
The problem here is not understanding correctly which "Spider" to use, as Scrapy offers different custom ones.
The main one, and the one you should be using is the simple Spider and not CrawlSpider, because CrawlSpider is used for a more deep and intensive search into forums, blogs, etc.
Just change the type of spider to:
from scrapy import Spider
class plane finder(Spider):
...
Check the value of ROBOTSTXT_OBEY in your settings.py file. By default it's set to True (but not when you run shell). Set it to False if you wan't to disobey robots.txt file.

gspread data does not appear in Google Sheet

I'm trying to write sensor data to a google sheet. I was able to write to this same sheet a year or so ago but I am active on this project again and can't get it to work. I believe the Oauth has changed and I've updated my code for that change.
In the below code, I get no errors, however no data in entered in the GoogleSheet. Also, If I look at GoogleSheets, the "last opened" date does not reflect the time my program would/should be writing to that google sheet.
I've tried numerous variations and I'm just stuck. Any suggestions would be appreciated.
#!/usr/bin/python3
#-- developed with Python 3.4.2
# External Resources
import time
import sys
import json
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import traceback
# Initialize gspread
scope = ['https://spreadsheets.google.com/feeds']
credentials = ServiceAccountCredentials.from_json_keyfile_name('MyGoogleCode.json',scope)
client = gspread.authorize(credentials)
# Start loop ________________________________________________________________
samplecount = 1
while True:
data_time = (time.strftime("%Y-%m-%d %H:%M:%S"))
row = ([samplecount,data_time])
# Append to Google sheet_
try:
if credentials is None or credentials.invalid:
credentials.refresh(httplib2.Http())
GoogleDataFile = client.open('DataLogger')
#wks = GoogleDataFile.get_worksheet(1)
wks = GoogleDataFile.get_worksheet(1)
wks.append_row([samplecount,data_time])
print("worksheets", GoogleDataFile.worksheets()) #prints ID for both sheets
except Exception as e:
traceback.print_exc()
print ("samplecount ", samplecount, row)
samplecount += 1
time.sleep(5)
I found my issue. I've changed 3 things to get gspread working:
Downloaded a newly created json file (probably did not need this step)
With the target worksheet open in chrome, I "shared" it with the email address found in the JSON file.
In the google developers console, I enabled "Drive API"
However, the code in the original post will not refresh the token. It will stop working after 60 minutes.
The code that works (as of July 2017) is below.
The code writes to a google sheet named "Datalogger"
It writes to the sheet shown as Sheet2 in the google view.
The only unique information is the name of the JSON file
Hope this helps others.
Jon
#!/usr/bin/python3
# -- developed with Python 3.4.2
#
# External Resources __________________________________________________________
import time
import json
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import traceback
# Initialize gspread credentials
scope = ['https://spreadsheets.google.com/feeds']
credentials = ServiceAccountCredentials.from_json_keyfile_name('MyjsonFile.json',scope)
headers = gspread.httpsession.HTTPSession(headers={'Connection': 'Keep-Alive'})
client = gspread.Client(auth=credentials, http_session=headers)
client.login()
workbook = client.open("DataLogger")
wksheet = workbook.get_worksheet(1)
# Start loop ________________________________________________________________
samplecount = 1
while True:
data_time = (time.strftime("%Y-%m-%d %H:%M:%S"))
row_data = [samplecount,data_time]
if credentials.access_token_expired:
client.login()
wksheet.append_row(row_data)
print("Number of rows in out worksheet ",wksheet.row_count)
print ("samplecount ", samplecount, row_data)
print()
samplecount += 1
time.sleep(16*60)

How to create EC2 instance through boto python code

requests = [conn.request_spot_instances(price=0.0034, image_id='ami-6989a659', count=1,type='one-time', instance_type='m1.micro')]
I used the following code. But it is not working.
Use the following code to create instance from python command line.
import boto.ec2
conn = boto.ec2.connect_to_region(
"us-west-2",
aws_access_key_id="<aws access key>",
aws_secret_access_key="<aws secret key>",
)
conn = boto.ec2.connect_to_region("us-west-2")
conn.run_instances(
"<ami-image-id>",
key_name="myKey",
instance_type="t2.micro",
security_groups=["your-security-group-here"],
)
To create an EC2 instance using Python on AWS, you need to have "aws_access_key_id_value" and "aws_secret_access_key_value".
You can store such variables in config.properties and write your code in create-ec2-instance.py file
Create a config.properties and save the following code in it.
aws_access_key_id_value='YOUR-ACCESS-KEY-OF-THE-AWS-ACCOUNT'
aws_secret_access_key_value='YOUR-SECRETE-KEY-OF-THE-AWS-ACCOUNT'
region_name_value='region'
ImageId_value = 'ami-id'
MinCount_value = 1
MaxCount_value = 1
InstanceType_value = 't2.micro'
KeyName_value = 'name-of-ssh-key'
Create create-ec2-instance.py and save the following code in it.
import boto3
def getVarFromFile(filename):
import imp
f = open(filename)
global data
data = imp.load_source('data', '', f)
f.close()
getVarFromFile('config.properties')
ec2 = boto3.resource(
'ec2',
aws_access_key_id=data.aws_access_key_id_value,
aws_secret_access_key=data.aws_secret_access_key_value,
region_name=data.region_name_value
)
instance = ec2.create_instances(
ImageId = data.ImageId_value,
MinCount = data.MinCount_value,
MaxCount = data.MaxCount_value,
InstanceType = data.InstanceType_value,
KeyName = data.KeyName_value)
print (instance[0].id)
Use the following command to execute the python code.
python create-ec2-instance.py

Resources