I've a spider which fetches both the data and images. I want to rename the images with the respective 'title' which i'm fetching.
Following is my code:
spider1.py
from imageToFileSystemCheck.items import ImagetofilesystemcheckItem
import scrapy
class TestSpider(scrapy.Spider):
name = 'imagecheck'
def start_requests(self):
searchterms=['keyword1','keyword2',]
for item in searchterms:
yield scrapy.Request('http://www.example.com/s?=%s' % item,callback=self.parse, meta={'item': item})
def parse(self,response):
start_urls=[]
item = response.meta.get('item')
for i in range(0,2):
link=str(response.css("div.tt a.chek::attr(href)")[i].extract())
start_urls.append(link)
for url in start_urls:
print(url)
yield scrapy.Request(url=url, callback=self.parse_info ,meta={'item': item})
def parse_info(self, response):
url=response.url
title=str(response.xpath('//*[#id="Title"]/text()').extract_first())
img_url_1=response.xpath("//img[#id='images']/#src").extract_first()
scraped_info = {
'url' : url,
'title' : title,
'image_urls': [img_url_1]
}
yield scraped_info
items.py
import scrapy
class ImagetofilesystemcheckItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
image_urls = scrapy.Field()
images = scrapy.Field()
pass
pipelines.py
class ImagetofilesystemcheckPipeline(object):
def process_item(self, item, spider):
return item
settings.py
BOT_NAME = 'imageToFileSystemCheck'
SPIDER_MODULES = ['imageToFileSystemCheck.spiders']
NEWSPIDER_MODULE = 'imageToFileSystemCheck.spiders'
ITEM_PIPELINES = {'scrapy.pipelines.images.ImagesPipeline': 1}
IMAGES_STORE = '/home/imageToFileSystemCheck/images/'
ROBOTSTXT_OBEY = True
Can you please help me with the required changes so that scrapy could save the scraped images in the 'title'.jpg format where title is scraped by the spider?
Create a Spider like this
class ShopeeSpider(scrapy.Spider):
_TEMP_IMAGES_STORE = "/home/crawler/scrapers/images"
custom_settings = {
'ITEM_PIPELINES': {
'coszi.pipelines.CustomImagePipeline': 400,
}
"IMAGES_STORE": _TEMP_IMAGES_STORE
}
def parse(self, response):
data = {}
data['images'] = {"image_link_here": "image_name_here"}
Then your pipelines.py should be like this
class CustomImagePipeline(ImagesPipeline):
def get_media_requests(self, item, info):
if 'images' in item:
for image_url, img_name in item['images'].iteritems():
if os.path.exists(os.path.join(item['images_path'], img_name)) == False:
request = scrapy.Request(url=image_url)
request.meta['img_name'] = img_name
request.meta['this_prod_img_folder'] = item['img_name_here']
request.dont_filter = True
yield request
def file_path(self, request, response=None, info=None):
return os.path.join(info.spider.CRAWLER_IMAGES_STORE, request.meta['this_prod_img_folder'], request.meta['img_name'])
Related
I'm new with ruby and I want to use GCP AIPlatform but I'm struggeling with the payload.
So far, I have :
client = ::Google::Cloud::AIPlatform::V1::PredictionService::Client.new do |config|
config.endpoint = "#{location}-aiplatform.googleapis.com"
end
img = File.open(imgPath, 'rb') do |img|
'data:image/png;base64,' + Base64.strict_encode64(img.read)
end
instance = Instance.new(:content => img)
request = Google::Cloud::AIPlatform::V1::PredictRequest.new(
endpoint: "projects/#{project}/locations/#{location}/endpoints/#{endpoint}",
instances: [instance]
)
result = client.predict request
p result
Here is my proto
message Instance {
required bytes content = 1;
};
But I have the following error : Invalid type Instance to assign to submessage field 'instances'
I read the documentation but for ruby SDK it's a bit light.
The parameters are OK, the JS example here : https://github.com/googleapis/nodejs-ai-platform/blob/main/samples/predict-image-object-detection.js is working with those parameters
What am I doing wrong ?
I managed it
client = Google::Cloud::AIPlatform::V1::PredictionService::Client.new do |config|
config.endpoint = "#{location}-aiplatform.googleapis.com"
end
img = File.open(imgPath, 'rb') do |img|
Base64.strict_encode64(img.read)
end
instance = Google::Protobuf::Value.new(:struct_value => {:fields => {
:content => {:string_value => img}
}})
endpoint = "projects/#{project}/locations/#{location}/endpoints/#{endpoint}"
request = Google::Cloud::AIPlatform::V1::PredictRequest.new(
endpoint: endpoint,
instances: [instance]
)
result = client.predict request
p result
The use of the Google::Protobuf::Value looks ugly to me but it works
Graphene sends the email, but the url doesn't exist. How should I set up the token url for this?
I can't find docs on how to configure urls.py so that the link that it sends through the email works.
http://127.0.0.1:8090/activate/eyJ1c2VybmFtZSI6ImtkZWVuZXl6eiIsImFjdGlvbiI6ImFjdGl2YXRpb24ifQ:1m2a0v:04V3Ho0msVn7nHuFW469DC9GBYuUz2czfsFai09EOyM
settings.py
GRAPHQL_JWT = {
'JWT_VERIFY_EXPIRATION': True,
'JWT_LONG_RUNNING_REFRESH_TOKEN': True,
'ALLOW_LOGIN_NOT_VERIFIED': True,
'JWT_ALLOW_ARGUMENT': True,
"JWT_ALLOW_ANY_CLASSES": [
"graphql_auth.mutations.Register",
"graphql_auth.mutations.VerifyAccount",
"graphql_auth.mutations.ResendActivationEmail",
"graphql_auth.mutations.SendPasswordResetEmail",
"graphql_auth.mutations.PasswordReset",
"graphql_auth.mutations.ObtainJSONWebToken",
"graphql_auth.mutations.VerifyToken",
"graphql_auth.mutations.RefreshToken",
"graphql_auth.mutations.RevokeToken",
"graphql_auth.mutations.VerifySecondaryEmail",
],
}
schema.py
class AuthMutation(graphene.ObjectType):
register = mutations.Register.Field()
verify_account = mutations.VerifyAccount.Field()
token_auth = mutations.ObtainJSONWebToken.Field()
update_account = mutations.UpdateAccount.Field()
resend_activation_email = mutations.ResendActivationEmail.Field()
send_password_reset_email = mutations.SendPasswordResetEmail.Field()
password_reset = mutations.PasswordReset.Field()
password_change = mutations.PasswordChange.Field()
You need to create a view to handle this url.
from django.http import HttpResponseRedirect
from django.views import View
from graphql_auth.models import UserStatus
class ActivateView(View):
def get(self, request, **kwargs):
try:
token = kwargs.get("token")
UserStatus.verify(token)
except Exception:
return HttpResponseRedirect(f"/some/error/url")
return HttpResponseRedirect(f"/activate/thankyou")
And in your urls.py
urlpatterns = [
....
path("activate/<str:token>", ActivateView.as_view()),
...
]
I have the method below that creates an Excel file, but how can I return it like an Excel report?
def report_excel(self,results,report_name,header,indice,title_report):
fileExcel = xlsxwriter.Workbook('C:\\Users\\Pc-Pc\\Desktop\\411\\'+report_name+'.xlsx')
listUsersSheet = fileExcel.add_worksheet(report_name)
column = 0
row = 15
for res in results:
listUsersSheet.merge_range(index[ind][0] + str(row+1) + ':'+ index[ind][0] + str(row + a), res[index[ind][1]], cell_format)
fileExcel.close()
How can I download it from the client as a report?
You can use a Web Controller for that:
from openerp.http import request
from openerp import http
from openerp.addons.web.controllers.main import serialize_exception,content_disposition
class Binary(http.Controller):
#http.route('/web/binary/download_report', type='http', auth="public")
#serialize_exception
def download_xls_document(self, path, filename="My report.xlsx", **kw):
with open(path, "rb") as pdf_file:
return request.make_response(pdf_file.read(),
[('Content-Type', 'application/octet-stream'),
('Content-Disposition', content_disposition(filename))])
and report_excel method should return:
return {
'type': 'ir.actions.act_url',
'url': '/web/binary/download_report?path=%s&filename=%s' % (path, "The report name.xlsx"),
'target': 'blank',
}
serializer.data show {} in the python shell. But there are four instances with some value. And when I run django project it shows [{}, {}, {}, {}]. why?
I will follow this tutorial and I am running my project in ubuntu 16.04
Here is my code.
tutorial/settings.py
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'rest_framework',
'snippets.apps.SnippetsConfig',
]
snippets/models.py
from django.db import models
from pygments.lexers import get_all_lexers
from pygments.styles import get_all_styles
LEXERS = [item for item in get_all_lexers() if item[1]]
LANGUAGE_CHOICES = sorted([(item[1][0], item[0]) for item in LEXERS])
STYLE_CHOICES = sorted((item, item) for item in get_all_styles())
class Snippet(models.Model):
created = models.DateTimeField(auto_now_add=True)
title = models.CharField(max_length=100, blank=True, default='')
code = models.TextField()
linenos = models.BooleanField(default=False)
language = models.CharField(choices=LANGUAGE_CHOICES, default='python', max_length=100)
style = models.CharField(choices=STYLE_CHOICES, default='friendly', max_length=100)
class Meta:
ordering = ('created',)
serializer.py
from rest_framework import serializers
from snippets.models import Snippet, LANGUAGE_CHOICES, STYLE_CHOICES
class SnippetSerializer(serializers.Serializer):
class Meta:
model = Snippet
fields = ('id', 'title', 'code', 'linenos', 'language', 'style')
def create(self, validated_data):
"""
Create and return a new `Snippet` instance, given the validated data.
"""
return Snippet.objects.create(**validated_data)
def update(self, instance, validated_data):
"""
Update and return an existing `Snippet` instance, given the validated data.
"""
instance.title = validated_data.get('title', instance.title)
instance.code = validated_data.get('code', instance.code)
instance.linenos = validated_data.get('linenos', instance.linenos)
instance.language = validated_data.get('language', instance.language)
instance.style = validated_data.get('style', instance.style)
instance.save()
return instance
snippets/urls.py
from django.conf.urls import url
from snippets import views
urlpatterns = [
url(r'^snippets/$', views.snippet_list),
url(r'^snippets/(?P<pk>[0-9]+)/$', views.snippet_detail),
]
I want to start working with Magento's REST API, but I can't seem to get it working.
To start with I need to get access tokens, here's what I'm trying:
require 'oauth'
#consumer = OAuth::Consumer.new("4200100753b2c8b03bde1f5b062c5a80", "c06abdcb734c85dfd7bb115c6a67ae4d", {:site=>"http://178.62.173.99/"})
#request_token = #consumer.get_request_token
# => oauth-0.4.7/lib/oauth/consumer.rb:216:in `token_request': 404 Not Found (OAuth::Unauthorized)
But I keep getting a 404 error.
What should I try next?
Here's a Ruby module I've written to create an access token for the Magento REST API:
module Token
def create_consumer
OAuth::Consumer.new(
CONSUMER_KEY,
CONSUMER_SECRET,
:request_token_path => '/oauth/initiate',
:authorize_path=>'/admin/oauth_authorize',
:access_token_path=>'/oauth/token',
:site => URL
)
end
def request_token(args = {})
args[:consumer].get_request_token(:oauth_callback => URL)
end
def get_authorize_url(args = {})
args[:request_token].authorize_url(:oauth_callback => URL)
end
def authorize_application(args = {})
m = Mechanize.new
m.get(args[:authorize_url]) do |login_page|
auth_page = login_page.form_with(:action => "#{URL}/index.php/admin/oauth_authorize/index/") do |form|
form.elements[1].value = ADMIN_USERNAME
form.elements[2].value = ADMIN_PASSWORD
end.submit
authorize_form = auth_page.forms[0]
#callback_page = authorize_form.submit
end
#callback_page.uri.to_s
end
def extract_oauth_verifier(args = {})
callback_page = "#{args[:callback_page]}".gsub!("#{URL}/?", '')
callback_page_query_string = CGI::parse(callback_page)
callback_page_query_string['oauth_verifier'][0]
end
def get_access_token(args = {})
args[:request_token].get_access_token(:oauth_verifier => args[:oauth_verifier])
end
def save_tokens_to_json(args = {})
auth = {}
auth[:time] = Time.now
auth[:token] = args[:access_token].token
auth[:secret] = args[:access_token].secret
File.open("#{args[:path]}#{args[:filename]}.json", 'w') {|f| f.write(auth.to_json)}
auth
end
def get_new_access_tokens
new_consumer = self.create_consumer
new_request_token = self.request_token(consumer: new_consumer)
new_authorize_url = self.get_authorize_url(request_token: new_request_token)
authorize_new_application = self.authorize_application(authorize_url: new_authorize_url)
extract_new_oauth_verifier = self.extract_oauth_verifier(callback_page: authorize_new_application)
new_access_token = self.get_access_token(request_token: new_request_token, oauth_verifier: extract_new_oauth_verifier)
save_tokens_to_json(filename: 'magento_oauth_access_tokens', path: '/', access_token: new_access_token)
return 'Successfully obtained new access tokens.'
end
end
Run #get_new_access_tokens to get an access token.
Don't forget to define the following variable:
CONSUMER_KEY
CONSUMER_SECRET
URL
ADMIN_USERNAME
ADMIN_PASSWORD
Check out mage on rails. It should work right out of the box. Check out this page for annotated ruby code showcasing the oauth flow