how to get entire document in scrapy using hxs.select

how to get entire document in scrapy using hxs.select - xpath

I've been at this for 12hrs and I'm hoping someone can give me a leg up.
Here is my code all I want is to get the anchor and url of every link on a page as it crawls along.
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector
from scrapy.utils.url import urljoin_rfc
from scrapy.utils.response import get_base_url
from urlparse import urljoin
#from scrapy.item import Item
from tutorial.items import DmozItem
class HopitaloneSpider(CrawlSpider):
name = 'dmoz'
allowed_domains = ['domain.co.uk']
start_urls = [
'http://www.domain.co.uk'
]
rules = (
#Rule(SgmlLinkExtractor(allow='>example\.org', )),
Rule(SgmlLinkExtractor(allow=('\w+$', )), callback='parse_item', follow=True),
)
user_agent = 'Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))'
def parse_item(self, response):
#self.log('Hi, this is an item page! %s' % response.url)
hxs = HtmlXPathSelector(response)
#print response.url
sites = hxs.select('//html')
#item = DmozItem()
items = []
for site in sites:
item = DmozItem()
item['title'] = site.select('a/text()').extract()
item['link'] = site.select('a/#href').extract()
items.append(item)
return items
What I'm doing wrong... my eyes hurt now.

response.body should be what you want
def parse_item(self, response):
#self.log('Hi, this is an item page! %s' % response.url)
body = response.body
item = ....

To get all links on a single page:
def parse_item(self, response):
hxs = HtmlXPathSelector(response)
items = []
links = hxs.select("//a")
for link in links:
item = DmozItem()
item['title'] = site.select('text()').extract()
item['link'] = site.select('#href').extract()
items.append(item)
return items

Related

Discord has no attribute named 'StoreChannel'

I am trying to make a discord bot that returns the most popular videos in the KR region regarding a topic which actually is the message of the user.
So far i coded import discord
import googleapiclient.discovery
from discord.ext import commands
import typing
from discord_ui import UI
cmd = commands.Bot(command_prefix='.')
client = discord.Client()
token =
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = 'googleapikeyhere'
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey=DEVELOPER_KEY)
#cmd.event
async def on_command_error(ctx, error):
if isinstance(error, commands.CommandOnCooldown):
await ctx.reply(f"You are on cooldown for {round(error.retry_after, 2)} seconds!")
# cmd.command()
# commands.cooldown(1, 4, commands.BucketType.user)
async def info(ctx, *args):
output2 = ''
for word in args:
output2 += str(word)
output2 += ' '
print(output2)
request3 = youtube.videos().list(
part="snippet, {}, statistics".format(output2),
chart="mostPopular",
regionCode="KR"
)
response3 = request3.execute()
print(response3)
channame = response3['items'][0]['snippet']['title']
chanid = response3['etag']
numberViews = response3['Items'][0]['statistics']['viewCount']
numberLikes = response3['Items'][0]['statistics']['likeCount']
embed2 = discord.Embed(title=f"{channame}", url=f"https://www.youtube.com/watch?v={chanid}",
numberviews=f"{numberViews}", numberlikes= f"{numberLikes}", color=0xFF5733)
response3 = request3.execute()
await ctx.send(embed=embed2)
cmd.run('findvideos')
but it results in the error discord has no attribute 'StoreData' why is that ?

discord py 'keyerror' (discord components, slash commands)

I'm trying to use the bot by adding a slash command and a button. For once, the slash command is working as intended. But every time I run the command, I get a warning like that. I'm not sure what the problem is. It's a part of the code.
Ignoring exception in on_socket_response
Traceback (most recent call last):
File "/home/yuna21724/.local/lib/python3.8/site-packages/discord/client.py", line 343, in _run_event
await coro(*args, **kwargs)
File "/home/yuna21724/.local/lib/python3.8/site-packages/discord_buttons_plugin/__main__.py", line 17, in soclistener
await self.emit(data["data"]["custom_id"], data)
KeyError: 'custom_id'
import discord
import asyncio
import SeagamiCommandsListening
import SeagamiNotice
import SeagamiCharacter
from discord_buttons_plugin import *
from discord.ext import commands
from discord_buttons_plugin import *
from discord_slash import SlashCommand
from discord import Intents
from discord_components.client import DiscordComponents
token = ""
seagami = commands.Bot(command_prefix='.', intents = Intents.all())
slash = SlashCommand(seagami, sync_commands = True)
buttons = ButtonsClient(seagami)
#seagami.event
async def on_ready():
DiscordComponents(seagami)
await seagami.change_presence(status=discord.Status.online)
print('[알림]디스코드 봇 서비스가 시작되었습니다...',
seagami.user.name)
while True:
if SeagamiNotice.a['title'] == SeagamiNotice.b['title']:
print('')
else:
embed = discord.Embed(color = 0x2F3136)
embed.add_field(name = "```🌷로스트아크 공지사항```",
value = "``🌷로스트아크 점검 시, 봇의 기능이 제한됩니다.``", inline =
False)
embed.add_field(name = f"```👑
{SeagamiNotice.a['title']}```", value = '``📕 추가 정보를 확인하려면
아래의 바로가기 버튼을 눌러주세요.``', inline = False)
components_notice = [Button(label = "바로가기", style =
5, url = f"{SeagamiNotice.a['Link']}")]
await
seagami.get_channel(int(878004091505221643)).send(embed = embed,
components = components_notice)
await asyncio.sleep(335)
#seagami.event
async def on_member_join(member):
message_ = "<#{}>".format(str(member.id))
embed = discord.Embed(color = 0x2F3136)
embed.add_field(name = "```🌷환영해요.```", value = f"``🌷
``{message_}``님, 새가미와 아이들에 오신 걸 환영합니다.``", inline =
False)
buttons = [Button(label = "역할 받기", style = 2, custom_id =
"role")]
await seagami.get_channel(int(879993490996600852)).send(embed =
embed, components = buttons)
await
member.add_roles(seagami.get_guild(int(870543165004849152)).get_role(int(
870552731419566111)))
#slash.slash(name="입장", description="새가미 봇이 음성채널로 이동합니
다.", guild_ids=[870543165004849152])
async def in_voice(ctx: SlashCommand):
try:
if (ctx.channel.id == int(878003511961464883)):
await
SeagamiCommandsListening.commands_Listening.in_(ctx)
await ctx.reply('``🌷``새가미 봇이 음성채널로 이동하였습니
다.')
except:
await ctx.reply('``🌷``해당 명령어는 여기서 사용할 수 없습니
다.')
#slash.slash(name="퇴장", description="새가미 봇이 음성채널에서 퇴장
합니다.", guild_ids=[870543165004849152])
async def out_voice(ctx: SlashCommand):
try:
if (ctx.channel.id == int(878003511961464883)):
await
`enter code here`SeagamiCommandsListening.commands_Listening.out_(ctx)
await ctx.reply('``🌷``새가미 봇이 음성채널에서 퇴장하였습
니다.')
except:
await ctx.reply('``🌷``해당 명령어는 여기서 사용할 수 없습니
다.')
#slash.slash(name="재생", description="새가미 봇을 통해 노래를 재생할
수 있습니다.", guild_ids=[870543165004849152])
async def play_voice(ctx: SlashCommand, 링크):
try:
if (ctx.channel.id == int(878003511961464883)):
await
SeagamiCommandsListening.commands_Listening.play_(ctx, 링크)
await ctx.reply('``🌷``신청하신 노래를 ``1회``재생 합니
다.')
await buttons.send(
content = "아래의 버튼을 눌러 ``👑명령어``를 사용하실 수 있습니다.",
channel = ctx.channel.id,
components = [
ActionRow([
Button(label = "일시 정지", style = 1, custom_id = "button_pause"),
Button(label = "다시 듣기", style = 3, custom_id = "button_resume"),
Button(label = "그만 듣기", style = 4, custom_id = "button_stop")
]
)])
except:
await ctx.reply('해당 명령어는 여기서 사용할 수 없습니다.')
#slash.slash(name="반복재생", description="새가미 봇을 통해 노래를 반복 재생할 수 있습니다.", guild_ids=[870543165004849152])
async def replay_voice(ctx: SlashCommand, 링크):
try:
if (ctx.channel.id == int(878003511961464883)):
await SeagamiCommandsListening.commands_Listening.replay_(ctx, 링크)
await ctx.reply('``🌷``신청하신 노래를 반복 재생 합니다.')
await buttons.send(
content = "아래의 버튼을 눌러 ``👑명령어``를 사용하실 수 있습니다.",
channel = ctx.channel.id,
components = [
ActionRow([
Button(label = "일시 정지", style = 1, custom_id = "button_pause"),
Button(label = "다시 듣기", style = 3, custom_id = "button_resume"),
Button(label = "그만 듣기", style = 4, custom_id = "button_stop")
]
)])
except:
await ctx.reply('해당 명령어는 여기서 사용할 수 없습니다.')
#slash.slash(name="검색", description="캐릭터 정보를 검색 할 수 있습니다.", guild_ids=[870543165004849152])
async def search_(ctx: SlashCommand, 캐릭터명):
if (ctx.channel.id == int(882916863779168267)):
await SeagamiCharacter.search_info.info_(ctx, 캐릭터명)
if SeagamiCharacter.code['code'] != '100':
embed = discord.Embed(color = 0x2F3136)
embed.add_field(name = f"> ```🌷{SeagamiCharacter.name_['name']}```", value = f"> ``🌷{SeagamiCharacter.class_info['class']}``", inline = False)
embed.add_field(name = "```📕캐릭터 레벨```", value = f"``Lv. {SeagamiCharacter.Level_['Level'][8:]}``", inline = True)
embed.add_field(name = "```📕원정대 레벨```", value = f"``Lv. {SeagamiCharacter.expedition_Level['expedition'][9:]}``", inline = True)
embed.add_field(name = "```📕아이템 레벨```", value = f"``Lv. {SeagamiCharacter.item_Level['item_Level'][12:]}``", inline = True)
embed.add_field(name = "> ```🔯특성```", value = "> ``🔯펫 효과가 적용 된 특성입니다.``", inline = False)
embed.add_field(name = "```🔯치명```", value = f"``{SeagamiCharacter.ability_battle['ability_battle1']}``", inline = True)
embed.add_field(name = "```🔯특화```", value = f"``{SeagamiCharacter.ability_battle['ability_battle3']}``", inline = True)
embed.add_field(name = "```🔯제압```", value = f"``{SeagamiCharacter.ability_battle['ability_battle5']}``", inline = True)
embed.add_field(name = "```🔯신속```", value = f"``{SeagamiCharacter.ability_battle['ability_battle7']}``", inline = True)
embed.add_field(name = "```🔯인내```", value = f"``{SeagamiCharacter.ability_battle['ability_battle9']}``", inline = True)
embed.add_field(name = "```🔯숙련```", value = f"``{SeagamiCharacter.ability_battle['ability_battle11']}``", inline = True)
embed.add_field(name = "> ```👑각인```", value = "> ``👑현재 사용중인 각인입니다.``", inline = False)
try:
for count in range(8):
embed.add_field(name = "```👑각인```", value = f"``{SeagamiCharacter.ability_engrave[f'ability_engrave{count}']}``", inline = True)
except:
components_ = [[
Button(label = "바로가기", style = 5, url = f'https://lostark.game.onstove.com/Profile/Character/{캐릭터명}')
]]
await ctx.reply(f'``🌷{캐릭터명}``검색이 완료되었습니다.')
await seagami.get_channel(int(882916863779168267)).send(embed = embed, components = components_)
else:
await ctx.reply('``🌷``로스트아크 점검 중 입니다.')
SeagamiCharacter.code['code'] = '200'
else:
await ctx.reply('해당 명령어는 여기서 사용할 수 없습니다.')
#buttons.click
async def button_pause(ctx):
await SeagamiCommandsListening.commands_Listening.pause_()
await ctx.reply('``🌷``노래가 일시 정지 되었습니다.')
#buttons.click
async def button_resume(ctx):
await SeagamiCommandsListening.commands_Listening.resume_()
await ctx.reply('``🌷``노래가 다시 재생 되고 있습니다.')
#buttons.click
async def button_stop(ctx):
await SeagamiCommandsListening.commands_Listening.stop_()
await ctx.reply('``🌷``재생중인 노래를 종료 하였습니다.')
seagami.run(token)
enter image description here
enter image description here
enter image description here
enter image description here

A KeyError is raised when a mapping (dictionary) key is not found in the set of existing keys.
If you're unsure why you're getting a KeyError, here are some things you can do
Print out the dictionary to see what's in it
Check that the key is what you intend it to be
If you want to provide a default for a value when the key isn't in the dictionary, you can use dict.get which defaults to None
>>> person["pets"]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
KeyError: 'pets'
>>> person.get("pets")
>>> person.get("pets", [])
[]
Docs:
https://docs.python.org/3/library/exceptions.html#KeyError%3E
https://docs.python.org/3/library/stdtypes.html#dict.get%3E
Helpful link: https://realpython.com/python-keyerror/%3E
I believe the cause of your error was the 3rd party package you installed called discord_buttons_plugin, which as far as I can tell requires you to add a custom_id to all buttons except the ButtonType().Link. In your _search command, you've forgot to add it, which causes an error in the package's code. Try adding a custom_id to the button there.
Long Story Short
Try adding a custom_id in your button component in the _search command.

django rest framework:In Serializer how to show the field properties also

I have model:
class Ingredient(models.Model):
KILOGRAM = 'kg'
LITER = 'ltr'
PIECES = 'pcs'
MUNITS_CHOICES = (
(KILOGRAM, 'Kilogram'),
(LITER, 'Liter'),
(PIECES, 'Pieces'),
)
name = models.CharField(max_length=200,unique=True,null=False)
slug = models.SlugField(unique=True)
munit = models.CharField(max_length=10,choices=MUNITS_CHOICES,default=KILOGRAM)
rate = models.DecimalField(max_digits=19, decimal_places=2,validators=[MinValueValidator(0)],default=0)
typeofingredient = models.ForeignKey(TypeOfIngredient, related_name='typeof_ingredient',null=True, blank=True,on_delete=models.PROTECT)
density_kg_per_lt = models.DecimalField(max_digits=19, decimal_places=2,verbose_name='Density (kg/lt)',null=True,blank=True,validators=[MinValueValidator(0)])
density_pcs_per_kg = models.DecimalField(max_digits=19, decimal_places=2,verbose_name='Density (pcs/kg)',null=True,blank=True,validators=[MinValueValidator(0)])
density_pcs_per_lt = models.DecimalField(max_digits=19, decimal_places=2,verbose_name='Density (pcs/lt)',null=True,blank=True,validators=[MinValueValidator(0)])
updated = models.DateTimeField(auto_now=True, auto_now_add=False)
timestamp = models.DateTimeField(auto_now=False, auto_now_add=True)
When i get the api i also want to get field types like char, decimal, datetime etc
Something like the below api result, is it possible. Because i am using reactJs as frontend, i have tell the input what kind of field it can accept and also helps in sorting by text or number
{
"id": {value: 1,type: number},
"name": {value: "adark",type: charfield},
"rate": {value: "12.00",type: decimal},
"updated": {value: "2017-07-14T10:51:47.847171Z",type: datetime},
.......so on
}

The Corresponding Serializer would be as follows:
class IngredientSerializer(serializers.ModelSerializer):
name = serializers.SerializerMethodField()
rate = serializers.SerializerMethodField()
updated = serializers.SerializerMethodField()
class Meta:
model = Ingredient
fields = ('name', 'rate', 'updated')
def get_name(self, obj):
response = dict()
response['value'] = obj.name
response['type'] = obj.name.get_internal_type()
return Response(response)
def get_rate(self, obj):
response = dict()
response['value'] = obj.rate
response['type'] = obj.rate.get_internal_type()
return Response(response)
def get_updated(self, obj):
response = dict()
response['value'] = obj.updated
response['type'] = obj.updated.get_internal_type()
return Response(response)

How to redirect with ajax django

The problem i am facing over here is that i am not able to see the cartempty page when items count is 0.
here is the view
if request.is_ajax():
if request.POST.get('action') == 'remove':
cart_item.delete()
if cart_item.cart.items.count() == 0:
return render(request , "carts/cartempty.html", {})
else:
try:
total = cart_item.line_item_total
except:
total = None
try:
original_total = cart_item.cart.original_total
except:
original_total = None
try:
final_total = cart_item.cart.final_total
except:
final_total = None
try:
total_items = cart_item.cart.items.count()
except:
total_items = 0
data = {
"line_total": total,
"original_total": original_total,
"final_total" :final_total,
"total_items": total_items
}
return JsonResponse(data)
How can i redirect to cartempty page using ajax?

Just guessing. Did you try?
return render_to_response('carts/cartempty.html', context_instance=RequestContext(request))
OR
return HttpResponseRedirect("/carts/cartempty/")
instead of:
return render(request , "carts/cartempty.html", {})

Downloading images in scrapy

I am trying to download image in via scrapy. Here are my different files :
items.py
class DmozItem(Item):
title = Field()
image_urls = Field()
images = Field()
settings.py
BOT_NAME = 'tutorial'
SPIDER_MODULES = ['tutorial.spiders']
NEWSPIDER_MODULE = 'tutorial.spiders'
ITEM_PIPELINES = ['scrapy.contrib.pipeline.images.ImagesPipeline']
IMAGES= '/home/mayank/Desktop/sc/tutorial/tutorial'
spider
class DmozSpider(BaseSpider):
name = "wikipedia"
allowed_domains = ["wikipedia.org"]
start_urls = [
"http://en.wikipedia.org/wiki/Pune"
]
def parse(self, response):
hxs = HtmlXPathSelector(response)
items = []
images=hxs.select('//a[#class="image"]')
for image in images:
item = DmozItem()
link=image.select('#href').extract()[0]
link = 'http://en.wikipedia.com'+link
item['image_urls']=link
items.append(item)
In spite of all these setting I my pipeline is not getting activated.Please help. I am new to this framework.

First, settings.py: IMAGES -> IMAGES_STORE
Second, spider: You should return an item so that ImagesPipeline could download those images.
item = DmozItem()
image_urls = hxs.select('//img/#src').extract()
item['image_urls'] = ["http:" + x for x in image_urls]
return item

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

how to get entire document in scrapy using hxs.select - xpath

response.body should be what you want def parse_item(self, response): #self.log('Hi, this is an item page! %s' % response.url) body = response.body item = ....

To get all links on a single page: def parse_item(self, response): hxs = HtmlXPathSelector(response) items = [] links = hxs.select("//a") for link in links: item = DmozItem() item['title'] = site.select('text()').extract() item['link'] = site.select('#href').extract() items.append(item) return items

Related

Discord has no attribute named 'StoreChannel'

discord py 'keyerror' (discord components, slash commands)

django rest framework:In Serializer how to show the field properties also

How to redirect with ajax django

Downloading images in scrapy

Categories

Resources