Aiohttp: Server & Client in one time - python-asyncio

I try to use aiohttp 3.6.2 both server and client:
For webhook perform work:
1) Get JSON-request from service
2) Fast send HTTP 200 OK back to service
3) Made additional work after: make http-request to slow web-service(answer 2-5 sec)
I dont understand how to perform work after view(or handler) returned web.Response(text="OK")?
Current view:
(it's slow cause slow http_request perform before response)
view.py:
async def make_http_request(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
print(await resp.text())
async def work_on_request(request):
url = (await request.json())['url']
await make_http_request(url)
return aiohttp.web.Response(text='all ok')
routes.py:
from views import work_on_request
def setup_routes(app):
app.router.add_get('/', work_on_request)
server.py:
from aiohttp import web
from routes import setup_routes
import asyncio
app = web.Application()
setup_routes(app)
web.run_app(app)
So, workaround for me is to start one more thread with different event_loop, or may be you know how to add some work to current event loop?

Already not actual, cause i found desicion to add one more task to main event_loop:
//additionaly i created one global queue to interoperate coroutine between each other.
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
queue = asyncio.Queue(maxsize=100000)
loop.create_task(worker('Worker1', queue))
app = web.Application()
app['global_queue'] = queue

Related

Iterate through asyncio loop

I am very new with aiohttp and asyncio so apologies for my ignorance up front. I am having difficulties with the event loop portion of the documentation and don't think my below code is executing asynchronously. I am trying to take the output of all combinations of two lists via itertools, and POST to XML. A more full blown version is listed here while using the requests module, however that is not ideal as I am needing to POST 1000+ requests potentially at a time. Here is a sample of how it looks now:
import aiohttp
import asyncio
import itertools
skillid = ['7715','7735','7736','7737','7738','7739','7740','7741','7742','7743','7744','7745','7746','7747','7748' ,'7749','7750','7751','7752','7753','7754','7755','7756','7757','7758','7759','7760','7761','7762','7763','7764','7765','7766','7767','7768','7769','7770','7771','7772','7773','7774','7775','7776','7777','7778','7779','7780','7781','7782','7783','7784']
agent= ['5124','5315','5331','5764','6049','6076','6192','6323','6669','7690','7716']
url = 'https://url'
user = 'user'
password = 'pass'
headers = {
'Content-Type': 'application/xml'
}
async def main():
async with aiohttp.ClientSession() as session:
for x in itertools.product(agent,skillid):
payload = "<operation><operationType>update</operationType><refURLs><refURL>/unifiedconfig/config/agent/" + x[0] + "</refURL></refURLs><changeSet><agent><skillGroupsRemoved><skillGroup><refURL>/unifiedconfig/config/skillgroup/" + x[1] + "</refURL></skillGroup></skillGroupsRemoved></agent></changeSet></operation>"
async with session.post(url,auth=aiohttp.BasicAuth(user, password), data=payload,headers=headers) as resp:
print(resp.status)
print(await resp.text())
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
I see that coroutines can be used but not sure that applies as there is only a single task to execute. Any clarification is appreciated.
Because you're making a request and then immediately await-ing on it, you are only making one request at a time. If you want to parallelize everything, you need to separate making the request from waiting for the response, and you need to use something like asyncio.gather to wait for the requests in bulk.
In the following example, I've modified your code to connect to a local httpbin instance for testing; I'm making requests to the /delay/<value> endpoint so that each requests takes a random amount of time to complete.
The theory of operation here is:
Move the request code into the asynchronous one_request function,
which we use to build an array of tasks.
Use asyncio.gather to run all the tasks at once.
The one_request functions returns a (agent, skillid, response)
tuple, so that when we iterate over the responses we can tell which
combination of parameters resulted in the given response.
import aiohttp
import asyncio
import itertools
import random
skillid = [
"7715", "7735", "7736", "7737", "7738", "7739", "7740", "7741", "7742",
"7743", "7744", "7745", "7746", "7747", "7748", "7749", "7750", "7751",
"7752", "7753", "7754", "7755", "7756", "7757", "7758", "7759", "7760",
"7761", "7762", "7763", "7764", "7765", "7766", "7767", "7768", "7769",
"7770", "7771", "7772", "7773", "7774", "7775", "7776", "7777", "7778",
"7779", "7780", "7781", "7782", "7783", "7784",
]
agent = [
"5124", "5315", "5331", "5764", "6049", "6076", "6192", "6323", "6669",
"7690", "7716",
]
user = 'user'
password = 'pass'
headers = {
'Content-Type': 'application/xml'
}
async def one_request(session, agent, skillid):
# I'm setting `url` here because I want a random parameter for
# reach request. You would probably just set this once globally.
delay = random.randint(0, 10)
url = f'http://localhost:8787/delay/{delay}'
payload = (
"<operation>"
"<operationType>update</operationType>"
"<refURLs>"
f"<refURL>/unifiedconfig/config/agent/{agent}</refURL>"
"</refURLs>"
"<changeSet>"
"<agent>"
"<skillGroupsRemoved><skillGroup>"
f"<refURL>/unifiedconfig/config/skillgroup/{skillid}</refURL>"
"</skillGroup></skillGroupsRemoved>"
"</agent>"
"</changeSet>"
"</operation>"
)
# This shows when the task actually executes.
print('req', agent, skillid)
async with session.post(
url, auth=aiohttp.BasicAuth(user, password),
data=payload, headers=headers) as resp:
return (agent, skillid, await resp.text())
async def main():
tasks = []
async with aiohttp.ClientSession() as session:
# Add tasks to the `tasks` array
for x in itertools.product(agent, skillid):
task = asyncio.ensure_future(one_request(session, x[0], x[1]))
tasks.append(task)
print(f'making {len(tasks)} requests')
# Run all the tasks and wait for them to complete. Return
# values will end up in the `responses` list.
responses = await asyncio.gather(*tasks)
# Just print everything out.
print(responses)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
The above code results in about 561 requests, and runs in about 30
seconds with the random delay I've introduced.
This code runs all the requests at once. If you wanted to limit the
maximum number of concurrent requests, you could introduce a
Semaphore to make one_request block if there were too many active requests.
If you wanted to process responses as they arrived, rather than
waiting for everything to complete, you could investigate the
asyncio.wait method instead.

Asyncio: Fastapi with aio-pika, consumer ignores Await

I am trying to hook my websocket endpoint with rabbitmq (aio-pika). Goal is to have listener in that endpoint and on any new message from queue pass the message to browser client over websockets.
I tested the consumer with asyncio in a script with asyncio loop. Works as I followed and used aio-pika documentation. (source: https://aio-pika.readthedocs.io/en/latest/rabbitmq-tutorial/2-work-queues.html, worker.py)
However, when I use it in fastapi in websockets endpoint, I cant make it work. Somehow the listener:
await queue.consume(on_message)
is completely ignored.
This is my attempt (I put it all in one function, so its more readable):
#app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
print("Entering websockets")
await manager.connect(websocket)
print("got connection")
# params
queue_name = "task_events"
routing_key = "user_id.task"
con = "amqp://rabbitmq:rabbitmq#rabbit:5672/"
connection = await connect(con)
channel = await connection.channel()
await channel.set_qos(prefetch_count=1)
exchange = await channel.declare_exchange(
"topic_logs",
ExchangeType.TOPIC,
)
# Declaring queue
queue = await channel.declare_queue(queue_name)
# Binding the queue to the exchange
await queue.bind(exchange, routing_key)
async def on_message(message: IncomingMessage):
async with message.process():
# here will be the message passed over websockets to browser client
print("sent", message.body)
try:
######### Not working as expected ###########
# await does not await and websockets finishes, as there is no loop
await queue.consume(on_message)
#############################################
################ This Alternative code atleast receives some messages #############
# If I use this part, I atleast get some messages, when I trigger a backend task that publishes new messages to the queue.
# It seems like the messages are somehow stuck and new task releases all stucked messages, but does not release new one.
while True:
await queue.consume(on_message)
await asyncio.sleep(1)
################## one part #############
except WebSocketDisconnect:
manager.disconnect(websocket)
I am quite new to async in python. I am not sure where is the problem and I cannot somehow implement async consuming loop while getting inspired with worker.py from aio-pika.
You could use an async iterator, which is the second canonical way to consume messages from a queue.
In your case, this means:
async with queue.iterator() as iter:
async for message in iter:
async with message.process():
# do something with message
It will block as long as no message is received and will be suspended again after processing a message.
The solution was simply.
aio-pika queue.consume even though we use await is nonblocking, so
this way we consume
consumer_tag = await queue.consume(on_message, no_ack=True)
and at the end of connection we cancel
await queue.cancel(consumer_tag)
The core of the solution for me, was to make something asyncio blocking, so I used
this part of the code after consume
while True:
data = await websocket.receive_text()
x = await manager.send_message(data, websocket)
I dont use this code, but its useful as this part of the code waits for frontend websocket response. If this part of the code is missing, then what happens is that client connects just to get disconnected (the websocket endpoit is succefully executed), as there is nothing blocking

Autobahn websocket client in Quart (async Flask) application

Good evening everyone. I'm not quite new to this place but finally decided to register and ask for a help. I develop a web application using Quart framework (asynchronous Flask). And now as application became bigger and more complex I decided to separate different procedures to different server instances, this is mostly because I want to keep web server clean, more abstract and free of computational load.
So I plan to use one web server with a few (if needed) identical procedure servers. All servers are based on quart framework, for now just for simplicity of development. I decided to use Crossbar.io router and autobahn to connect all servers together.
And here the problem occurred.
I followed this posts:
Running several ApplicationSessions non-blockingly using autbahn.asyncio.wamp
How can I implement an interactive websocket client with autobahn asyncio?
How I can integrate crossbar client (python3,asyncio) with tkinter
How to send Autobahn/Twisted WAMP message from outside of protocol?
Seems like I tried all possible approaches to implement autobahn websocket client in my quart application. I don't know how to make it possible so both things are working, whether Quart app works but autobahn WS client does not, or vice versa.
Simplified my quart app looks like this:
from quart import Quart, request, current_app
from config import Config
# Autobahn
import asyncio
from autobahn import wamp
from autobahn.asyncio.wamp import ApplicationSession, ApplicationRunner
import concurrent.futures
class Component(ApplicationSession):
"""
An application component registering RPC endpoints using decorators.
"""
async def onJoin(self, details):
# register all methods on this object decorated with "#wamp.register"
# as a RPC endpoint
##
results = await self.register(self)
for res in results:
if isinstance(res, wamp.protocol.Registration):
# res is an Registration instance
print("Ok, registered procedure with registration ID {}".format(res.id))
else:
# res is an Failure instance
print("Failed to register procedure: {}".format(res))
#wamp.register(u'com.mathservice.add2')
def add2(self, x, y):
return x + y
def create_app(config_class=Config):
app = Quart(__name__)
app.config.from_object(config_class)
# Blueprint registration
from app.main import bp as main_bp
app.register_blueprint(main_bp)
print ("before autobahn start")
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
runner = ApplicationRunner('ws://127.0.0.1:8080 /ws', 'realm1')
future = executor.submit(runner.run(Component))
print ("after autobahn started")
return app
from app import models
In this case application stuck in runner loop and whole application does not work (can not serve requests), it becomes possible only if I interrupt the runners(autobahn) loop by Ctrl-C.
CMD after start:
(quart-app) user#car:~/quart-app$ hypercorn --debug --error-log - --access-log - -b 0.0.0.0:8001 tengine:app
Running on 0.0.0.0:8001 over http (CTRL + C to quit)
before autobahn start
Ok, registered procedure with registration ID 4605315769796303
after pressing ctrl-C:
...
^Cafter autobahn started
2019-03-29T01:06:52 <Server sockets=[<socket.socket fd=11, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=0, laddr=('0.0.0.0', 8001)>]> is serving
How to make it possible to work quart application with autobahn client together in non-blocking fashion? So autobahn opens and keeps websocket connection to Crossbar router and silently listen on background.
Well, after many sleepless nights I finally found a good approach to solve this conundrum.
Thanks to this post C-Python asyncio: running discord.py in a thread
So, I rewrote my code like this and was able to run my Quart app with autobahn client inside, and both are actively working in nonblocking fashion.
The whole __init__.py looks like:
from quart import Quart, request, current_app
from config import Config
def create_app(config_class=Config):
app = Quart(__name__)
app.config.from_object(config_class)
# Blueprint registration
from app.main import bp as main_bp
app.register_blueprint(main_bp)
return app
# Autobahn
import asyncio
from autobahn import wamp
from autobahn.asyncio.wamp import ApplicationSession, ApplicationRunner
import threading
class Component(ApplicationSession):
"""
An application component registering RPC endpoints using decorators.
"""
async def onJoin(self, details):
# register all methods on this object decorated with "#wamp.register"
# as a RPC endpoint
##
results = await self.register(self)
for res in results:
if isinstance(res, wamp.protocol.Registration):
# res is an Registration instance
print("Ok, registered procedure with registration ID {}".format(res.id))
else:
# res is an Failure instance
print("Failed to register procedure: {}".format(res))
def onDisconnect(self):
print('Autobahn disconnected')
#wamp.register(u'com.mathservice.add2')
def add2(self, x, y):
return x + y
async def start():
runner = ApplicationRunner('ws://127.0.0.1:8080/ws', 'realm1')
await runner.run(Component) # use client.start instead of client.run
def run_it_forever(loop):
loop.run_forever()
asyncio.get_child_watcher() # I still don't know if I need this method. It works without it.
loop = asyncio.get_event_loop()
loop.create_task(start())
print('Starting thread for Autobahn...')
thread = threading.Thread(target=run_it_forever, args=(loop,))
thread.start()
print ("Thread for Autobahn has been started...")
from app import models
With this scenario we create task with autobahn's runner.run and attach it to the current loop and then run this loop forever in new thread.
I was quite satisfied with current solution.... but then then was found out that this solution has some drawbacks, that was crucial for me, for example: reconnect if connection dropped (i.e crossbar router becomes unavailable). With this approach if connection was failed to initialize or dropped after a while it will not try to reconnect. Additionally for me it wasn't obvious how to ApplicationSession API, i.e. to register/call RPC from the code in my quart app.
Luckily I spotted another new component API that autobahn used in their documentation:
https://autobahn.readthedocs.io/en/latest/wamp/programming.html#registering-procedures
https://github.com/crossbario/autobahn-python/blob/master/examples/asyncio/wamp/component/backend.py
It has auto reconnect feature and it's easy to register functions for RPC using decorators #component.register('com.something.do'), you just need to import component before.
So here is the final view of __init__.py solution:
from quart import Quart, request, current_app
from config import Config
def create_app(config_class=Config):
...
return app
from autobahn.asyncio.component import Component, run
from autobahn.wamp.types import RegisterOptions
import asyncio
import ssl
import threading
component = Component(
transports=[
{
"type": "websocket",
"url": u"ws://localhost:8080/ws",
"endpoint": {
"type": "tcp",
"host": "localhost",
"port": 8080,
},
"options": {
"open_handshake_timeout": 100,
}
},
],
realm=u"realm1",
)
#component.on_join
def join(session, details):
print("joined {}".format(details))
async def start():
await component.start() #used component.start() instead of run([component]) as it's async function
def run_it_forever(loop):
loop.run_forever()
loop = asyncio.get_event_loop()
#asyncio.get_child_watcher() # I still don't know if I need this method. It works without it.
asyncio.get_child_watcher().attach_loop(loop)
loop.create_task(start())
print('Starting thread for Autobahn...')
thread = threading.Thread(target=run_it_forever, args=(loop,))
thread.start()
print ("Thread for Autobahn has been started...")
from app import models
I hope it will help somebody. Cheers!

Using the Decorator approach with AutobahnWS, how to publish messages independent from subscription callbacks and it's Session-Reference?

When working with Autobahn and WAMP before I have been using the Subclassing-Approach but stumbled over decorator / functions approach which I really prefer over subclassing.
However. I have a function that is being called from an external hardware (via callback) and this function needs to publish to Crossbar.io Router whenever it is being called.
This is how I've done this, keeping a reference of the Session right after the on_join -> async def joined(session, details) was called.
from autobahn.asyncio.component import Component
from autobahn.asyncio.component import run
global_session = None
comp = Component(
transports=u"ws://localhost:8080/ws",
realm=u"realm1",
)
def callback_from_hardware(msg):
if global_session is None:
return
global_session.publish(u'com.someapp.somechannel', msg)
#comp.on_join
async def joined(session, details):
global global_session
global_session = session
print("session ready")
if __name__ == "__main__":
run([comp])
This approach of keeping a reference after component has joined connection feels however a bit "odd". Is there a different approach to this? Can this done on some other way.
If not than it feels a bit more "right" with subclassing and having all the application depended code within that subclass (but however keeping everything of my app within one subclass also feels odd).
I would recommend to use asynchronous queue instead of shared session:
import asyncio
from autobahn.asyncio.component import Component
from autobahn.asyncio.component import run
queue = asyncio.queues.Queue()
comp = Component(
transports=u"ws://localhost:8080/ws",
realm=u"realm1",
)
def callback_from_hardware(msg):
queue.put_nowait((u'com.someapp.somechannel', msg,))
#comp.on_join
async def joined(session, details):
print("session ready")
while True:
topic, message, = await queue.get()
print("Publishing: topic: `%s`, message: `%s`" % (topic, message))
session.publish(topic, message)
if __name__ == "__main__":
callback_from_hardware("dassdasdasd")
run([comp])
There are multiple approaches you could take here, though the simplest IMO would be to use Crossbar's http bridge. So whenever an event callback is received from your hardware, you can just make a http POST request to Crossbar and your message will get delivered
More details about http bridge https://crossbar.io/docs/HTTP-Bridge-Publisher/

How to reuse aiohttp ClientSession pool?

The docs say to reuse the ClientSession:
Don’t create a session per request. Most likely you need a session per
application which performs all requests altogether.
A session contains a connection pool inside, connection reusage and
keep-alives (both are on by default) may speed up total performance.1
But there doesn't seem to be any explanation in the docs about how to do this? There is one example that's maybe relevant, but it does not show how to reuse the pool elsewhere: http://aiohttp.readthedocs.io/en/stable/client.html#keep-alive-connection-pooling-and-cookie-sharing
Would something like this be the correct way to do it?
#app.listener('before_server_start')
async def before_server_start(app, loop):
app.pg_pool = await asyncpg.create_pool(**DB_CONFIG, loop=loop, max_size=100)
app.http_session_pool = aiohttp.ClientSession()
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
app.http_session_pool.close()
app.pg_pool.close()
#app.post("/api/register")
async def register(request):
# json validation
async with app.pg_pool.acquire() as pg:
await pg.execute() # create unactivated user in db
async with app.http_session_pool as session:
# TODO send activation email using SES API
async with session.post('http://httpbin.org/post', data=b'data') as resp:
print(resp.status)
print(await resp.text())
return HTTPResponse(status=204)
There're few things I think can be improved:
1)
Instance of ClientSession is one session object. This on session contains pool of connections, but it's not "session_pool" itself. I would suggest rename http_session_pool to http_session or may be client_session.
2)
Session's close() method is a corountine. Your should await it:
await app.client_session.close()
Or even better (IMHO), instead of thinking about how to properly open/close session use standard async context manager with awaiting of __aenter__ / __aexit__:
#app.listener('before_server_start')
async def before_server_start(app, loop):
# ...
app.client_session = await aiohttp.ClientSession().__aenter__()
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
await app.client_session.__aexit__(None, None, None)
# ...
3)
Pay attention to this info:
However, if the event loop is stopped before the underlying connection
is closed, an ResourceWarning: unclosed transport warning is emitted
(when warnings are enabled).
To avoid this situation, a small delay must be added before closing
the event loop to allow any open underlying connections to close.
I'm not sure it's mandatory in your case but there's nothing bad in adding await asyncio.sleep(0) inside after_server_stop as documentation advices:
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
# ...
await asyncio.sleep(0) # http://aiohttp.readthedocs.io/en/stable/client.html#graceful-shutdown
Upd:
Class that implements __aenter__ / __aexit__ can be used as async context manager (can be used in async with statement). It allows to do some actions before executing internal block and after it. This is very similar to regular context managers, but asyncio related. Same as regular context manager async one can be used directly (without async with) manually awaiting __aenter__ / __aexit__.
Why do I think it's better to create/free session using __aenter__ / __aexit__ manually instead of using close(), for example? Because we shouldn't worry what actually happens inside __aenter__ / __aexit__. Imagine in future versions of aiohttp creating of session will be changed with the need to await open() for example. If you'll use __aenter__ / __aexit__ you wouldn't need to somehow change your code.
seems no session pool in aiohttp.
// just post some official docs.
persistent session
here is persistent-session usage demo in official site
https://docs.aiohttp.org/en/latest/client_advanced.html#persistent-session
app.cleanup_ctx.append(persistent_session)
async def persistent_session(app):
app['PERSISTENT_SESSION'] = session = aiohttp.ClientSession()
yield
await session.close()
async def my_request_handler(request):
session = request.app['PERSISTENT_SESSION']
async with session.get("http://python.org") as resp:
print(resp.status)
//TODO: a full runnable demo code
connection pool
and it has a connection pool:
https://docs.aiohttp.org/en/latest/client_advanced.html#connectors
conn = aiohttp.TCPConnector()
#conn = aiohttp.TCPConnector(limit=30)
#conn = aiohttp.TCPConnector(limit=0) # nolimit, default is 100.
#conn = aiohttp.TCPConnector(limit_per_host=30) # default is 0
session = aiohttp.ClientSession(connector=conn)
I found this question after searching on Google on how to reuse an aiohttp ClientSession instance after my code was triggering this warning message: UserWarning: Creating a client session outside of coroutine is a very dangerous idea
This code may not solve the above problem though it is related. I am new to asyncio and aiohttp, so this may not be best practice. It's the best I could come up with after reading a lot of seemingly conflicting information.
I created a class ResourceManager taken from the Python docs that opens a context.
The ResourceManager instance handles the opening and closing of the aiohttp ClientSession instance via the magic methods __aenter__ and __aexit__ with BaseScraper.set_session and BaseScraper.close_session wrapper methods.
I was able to reuse a ClientSession instance with the following code.
The BaseScraper class also has methods for authentication. It depends on the lxml third-party package.
import asyncio
from time import time
from contextlib import contextmanager, AbstractContextManager, ExitStack
import aiohttp
import lxml.html
class ResourceManager(AbstractContextManager):
# Code taken from Python docs: 29.6.2.4. of https://docs.python.org/3.6/library/contextlib.html
def __init__(self, scraper, check_resource_ok=None):
self.acquire_resource = scraper.acquire_resource
self.release_resource = scraper.release_resource
if check_resource_ok is None:
def check_resource_ok(resource):
return True
self.check_resource_ok = check_resource_ok
#contextmanager
def _cleanup_on_error(self):
with ExitStack() as stack:
stack.push(self)
yield
# The validation check passed and didn't raise an exception
# Accordingly, we want to keep the resource, and pass it
# back to our caller
stack.pop_all()
def __enter__(self):
resource = self.acquire_resource()
with self._cleanup_on_error():
if not self.check_resource_ok(resource):
msg = "Failed validation for {!r}"
raise RuntimeError(msg.format(resource))
return resource
def __exit__(self, *exc_details):
# We don't need to duplicate any of our resource release logic
self.release_resource()
class BaseScraper:
login_url = ""
login_data = dict() # dict of key, value pairs to fill the login form
loop = asyncio.get_event_loop()
def __init__(self, urls):
self.urls = urls
self.acquire_resource = self.set_session
self.release_resource = self.close_session
async def _set_session(self):
self.session = await aiohttp.ClientSession().__aenter__()
def set_session(self):
set_session_attr = self.loop.create_task(self._set_session())
self.loop.run_until_complete(set_session_attr)
return self # variable after "as" becomes instance of BaseScraper
async def _close_session(self):
await self.session.__aexit__(None, None, None)
def close_session(self):
close_session = self.loop.create_task(self._close_session())
self.loop.run_until_complete(close_session)
def __call__(self):
fetch_urls = self.loop.create_task(self._fetch())
return self.loop.run_until_complete(fetch_urls)
async def _get(self, url):
async with self.session.get(url) as response:
result = await response.read()
return url, result
async def _fetch(self):
tasks = (self.loop.create_task(self._get(url)) for url in self.urls)
start = time()
results = await asyncio.gather(*tasks)
print(
"time elapsed: {} seconds \nurls count: {}".format(
time() - start, len(urls)
)
)
return results
#property
def form(self):
"""Create and return form for authentication."""
form = aiohttp.FormData(self.login_data)
get_login_page = self.loop.create_task(self._get(self.login_url))
url, login_page = self.loop.run_until_complete(get_login_page)
login_html = lxml.html.fromstring(login_page)
hidden_inputs = login_html.xpath(r'//form//input[#type="hidden"]')
login_form = {x.attrib["name"]: x.attrib["value"] for x in hidden_inputs}
for key, value in login_form.items():
form.add_field(key, value)
return form
async def _login(self, form):
async with self.session.post(self.login_url, data=form) as response:
if response.status != 200:
response.raise_for_status()
print("logged into {}".format(url))
await response.release()
def login(self):
post_login_form = self.loop.create_task(self._login(self.form))
self.loop.run_until_complete(post_login_form)
if __name__ == "__main__":
urls = ("http://example.com",) * 10
base_scraper = BaseScraper(urls)
with ResourceManager(base_scraper) as scraper:
for url, html in scraper():
print(url, len(html))

Resources