How to break out of an (asyncio) websocket fetch loop that doesn't have any incoming messages? - websocket

This code prints all messages from a websocket connection:
class OrderStreamer:
def __init__(ᬑ):
ᬑ.terminate_flag = False
# worker thread to receive data stream
ᬑ.worker_thread = threading.Thread(
target=ᬑ.worker_thread_func,
daemon=True
)
def start_streaming(ᬑ, from_scheduler = False):
ᬑ.worker_thread.start()
def terminate(ᬑ):
ᬑ.terminate_flag = True
def worker_thread_func(ᬑ):
asyncio.run(ᬑ.aio_func()) # blocks
async def aio_func(ᬑ):
async with \
aiohttp.ClientSession() as session, \
session.ws_connect(streams_url) as wsock, \
anyio.create_task_group() as tg:
async for msg in wsock:
print(msg.data)
if ᬑ.terminate_flag:
await wsock.close()
The problem is that if no messages arrive, the loop never gets the chance to check terminate_flag and never exits.
I tried creating an external reference to the runloop and websocket:
async with \
aiohttp.ClientSession() as session, \
session.ws_connect(streams_url) as wsock, \
anyio.create_task_group() as tg:
ᬑ.wsock = wsock
ᬑ.loop = asyncio.get_event_loop()
... and modifying my terminate function:
def terminate(ᬑ):
# ᬑ.loop.stop()
asyncio.set_event_loop(ᬑ.loop)
async def kill():
await ᬑ.wsock.close()
asyncio.run(kill())
... but it does not work.
I can't afford to rearchitect my entire application to use asyncio at this point in time.
How to break out of the loop?

You should use asyncio.wait_for or asyncio.wait and call wsock.__anext__() directly instead of using async for loop.
The loop with asyncio.wait should look something like this:
next_message = asyncio.create_task(wsock.__anext__())
while not self.terminate_flag:
await asyncio.wait([next_message], timeout=SOME_TIMEOUT,)
if next_message.done():
try:
msg = next_message.result()
except StopAsyncIteration:
break
else:
print(msg.data)
next_message = asyncio.create_task(wsock.__anext__())
SOME_TIMEOUT should be replaced with the amount of seconds you want to wait continuously for the next incoming message
Here is the documentation for asyncio.wait
P.S. I replaced ᬑ with self, but I hope you get the idea

Note that to read data you should not create a new task as mentioned here:
Reading from the WebSocket (await ws.receive()) must only be done inside the request handler task;
You can simply use timeout.
async def handler(request):
ws = web.WebSocketResponse() # or web.WebSocketResponse(receive_timeout=5)
await ws.prepare(request)
while True:
try:
msg = await ws.receive(timeout=5)
except asyncio.TimeoutError:
print('TimeoutError')
if your_terminate_flag is True:
break
aiohttp/web_protocol.py/_handle_request() will dump errors if you don't write try/except or don't catch the right exception. Try testing except Exception as err: or check its source code.

Related

Python asyncio: awaiting a future you don't have yet

Imagine that I have a main program which starts many async activities which all wait on queues to do jobs, and then on ctrl-C properly closes them all down: it might look something like this:
async def run_act1_forever():
# this is the async queue loop
while True:
job = await inputQueue1.get()
# do something with this incoming job
def run_activity_1(loop):
# run the async queue loop as a task
coro = loop.create_task(run_act1_forever())
return coro
def mainprogram():
loop = asyncio.get_event_loop()
act1 = run_activity_1(loop)
# also start act2, act3, etc here
try:
loop.run_forever()
except KeyboardInterrupt:
pass
finally:
act1.cancel()
# also act2.cancel(), act3.cancel(), etc
loop.close()
This all works fine. However, starting up activity 1 is actually more complex than this; it happens in three parts. Part 1 is to wait on the queue until a particular job comes in, one time; part 2 is a synchronous part which has to run in a thread with run_in_executor, one time, and then part 3 is the endless waiting on the queue for jobs as above. How do I structure this? My initial thought was:
async def run_act1_forever():
# this is the async queue loop
while True:
job = await inputQueue1.get()
# do something with this incoming job
async def run_act1_step1():
while True:
job = await inputQueue1.get()
# good, we have handled that first task; we're done
break
def run_act1_step2():
# note: this is sync, not async, so it's in a thread
# do whatever, here, and then exit when done
time.sleep(5)
def run_activity_1(loop):
# run step 1 as a task
step1 = loop.create_task(run_act1_step1())
# ERROR! See below
# now run the sync step 2 in a thread
self.loop.run_in_executor(None, run_act1_step2())
# finally, run the async queue loop as a task
coro = loop.create_task(run_act1_forever())
return coro
def mainprogram():
loop = asyncio.get_event_loop()
act1 = run_activity_1(loop)
# also start act2, act3, etc here
try:
loop.run_forever()
except KeyboardInterrupt:
pass
finally:
act1.cancel()
# also act2.cancel(), act3.cancel(), etc
loop.close()
but this does not work, because at the point where we say "ERROR!", we need to await the step1 task and we never do. We can't await it, because run_activity_1 is not an async function. So... what should I do here?
I thought about getting the Future back from calling run_act1_step1() and then using future.add_done_callback to handle running steps 2 and 3. However, if I do that, then run_activity_1() can't return the future generated by run_act1_forever(), which means that mainprogram() can't cancel that run_act1_forever() task.
I thought of generating an "empty" Future in run_activity_1() and returning that, and then making that empty Future "chain" to the Future returned by run_act1_forever(). But Python asyncio doesn't support chaining Futures.
You say that things are difficult because run_activity_1 is not an async function, but don't really detail why it can't be async.
async def run_activity_1(loop):
await run_act1_step1()
await loop.run_in_executor(None, run_act1_step2)
await run_act1_forever()
The returned coroutine won't be the same as the one returned by run_act1_forever(), but cancellation should propagate if you've got as far as executing that step.
With this change, run_activity_1 is no longer returning a task, so the invocation inside mainprogram would need to change to:
act1 = loop.create_task(run_activity_1(loop))
I think you were on the right track when you said, "I thought about getting the Future back from calling run_act1_step1() and then using future.add_done_callback to handle running steps 2 and 3." That's the logical way to structure this application. You have to manage the various returned objects correctly, but a small class solves this problem.
Here is a program similar to your second code snippet. It runs (tested with Python3.10) and handles Ctrl-C gracefully.
Python3.10 issues a deprecation warning when the function asyncio.get_event_loop() is called without a running loop, so I avoided doing that.
Activities.run() creates task1, then attaches a done_callback that starts task2 and the rest of the activities. The Activities object keeps track of task1 and task2 so they can be cancelled. The main program keeps a reference to Activities, and calls cancel_gracefully() to do the right thing, depending on how far the script progressed through the sequence of start-up activities.
Some care needs to be taken to catch the CancelledExceptions; otherwise stuff gets printed on the console when the program terminates.
The important difference between this program and your second code snippet is that this program immediately stores task1 and task2 in variables so they can be accessed later. Therefore they can be cancelled any time after their creation. The done_callback trick is used to launch all the steps in the proper order.
#! python3.10
import asyncio
import time
async def run_act1_forever():
# this is the async queue loop
while True:
await asyncio.sleep(1.0)
# job = await inputQueue1.get()
# do something with this incoming job
print("Act1 forever")
async def run_act1_step1():
while True:
await asyncio.sleep(1.0)
# job = await inputQueue1.get()
# good, we have handled that first task; we're done
break
print("act1 step1 finished")
def run_act1_step2():
# note: this is sync, not async, so it's in a thread
# do whatever, here, and then exit when done
time.sleep(5)
print("Step2 finished")
class Activities:
def __init__(self, loop):
self.loop = loop
self.task1: asyncio.Task = None
self.task2: asyncio.Task = None
def run(self):
# run step 1 as a task
self.task1 = self.loop.create_task(run_act1_step1())
self.task1.add_done_callback(self.run2)
# also start act2, act3, etc here
def run2(self, fut):
try:
if fut.exception() is not None: # do nothing if task1 failed
return
except asyncio.CancelledError: # or if it was cancelled
return
# now run the sync step 2 in a thread
self.loop.run_in_executor(None, run_act1_step2)
# finally, run the async queue loop as a task
self.task2 = self.loop.create_task(run_act1_forever())
async def cancel_gracefully(self):
if self.task2 is not None:
# in this case, task1 has already finished without error
self.task2.cancel()
try:
await self.task2
except asyncio.CancelledError:
pass
elif self.task1 is not None:
self.task1.cancel()
try:
await self.task1
except asyncio.CancelledError:
pass
# also act2.cancel(), act3.cancel(), etc
def mainprogram():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
acts = Activities(loop)
loop.call_soon(acts.run)
try:
loop.run_forever()
except KeyboardInterrupt:
pass
loop.run_until_complete(acts.cancel_gracefully())
if __name__ == "__main__":
mainprogram()
You can do this with a combination of threading events and asyncio events. You'll need two events, one to signal the first item has arrived. The thread will wait on this event, so it needs to be a threading Event. You'll also need one to signal the thread is finished. Your run_act1_forever coroutine will await this, so it will need to be an asyncio Event. You can then return the task for run_act1_forever normally and cancel it as you need.
Note that when setting the asyncio event from the separate thread you'll need to use loop.call_soon_threadsafe as asyncio Events are not thread safe.
import asyncio
import time
import threading
import functools
from asyncio import Queue, AbstractEventLoop
async def run_act1_forever(inputQueue1: Queue,
thread_done_event: asyncio.Event):
await thread_done_event.wait()
print('running forever')
while True:
job = await inputQueue1.get()
async def run_act1_step1(inputQueue1: Queue,
first_item_event: threading.Event):
print('Waiting for queue item')
job = await inputQueue1.get()
print('Setting event')
first_item_event.set()
def run_act1_step2(loop: AbstractEventLoop,
first_item_event: threading.Event,
thread_done_event: asyncio.Event):
print('Waiting for event...')
first_item_event.wait()
print('Got event, processing...')
time.sleep(5)
loop.call_soon_threadsafe(thread_done_event.set)
def run_activity_1(loop):
inputQueue1 = asyncio.Queue(loop=loop)
first_item_event = threading.Event()
thread_done_event = asyncio.Event(loop=loop)
loop.create_task(run_act1_step1(inputQueue1, first_item_event))
inputQueue1.put_nowait('First item to test the code')
loop.run_in_executor(None, functools.partial(run_act1_step2,
loop,
first_item_event,
thread_done_event))
return loop.create_task(run_act1_forever(inputQueue1, thread_done_event))
def mainprogram():
loop = asyncio.new_event_loop()
act1 = run_activity_1(loop)
# also start act2, act3, etc here
try:
loop.run_forever()
except KeyboardInterrupt:
pass
finally:
act1.cancel()
# also act2.cancel(), act3.cancel(), etc
loop.close()
mainprogram()

Can't figure out how to correctly schedule my asyncio functions

I'm rather new to asyncio and I read through some documentation, examples, and other questions, but I can't find any answers as to what I'm doing wrong here. I don't quite understand enough about asyncio to diagnose why this is happening. I have a class with asyncio functions, one of which opens a websocket and then creates three asyncio tasks which it executes. The first executes completely fine. I'm confused with the execution of the second and third tasks. Even after the second one has executed asyncio.sleep, the third task seems to not execute websock.recv(), and so the second task executes websocket.recv() and gets sent the data that I meant for the third task to receive. What am I doing wrong here? Thanks in advance for any help.
Edit: Sorry the code isn't runnable unless you create or have a discord bot and use it's token in the code.
import websockets
import requests
import asyncio
import json
class GatewayConnection:
def __init__(self):
self.heartbeat = None
self.s = None
self.info = None
self.connected = False
self.uri = f"{self.get_gateway()}/?v=8&encoding=json"
#staticmethod
def get_gateway():
endpoint = requests.get(OAUTH+'gateway')
return endpoint.json()['url']
async def get_gateway_info(self, websock):
recv = json.loads(await websock.recv())
self.heartbeat = recv['d']['heartbeat_interval']
self.s = recv['s']
async def finish_gateway_connect(self, websock):
await websock.send(json.dumps({
'op': 2,
'd': {
'token': TOKEN,
'intents': 513,
'properties': {
'$os': 'windows',
'$browser': 'Sheepp',
'$device': 'Sheepp'
}
}
}))
print("waiting to received ready info")
self.info = json.loads(await websock.recv())
print("Ready info received")
print(self.info)
async def communicate(self):
async with websockets.connect(self.uri) as websock:
self.connected = True
get = asyncio.create_task(self.get_gateway_info(websock))
heartbeat = asyncio.create_task(self.send_heartbeat(websock))
finish = asyncio.create_task(self.finish_gateway_connect(websock))
await get # First task
await heartbeat # Second task
await finish # Third task
async def send_heartbeat(self, websock):
while self.connected:
await websock.send(json.dumps({'op': 1, 'd': self.s}))
print("Sent heartbeat")
response = json.loads(await websock.recv())
print("Received 'heartbeat'")
if response['op'] != 11:
self.connected = False # Discord api says to terminate connection upon not receiving a valid heartbeat response
print(f"The server did not send back a valid heartbeat response, but instead: {response}")
await asyncio.sleep(self.heartbeat/1000)
gateway = GatewayConnection()
try:
loop.run_until_complete(gateway.communicate())
except KeyboardInterrupt:
print("Keyboard Interruption")
finally:
loop.close()
To fix the error I was getting, I made a separate class for handling the websocket which makes sure that the recv and send functions are executed in time correctly. I'm not sure about how well I made the class but here it is:
class WebsocketHandler:
def __init__(self, websocket):
self.ws = websocket
self.sending = False
self.sends = []
self.recvs = []
async def send(self, data):
if self.sending:
self.sends.append(json.dumps(data))
else:
self.sending = True
await self.ws.send(json.dumps(data))
while self.sends:
data = self.sends[0]
self.sends.pop(0)
await self.ws.send(json.dumps(data))
self.sending = False
async def recv(self):
event = asyncio.Event()
self.recvs.append(event)
while self.recvs[0] != event:
await event.wait()
try:
data = await asyncio.wait_for(self.ws.recv(), timeout=1)
except asyncio.exceptions.TimeoutError:
self.recvs.pop(0)
raise asyncio.exceptions.TimeoutError
self.recvs.pop(0)
if len(self.recvs) > 0:
self.recvs[0].set()
return json.loads(data)
The error I was getting was RuntimeError: cannot call recv while another coroutine is already waiting for the next message
Edit: Feel free to critique my code.
Edit 2: I made a couple changes as needed to fix some errors I was getting as well as for convenience sake.

When using asyncio.Queue() how do I cancel the gets?

I'm writing a client in asyncio and using q.get() to wait for responses from the server. When I receive a response from the server I put it on the queue. If the server connection is lost I will no longer being doing those puts and could have any number of await q.get()'s hanging around.
How should I cancel them? I noticed that when I delete the queue the await gets are still waiting.
Does this look like what you are trying to do? You have two options I think:
If you keep a count of outstanding gets then when you are done with the queue you can just put(None) that many times?
Or if None is a valid response then keep a list of the outstanding futures and call cancel on them yourself.
import asyncio
async def qget(q):
try:
x = await q.get()
q.task_done()
print("qget done ",x)
except asyncio.CancelledError as e:
print("qget cancel exception ",e)
except Exception as e:
print("qget exception ",e)
async def run():
q = asyncio.Queue()
futs = []
futs.append( asyncio.ensure_future( qget(q) ) )
futs.append( asyncio.ensure_future( qget(q) ) )
num = 2
await asyncio.sleep(0.1)
# Keep the number of outstanding gets and put None for each one
if 1:
for x in range(num):
q.put_nowait(None)
# Or keep the futures in a list and cancel them
if 0:
for f in futs:
f.cancel()
await asyncio.sleep(1)
print("run loop done")
asyncio.run(run())
If you look at the python code for the queue it does keep a list called _getters, but there is no public api for accessing it.

How to convert event-based communication into async/await using asyncio.Future

I'm trying to expose an event-based communication as a coroutine. Here is an example:
class Terminal:
async def start(self):
loop = asyncio.get_running_loop()
future = loop.create_future()
t = threading.Thread(target=self.run_cmd, args=future)
t.start()
return await future
def run_cmd(self, future):
time.sleep(3) # imitating doing something
future.set_result(1)
But when I run it like this:
async def main():
t = Terminal()
result = await t.start()
print(result)
asyncio.run(main())
I get the following error: RuntimeError: await wasn't used with future
Is it possible to achieve the desired behavior?
There are two issues with your code. One is that the args argument to the Thread constructor requires a sequence or iterable, so you need to write wrap the argument in a container, e.g. args=(future,). Since future is iterable (for technical reasons unrelated to this use case), args=future is not immediately rejected, but leads to the misleading error later down the line.
The other issue is that asyncio objects aren't thread-safe, so you cannot just call future.set_result from another thread. This causes the test program to hang even after fixing the first issue. The correct way to resolve the future from another thread is through the call_soon_threadsafe method on the event loop:
class Terminal:
async def start(self):
loop = asyncio.get_running_loop()
future = loop.create_future()
t = threading.Thread(target=self.run_cmd, args=(loop, future,))
t.start()
return await future
def run_cmd(self, loop, future):
time.sleep(3)
loop.call_soon_threadsafe(future.set_result, 1)
If your thread is really just calling a blocking function whose result you're interested in, consider using run_in_executor instead of manually spawning threads:
class Terminal:
async def start(self):
loop = asyncio.get_running_loop()
return await loop.run_in_executor(None, self.run_cmd)
# Executed in a different thread; `run_in_executor` submits the
# callable to a thread pool, suspends the awaiting coroutine until
# it's done, and transfers the result/exception back to asyncio.
def run_cmd(self):
time.sleep(3)
return 1

How to reuse aiohttp ClientSession pool?

The docs say to reuse the ClientSession:
Don’t create a session per request. Most likely you need a session per
application which performs all requests altogether.
A session contains a connection pool inside, connection reusage and
keep-alives (both are on by default) may speed up total performance.1
But there doesn't seem to be any explanation in the docs about how to do this? There is one example that's maybe relevant, but it does not show how to reuse the pool elsewhere: http://aiohttp.readthedocs.io/en/stable/client.html#keep-alive-connection-pooling-and-cookie-sharing
Would something like this be the correct way to do it?
#app.listener('before_server_start')
async def before_server_start(app, loop):
app.pg_pool = await asyncpg.create_pool(**DB_CONFIG, loop=loop, max_size=100)
app.http_session_pool = aiohttp.ClientSession()
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
app.http_session_pool.close()
app.pg_pool.close()
#app.post("/api/register")
async def register(request):
# json validation
async with app.pg_pool.acquire() as pg:
await pg.execute() # create unactivated user in db
async with app.http_session_pool as session:
# TODO send activation email using SES API
async with session.post('http://httpbin.org/post', data=b'data') as resp:
print(resp.status)
print(await resp.text())
return HTTPResponse(status=204)
There're few things I think can be improved:
1)
Instance of ClientSession is one session object. This on session contains pool of connections, but it's not "session_pool" itself. I would suggest rename http_session_pool to http_session or may be client_session.
2)
Session's close() method is a corountine. Your should await it:
await app.client_session.close()
Or even better (IMHO), instead of thinking about how to properly open/close session use standard async context manager with awaiting of __aenter__ / __aexit__:
#app.listener('before_server_start')
async def before_server_start(app, loop):
# ...
app.client_session = await aiohttp.ClientSession().__aenter__()
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
await app.client_session.__aexit__(None, None, None)
# ...
3)
Pay attention to this info:
However, if the event loop is stopped before the underlying connection
is closed, an ResourceWarning: unclosed transport warning is emitted
(when warnings are enabled).
To avoid this situation, a small delay must be added before closing
the event loop to allow any open underlying connections to close.
I'm not sure it's mandatory in your case but there's nothing bad in adding await asyncio.sleep(0) inside after_server_stop as documentation advices:
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
# ...
await asyncio.sleep(0) # http://aiohttp.readthedocs.io/en/stable/client.html#graceful-shutdown
Upd:
Class that implements __aenter__ / __aexit__ can be used as async context manager (can be used in async with statement). It allows to do some actions before executing internal block and after it. This is very similar to regular context managers, but asyncio related. Same as regular context manager async one can be used directly (without async with) manually awaiting __aenter__ / __aexit__.
Why do I think it's better to create/free session using __aenter__ / __aexit__ manually instead of using close(), for example? Because we shouldn't worry what actually happens inside __aenter__ / __aexit__. Imagine in future versions of aiohttp creating of session will be changed with the need to await open() for example. If you'll use __aenter__ / __aexit__ you wouldn't need to somehow change your code.
seems no session pool in aiohttp.
// just post some official docs.
persistent session
here is persistent-session usage demo in official site
https://docs.aiohttp.org/en/latest/client_advanced.html#persistent-session
app.cleanup_ctx.append(persistent_session)
async def persistent_session(app):
app['PERSISTENT_SESSION'] = session = aiohttp.ClientSession()
yield
await session.close()
async def my_request_handler(request):
session = request.app['PERSISTENT_SESSION']
async with session.get("http://python.org") as resp:
print(resp.status)
//TODO: a full runnable demo code
connection pool
and it has a connection pool:
https://docs.aiohttp.org/en/latest/client_advanced.html#connectors
conn = aiohttp.TCPConnector()
#conn = aiohttp.TCPConnector(limit=30)
#conn = aiohttp.TCPConnector(limit=0) # nolimit, default is 100.
#conn = aiohttp.TCPConnector(limit_per_host=30) # default is 0
session = aiohttp.ClientSession(connector=conn)
I found this question after searching on Google on how to reuse an aiohttp ClientSession instance after my code was triggering this warning message: UserWarning: Creating a client session outside of coroutine is a very dangerous idea
This code may not solve the above problem though it is related. I am new to asyncio and aiohttp, so this may not be best practice. It's the best I could come up with after reading a lot of seemingly conflicting information.
I created a class ResourceManager taken from the Python docs that opens a context.
The ResourceManager instance handles the opening and closing of the aiohttp ClientSession instance via the magic methods __aenter__ and __aexit__ with BaseScraper.set_session and BaseScraper.close_session wrapper methods.
I was able to reuse a ClientSession instance with the following code.
The BaseScraper class also has methods for authentication. It depends on the lxml third-party package.
import asyncio
from time import time
from contextlib import contextmanager, AbstractContextManager, ExitStack
import aiohttp
import lxml.html
class ResourceManager(AbstractContextManager):
# Code taken from Python docs: 29.6.2.4. of https://docs.python.org/3.6/library/contextlib.html
def __init__(self, scraper, check_resource_ok=None):
self.acquire_resource = scraper.acquire_resource
self.release_resource = scraper.release_resource
if check_resource_ok is None:
def check_resource_ok(resource):
return True
self.check_resource_ok = check_resource_ok
#contextmanager
def _cleanup_on_error(self):
with ExitStack() as stack:
stack.push(self)
yield
# The validation check passed and didn't raise an exception
# Accordingly, we want to keep the resource, and pass it
# back to our caller
stack.pop_all()
def __enter__(self):
resource = self.acquire_resource()
with self._cleanup_on_error():
if not self.check_resource_ok(resource):
msg = "Failed validation for {!r}"
raise RuntimeError(msg.format(resource))
return resource
def __exit__(self, *exc_details):
# We don't need to duplicate any of our resource release logic
self.release_resource()
class BaseScraper:
login_url = ""
login_data = dict() # dict of key, value pairs to fill the login form
loop = asyncio.get_event_loop()
def __init__(self, urls):
self.urls = urls
self.acquire_resource = self.set_session
self.release_resource = self.close_session
async def _set_session(self):
self.session = await aiohttp.ClientSession().__aenter__()
def set_session(self):
set_session_attr = self.loop.create_task(self._set_session())
self.loop.run_until_complete(set_session_attr)
return self # variable after "as" becomes instance of BaseScraper
async def _close_session(self):
await self.session.__aexit__(None, None, None)
def close_session(self):
close_session = self.loop.create_task(self._close_session())
self.loop.run_until_complete(close_session)
def __call__(self):
fetch_urls = self.loop.create_task(self._fetch())
return self.loop.run_until_complete(fetch_urls)
async def _get(self, url):
async with self.session.get(url) as response:
result = await response.read()
return url, result
async def _fetch(self):
tasks = (self.loop.create_task(self._get(url)) for url in self.urls)
start = time()
results = await asyncio.gather(*tasks)
print(
"time elapsed: {} seconds \nurls count: {}".format(
time() - start, len(urls)
)
)
return results
#property
def form(self):
"""Create and return form for authentication."""
form = aiohttp.FormData(self.login_data)
get_login_page = self.loop.create_task(self._get(self.login_url))
url, login_page = self.loop.run_until_complete(get_login_page)
login_html = lxml.html.fromstring(login_page)
hidden_inputs = login_html.xpath(r'//form//input[#type="hidden"]')
login_form = {x.attrib["name"]: x.attrib["value"] for x in hidden_inputs}
for key, value in login_form.items():
form.add_field(key, value)
return form
async def _login(self, form):
async with self.session.post(self.login_url, data=form) as response:
if response.status != 200:
response.raise_for_status()
print("logged into {}".format(url))
await response.release()
def login(self):
post_login_form = self.loop.create_task(self._login(self.form))
self.loop.run_until_complete(post_login_form)
if __name__ == "__main__":
urls = ("http://example.com",) * 10
base_scraper = BaseScraper(urls)
with ResourceManager(base_scraper) as scraper:
for url, html in scraper():
print(url, len(html))

Resources