Shiny for Python: Implementing an asynchronous iterator (almost there) - python-asyncio

The endgame is making an app reactive to a non-blocking stream of information (in my particular case a MongoDB ChangeSteam; it could also be a Kafka consumer).
For the sake of reproducibility, in the example below I implement a generic asynchronous iterator AsyncIteratorDummy that mimics the behaviour of a data stream:
import asyncio
from shiny import reactive, ui, Inputs, Outputs, Session, App, render
class AsyncIteratorDummy:
''' Iterate over an asynchronous source n Iterations.'''
def __init__(self, n):
self.current = 0
self.n = n
def __aiter__(self):
return self
async def __anext__(self):
await asyncio.sleep(1)
print(f"get next element {self.current}")
self.current += 1
if self.current > self.n:
raise StopAsyncIteration
return self.current - 1
async def watch_changes(rval: reactive.Value):
async for i in AsyncIteratorDummy(5):
print(f"next element {i}")
rval.set(i)
app_ui = ui.page_fluid(
"This should update automatically",
ui.output_text_verbatim("async_text"),
)
def server(input: Inputs, output: Outputs, session: Session):
triggered_val = reactive.Value(-1)
asyncio.create_task(watch_changes(triggered_val))
#output(id="async_text")
#render.text()
async def _():
return triggered_val.get()
# un/commenting this makes makes the invalidation
# of `triggered_val` effective or not:
#reactive.Effect
def _():
reactive.invalidate_later(0.1)
app = App(app_ui, server)
The app works because of the presence of
#reactive.Effect
def _():
reactive.invalidate_later(0.1)
Else, async_text greys out (indicating it has been invalidated) but does not update.
Is it possible to implement the asynchronous iteration without the "hack" of the reactive.Effect invalidating on loop?
My supposition is that I have to "flush" or "execute" invalidated variables in the context of watch_changes() (after rval.set(i)), using a low-level py-shiny function that I cannot figure out.

I think you are looking for reactive.flush().
async def watch_changes(rval: reactive.Value):
async for i in AsyncIteratorDummy(5):
print(f"next element {i}")
rval.set(i)
reactive.flush()

Related

Make multiprocessing.Queue accessible from asyncio [duplicate]

This question already has answers here:
FastAPI runs api-calls in serial instead of parallel fashion
(2 answers)
Is there a way to use asyncio.Queue in multiple threads?
(4 answers)
Closed 19 days ago.
The community is reviewing whether to reopen this question as of 18 days ago.
Given a multiprocessing.Queue that is filled from different Python threads, created via ThreadPoolExecutor.submit(...).
How to access that Queue with asyncio / Trio / Anyio in a safe manner (context FastAPI) and reliable manner?
I am aware of Janus library, but prefer a custom solution here.
Asked (hopefully) more concisely:
How to implement the
await <something_is_in_my_multiprocessing_queue>
to have it accesible with async/await and to prevent blocking the event loop?
What synchronization mechanism in general would you suggest?
(Attention here: multiprocessing.Queue not asyncio.Queue)
Actually, I figured it out.
Given a method, that reads the mp.Queue:
def read_queue_blocking():
return queue.get()
Comment: And this is the main issue: A call to get is blocking.
We can now either
use `asyncio.loop.run_in_executor' in asyncio EventLoop.
( see https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.run_in_executor ) or
use anyio with await anyio.to_thread.run_sync(...) to execute the blocking retrieval of data from the queue in a separate thread.
For FastAPI
#app.websocket("/ws/{client_id}")
async def websocket_endpoint(websocket: WebSocket, client_id: str):
await websocket.accept()
while True:
import anyio
queue_result = await anyio.to_thread.run_sync(read_queue_blocking)
await websocket.send_text(f"Message text was: {queue_result}")
I remastered the answer to show case when main thread with asyncio loop is feed with data from child processes (ProcessPoolExecutor):
from concurrent.futures import ProcessPoolExecutor
import asyncio
from random import randint
from functools import partial
def some_heavy_task() -> int:
sum(i * i for i in range(10 ** 8))
return randint(1, 9)
def callback(fut: asyncio.Future, q: asyncio.Queue) -> None:
"""callback is used instead of mp.Queue to get feed from child processes."""
loop = asyncio.get_event_loop()
if not fut.exception() and not fut.cancelled():
loop.call_soon(q.put_nowait, f"name-{fut.name}: {fut.result()}")
async def result_picker(q: asyncio.Queue) -> None:
"""Returns results to some outer world."""
while True:
res = await q.get()
# imagine it is websocket
print(f"Result from heavy_work_producer: {res}")
q.task_done() # mark task as done here
async def heavy_work_producer(q: asyncio.Queue) -> None:
"""Wrapper around all multiprocessing work."""
loop = asyncio.get_event_loop()
with ProcessPoolExecutor(max_workers=4) as pool:
heavy_tasks = [loop.run_in_executor(pool, some_heavy_task) for _ in range(12)]
[i.add_done_callback(partial(callback, q=q)) for i in heavy_tasks]
[setattr(t, "name", i) for i, t in enumerate(heavy_tasks)] # just name them
await asyncio.gather(*heavy_tasks)
async def amain():
"""Main entrypoint of async app."""
q = asyncio.Queue()
asyncio.create_task(result_picker(q))
await heavy_work_producer(q)
# do not let result_picker finish when heavy_work_producer is done
# wait all results to show
await q.join()
print("All done.")
if __name__ == '__main__':
asyncio.run(amain())

Python asyncio - Increase the value of Semaphore

I am making use of aiohttp in one of my projects and would like to limit the number of requests made per second. I am using asyncio.Semaphore to do that. My challenge is I may want to increase/decrease the number of requests allowed per second.
For example:
limit = asyncio.Semaphore(10)
async with limit:
async with aiohttp.request(...)
...
await asyncio.sleep(1)
This works great. That is, it limits that aiohttp.request to 10 concurrent requests in a second. However, I may want to increase and decrease the Semaphore._value. I can do limit._value = 20 but I am not sure if this is the right approach or there is another way to do that.
Accessing the private _value attribute is not the right approach for at least two reasons: one that the attribute is private and can be removed, renamed, or change meaning in a future version without notice, and the other that increasing the limit won't be noticed by a semaphore that already has waiters.
Since asyncio.Semaphore doesn't support modifying the limit dynamically, you have two options: implementing your own Semaphore class that does support it, or not using a Semaphore at all. The latter is probably easier as you can always replace a semaphore-enforced limit with a fixed number of worker tasks that receive jobs through a queue. Assuming you currently have code that looks like this:
async def fetch(limit, arg):
async with limit:
# your actual code here
return result
async def tweak_limit(limit):
# here you'd like to be able to increase the limit
async def main():
limit = asyncio.Semaphore(10)
asyncio.create_task(tweak_limit(limit))
results = await asyncio.gather(*[fetch(limit, x) for x in range(1000)])
You could express it without a semaphore by creating workers in advance and giving them work to do:
async def fetch_task(queue, results):
while True:
arg = await queue.get()
# your actual code here
results.append(result)
queue.task_done()
async def main():
# fill the queue with jobs for the workers
queue = asyncio.Queue()
for x in range(1000):
await queue.put(x)
# create the initial pool of workers
results = []
workers = [asyncio.create_task(fetch_task(queue, results))
for _ in range(10)]
asyncio.create_task(tweak_limit(workers, queue, results))
# wait for workers to process the entire queue
await queue.join()
# finally, cancel the now-idle worker tasks
for w in workers:
w.cancel()
# results are now available
The tweak_limit() function can now increase the limit simply by spawning new workers:
async def tweak_limit(workers, queue, results):
while True:
await asyncio.sleep(1)
if need_more_workers:
workers.append(asyncio.create_task(fetch_task(queue, results)))
Using workers and queues is a more complex solution, you have to think about issues like setup, teardown, exception handling and backpressure, etc.
Semaphore can be implemented with Lock, if you don't mind abit of inefficiency (you will see why), here's a simple implemention for a dynamic-value semaphore:
class DynamicSemaphore:
def __init__(self, value=1):
self._lock = asyncio.Lock()
if value < 0:
raise ValueError("Semaphore initial value must be >= 0")
self.value = value
async def __aenter__(self):
await self.acquire()
return None
async def __aexit__(self, exc_type, exc, tb):
self.release()
def locked(self):
return self.value == 0
async def acquire(self):
async with self._lock:
while self.value <= 0:
await asyncio.sleep(0.1)
self.value -= 1
return True
def release(self):
self.value += 1

How to combine callback-based library with asyncio library in Python?

I have the following issue. I want to read out keystrokes with the pynput library and send them over websockets. Pynput proposes the following usage
from pynput import keyboard
def on_press(key):
try:
print('alphanumeric key {0} pressed'.format(
key.char))
except AttributeError:
print('special key {0} pressed'.format(
key))
def on_release(key):
print('{0} released'.format(
key))
if key == keyboard.Key.esc:
# Stop listener
return False
# Collect events until released
with keyboard.Listener(
on_press=on_press,
on_release=on_release) as listener:
listener.join()
# ...or, in a non-blocking fashion:
listener = keyboard.Listener(
on_press=on_press,
on_release=on_release)
listener.start()
(taken from https://pynput.readthedocs.io/en/latest/keyboard.html)
In contrast to that, the websocket-client library is called as follows:
import asyncio
import websockets
async def hello():
uri = "ws://localhost:8765"
async with websockets.connect(uri) as websocket:
name = input("What's your name? ")
await websocket.send(name)
print(f"> {name}")
greeting = await websocket.recv()
print(f"< {greeting}")
asyncio.get_event_loop().run_until_complete(hello())
(taken from https://websockets.readthedocs.io/en/stable/intro.html).
I am now struggling how this can be done as the websocket library is asynchronous and pynput is synchronous. I somehow have to inject a "websocket.send()" into on_press/on_release - but currently I am struggling with this.
Note that your pynput example contains two different variants of using pynput, from which you need to choose the latter because it is easier to connect to asyncio. The keyboard listener will allow the program to proceed with the asyncio event loop, while invoking the callbacks from a separate thread. Inside the callback functions you can use call_soon_threadsafe to communicate the key-presses to asyncio, e.g. using a queue. For example (untested):
def transmit_keys():
# Start a keyboard listener that transmits keypresses into an
# asyncio queue, and immediately return the queue to the caller.
queue = asyncio.Queue()
loop = asyncio.get_event_loop()
def on_press(key):
# this callback is invoked from another thread, so we can't
# just queue.put_nowait(key.char), we have to go through
# call_soon_threadsafe
loop.call_soon_threadsafe(queue.put_nowait, key.char)
pynput.keyboard.Listener(on_press=on_press).start()
return queue
async def main():
key_queue = transmit_keys()
async with websockets.connect("ws://localhost:8765") as websocket:
while True:
key = await key_queue.get()
await websocket.send(f"key pressed: {key}")
asyncio.run(main())

How to convert event-based communication into async/await using asyncio.Future

I'm trying to expose an event-based communication as a coroutine. Here is an example:
class Terminal:
async def start(self):
loop = asyncio.get_running_loop()
future = loop.create_future()
t = threading.Thread(target=self.run_cmd, args=future)
t.start()
return await future
def run_cmd(self, future):
time.sleep(3) # imitating doing something
future.set_result(1)
But when I run it like this:
async def main():
t = Terminal()
result = await t.start()
print(result)
asyncio.run(main())
I get the following error: RuntimeError: await wasn't used with future
Is it possible to achieve the desired behavior?
There are two issues with your code. One is that the args argument to the Thread constructor requires a sequence or iterable, so you need to write wrap the argument in a container, e.g. args=(future,). Since future is iterable (for technical reasons unrelated to this use case), args=future is not immediately rejected, but leads to the misleading error later down the line.
The other issue is that asyncio objects aren't thread-safe, so you cannot just call future.set_result from another thread. This causes the test program to hang even after fixing the first issue. The correct way to resolve the future from another thread is through the call_soon_threadsafe method on the event loop:
class Terminal:
async def start(self):
loop = asyncio.get_running_loop()
future = loop.create_future()
t = threading.Thread(target=self.run_cmd, args=(loop, future,))
t.start()
return await future
def run_cmd(self, loop, future):
time.sleep(3)
loop.call_soon_threadsafe(future.set_result, 1)
If your thread is really just calling a blocking function whose result you're interested in, consider using run_in_executor instead of manually spawning threads:
class Terminal:
async def start(self):
loop = asyncio.get_running_loop()
return await loop.run_in_executor(None, self.run_cmd)
# Executed in a different thread; `run_in_executor` submits the
# callable to a thread pool, suspends the awaiting coroutine until
# it's done, and transfers the result/exception back to asyncio.
def run_cmd(self):
time.sleep(3)
return 1

How to reuse aiohttp ClientSession pool?

The docs say to reuse the ClientSession:
Don’t create a session per request. Most likely you need a session per
application which performs all requests altogether.
A session contains a connection pool inside, connection reusage and
keep-alives (both are on by default) may speed up total performance.1
But there doesn't seem to be any explanation in the docs about how to do this? There is one example that's maybe relevant, but it does not show how to reuse the pool elsewhere: http://aiohttp.readthedocs.io/en/stable/client.html#keep-alive-connection-pooling-and-cookie-sharing
Would something like this be the correct way to do it?
#app.listener('before_server_start')
async def before_server_start(app, loop):
app.pg_pool = await asyncpg.create_pool(**DB_CONFIG, loop=loop, max_size=100)
app.http_session_pool = aiohttp.ClientSession()
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
app.http_session_pool.close()
app.pg_pool.close()
#app.post("/api/register")
async def register(request):
# json validation
async with app.pg_pool.acquire() as pg:
await pg.execute() # create unactivated user in db
async with app.http_session_pool as session:
# TODO send activation email using SES API
async with session.post('http://httpbin.org/post', data=b'data') as resp:
print(resp.status)
print(await resp.text())
return HTTPResponse(status=204)
There're few things I think can be improved:
1)
Instance of ClientSession is one session object. This on session contains pool of connections, but it's not "session_pool" itself. I would suggest rename http_session_pool to http_session or may be client_session.
2)
Session's close() method is a corountine. Your should await it:
await app.client_session.close()
Or even better (IMHO), instead of thinking about how to properly open/close session use standard async context manager with awaiting of __aenter__ / __aexit__:
#app.listener('before_server_start')
async def before_server_start(app, loop):
# ...
app.client_session = await aiohttp.ClientSession().__aenter__()
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
await app.client_session.__aexit__(None, None, None)
# ...
3)
Pay attention to this info:
However, if the event loop is stopped before the underlying connection
is closed, an ResourceWarning: unclosed transport warning is emitted
(when warnings are enabled).
To avoid this situation, a small delay must be added before closing
the event loop to allow any open underlying connections to close.
I'm not sure it's mandatory in your case but there's nothing bad in adding await asyncio.sleep(0) inside after_server_stop as documentation advices:
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
# ...
await asyncio.sleep(0) # http://aiohttp.readthedocs.io/en/stable/client.html#graceful-shutdown
Upd:
Class that implements __aenter__ / __aexit__ can be used as async context manager (can be used in async with statement). It allows to do some actions before executing internal block and after it. This is very similar to regular context managers, but asyncio related. Same as regular context manager async one can be used directly (without async with) manually awaiting __aenter__ / __aexit__.
Why do I think it's better to create/free session using __aenter__ / __aexit__ manually instead of using close(), for example? Because we shouldn't worry what actually happens inside __aenter__ / __aexit__. Imagine in future versions of aiohttp creating of session will be changed with the need to await open() for example. If you'll use __aenter__ / __aexit__ you wouldn't need to somehow change your code.
seems no session pool in aiohttp.
// just post some official docs.
persistent session
here is persistent-session usage demo in official site
https://docs.aiohttp.org/en/latest/client_advanced.html#persistent-session
app.cleanup_ctx.append(persistent_session)
async def persistent_session(app):
app['PERSISTENT_SESSION'] = session = aiohttp.ClientSession()
yield
await session.close()
async def my_request_handler(request):
session = request.app['PERSISTENT_SESSION']
async with session.get("http://python.org") as resp:
print(resp.status)
//TODO: a full runnable demo code
connection pool
and it has a connection pool:
https://docs.aiohttp.org/en/latest/client_advanced.html#connectors
conn = aiohttp.TCPConnector()
#conn = aiohttp.TCPConnector(limit=30)
#conn = aiohttp.TCPConnector(limit=0) # nolimit, default is 100.
#conn = aiohttp.TCPConnector(limit_per_host=30) # default is 0
session = aiohttp.ClientSession(connector=conn)
I found this question after searching on Google on how to reuse an aiohttp ClientSession instance after my code was triggering this warning message: UserWarning: Creating a client session outside of coroutine is a very dangerous idea
This code may not solve the above problem though it is related. I am new to asyncio and aiohttp, so this may not be best practice. It's the best I could come up with after reading a lot of seemingly conflicting information.
I created a class ResourceManager taken from the Python docs that opens a context.
The ResourceManager instance handles the opening and closing of the aiohttp ClientSession instance via the magic methods __aenter__ and __aexit__ with BaseScraper.set_session and BaseScraper.close_session wrapper methods.
I was able to reuse a ClientSession instance with the following code.
The BaseScraper class also has methods for authentication. It depends on the lxml third-party package.
import asyncio
from time import time
from contextlib import contextmanager, AbstractContextManager, ExitStack
import aiohttp
import lxml.html
class ResourceManager(AbstractContextManager):
# Code taken from Python docs: 29.6.2.4. of https://docs.python.org/3.6/library/contextlib.html
def __init__(self, scraper, check_resource_ok=None):
self.acquire_resource = scraper.acquire_resource
self.release_resource = scraper.release_resource
if check_resource_ok is None:
def check_resource_ok(resource):
return True
self.check_resource_ok = check_resource_ok
#contextmanager
def _cleanup_on_error(self):
with ExitStack() as stack:
stack.push(self)
yield
# The validation check passed and didn't raise an exception
# Accordingly, we want to keep the resource, and pass it
# back to our caller
stack.pop_all()
def __enter__(self):
resource = self.acquire_resource()
with self._cleanup_on_error():
if not self.check_resource_ok(resource):
msg = "Failed validation for {!r}"
raise RuntimeError(msg.format(resource))
return resource
def __exit__(self, *exc_details):
# We don't need to duplicate any of our resource release logic
self.release_resource()
class BaseScraper:
login_url = ""
login_data = dict() # dict of key, value pairs to fill the login form
loop = asyncio.get_event_loop()
def __init__(self, urls):
self.urls = urls
self.acquire_resource = self.set_session
self.release_resource = self.close_session
async def _set_session(self):
self.session = await aiohttp.ClientSession().__aenter__()
def set_session(self):
set_session_attr = self.loop.create_task(self._set_session())
self.loop.run_until_complete(set_session_attr)
return self # variable after "as" becomes instance of BaseScraper
async def _close_session(self):
await self.session.__aexit__(None, None, None)
def close_session(self):
close_session = self.loop.create_task(self._close_session())
self.loop.run_until_complete(close_session)
def __call__(self):
fetch_urls = self.loop.create_task(self._fetch())
return self.loop.run_until_complete(fetch_urls)
async def _get(self, url):
async with self.session.get(url) as response:
result = await response.read()
return url, result
async def _fetch(self):
tasks = (self.loop.create_task(self._get(url)) for url in self.urls)
start = time()
results = await asyncio.gather(*tasks)
print(
"time elapsed: {} seconds \nurls count: {}".format(
time() - start, len(urls)
)
)
return results
#property
def form(self):
"""Create and return form for authentication."""
form = aiohttp.FormData(self.login_data)
get_login_page = self.loop.create_task(self._get(self.login_url))
url, login_page = self.loop.run_until_complete(get_login_page)
login_html = lxml.html.fromstring(login_page)
hidden_inputs = login_html.xpath(r'//form//input[#type="hidden"]')
login_form = {x.attrib["name"]: x.attrib["value"] for x in hidden_inputs}
for key, value in login_form.items():
form.add_field(key, value)
return form
async def _login(self, form):
async with self.session.post(self.login_url, data=form) as response:
if response.status != 200:
response.raise_for_status()
print("logged into {}".format(url))
await response.release()
def login(self):
post_login_form = self.loop.create_task(self._login(self.form))
self.loop.run_until_complete(post_login_form)
if __name__ == "__main__":
urls = ("http://example.com",) * 10
base_scraper = BaseScraper(urls)
with ResourceManager(base_scraper) as scraper:
for url, html in scraper():
print(url, len(html))

Resources