When using asyncio for receiving multiple files over a TCP socket I struggle with the call order of received_data. When sending 3 data streams at once my output is the following:
DEBUG connection_made ('127.0.0.1', 33972) new connection was made
DEBUG connection_made ('127.0.0.1', 33974) new connection was made
DEBUG connection_made ('127.0.0.1', 33976) new connection was made
DEBUG data_received ('127.0.0.1', 33976) data received from new connection
DEBUG data_received ('127.0.0.1', 33974) data received from new connection
DEBUG data_received ('127.0.0.1', 33972) data received from new connection
I assume that its behaviour is analog to a stack where the data is received from the newest to the oldest connection made, but this is only a guess.
Is it possible to change that behaviour in a way that the data is received in the order that the connections were made? This is important because I need the data received from the first connection for further processing the following connections.
My code is the following:
import asyncio
class AIO(asyncio.Protocol):
def __init__(self):
self.extra = bytearray()
def connection_made(self, transport):
global request_time
peer = transport.get_extra_info('peername')
logger.debug('{} new connection was made'.format(peer))
self.transport = transport
request_time = str(time.time())
def data_received(self, request, msgid=1):
peer = self.transport.get_extra_info('peername')
logger.debug('{} data received from new connection'.format(peer))
self.extra.extend(request)
First, I would recommend using the higher-level streams API instead of the transport/protocols. Then if you need to maintain the order observed when connections were made, you can enforce it yourself using a series of asyncio.Events. For example:
import asyncio, itertools
class Comm:
def __init__(self):
self.extra = bytearray()
self.preceding_evt = None
async def serve(self, r, w):
preceding_evt = self.preceding_evt
self.preceding_evt = this_evt = asyncio.Event()
while True:
request = await r.read(4096)
# wait for the preceding connection to receive and store data
# before we store ours
if preceding_evt is not None:
await preceding_evt.wait()
preceding_evt.clear()
# self.extra now contains data from previous connections
self.extra.extend(request)
# inform the subsequent connection that we got data
this_evt.set()
w.close()
async def main():
comm = Comm()
server = await asyncio.start_server(comm.serve, '127.0.0.1', 8888)
async with server:
await server.serve_forever()
asyncio.run(main())
Related
I'm working on a "server" thread, which takes care of some IO calls for a bunch of "clients".
The communication is done using pynng v0.5.0, the server has its own asyncio loop.
Each client "registers" by sending a first request, and then loops receiving the results and sending back READY messages.
On the server, the goal is to treat the first message of each client as a registration request, and to create a dedicated worker task which will loop doing IO stuff, sending the result and waiting for the READY message of that particular client.
To implement this, I'm trying to leverage the Context feature of REP0 sockets.
Side notes
I would have liked to tag this question with nng and pynng, but I don't have enough reputation.
Although I'm an avid consumer of this site, it's my first question :)
I do know about the PUB/SUB pattern, let's just say that for self-instructional purposes, I chose not to use it for this service.
Problem:
After a few iterations, some READY messages are intercepted by the registration coroutine of the server, instead of being routed to the proper worker task.
Since I can't share the code, I wrote a reproducer for my issue and included it below.
Worse, as you can see in the output, some result messages are sent to the wrong client (ERROR:root:<Worker 1>: worker/client mismatch, exiting.).
It looks like a bug, but I'm not entirely sure I understand how to use the contexts correctly, so any help would be appreciated.
Environment:
winpython-3.8.2
pynng v0.5.0+dev (46fbbcb2), with nng v1.3.0 (ff99ee51)
Code:
import asyncio
import logging
import pynng
import threading
NNG_DURATION_INFINITE = -1
ENDPOINT = 'inproc://example_endpoint'
class Server(threading.Thread):
def __init__(self):
super(Server, self).__init__()
self._client_tasks = dict()
#staticmethod
async def _worker(ctx, client_id):
while True:
# Remember, the first 'receive' has already been done by self._new_client_handler()
logging.debug(f"<Worker {client_id}>: doing some IO")
await asyncio.sleep(1)
logging.debug(f"<Worker {client_id}>: sending the result")
# I already tried sending synchronously here instead, just in case the issue was related to that
# (but it's not)
await ctx.asend(f"result data for client {client_id}".encode())
logging.debug(f"<Worker {client_id}>: waiting for client READY msg")
data = await ctx.arecv()
logging.debug(f"<Worker {client_id}>: received '{data}'")
if data != bytes([client_id]):
logging.error(f"<Worker {client_id}>: worker/client mismatch, exiting.")
return
async def _new_client_handler(self):
with pynng.Rep0(listen=ENDPOINT) as socket:
max_workers = 3 + 1 # Try setting it to 3 instead, to stop creating new contexts => now it works fine
while await asyncio.sleep(0, result=True) and len(self._client_tasks) < max_workers:
# The issue is here: at some point, the existing client READY messages get
# intercepted here, instead of being routed to the proper worker context.
# The intent here was to open a new context only for each *new* client, I was
# assuming that a 'recv' on older worker contexts would take precedence.
ctx = socket.new_context()
data = await ctx.arecv()
client_id = data[0]
if client_id in self._client_tasks:
logging.error(f"<Server>: We already have a task for client {client_id}")
continue # just let the client block on its 'recv' for now
logging.debug(f"<Server>: New client : {client_id}")
self._client_tasks[client_id] = asyncio.create_task(self._worker(ctx, client_id))
await asyncio.gather(*list(self._client_tasks.values()))
def run(self) -> None:
# The "server" thread has its own asyncio loop
asyncio.run(self._new_client_handler(), debug=True)
class Client(threading.Thread):
def __init__(self, client_id: int):
super(Client, self).__init__()
self._id = client_id
def __repr__(self):
return f'<Client {self._id}>'
def run(self):
with pynng.Req0(dial=ENDPOINT, resend_time=NNG_DURATION_INFINITE) as socket:
while True:
logging.debug(f"{self}: READY")
socket.send(bytes([self._id]))
data_str = socket.recv().decode()
logging.debug(f"{self}: received '{data_str}'")
if data_str != f"result data for client {self._id}":
logging.error(f"{self}: client/worker mismatch, exiting.")
return
def main():
logging.basicConfig(level=logging.DEBUG)
threads = [Server(),
*[Client(i) for i in range(3)]]
for t in threads:
t.start()
for t in threads:
t.join()
if __name__ == '__main__':
main()
Output:
DEBUG:asyncio:Using proactor: IocpProactor
DEBUG:root:<Client 1>: READY
DEBUG:root:<Client 0>: READY
DEBUG:root:<Client 2>: READY
DEBUG:root:<Server>: New client : 1
DEBUG:root:<Worker 1>: doing some IO
DEBUG:root:<Server>: New client : 0
DEBUG:root:<Worker 0>: doing some IO
DEBUG:root:<Server>: New client : 2
DEBUG:root:<Worker 2>: doing some IO
DEBUG:root:<Worker 1>: sending the result
DEBUG:root:<Client 1>: received 'result data for client 1'
DEBUG:root:<Client 1>: READY
ERROR:root:<Server>: We already have a task for client 1
DEBUG:root:<Worker 1>: waiting for client READY msg
DEBUG:root:<Worker 0>: sending the result
DEBUG:root:<Client 0>: received 'result data for client 0'
DEBUG:root:<Client 0>: READY
DEBUG:root:<Worker 0>: waiting for client READY msg
DEBUG:root:<Worker 1>: received 'b'\x00''
ERROR:root:<Worker 1>: worker/client mismatch, exiting.
DEBUG:root:<Worker 2>: sending the result
DEBUG:root:<Client 2>: received 'result data for client 2'
DEBUG:root:<Client 2>: READY
DEBUG:root:<Worker 2>: waiting for client READY msg
ERROR:root:<Server>: We already have a task for client 2
Edit (2020-04-10): updated both pynng and the underlying nng.lib to their latest version (master branches), still the same issue.
After digging into the sources of both nng and pynng, and confirming my understanding with the maintainers, I can now answer my own question.
When using a context on a REP0 socket, there are a few things to be aware of.
As advertised, send/asend() is guaranteed to be routed to the same peer you last received from.
The data from the next recv/arecv() on this same context, however, is NOT guaranteed to be coming from the same peer.
Actually, the underlying nng call to rep0_ctx_recv() merely reads the next socket pipe with available data, so there's no guarantee that said data is coming from the same peer than the last recv/send pair.
In the reproducer above, I was concurrently calling arecv() both on a new context (in the Server._new_client_handler() coroutine), and on each worker context (in the Server._worker() coroutine).
So what I had previously described as the next request being "intercepted" by the main coroutine was merely a race condition.
One solution would be to only receive from the Server._new_client_handler() coroutine, and have the workers only handle one request. Note that in this case, the workers are no longer dedicated to a particular peer. If this behavior is needed, the routing of incoming requests must be handled at application level.
class Server(threading.Thread):
#staticmethod
async def _worker(ctx, data: bytes):
client_id = int.from_bytes(data, byteorder='big', signed=False)
logging.debug(f"<Worker {client_id}>: doing some IO")
await asyncio.sleep(1 + 10 * random.random())
logging.debug(f"<Worker {client_id}>: sending the result")
await ctx.asend(f"result data for client {client_id}".encode())
async def _new_client_handler(self):
with pynng.Rep0(listen=ENDPOINT) as socket:
while await asyncio.sleep(0, result=True):
ctx = socket.new_context()
data = await ctx.arecv()
asyncio.create_task(self._worker(ctx, data))
def run(self) -> None:
# The "server" thread has its own asyncio loop
asyncio.run(self._new_client_handler(), debug=False)
So, I can pretty easily create an SSLSocket in asyncio:
# Create a Socket
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
sock.setblocking(False)
# Connect
await loop.sock_connect(sock, host_address) # type: ignore
# Create an SSL Context
ssl_context = ssl.create_default_context(cafile=PROXY_CA_BUNDLE)
...
# Wrap the SSL Context Around the Socket
def do_handshake(loop, sock, waiter):
sock_fd = sock.fileno()
try:
sock.do_handshake()
except ssl.SSLWantReadError:
loop.remove_reader(sock_fd)
loop.add_reader(sock_fd, do_handshake,
loop, sock, waiter)
return
except ssl.SSLWantWriteError:
loop.remove_writer(sock_fd)
loop.add_writer(sock_fd, do_handshake,
loop, sock, waiter)
return
loop.remove_reader(sock_fd)
loop.remove_writer(sock_fd)
waiter.set_result(None)
waiter = loop.create_future()
do_handshake(loop, sslconn, waiter)
await waiter
The problem is that an SSLSocket breaks the non-blocking interface of the original socket library, so it is no longer compatible with other asyncio methods like asyncio.sock_sendall. Is there a way to instead wrap a socket and have it still respect the original socket interface?
I use an almost standard example of using NATS on python asyncio. I want to receive a message, process it, and send the result
back to the queue, but when the NATS is disconnected (e.g. reboot gnats), the exception does not raise. I even did await asyncio.sleep (1, loop = loop) to change the context
and the disconnect -> reconnect exception was thrown, but this does not happen. What am I doing wrong? May be it's a bug?
import asyncio
from nats.aio.client import Client as NATS
import time
async def run(loop):
nc = NATS()
await nc.connect(io_loop=loop)
async def message_handler(msg):
subject = msg.subject
reply = msg.reply
data = msg.data.decode()
print("Received a message on '{subject} {reply}': {data}".format(
subject=subject, reply=reply, data=data))
# Working
time.sleep(10)
# If nats disconnects at this point, the exception will not be caused
# and will be made attempt to send a message by nc.publish
await asyncio.sleep(2, loop=loop)
print("UNSLEEP")
await nc.publish("test", "test payload".encode())
print("PUBLISHED")
# Simple publisher and async subscriber via coroutine.
await nc.subscribe("foo", cb=message_handler)
while True:
await asyncio.sleep(1, loop=loop)
await nc.close()
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(run(loop))
loop.close()
NATs is built on top of TCP.
TCP has no reliable disconnection signal by definition.
To solve it any messaging system should use a kind of ping messages and drop the connection if timeout occurs.
Strictly speaking you will get disconnection event sometimes, but it may take up to 2 hours (depends on your OS settings).
I have gone through many forums and sites but did not find any solutions that can solve my problem.
I have this server.py file :
from autobahn.twisted.websocket import WebSocketServerProtocol, \
WebSocketServerFactory
class MyServerProtocol(WebSocketServerProtocol):
def onConnect(self, request):
print("Client connecting: {0}".format(request.peer))
def onOpen(self):
print("WebSocket connection open.")
def onMessage(self, payload, isBinary):
if isBinary:
print("Binary message received: {0} bytes".format(len(payload)))
else:
print("Text message received: {0}".format(payload.decode('utf8')))
print("Text message received: {0}".format(self.peer))
# echo back message verbatim
self.sendMessage(payload, isBinary)
def onClose(self, wasClean, code, reason):
print("WebSocket connection closed: {0}".format(reason))
if __name__ == '__main__':
import sys
from twisted.python import log
from twisted.internet import reactor
log.startLogging(sys.stdout)
factory = WebSocketServerFactory(u"ws://127.0.0.1:9000")
factory.protocol = MyServerProtocol
# factory.setProtocolOptions(maxConnections=2)
# note to self: if using putChild, the child must be bytes...
reactor.listenTCP(9000, factory)
reactor.run()
What I want to do is inside onMessage I want to receive the payload from a client and then send that payload to another client, I don't want to echo back the payload to the same client.
Currently I can receive the payload successfully. But what is the way to send that payload to a different client?
I have seen similar questions in many sites but none of them helped.
This is a variation on the FAQ "How do I make input on one connection result in output on another?"
Essentially, you just need a reference to the protocol for the other connection so you can call sendMessage on it. That reference may take the form of an attribute on your MyServerProtocol or on the factory or another object. Perhaps it will be a direct reference to another protocol instance or perhaps it will be a collection (a list, a dict, a set) for more complicated interactions.
Once you have the reference, you just call sendMessage with it and the message goes to that connection instead of the connection that self represents.
Inspired by ipython-notebook-proxy, and based on ipydra, and extending the latter to support more complex user authentication as well as a proxy, because in my use case, only port 80 can be exposed.
I am using flask-sockets for the gunicorn worker, but I am having troubles to proxy WebSockets. IPython uses three different WebSockets connections, /shell, /stdin, and /iopub, but I am only able to get the 101 Switching Protocols for the first two. And /stdin receives a Connection Close Frame as soon as is created.
This is the excerpt code in question:
# Flask imports...
from werkzeug import LocalProxy
from ws4py.client.geventclient import WebSocketClient
# I use my own LocalProxy because flask-sockets does not support Werkzeug Rules
websocket = LocalProxy(lambda: request.environ.get('wsgi.websocket', None))
websockets = {}
PROXY_DOMAIN = "127.0.0.1:8888" # IPython host and port
methods = ["GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH",
"CONNECT"]
#app.route('/', defaults={'url': ''}, methods=methods)
#app.route('/<path:url>', methods=methods)
def proxy(url):
with app.test_request_context():
if websocket:
while True:
data = websocket.receive()
websocket_url = 'ws://{}/{}'.format(PROXY_DOMAIN, url)
if websocket_url not in websockets:
client = WebSocketClient(websocket_url,
protocols=['http-only', 'chat'])
websockets[websocket_url] = client
else:
client = websockets[websocket_url]
client.connect()
if data:
client.send(data)
client_data = client.receive()
if client_data:
websocket.send(client_data)
return Response()
I also tried to create my own WebSocket proxy class, but it doesn't work either.
class WebSocketProxy(WebSocketClient):
def __init__(self, to, *args, **kwargs):
self.to = to
print(("Proxy to", self.to))
super(WebSocketProxy, self).__init__(*args, **kwargs)
def opened(self):
m = self.to.receive()
print("<= %d %s" % (len(m), str(m)))
self.send(m)
def closed(self, code, reason):
print(("Closed down", code, reason))
def received_message(self, m):
print("=> %d %s" % (len(m), str(m)))
self.to.send(m)
Regular request-response cycle works like a charm, so I removed that code. If interested, the complete code is hosted in hidra.
I run the server with
$ gunicorn -k flask_sockets.worker hidra:app
Here is my solution(ish). It is crude, but should serve as a starting point for building websocket proxy. The full code is available in unreleased project, pyramid_notebook.
This uses ws4py and uWSGI instead of gunicorn
We use uWSGI's internal mechanism to receive downstream websocket message loop. There is nothing like WSGI for websockets in Python world (yet?), but looks like every web server implements its own mechanism.
A custom ws4py ProxyConnection is created which can combine ws4py event loop with uWSGI event loop
The thing is started and messages start fly around
This uses Pyramid request (based on WebOb), but this really shouldn't matter and code should be fine for any Python WSGI app with little modifications
As you can see, this does not really take advantage of asynchronicity, but just sleep() if there is nothing coming in from the socket
Code goes here:
"""UWSGI websocket proxy."""
from urllib.parse import urlparse, urlunparse
import logging
import time
import uwsgi
from ws4py import WS_VERSION
from ws4py.client import WebSocketBaseClient
#: HTTP headers we need to proxy to upstream websocket server when the Connect: upgrade is performed
CAPTURE_CONNECT_HEADERS = ["sec-websocket-extensions", "sec-websocket-key", "origin"]
logger = logging.getLogger(__name__)
class ProxyClient(WebSocketBaseClient):
"""Proxy between upstream WebSocket server and downstream UWSGI."""
#property
def handshake_headers(self):
"""
List of headers appropriate for the upgrade
handshake.
"""
headers = [
('Host', self.host),
('Connection', 'Upgrade'),
('Upgrade', 'websocket'),
('Sec-WebSocket-Key', self.key.decode('utf-8')),
# Origin is proxyed from the downstream server, don't set it twice
# ('Origin', self.url),
('Sec-WebSocket-Version', str(max(WS_VERSION)))
]
if self.protocols:
headers.append(('Sec-WebSocket-Protocol', ','.join(self.protocols)))
if self.extra_headers:
headers.extend(self.extra_headers)
logger.info("Handshake headers: %s", headers)
return headers
def received_message(self, m):
"""Push upstream messages to downstream."""
# TODO: No support for binary messages
m = str(m)
logger.debug("Incoming upstream WS: %s", m)
uwsgi.websocket_send(m)
logger.debug("Send ok")
def handshake_ok(self):
"""
Called when the upgrade handshake has completed
successfully.
Starts the client's thread.
"""
self.run()
def terminate(self):
raise RuntimeError("NO!")
super(ProxyClient, self).terminate()
def run(self):
"""Combine async uwsgi message loop with ws4py message loop.
TODO: This could do some serious optimizations and behave asynchronously correct instead of just sleep().
"""
self.sock.setblocking(False)
try:
while not self.terminated:
logger.debug("Doing nothing")
time.sleep(0.050)
logger.debug("Asking for downstream msg")
msg = uwsgi.websocket_recv_nb()
if msg:
logger.debug("Incoming downstream WS: %s", msg)
self.send(msg)
s = self.stream
self.opened()
logger.debug("Asking for upstream msg")
try:
bytes = self.sock.recv(self.reading_buffer_size)
if bytes:
self.process(bytes)
except BlockingIOError:
pass
except Exception as e:
logger.exception(e)
finally:
logger.info("Terminating WS proxy loop")
self.terminate()
def serve_websocket(request, port):
"""Start UWSGI websocket loop and proxy."""
env = request.environ
# Send HTTP response 101 Switch Protocol downstream
uwsgi.websocket_handshake(env['HTTP_SEC_WEBSOCKET_KEY'], env.get('HTTP_ORIGIN', ''))
# Map the websocket URL to the upstream localhost:4000x Notebook instance
parts = urlparse(request.url)
parts = parts._replace(scheme="ws", netloc="localhost:{}".format(port))
url = urlunparse(parts)
# Proxy initial connection headers
headers = [(header, value) for header, value in request.headers.items() if header.lower() in CAPTURE_CONNECT_HEADERS]
logger.info("Connecting to upstream websockets: %s, headers: %s", url, headers)
ws = ProxyClient(url, headers=headers)
ws.connect()
# Happens only if exceptions fly around
return ""