Pyarrow basic auth: How to prevent `Stream is closed`? - windows

I am new to Arrow Flight and pyarrow (v=6.0.1), and am trying to implement basic auth but I am always getting an error:
OSError: Stream is closed
I have created a minimal reproducing sample, by running the following two files sequentially (representing server and client respectively):
from typing import Dict, Union
from pyarrow.lib import tobytes
from pyarrow.flight import BasicAuth, FlightUnauthenticatedError, ServerAuthHandler, FlightServerBase
from pyarrow._flight import ServerAuthSender, ServerAuthReader
class ServerBasicAuthHandler(ServerAuthHandler):
def __init__(self, creds: Dict[str, str]):
self.creds = {user.encode(): pw.encode() for user, pw in creds.items()}
def authenticate(self, outgoing: ServerAuthSender, incoming: ServerAuthReader):
buf = incoming.read() # this line raises "OSError: Stream is closed"
auth = BasicAuth.deserialize(buf)
if auth.username not in self.creds:
raise FlightUnauthenticatedError("unknown user")
if self.creds[auth.username] != auth.password:
raise FlightUnauthenticatedError("wrong password")
outgoing.write(tobytes(auth.username))
def is_valid(self, token: bytes) -> Union[bytes, str]:
if not token:
raise FlightUnauthenticatedError("no basic auth provided")
if token not in self.creds:
raise FlightUnauthenticatedError("unknown user")
return token
service = FlightServerBase(
location=f"grpc://[::]:50051",
auth_handler=ServerBasicAuthHandler({"user": "pw"}),
)
service.serve()
from pyarrow.flight import FlightClient
client = FlightClient(location=f"grpc://localhost:50051")
client.authenticate_basic_token("user", "pw")
I basically copied the ServerAuthHandler implementation from their tests, so it is proven to work. However, I can't get it to work.
The error message Stream is closed hard to debug. I don't know where it comes from and I can't trace it to anywhere within the pyarrow implementation (neither Pythonside nor C++ side). I can't see where it comes from.
Any help or hints on how to prevent this error would be appreciated.

The example in the OP is mixing up two authentication implementations (which is indeed confusing). The "BasicAuth" object isn't actual HTTP basic authentication that the authenticate_basic_token method implements; this is because contributors have implemented a variety of authentication methods over the years. The actual test is as follows:
header_auth_server_middleware_factory = HeaderAuthServerMiddlewareFactory()
no_op_auth_handler = NoopAuthHandler()
def test_authenticate_basic_token():
"""Test authenticate_basic_token with bearer token and auth headers."""
with HeaderAuthFlightServer(auth_handler=no_op_auth_handler, middleware={
"auth": HeaderAuthServerMiddlewareFactory()
}) as server:
client = FlightClient(('localhost', server.port))
token_pair = client.authenticate_basic_token(b'test', b'password')
assert token_pair[0] == b'authorization'
assert token_pair[1] == b'Bearer token1234'
i.e. we're not using authenticate but rather a "middleware" to do the implementation. A full example looks as follows:
import base64
import pyarrow.flight as flight
class BasicAuthServerMiddlewareFactory(flight.ServerMiddlewareFactory):
def __init__(self, creds):
self.creds = creds
def start_call(self, info, headers):
token = None
for header in headers:
if header.lower() == "authorization":
token = headers[header]
break
if not token:
raise flight.FlightUnauthenticatedError("No credentials supplied")
values = token[0].split(' ', 1)
if values[0] == 'Basic':
decoded = base64.b64decode(values[1])
pair = decoded.decode("utf-8").split(':')
if pair[0] not in self.creds:
raise flight.FlightUnauthenticatedError("No credentials supplied")
if pair[1] != self.creds[pair[0]]:
raise flight.FlightUnauthenticatedError("No credentials supplied")
return BasicAuthServerMiddleware("BearerTokenValue")
raise flight.FlightUnauthenticatedError("No credentials supplied")
class BasicAuthServerMiddleware(flight.ServerMiddleware):
def __init__(self, token):
self.token = token
def sending_headers(self):
return {'authorization': f'Bearer {self.token}'}
class NoOpAuthHandler(flight.ServerAuthHandler):
def authenticate(self, outgoing, incoming):
pass
def is_valid(self, token):
return ""
with flight.FlightServerBase(auth_handler=NoOpAuthHandler(), middleware={
"basic": BasicAuthServerMiddlewareFactory({"test": "password"})
}) as server:
client = flight.connect(('localhost', server.port))
token_pair = client.authenticate_basic_token(b'test', b'password')
print(token_pair)
assert token_pair[0] == b'authorization'
assert token_pair[1] == b'Bearer BearerTokenValue'

I think it's simply due to this not being supported on Windows.
On closer inspection, the test that "proves it work" is being skipped in Windows. The comment refers to this issue. That issue has been fixed though (ostensibly); not anything as to why it wouldn't work with Stream is closed.

Related

Jmeter throws error on Encryption - using Public Key

I am currently working in a application where RSA Encryption is being used for Encrypting sensitive data. I have tried incorporating the standard encryption method but it is throwing errors. I have selected the language Groovy. Can someone throw light on whether i am doing it right?
import javax.crypto.Cipher
import java.security.KeyFactory
import java.security.spec.X509EncodedKeySpec
def publicKey = '5dy47yt7ty5ad283c0c4955f53csa24wse244wfrfafa34239rsgd89gfsg8342r93r98efae89fdf9983r9gjsdgnsgjkwt23r923r2r0943tf9sdg9d8gfsgf90sgsf89grw098tg09s90ig90g90s903r5244r517823rea8f8werf9842tf24tf42e0132saf9fg6f65afa43f12r103tf4040ryrw0e9rtqtwe0r9t04ty8842t03e9asfads0fgadg675'
def x509PublicKey = new X509EncodedKeySpec(publicKey.decodeBase64())
def keyFactory = KeyFactory.getInstance('RSA')
def key = keyFactory.generatePublic(x509Publickey)
def string2Encrypt = '("testinga#gmail.com|testingb#gmail.com").'
def encryptCipher = Cipher.getInstance('RSA')
encryptCipher.init(Cipher.ENCRYPT_MODE,key)
def secretMessage = string2Encrypt.getBytes('UTF-8')
def encryptedMessage = encryptCipher.doFinal(secretMessage)
def encodedMessage = encryptedMessage.encodedBase64().toString()
vars.put('encodedMessage',encodedMessage)
The Output Error i am getting
Response Code: 500
Response Message:javax.script.ScriptException: groovy.lang.MissingPropertyException: No such property: x509Publickey for class: Script4
SampleResult fields:
ContentType:
DataEncoding: null
You have:
def x509PublicKey
^ mind the capital K
and
def key = keyFactory.generatePublic(x509Publickey)
^ mind the lower-case k
in Groovy they're absolutely different beasts and case sensitivity matters a lot, choose one option and stick to it and "your" script will start working as expected (or at least this error will go away)
More information:
Apache Groovy - Syntax
Apache Groovy - Why and How You Should Use It

How to save user data to database instead of a pickle or a json file when trying to post videos on YouTube using Django and data v3 api

I'm trying to upload videos to youtube using Django and MSSQL, I want to store the user data to DB so that I can log in from multiple accounts and post videos.
The official documentation provided by youtube implements a file system and after login, all the user data gets saved there, I don't want to store any data in a file as saving files to DB would be a huge risk and not a good practice. So how can I bypass this step and save data directly to DB and retrieve it when I want to post videos to a specific account?
In short, I want to replace the pickle file implementation with storing it in the database.
Here's my code
def youtubeAuthenticate():
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
client_secrets_file = "client_secrets.json"
creds = None
# the file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first time
if os.path.exists("token.pickle"):
with open("token.pickle", "rb") as token:
creds = pickle.load(token)
# if there are no (valid) credentials availablle, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(client_secrets_file, SCOPES)
creds = flow.run_local_server(port=0)
# save the credentials for the next run
with open("token.pickle", "wb") as token:
pickle.dump(creds, token)
return build(api_service_name, api_version, credentials=creds)
#api_view(['GET','POST'])
def postVideoYT(request):
youtube = youtubeAuthenticate()
print('yt',youtube)
try:
initialize_upload(youtube, request.data)
except HttpError as e:
print("An HTTP error %d occurred:\n%s" % (e.resp.status, e.content))
return Response("Hello")
def initialize_upload(youtube, options):
print('options', options)
print("title", options['title'])
# tags = None
# if options.keywords:
# tags = options.keywords.split(",")
body=dict(
snippet=dict(
title=options['title'],
description=options['description'],
tags=options['keywords'],
categoryId=options['categoryId']
),
status=dict(
privacyStatus=options['privacyStatus']
)
)
# # Call the API's videos.insert method to create and upload the video.
insert_request = youtube.videos().insert(
part=",".join(body.keys()),
body=body,
media_body=MediaFileUpload(options['file'], chunksize=-1, resumable=True)
)
path = pathlib.Path(options['file'])
ext = path.suffix
getSize = os.path.getsize(options['file'])
resumable_upload(insert_request,ext,getSize)
# This method implements an exponential backoff strategy to resume a
# failed upload.
def resumable_upload(insert_request, ext, getSize):
response = None
error = None
retry = 0
while response is None:
try:
print("Uploading file...")
status, response = insert_request.next_chunk()
if response is not None:
respData = response
if 'id' in response:
print("Video id '%s' was successfully uploaded." % response['id'])
else:
exit("The upload failed with an unexpected response: %s" % response)
except HttpError as e:
if e.resp.status in RETRIABLE_STATUS_CODES:
error = "A retriable HTTP error %d occurred:\n%s" % (e.resp.status,
e.content)
else:
raise
except RETRIABLE_EXCEPTIONS as e:
error = "A retriable error occurred: %s" % e
if error is not None:
print(error)
retry += 1
if retry > MAX_RETRIES:
exit("No longer attempting to retry.")
max_sleep = 2 ** retry
sleep_seconds = random.random() * max_sleep
print("Sleeping %f seconds and then retrying..." % sleep_seconds)
time.sleep(sleep_seconds)

DJANGO-STORAGES, PARAMIKO: connection failure for global connection

I have a strange problem using the SFTP-API from django-storages(https://github.com/jschneier/django-storages). I am trying to use it in order to fetch media-files, which are stored on a different server and thus needed to create a Proxy for SFTP Downloads, since plain Django just sends GET-requests to the MEDIA_ROOT. I figured that Middleware provides a good hook:
import mimetypes
from storages.backends.sftpstorage import SFTPStorage
from django.http import HttpResponse
from storages.backends.sftpstorage import SFTPStorage
class SFTPMiddleware:
def __init__(self, get_response):
self.get_response = get_response
def __call__(self, request):
# Code to be executed for each request before
# the view (and later middleware) are called.
response = self.get_response(request)
try:
path = request.get_full_path()
SFTP = SFTPStorage() # <- this is where the magic happens
if SFTP.exists(path):
file = SFTP._read(path)
type, encoding = mimetypes.guess_type(path)
response = HttpResponse(file, content_type=type)
response['Content-Disposition'] = u'attachment; filename="{filename}"'.format(filename=path)
except PermissionError:
pass
return response
which works fine, but obviously it opens a new connection every time a website call is issued which I don't want (it also crashes after 3 reloads or something, I think it has to many parallel connections by then). So I tried just opening one connection to the Server via SFTP by moving the SFTP = SFTPStorage()-initialization into the __init__()-method which is just called once:
import mimetypes
from storages.backends.sftpstorage import SFTPStorage
from django.http import HttpResponse
from storages.backends.sftpstorage import SFTPStorage
class SFTPMiddleware:
def __init__(self, get_response):
self.get_response = get_response
self.SFTP = SFTPStorage() # <- this is where the magic happens
def __call__(self, request):
# Code to be executed for each request before
# the view (and later middleware) are called.
response = self.get_response(request)
try:
path = request.get_full_path()
if self.SFTP.exists(path):
file = self.SFTP._read(path)
type, encoding = mimetypes.guess_type(path)
response = HttpResponse(file, content_type=type)
response['Content-Disposition'] = u'attachment; filename="{filename}"'.format(filename=path)
except PermissionError:
pass
return response
But this implementation doesn't seem to work, the program is stuck either before the SFTP.exists() or after the SFTP._read() methods.
Can anybody tell me how to fix this problem? Or does anybody even have a better idea as to how to tackle this problem?
Thanks in advance,
Kingrimursel

How to reuse aiohttp ClientSession pool?

The docs say to reuse the ClientSession:
Don’t create a session per request. Most likely you need a session per
application which performs all requests altogether.
A session contains a connection pool inside, connection reusage and
keep-alives (both are on by default) may speed up total performance.1
But there doesn't seem to be any explanation in the docs about how to do this? There is one example that's maybe relevant, but it does not show how to reuse the pool elsewhere: http://aiohttp.readthedocs.io/en/stable/client.html#keep-alive-connection-pooling-and-cookie-sharing
Would something like this be the correct way to do it?
#app.listener('before_server_start')
async def before_server_start(app, loop):
app.pg_pool = await asyncpg.create_pool(**DB_CONFIG, loop=loop, max_size=100)
app.http_session_pool = aiohttp.ClientSession()
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
app.http_session_pool.close()
app.pg_pool.close()
#app.post("/api/register")
async def register(request):
# json validation
async with app.pg_pool.acquire() as pg:
await pg.execute() # create unactivated user in db
async with app.http_session_pool as session:
# TODO send activation email using SES API
async with session.post('http://httpbin.org/post', data=b'data') as resp:
print(resp.status)
print(await resp.text())
return HTTPResponse(status=204)
There're few things I think can be improved:
1)
Instance of ClientSession is one session object. This on session contains pool of connections, but it's not "session_pool" itself. I would suggest rename http_session_pool to http_session or may be client_session.
2)
Session's close() method is a corountine. Your should await it:
await app.client_session.close()
Or even better (IMHO), instead of thinking about how to properly open/close session use standard async context manager with awaiting of __aenter__ / __aexit__:
#app.listener('before_server_start')
async def before_server_start(app, loop):
# ...
app.client_session = await aiohttp.ClientSession().__aenter__()
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
await app.client_session.__aexit__(None, None, None)
# ...
3)
Pay attention to this info:
However, if the event loop is stopped before the underlying connection
is closed, an ResourceWarning: unclosed transport warning is emitted
(when warnings are enabled).
To avoid this situation, a small delay must be added before closing
the event loop to allow any open underlying connections to close.
I'm not sure it's mandatory in your case but there's nothing bad in adding await asyncio.sleep(0) inside after_server_stop as documentation advices:
#app.listener('after_server_stop')
async def after_server_stop(app, loop):
# ...
await asyncio.sleep(0) # http://aiohttp.readthedocs.io/en/stable/client.html#graceful-shutdown
Upd:
Class that implements __aenter__ / __aexit__ can be used as async context manager (can be used in async with statement). It allows to do some actions before executing internal block and after it. This is very similar to regular context managers, but asyncio related. Same as regular context manager async one can be used directly (without async with) manually awaiting __aenter__ / __aexit__.
Why do I think it's better to create/free session using __aenter__ / __aexit__ manually instead of using close(), for example? Because we shouldn't worry what actually happens inside __aenter__ / __aexit__. Imagine in future versions of aiohttp creating of session will be changed with the need to await open() for example. If you'll use __aenter__ / __aexit__ you wouldn't need to somehow change your code.
seems no session pool in aiohttp.
// just post some official docs.
persistent session
here is persistent-session usage demo in official site
https://docs.aiohttp.org/en/latest/client_advanced.html#persistent-session
app.cleanup_ctx.append(persistent_session)
async def persistent_session(app):
app['PERSISTENT_SESSION'] = session = aiohttp.ClientSession()
yield
await session.close()
async def my_request_handler(request):
session = request.app['PERSISTENT_SESSION']
async with session.get("http://python.org") as resp:
print(resp.status)
//TODO: a full runnable demo code
connection pool
and it has a connection pool:
https://docs.aiohttp.org/en/latest/client_advanced.html#connectors
conn = aiohttp.TCPConnector()
#conn = aiohttp.TCPConnector(limit=30)
#conn = aiohttp.TCPConnector(limit=0) # nolimit, default is 100.
#conn = aiohttp.TCPConnector(limit_per_host=30) # default is 0
session = aiohttp.ClientSession(connector=conn)
I found this question after searching on Google on how to reuse an aiohttp ClientSession instance after my code was triggering this warning message: UserWarning: Creating a client session outside of coroutine is a very dangerous idea
This code may not solve the above problem though it is related. I am new to asyncio and aiohttp, so this may not be best practice. It's the best I could come up with after reading a lot of seemingly conflicting information.
I created a class ResourceManager taken from the Python docs that opens a context.
The ResourceManager instance handles the opening and closing of the aiohttp ClientSession instance via the magic methods __aenter__ and __aexit__ with BaseScraper.set_session and BaseScraper.close_session wrapper methods.
I was able to reuse a ClientSession instance with the following code.
The BaseScraper class also has methods for authentication. It depends on the lxml third-party package.
import asyncio
from time import time
from contextlib import contextmanager, AbstractContextManager, ExitStack
import aiohttp
import lxml.html
class ResourceManager(AbstractContextManager):
# Code taken from Python docs: 29.6.2.4. of https://docs.python.org/3.6/library/contextlib.html
def __init__(self, scraper, check_resource_ok=None):
self.acquire_resource = scraper.acquire_resource
self.release_resource = scraper.release_resource
if check_resource_ok is None:
def check_resource_ok(resource):
return True
self.check_resource_ok = check_resource_ok
#contextmanager
def _cleanup_on_error(self):
with ExitStack() as stack:
stack.push(self)
yield
# The validation check passed and didn't raise an exception
# Accordingly, we want to keep the resource, and pass it
# back to our caller
stack.pop_all()
def __enter__(self):
resource = self.acquire_resource()
with self._cleanup_on_error():
if not self.check_resource_ok(resource):
msg = "Failed validation for {!r}"
raise RuntimeError(msg.format(resource))
return resource
def __exit__(self, *exc_details):
# We don't need to duplicate any of our resource release logic
self.release_resource()
class BaseScraper:
login_url = ""
login_data = dict() # dict of key, value pairs to fill the login form
loop = asyncio.get_event_loop()
def __init__(self, urls):
self.urls = urls
self.acquire_resource = self.set_session
self.release_resource = self.close_session
async def _set_session(self):
self.session = await aiohttp.ClientSession().__aenter__()
def set_session(self):
set_session_attr = self.loop.create_task(self._set_session())
self.loop.run_until_complete(set_session_attr)
return self # variable after "as" becomes instance of BaseScraper
async def _close_session(self):
await self.session.__aexit__(None, None, None)
def close_session(self):
close_session = self.loop.create_task(self._close_session())
self.loop.run_until_complete(close_session)
def __call__(self):
fetch_urls = self.loop.create_task(self._fetch())
return self.loop.run_until_complete(fetch_urls)
async def _get(self, url):
async with self.session.get(url) as response:
result = await response.read()
return url, result
async def _fetch(self):
tasks = (self.loop.create_task(self._get(url)) for url in self.urls)
start = time()
results = await asyncio.gather(*tasks)
print(
"time elapsed: {} seconds \nurls count: {}".format(
time() - start, len(urls)
)
)
return results
#property
def form(self):
"""Create and return form for authentication."""
form = aiohttp.FormData(self.login_data)
get_login_page = self.loop.create_task(self._get(self.login_url))
url, login_page = self.loop.run_until_complete(get_login_page)
login_html = lxml.html.fromstring(login_page)
hidden_inputs = login_html.xpath(r'//form//input[#type="hidden"]')
login_form = {x.attrib["name"]: x.attrib["value"] for x in hidden_inputs}
for key, value in login_form.items():
form.add_field(key, value)
return form
async def _login(self, form):
async with self.session.post(self.login_url, data=form) as response:
if response.status != 200:
response.raise_for_status()
print("logged into {}".format(url))
await response.release()
def login(self):
post_login_form = self.loop.create_task(self._login(self.form))
self.loop.run_until_complete(post_login_form)
if __name__ == "__main__":
urls = ("http://example.com",) * 10
base_scraper = BaseScraper(urls)
with ResourceManager(base_scraper) as scraper:
for url, html in scraper():
print(url, len(html))

Requests/urllib3 error: unorderable types: Retry() < int()

I understand the error, yet do no understand it in the context of my code. This is in Python 3.4. The relevant bits of code (simplified somewhat for clarity):
from requests.adapters import HTTPAdapter
from urllib3.poolmanager import PoolManager
class SessionAdapter(HTTPAdapter):
def init_poolmanager(self, connections, maxsize, block=False):
self.poolmanager = PoolManager(num_pools=connections,
maxsize=maxsize,
block=block,
ssl_version=ssl.PROTOCOL_TLSv1,
cert_reqs = 'CERT_REQUIRED',
ca_certs = certifi.where(),
)
try:
app_session = requests.Session()
app_session.mount('https://', SessionAdapter())
app_response = app_session.post(
url = 'https://<FQD URL>'
, auth = (user, password)
, verify = True
)
# Code errors on the previous line and never executes the logger line
logger.debug('Status code: {}'.format(app_response.status_code))
if app_response.status_code == 401:
return 401
else:
return app_session
except:
logger.debug('Exception')
From sys.exc_info() I see:
", verify = True"): unorderable types: Retry() < int()
If the error were something like SessionAdapter() < int() it might make more sense. But I don't know where the Retry() check is being made.
Does the import of PoolManager need to be done differently for Python 3? I'm using version 1.7.1 of python-urllib3 on Ubuntu.

Resources