We have an elixir (version 1.8.1) web application running inside docker (Server version 1.12.6, Client version 18.09.2) containers within a rancher cluster (Running within EC2, using the AMI Rancheros 1.4.0). We are using the phoenix framework (Version 1.3).
I implemented a simple html cache using the file system. It's a small plug which checks if the html file for the url requested exists and returns the file if it does. If not, it registers a function to run before the response is sent to save the html response to the cache.
It works very well with response times of 4ms and below in case of a cache hit. BUT it seems the plug did introduce a memory leak. The memory the docker container uses grows over time, depending on the amount of traffic the web app receives. If I have a simple crawler going through the side, the memory grows by about 1MB/Minute.
Interestingly, this does not happen locally on my dev machine but in our staging and production environment.
Here is the full plug:
defmodule PagesWeb.Plugs.Cache do
#moduledoc false
import Plug.Conn
def init(default), do: default
def call(
%Plug.Conn{method: "GET", query_string: query_string, request_path: request_path} = conn,
_default
) do
case page_cached?(request_path, query_string) do
true ->
conn
|> put_resp_header("x-phoenix-cache", "true")
|> put_resp_header("content-type", "text/html; charset=utf-8")
|> send_file(200, "priv/static/html#{uri_to_filepath(request_path, query_string)}")
|> halt()
false ->
conn
|> Plug.Conn.register_before_send(&PagesWeb.Plugs.Cache.save_html_to_cache/1)
|> put_resp_header("x-phoenix-cache", "false")
end
end
def call(conn, _default) do
conn
end
def save_html_to_cache(
%Plug.Conn{request_path: request_path, query_string: query_string, resp_body: resp_body} =
conn
) do
case conn.status do
200 ->
html_file = uri_to_filepath(request_path, query_string)
File.mkdir_p(Path.dirname("priv/static/html#{html_file}"))
File.write("priv/static/html#{html_file}", resp_body)
conn
_ ->
conn
end
end
def read_cached_page(url, query_string) do
case File.open("priv/static/html#{uri_to_filepath(url, query_string)}", [:read, :utf8]) do
{:ok, file} ->
content = IO.read(file, :all)
File.close(file)
content
{:error, _} ->
:err
end
end
def page_cached?(url, query_string) do
File.exists?("priv/static/html#{uri_to_filepath(url, query_string)}", [:raw])
end
defp uri_to_filepath(url, query_string) do
query_string =
case query_string do
"" -> ""
qs -> "-#{qs}"
end
case url do
"/" -> "/index.html"
path -> "#{path}#{query_string}.html"
end
end
end
Related
I have a project to complete in Ruby involving TLS v.1.3. I want to optimize requests and thus use "early data". I'm using a package called tttls1.3 and the client works until I send early data to the server. What's even more wired is that a request with early data goes through and I get a response from the server but immediately after the reply (response message) an alert 20 (Bad Record MAC) is received. I went so far that I even go and recalculate the "client-finished" message which seemed suspicious but it looks correct.
What could be the problem? Is there a TCP or other issue I could check?
Here's an example:
require 'socket'
require 'tttls1.3'
settings2 = {
alpn: ['http/1.1'],
supported_groups: [TTTLS13::NamedGroup::SECP256R1],
cipher_suites: [TTTLS13::CipherSuite::TLS_AES_256_GCM_SHA384],
check_certificate_status: false,
}
settings1 = {
alpn: ['http/1.1'],
supported_groups: [TTTLS13::NamedGroup::SECP256R1],
cipher_suites: [TTTLS13::CipherSuite::TLS_AES_256_GCM_SHA384],
check_certificate_status: false,
process_new_session_ticket: lambda do |nst, rms, cs|
return if Time.now.to_i - nst.timestamp > nst.ticket_lifetime
settings2[:ticket] = nst.ticket
settings2[:resumption_master_secret] = rms
settings2[:psk_cipher_suite] = cs
settings2[:ticket_nonce] = nst.ticket_nonce
settings2[:ticket_age_add] = nst.ticket_age_add
settings2[:ticket_timestamp] = nst.timestamp
end
}
# REQUEST
socket = TCPSocket.new("ssltest.louis.info", 443)
client = TTTLS13::Client.new(socket, "ssltest.louis.info", settings1)
client.connect
client.write("GET / HTTP/1.1\r\n")
client.write("Host: ssltest.louis.info\r\n")
client.write("\r\n\r\n")
client.read
client.close
socket.close
sleep(1)
# RESUMPTION
socket = TCPSocket.new("ssltest.louis.info", 443)
client = TTTLS13::Client.new(socket, "ssltest.louis.info", settings2)
client.early_data("HEAD / HTTP/1.1\r\nHost: ssltest.louis.info\r\n\r\n\r\n")
client.connect
p client.read
p client.read
p client.read
p client.read
Original issue: https://github.com/thekuwayama/tttls1.3/issues/48
It turned out that the Connection: close header must be present in the request. It must be the remote server implementation specific.
The following code is part of some automated tests that I have written in python 3.6:
connected = False
def aiohttp_server(loop):
async def handler(msg, session):
global connected
if msg.type == sockjs.MSG_OPEN:
connected = True
if msg.type == sockjs.MSG_CLOSE:
connected = False
app = web.Application(loop=loop)
sockjs.add_endpoint(app, handler)
runner = web.AppRunner(app)
return runner
def run_server(runner, loop):
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s')
asyncio.set_event_loop(loop)
loop.run_until_complete(runner.setup())
site = web.TCPSite(runner, 'localhost', 8080)
loop.run_until_complete(site.start())
loop.run_forever()
def start_server():
loop = asyncio.new_event_loop()
t = threading.Thread(target=run_server, args=(aiohttp_server(loop),loop,), daemon=True)
t.start()
time.sleep(0.01)
Basically, calling start_server should initiate a simple web server with a sockjs endpoint named /sockjs
I am not yet a master of python's async keyword. There are two issues, that I suspect are related:
Firstly, I am getting a deprecation warning on the app = web.Application(loop=loop) statement:
/home/peter/incubator/sockjs_client/tests/test_sockjs_client.py:25: DeprecationWarning: loop argument is deprecated
app = web.Application(loop=loop)
/home/peter/.local/lib/python3.6/site-packages/sockjs/route.py:54: DeprecationWarning: loop property is deprecated
manager = SessionManager(name, app, handler, app.loop)
And secondly, the tests fail occasionally. I believe that, depending on machine load, sometimes the server hasn't had enough time to start before the test code actually starts executing.
Basically, what I need is for the start_server function to initialise a web application with a websocket endpoint, and not return until the application is prepared to accept websocket connections.
Firstly, I am getting a deprecation warning on the app = web.Application(loop=loop) statement:
The recommended way to avoid passing around the loop everywhere is to switch to asyncio.run. Instead of managing the loop manually, let asyncio.run create (and close) the loop for you. If all your work is done in coroutines, you can access the loop with get_event_loop() or get_running_loop().
Basically, what I need is for the start_server function to initialise a web application with a websocket endpoint, and not return until the application is prepared to accept websocket connections.
You can pass a threading.Event to the thread that gets set when the site is set up, and wait for it in the main thread.
Here is an (untested) example that implements both suggestions:
connected = False
def aiohttp_server():
async def handler(msg, session):
global connected
if msg.type == sockjs.MSG_OPEN:
connected = True
if msg.type == sockjs.MSG_CLOSE:
connected = False
app = web.Application()
sockjs.add_endpoint(app, handler)
return web.AppRunner(app)
async def run_server(ready):
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s')
runner = aiohttp_server()
await runner.setup()
site = web.TCPSite(runner, 'localhost', 8080)
await site.start()
ready.set()
# emulates loop.run_forever()
await asyncio.get_running_loop().create_future()
def start_server():
ready = threading.Event()
threading.Thread(target=asyncio.run, args=(aiohttp_server(ready),),
daemon=True).start()
ready.wait()
Please upgrade sockjs to the newest version.
It doesn't require passing the loop anymore.
I have to run bash heavy-job.sh <data-num> (that takes 0.5~2 days) frequently on my computer to process data located at ~/a/data/num . The script call a few sub-processes sequentially and write a log to ~/a/result/num.log . I have done this manually until now.
I wanted to visualize processed tasks and it's status(success or fail), etc as html table. I wrote simple sinatra app to render a table that shows
the list of ~/a/data/num to be processed
~/a/result/num.log exists or not (process not-launched/processing/done)
it's status (the log file contains the word "error" or not)
I found that it would be convenient that if I could launch a bash heavy-job.sh <data-num> from the sinatra app, log the tasks (and info like time,date,etc..) and it's args (heavy-jobs takes some optional args ) and show them as html table.
So I need something that manages jobs and logs to files (or db).
First I wrote a code like below for test (! for test, not integrated with my system yet !), but later I found resque is what i wanted. I am a beginner and not sure if my decision is reasonable or not.
my questions are
is it reasonable to use resque to manage external long-running commands (and log tasks)
or should I use another tool (not necessarily ruby-tool).
(extra;) the task-manager and the sinatra app should work separately (and communicate each other over REST or something) OR not ?
The jobs are not critical since I can retry tasks manually later if failed.
I am not good at English and my question may be misleading. I appreciate any help :) .
class TaskSpawn
def initialize()
#pids = []
end
def spawn(command, options = {})
#opt = {:pgroup => true}
#pids << Kernel.spawn(command, options)
end
def pids()
return #pids.clone
end
def waitany_nohang()
delete_idx = nil
ret = nil
#pids.each_with_index do |p, idx|
pid,status = Process.waitpid2(p, Process::WNOHANG)
unless pid.nil?
delete_idx = idx
ret = [pid,status]
break
end
end
if delete_idx
#pids.delete_at(delete_idx)
return ret
else
# no task fininshed
return nil
end
end
def waitall()
ret = waitall
raise "interal error" if ret.size != pids.size
return ret
end
end
I tried for few days, I am a little confused here.
I am using clojure http-kit to make keepalive get request.
(ns weibo-collector.weibo
(:require [org.httpkit.client :as http]
[clojure.java.io :as io]))
(def sub-url "http://c.api.weibo.com/datapush/status?subid=10542")
(defn spit-to-file [content]
(spit "sample.data" content :append true))
#(http/get sub-url {:as :stream :keepalive 3000000}
(fn [{:keys [status headers body error opts]}]
(spit-to-file body)
))
I am pretty sure that I made a persistent connection to target server, but nothing written to sample.data file.
I tried as stream and as text.
I also tried ruby version the program create a persistent connection either, still nothing written.
So typically, the target will use webhook to notify my server new data is coming, but how to I get data from the persistent connection?
---EDIT---
require 'awesome_print'
url = "http://c.api.weibo.com/datapush/status?subid=10542"
require "httpclient"
c = HTTPClient.new
conn = c.get_async(url)
Thread.new do
res = conn.pop
while true
text = ""
while ch = res.content.read(1)
text = text+ch
break if text.end_with? "\r\n"
end
ap text
end
end
while true
end
Above is a working example using ruby, it uses a thread to read data from the connection. So I must miss something to get the data from clojure
Inspired by ipython-notebook-proxy, and based on ipydra, and extending the latter to support more complex user authentication as well as a proxy, because in my use case, only port 80 can be exposed.
I am using flask-sockets for the gunicorn worker, but I am having troubles to proxy WebSockets. IPython uses three different WebSockets connections, /shell, /stdin, and /iopub, but I am only able to get the 101 Switching Protocols for the first two. And /stdin receives a Connection Close Frame as soon as is created.
This is the excerpt code in question:
# Flask imports...
from werkzeug import LocalProxy
from ws4py.client.geventclient import WebSocketClient
# I use my own LocalProxy because flask-sockets does not support Werkzeug Rules
websocket = LocalProxy(lambda: request.environ.get('wsgi.websocket', None))
websockets = {}
PROXY_DOMAIN = "127.0.0.1:8888" # IPython host and port
methods = ["GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH",
"CONNECT"]
#app.route('/', defaults={'url': ''}, methods=methods)
#app.route('/<path:url>', methods=methods)
def proxy(url):
with app.test_request_context():
if websocket:
while True:
data = websocket.receive()
websocket_url = 'ws://{}/{}'.format(PROXY_DOMAIN, url)
if websocket_url not in websockets:
client = WebSocketClient(websocket_url,
protocols=['http-only', 'chat'])
websockets[websocket_url] = client
else:
client = websockets[websocket_url]
client.connect()
if data:
client.send(data)
client_data = client.receive()
if client_data:
websocket.send(client_data)
return Response()
I also tried to create my own WebSocket proxy class, but it doesn't work either.
class WebSocketProxy(WebSocketClient):
def __init__(self, to, *args, **kwargs):
self.to = to
print(("Proxy to", self.to))
super(WebSocketProxy, self).__init__(*args, **kwargs)
def opened(self):
m = self.to.receive()
print("<= %d %s" % (len(m), str(m)))
self.send(m)
def closed(self, code, reason):
print(("Closed down", code, reason))
def received_message(self, m):
print("=> %d %s" % (len(m), str(m)))
self.to.send(m)
Regular request-response cycle works like a charm, so I removed that code. If interested, the complete code is hosted in hidra.
I run the server with
$ gunicorn -k flask_sockets.worker hidra:app
Here is my solution(ish). It is crude, but should serve as a starting point for building websocket proxy. The full code is available in unreleased project, pyramid_notebook.
This uses ws4py and uWSGI instead of gunicorn
We use uWSGI's internal mechanism to receive downstream websocket message loop. There is nothing like WSGI for websockets in Python world (yet?), but looks like every web server implements its own mechanism.
A custom ws4py ProxyConnection is created which can combine ws4py event loop with uWSGI event loop
The thing is started and messages start fly around
This uses Pyramid request (based on WebOb), but this really shouldn't matter and code should be fine for any Python WSGI app with little modifications
As you can see, this does not really take advantage of asynchronicity, but just sleep() if there is nothing coming in from the socket
Code goes here:
"""UWSGI websocket proxy."""
from urllib.parse import urlparse, urlunparse
import logging
import time
import uwsgi
from ws4py import WS_VERSION
from ws4py.client import WebSocketBaseClient
#: HTTP headers we need to proxy to upstream websocket server when the Connect: upgrade is performed
CAPTURE_CONNECT_HEADERS = ["sec-websocket-extensions", "sec-websocket-key", "origin"]
logger = logging.getLogger(__name__)
class ProxyClient(WebSocketBaseClient):
"""Proxy between upstream WebSocket server and downstream UWSGI."""
#property
def handshake_headers(self):
"""
List of headers appropriate for the upgrade
handshake.
"""
headers = [
('Host', self.host),
('Connection', 'Upgrade'),
('Upgrade', 'websocket'),
('Sec-WebSocket-Key', self.key.decode('utf-8')),
# Origin is proxyed from the downstream server, don't set it twice
# ('Origin', self.url),
('Sec-WebSocket-Version', str(max(WS_VERSION)))
]
if self.protocols:
headers.append(('Sec-WebSocket-Protocol', ','.join(self.protocols)))
if self.extra_headers:
headers.extend(self.extra_headers)
logger.info("Handshake headers: %s", headers)
return headers
def received_message(self, m):
"""Push upstream messages to downstream."""
# TODO: No support for binary messages
m = str(m)
logger.debug("Incoming upstream WS: %s", m)
uwsgi.websocket_send(m)
logger.debug("Send ok")
def handshake_ok(self):
"""
Called when the upgrade handshake has completed
successfully.
Starts the client's thread.
"""
self.run()
def terminate(self):
raise RuntimeError("NO!")
super(ProxyClient, self).terminate()
def run(self):
"""Combine async uwsgi message loop with ws4py message loop.
TODO: This could do some serious optimizations and behave asynchronously correct instead of just sleep().
"""
self.sock.setblocking(False)
try:
while not self.terminated:
logger.debug("Doing nothing")
time.sleep(0.050)
logger.debug("Asking for downstream msg")
msg = uwsgi.websocket_recv_nb()
if msg:
logger.debug("Incoming downstream WS: %s", msg)
self.send(msg)
s = self.stream
self.opened()
logger.debug("Asking for upstream msg")
try:
bytes = self.sock.recv(self.reading_buffer_size)
if bytes:
self.process(bytes)
except BlockingIOError:
pass
except Exception as e:
logger.exception(e)
finally:
logger.info("Terminating WS proxy loop")
self.terminate()
def serve_websocket(request, port):
"""Start UWSGI websocket loop and proxy."""
env = request.environ
# Send HTTP response 101 Switch Protocol downstream
uwsgi.websocket_handshake(env['HTTP_SEC_WEBSOCKET_KEY'], env.get('HTTP_ORIGIN', ''))
# Map the websocket URL to the upstream localhost:4000x Notebook instance
parts = urlparse(request.url)
parts = parts._replace(scheme="ws", netloc="localhost:{}".format(port))
url = urlunparse(parts)
# Proxy initial connection headers
headers = [(header, value) for header, value in request.headers.items() if header.lower() in CAPTURE_CONNECT_HEADERS]
logger.info("Connecting to upstream websockets: %s, headers: %s", url, headers)
ws = ProxyClient(url, headers=headers)
ws.connect()
# Happens only if exceptions fly around
return ""