Node.js - pipe() to a http response results in slow response time on ubuntu - performance

I noticed serious performance issue when piping a response from a service B to a client through server A. I found out this issue while investigation performance one one of our node server that mainly does proxying. We are using http-proxy for proxying, and at first I thought it was http-proxy that was slow, but I narrowed the issue to that simple snippet of code, which simply use http.get to make a request to another server and return the response to the client.
Server A (proxy):
/*jslint node:true nomen:true vars:true*/
var http = require('http');
function outputMs() {
'use strict';
var d = new Date();
var n = d.getUTCMilliseconds();
return n;
}
http.createServer(function (req, res) {
'use strict';
var startTime = outputMs();
var options = {
hostname: '10.0.1.114',
port: 3001,
path: '/test'
};
if (req.url.indexOf('/pipe') > -1) {
http.get(options, function (proxyRes) {
// This is slow!!!
proxyRes.pipe(res);
proxyRes.on('data', function (chunk) {
console.log('data:' + (outputMs() - startTime));
console.log('received:' + chunk);
});
proxyRes.on('end', function () {
console.log('end:' + (outputMs() - startTime));
});
});
} else {
var data;
http.get(options, function (proxyRes) {
// This is fast!!!
proxyRes.on('data', function (chunk) {
console.log('data:' + (outputMs() - startTime));
data += chunk;
});
proxyRes.on('end', function () {
console.log('end:' + (outputMs() - startTime));
res.end(data);
});
});
}
}).listen(3000);
Server B:
/*jslint node:true nomen:true vars:true*/
var http = require('http');
http.createServer(function (req, res) {
'use strict';
res.end('Hello World');
}).listen(3001);
The request event seems to take the same time in both case, but the client receive the response a lot slower. Testing using curl:
Using pipe:
C:\github\mecs> curl -s -w "%{time_total}\n" -o /dev/null http://54.209.35.253:3000/pipe
0.265
C:\github\mecs> curl -s -w "%{time_total}\n" -o /dev/null http://54.209.35.253:3000/pipe
0.265
Not using pipe:
C:\github\mecs> curl -s -w "%{time_total}\n" -o /dev/null http://54.209.35.253:3000
0.047
C:\github\mecs> curl -s -w "%{time_total}\n" -o /dev/null http://54.209.35.253:3000
0.063
Both server are running on AWS on micro-instance, running Ubuntu Server 12.04.1 LTS and node.js 0.10.21. I reproduced the issue with node 0.10.20 and 0.8.24, and on Ubuntu Server 12.04.2 and 13.10. The issue is not observed on Windows.
Does anyone experience the same problem? Any work around?
Thanks a lot for your help...

You probably need to do something like socket.setNoDelay([noDelay]). Not sure how to do that with the http module though. My guess is one end is waiting for socket close since its a small message, and when it times out, you see the response.
The setNoDelay() must be called on the client request, and first time I tried it was on my proxied request.
server.on('connection', function (socket) {
'use strict';
console.log('server.on.connection - setNoDelay');
socket.setNoDelay(true);
});

I was having similar error. I was able to get response much faster after I set agent in request options with keepAlive=true of agent.

Related

Possible to call http gets with Alexa hosted skill?

I have been trying without success to use http module in my Node.js endpoint to do a simple http get.
I have followed the various tutorials to execute the get within my intent, but it keeps failing with getaddrinfo ENOTFOUND in the cloudwatch log.
It seems like I am preparing the url correctly, if I just cut and past the url output into the browswer I get the expected response, and its just a plain http get over port 80.
I suspect that maybe the Alexa hosted lambda doesn't have permission necessary to make remote calls to non-amazon web services, but I don't know this for sure.
Can anybody shed any light? FYI this is the code in my lambda:
var http = require('http');
function httpGet(address, zip, zillowid) {
const pathval = 'www.zillow.com/webservice/GetSearchResults.htm' + `?zws-id=${zillowid}` + `&address=${encodeURIComponent(address)}&citystatezip=${zip}`;
console.log ("pathval =" + pathval);
return new Promise(((resolve, reject) => {
var options = {
host: pathval,
port: 80,
method: 'GET',
};
const request = http.request(options, (response) => {
response.setEncoding('utf8');
console.log("options are" + options);
let returnData = '';
response.on('data', (chunk) => {
returnData += chunk;
});
response.on('end', () => {
resolve(JSON.parse(returnData));
});
response.on('error', (error) => {
console.log("I see there was an error, which is " + error);
reject(error);
});
});
request.end();
}));
}
host: pathval is incorrect usage of the Node.js http module. You need to provide the hostname and the path + query string as two different options.
An example of correct usage:
host: 'example.com',
path: '/webservice/GetSearchResults.htm?zws-id=...',
(Of course, these can be variables, they don't need to be literals as shown here for clarity.)
The error occurs because you're treating the whole URL as a hostname, and as such it doesn't exist.
I suspect that maybe the Alexa hosted lambda doesn't have permission necessary to make remote calls to non-amazon web services
There is no restriction on what services you can contact from a within a Lambda function (other than filters that protect against sending spam email directly to random mail servers).

How to control a CasperJS automation script from a remote jquery client using socket.io

I have an automation script in CasperJS controlling a PhantomJS headless browser that logs into a site, enters data over multiple pages / form.
From the same physical server, I have PHP/MySQL serving up a CRM client website. On the CRM site, I want to have the ability to:
Trigger the remote CasperJS script to go browse a remote site and log in and fill out forms
Read the output stream (i.e. "Page 1 complete, page 2 complete" ,etc)
Display the status updates to the client user as the CasperJS script is executing
I am thinking that socket.io is the ticket here. But, I am I going about this all wrong? I am trying to avoid having a selenium server running. I checked this answer on SO but I am not looking for screenshots, I'm looking for the console output from CasperJS to be displayed in the client website.
I had a similar task once and concocted a solution using local Express.js server with Socket.io.
You would launch this server with node.js and then pass tasks to it from PHP by making POST requests to http://127.0.0.1:9000 (I used the excellent Requests library).
Here's a simplified version of my script:
var fs = require("fs");
var express = require("express");
var app = express();
var server = require("http").Server(app);
var io = require("socket.io")(server);
var iosocket;
// Express middleware to get variables from POST request
var bodyParser = require('body-parser');
app.use(bodyParser.urlencoded({ extended: true }));
// Create websocket connection
io.on("connection", function(socket){
console.log('io.js connection');
iosocket = socket;
});
// Receieve task from external POST request
app.post("/scrape", function(req, res){
res.send("Request accepted");
// Url to parse
var url = req.body.url;
// Variable to collect data from scraper
var data = [];
// Launch scraping script
var spawn = require('child_process').spawn,
child = spawn('/path/to/casperjs', ['/path/to/scrape/script.js', url]);
console.log("Spawned parser");
// Receieve data from script
child.stdout.on('data', function (data) {
var message = data.toString();
data.push(message);
// Send data to the web client
iosocket.emit("message", message);
});
// On error
child.stderr.on('data', function (data) {
console.log('stderr: ' + data.toString());
});
// On scraper exit
child.on('close', function (code) {
console.log("Scraper exited with code: " + code);
//
// Put data into a file or a database, for example
//
fs.writeFileSync("path/to/file/results_" + (new Date()).getTime() + ".json", JSON.stringify(data));
});
});
// Bind app to port # localhost
server.listen(9000, "127.0.0.1");
Solution with CasperJS/Phantomjs server is interesting, however people pointed out that it leaks memory, which probably won't be happening if you run short-lived CasperJS scripts.

a file upload progress bar with node (socket.io and formidable) and ajax

I was in the middle of teaching myself some Ajax, and this lesson required building a simple file upload form locally. I'm running XAMPP on windows 7, with a virtual host set up for http://test. The solution in the book was to use node and an almost unknown package called "multipart" which was supposed to parse the form data but was crapping out on me.
I looked for the best package for the job, and that seems to be formidable. It does the trick and my file will upload locally and I get all the details back through Ajax. BUT, it won't play nice with the simple JS code from the book which was to display the upload progress in a progress element. SO, I looked around and people suggested using socket.io to emit the progress info back to the client page.
I've managed to get formidable working locally, and I've managed to get socket.io working with some basic tutorials. Now, I can't for the life of me get them to work together. I can't even get a simple console log message to be sent back to my page from socket.io while formidable does its thing.
First, here is the file upload form by itself. The script inside the upload.html page:
document.getElementById("submit").onclick = handleButtonPress;
var httpRequest;
function handleResponse() {
if (httpRequest.readyState == 4 && httpRequest.status == 200) {
document.getElementById("results").innerHTML = httpRequest.responseText;
}
}
function handleButtonPress(e) {
e.preventDefault();
var form = document.getElementById("myform");
var formData = new FormData(form);
httpRequest = new XMLHttpRequest();
httpRequest.onreadystatechange = handleResponse;
httpRequest.open("POST", form.action);
httpRequest.send(formData);
}
And here's the corresponding node script (the important part being form.on('progress')
var http = require('http'),
util = require('util'),
formidable = require('formidable');
http.createServer(function(req, res) {
if (req.url == '/upload' && req.method.toLowerCase() == 'post') {
var form = new formidable.IncomingForm(),
files = [],
fields = [];
form.uploadDir = './files/';
form.keepExtensions = true;
form
.on('progress', function(bytesReceived, bytesExpected) {
console.log('Progress so far: '+(bytesReceived / bytesExpected * 100).toFixed(0)+"%");
})
.on('file', function(name, file) {
files.push([name, file]);
})
.on('error', function(err) {
console.log('ERROR!');
res.end();
})
.on('end', function() {
console.log('-> upload done');
res.writeHead(200, "OK", {
"Content-Type": "text/html", "Access-Control-Allow-Origin": "http://test"
});
res.end('received files: '+util.inspect(files));
});
form.parse(req);
} else {
res.writeHead(404, {'content-type': 'text/plain'});
res.end('404');
}
return;
}).listen(8080);
console.log('listening');
Ok, so that all works as expected. Now here's the simplest socket.io script which I'm hoping to infuse into the previous two to emit the progress info back to my page. Here's the client-side code:
var socket = io.connect('http://test:8080');
socket.on('news', function(data){
console.log('server sent news:', data);
});
And here's the server-side node script:
var http = require('http'),
fs = require('fs');
var server = http.createServer(function(req, res) {
fs.createReadStream('./socket.html').pipe(res);
});
var io = require('socket.io').listen(server);
io.sockets.on('connection', function(socket) {
socket.emit('news', {hello: "world"});
});
server.listen(8080);
So this works fine by itself, but my problem comes when I try to place the socket.io code inside my form.... I've tried placing it anywhere it might remotely make sense, i've tried the asynchronous mode of fs.readFile too, but it just wont send anything back to the client - meanwhile the file upload portion still works fine. Do I need to establish some sort of handshake between the two packages? Help me out here. I'm a front-end guy so I'm not too familiar with this back-end stuff. I'll put this aside for now and move onto other lessons.
Maybe you can create a room for one single client and then broadcast the percentage to this room.
I explained it here: How to connect formidable file upload to socket.io in Node.js

heroku http streaming (sse) disconnect not detected with express.js

Heroku is not detecting when a client disconnects.
I cloned their starter app and added a couple lines for sse:
var sse = require('connect-sse')();
app.get('/testing', sse, function(req, res, next) {
var interval;
interval = setInterval(function() {
var date;
date = new Date();
res.json({
ping: date
});
return console.log(date);
}, 2000);
return req.on('close', function() {
clearInterval(interval);
return console.log("cleared");
});
});
When I go to my app I see the correct reponse:
id: 0
data: {"ping":"2014-01-05T23:53:49.835Z"}
id: 1
data: {"ping":"2014-01-05T23:53:51.839Z"}
But after I close the browser tab heroku does not register that I did.
With heroku logs -t I keep getting the date printed out and never cleared
This works locally with foreman start but not on heroku.
Anyone know what I am doing wrong?
Based on the documentation for the connect-sse node module, it looks like you should be referring to the sse middleware without parentheses, like so:
app.get('/testing', sse, function(req, res, next) {
// ...
});

node.js server running but not loading

I've just installed node.js on my computer running Win7(64bit).
The problem is that when I run a simple hello-world application it is running (as confirmed by console.log() and me pushing the code to OpenShift where it works just fine) but when I try to load the page in localhost:1337 it just keeps on loading (eventually times out).
I've no idea what to check, since firewall is not blocking node and I'm not running anything that would block the port.
Here's the server code.
#!/bin/env node
// Include http module.
var http = require("http");
//Get the environment variables we need if on OpenShift
var ipaddr = process.env.OPENSHIFT_NODEJS_IP || "127.0.0.1";
var port = process.env.OPENSHIFT_NODEJS_PORT || 1337;
http.createServer(function (request, response) {
request.on("end", function () {
response.writeHead(200, {
'Content-Type': 'text/plain'
});
response.end('Hello HTTP!');
});
}).listen(port, ipaddr);
console.log('It works');
console.log('IP : ' + ipaddr + '\nPort : ' + port);
Any help is appreciated, thank you.
edit
Here's a screenshot of commandline output.
http://i.stack.imgur.com/GGaLD.png
The node server is hanging as you need to always call response.end.
I believe that listening to the end event on the request is causing the timeout. If you remove it will work.

Resources