Mongo Map-Reduce: group documents in a certain distance by value - ruby

I'm trying to convert my aggregation query to Map-Reduce one in MongoDB (using Ruby driver). In my original query, I search for elements in a certain distance from a point and then group them by NEIGHBORHOOD value.
Sample document
{
"_id":"5b01a2c77b61e58732920f86",
"YEAR":2004,
"NEIGHBOURHOOD":"Grandview-Woodland",
"LOC":{"type":"Point","coordinates":[-123.067654,49.26773386]}
}
Aggregation query
crime.aggregate([
{ "$geoNear": {
"near": {
"type": "Point",
"coordinates": [ -123.0837633, 49.26980201 ]
},
"query": { "YEAR": 2004 },
"distanceField": "distance",
"minDistance": 10,
"maxDistance": 10000,
"num": 100000,
"spherical": true
}},
{ "$group": {
"_id": "$NEIGHBOURHOOD",
"count": { "$sum": 1 }
}}
])
So the fragment of the output looks like this:
Output
{"_id"=>"Musqueam", "count"=>80}
{"_id"=>"West Point Grey", "count"=>651}
{"_id"=>"Marpole", "count"=>1367}
Now I'm trying to make something like this in MapReduce. In my map function I try to check if documents are in the correct distance (based on the answer to THIS QUESTION) and if so pass them along to reduce function in which they will be counted. But something is not right and I don't get the desired result - count values are too big. What am I doing wrong?
Map function
map = "function() {" +
"var rad_per_deg = Math.PI/180;" +
"var rm = 6371 * 1000;" +
"var dlat_rad = (this.LOC.coordinates[0] - (-123.0837633)) * rad_per_deg;" +
"var dlon_rad = (this.LOC.coordinates[1] - (49.26980201)) * rad_per_deg;" +
"var lat1_rad = -123.0837633 * rad_per_deg;" +
"var lon1_rad = 49.26980201 * rad_per_deg;" +
"var lat2_rad = this.LOC.coordinates[0] * rad_per_deg;" +
"var lon2_rad = this.LOC.coordinates[1] * rad_per_deg;" +
"var a = Math.pow(Math.sin(dlat_rad/2), 2) + Math.cos(lat1_rad) * Math.cos(lat2_rad) * Math.pow(Math.sin(dlon_rad/2), 2);" +
"var c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a));" +
"if( rm * c < 10000) { " +
" emit(this.NEIGHBOURHOOD, {count: 1});" +
"}" +
"};"
Reduce function
reduce = "function(key, values) { " +
"var sum = 0; " +
"values.forEach(function(f) { " +
" sum += f.count; " +
"}); " +
"return {count: sum};" +
"};"
Query
opts = {
query:{ "YEAR": 2004 },
:out => "results",
:raw => true
}
Output
crime.find().map_reduce(map, reduce, opts)
{"_id"=>"", "value"=>{"count"=>2257.0}}
{"_id"=>"Arbutus Ridge", "value"=>{"count"=>6066.0}}
{"_id"=>"Central Business District", "value"=>{"count"=>110947.0}}

Related

Turing machine |x-y|

The problem is not about programming, but about algorithm. We need to solve |x-y| in unary code on the Turing machine. For example if we have 111 * 11 on the tape. * as a number separator, then we need to leave 1. I understand that when we have a space left or right near the *, the program should remove the * and terminate, but I always get it to work if only the numbers on the right are larger or vice versa. To make the same algorithm work and if the ribbon is 11 * 1 and 1 * 11 does not work.
If it is possible to write a system command:
q1 1 -> q2 _ R
Or with a function table as in the screenshot
I would apply this algorithm:
Move to the right and ensure that there is a "1" at both sides of the "*"
Wipe out the rightmost "1" and then walk back to wipe out the leftmost "1"
Repeat
When the condition in the first scan is not true, then wipe out the "*" and stop.
This leads to the following transition table:
State
In
Out
Move
New State
start
1
1
→
toright
start
*
_
→
stop
toright
1
1
→
toright
toright
*
*
→
toright
toright
_
_
←
delright
delright
1
_
←
toleft
delright
*
_
←
stop
toleft
1
1
←
toleft
toleft
*
*
←
toleft
toleft
_
_
→
delleft
delleft
1
_
→
start
Here is a little implementation in JavaScript:
class Turing {
constructor(str, idx=0) {
this.tape = Array.from(str);
this.idx = idx;
}
read() {
return this.tape[this.idx] || " ";
}
writeAndMove(chr, move) {
this.tape[this.idx] = chr;
this.idx += (move > 0) - (move < 0);
}
toString() {
return this.tape.join("").trim();
}
static run(input, transitions, state, endState) {
const turing = new Turing(input);
let move, chr;
while (state !== endState) {
chr = turing.read();
[,,chr, move, state] = transitions.find(t => t[0] === state && t[1] === chr);
turing.writeAndMove(chr, move);
}
return turing.toString();
}
}
const transitions = [
// STATE IN OUT MOVE NEWSTATE
// --------- ---- --- -- ----------
["start" , "1", "1", +1, "toright" ],
["start" , "*", " ", +1, "stop" ],
["toright" , "1", "1", +1, "toright" ],
["toright" , "*", "*", +1, "toright" ],
["toright" , " ", " ", -1, "delright"],
["delright", "1", " ", -1, "toleft" ],
["delright", "*", " ", -1, "stop" ],
["toleft" , "1", "1", -1, "toleft" ],
["toleft" , "*", "*", -1, "toleft" ],
["toleft" , " ", " ", +1, "delleft" ],
["delleft" , "1", " ", +1, "start" ],
];
// Demo run
const output = Turing.run("1*111", transitions, "start", "stop");
console.log(output);

When creating an angle, how do I control the attributes of the automatically created points?

I'm working with a polygon and attempting to create angles with labels but when angles are created, so are the points used to define them. This would be fine but I can't control the labels on the automatically created points (and I don't know what they are called or how to find out).
var points = [
[0, 0],
[0, 5],
[3, 0]
];
for (k = 0; k < showAngle.length; k++) {
if (showAngle[k] == 1) {
var angle = board.create('angle', [points[k], points[((k + 1) % points.length)], points[((k + 2) % points.length)]],{fixed:true});
} else if (showAngle[k] == 2) {
var angle = board.create('angle', [points[k], points[((k + 1) % points.length)], points[((k + 2) % points.length)]], {
fixed: false,
name: function() {
return ((180/Math.PI)*JXG.Math.Geometry.rad(points[k], points[((k + 1) % points.length)], points[((k + 2) % points.length)])).toFixed(1) + '°';
}
});
}
}
https://jsfiddle.net/jscottuq/acyrLxfh/12/ contains what I've got so far.
The arrays showLen and showAngle are setting what labels are shown for each side/angle (0 - no label, 1 - name , 2 - measurement).
These will be set when the jsxgraph is created.
At the time being, the possibility to control the style of the newly created points of an angle is missing. We will add this soon.
However, a solution would be to use the already existing points which are hidden in this example. For this it would be helpful to kee a list of these points, e.g. jxg_points:
var jxg_points = [];
for (i = 0; i < points.length; i++) {
var rise = points[(i + 1) % points.length][1] - points[i][1];
var run = points[(i + 1) % points.length][0] - points[i][0];
var point = board.create('point', [points[i][0], points[i][1]], {
fixed: true,
visible:false
});
jxg_points.push(point); // Store the point
points[i].pop();
len[i] = Math.round((Math.sqrt(rise * rise + run * run) + Number.EPSILON) * 100) / 100;
}
Then the points can be reused for the angles without creating new points:
for (k = 0; k < showAngle.length; k++) {
if (showAngle[k] == 1) {
angle = board.create('angle', [
jxg_points[k],
jxg_points[((k + 1) % jxg_points.length)],
jxg_points[((k + 2) % jxg_points.length)]
],{fixed:true});
} else if (showAngle[k] == 2) {
var angle = board.create('angle', [
jxg_points[k],
jxg_points[((k + 1) % jxg_points.length)],
jxg_points[((k + 2) % jxg_points.length)]], {
fixed: false,
name: function() {
return ((180/Math.PI)*JXG.Math.Geometry.rad(points[k], points[((k + 1) % points.length)], points[((k + 2) % points.length)])).toFixed(1) + '°';
}
});
}
}
See it live at https://jsfiddle.net/d8an0epy/.

Find the combination of a number set so that the total difference between two number sets is minimal

Find the combinations of lots assigned from newLots into oldLots, so that the sum of differences between newLotCombinations and oldLots is optimal.
If a difference is less than 0, it counts as 0.
All lots from newLot must be assigned into newLotCombinations.
/**
*
* e.g.
*
* newLots = [20.16, 9.95, 12.62, 7.44, 11.18, 9.02, 8.21, 8.22, 6.57, 6.63]
* oldLots = [12.03, 14.03, 16.04, 17.8, 18.04, 22.05]
*
* newLotCombinations | oldLot | Difference (newLotCombinations - oldLot)
* 20.16, 9.95 | 12.03 | 18.03 (20.16 + 9.95 - 12.03)
* 12.62, 7.44 | 14.03 | 6.03 (12.62 + 7.44 - 14.03)
* 11.18 | 16.04 | 0
* ...
* ...
* Sum of differences = 18.03 + 6.03 + 0 + ...
*/
I think this should involve memoizing the paths that I have gone through, like a map, and walking backward when a path is not correct (total sum larger than the assumption)
This is what I have so far:
const newLots = [20.16, 9.95, 12.62, 7.44, 11.18, 9.02, 8.21, 8.22, 6.57, 6.63]; // stack
const oldLots = [12.03, 14.03, 16.04, 17.8, 18.04, 22.05];
// newLotCombinations is an array of array [[], [], [], [], [], []] // i and j
const newLotCombinations = oldLots.map(() => []);
// Assuming the max. sum of differences is 5.
const MAX_SUM_OF_DIFFERENCES = 7;
const sum = 0;
// store information about a path?
const paths = {};
const loopLots = (i = 0, j = 0) => {
if (i === -1) {
console.log('it should not come to here');
console.log(
"No possible combination for max sum of differences:",
MAX_SUM_OF_DIFFERENCES
);
} else if (!newLots.length) {
console.log(
"Combination that works with max sum of difference ",
MAX_SUM_OF_DIFFERENCES,
newLotCombinations
);
}
newLotCombinations[i][j] = newLots.pop();
if (getSumOfDifferences() > MAX_SUM_OF_DIFFERENCES) {
// put it back to stack
newLots.push(newLotCombinations[i].pop());
if (i + 1 < newLotCombinations.length) {
loopLots(i + 1, newLotCombinations[i+ 1].length);
} else {
// It keeps popping until an array has more than 1 element.
// But this is incorrect, as it will loop with 3 combinations of numbers
// It should walk backward until last divergence
while (newLotCombinations[i] && (i === newLotCombinations.length - 1 || newLotCombinations[i].length < 2)) {
newLots.push(newLotCombinations[i].pop());
i--;
}
if (newLotCombinations[i]) {
newLots.push(newLotCombinations[i].pop());
newLotCombinations[i + 1][newLotCombinations[i + 1].length] = newLots.pop();
// loopLots(i + 1, newLotCombinations[i + 1].length);
loopLots(0, newLotCombinations[0].length);
} else {
console.log(
"No possible combination for max sum of differences:",
MAX_SUM_OF_DIFFERENCES
);
}
}
} else {
loopLots(0, newLotCombinations[0].length);
}
};
const getSumOfDifferences = () => {
let sumOfDifferences = 0;
newLotCombinations.forEach((lots, i) => {
const lotSum = lots.reduce((sum, lot) => {
sum += lot;
return sum;
}, 0);
const difference = lotSum - oldLots[i];
if (difference > 0) {
sumOfDifferences += difference;
}
});
return sumOfDifferences;
};
loopLots();
The logic of using newLotCombinations[i].length < 2 is incorrect, because it keeps pushing in the same alternating numbers. If I memoize the paths to check whether I should go further, how can I know when I am walking backward or forward if simply saving the paths that I have walked through?
I am thinking I also should not save a subset path. I should save a path that reaches the end (i.e. 6), because a subset of path contains unknown paths ahead.

Generate all valid combinations of N pairs of parentheses

UPDATE (task detailed Explanation):
We have a string consist of numbers 0 and 1, divided by operators |, ^ or &. The task is to create all fully parenthesized expressions. So the final expressions should be divided into "2 parts"
For example
0^1 -> (0)^(1) but not extraneously: 0^1 -> (((0))^(1))
Example for expression 1|0&1:
(1)|((0)&(1))
((1)|(0))&(1)
As you can see both expressions above have left and write part:
left: (1); right: ((0)&(1))
left: ((1)|(0)); right: (1)
I tried the following code, but it does not work correctly (see output):
// expression has type string
// result has type Array (ArrayList in Java)
function setParens(expression, result) {
if (expression.length === 1) return "(" + expression + ")";
for (var i = 0; i < expression.length; i++) {
var c = expression[i];
if (c === "|" || c === "^" || c === "&") {
var left = expression.substring(0, i);
var right = expression.substring(i + 1);
leftParen = setParens(left, result);
rightParen = setParens(right, result);
var newExp = leftParen + c + rightParen;
result.push(newExp);
}
}
return expression;
}
function test() {
var r = [];
setParens('1|0&1', r);
console.log(r);
}
test();
code output: ["(0)&(1)", "(0)|0&1", "(1)|(0)", "1|0&(1)"]
Assuming the input expression is not already partially parenthesized and you want only fully parenthesized results:
FullyParenthesize(expression[1...n])
result = {}
// looking for operators
for p = 1 to n do
// binary operator; parenthesize LHS and RHS
// parenthesize the binary operation
if expression[p] is a binary operator then
lps = FullyParenthesize(expression[1 ... p - 1])
rps = FullyParenthesize(expression[p + 1 ... n])
for each lp in lps do
for each rp in rps do
result = result U {"(" + lp + expression[p] + rp + ")"}
// no binary operations <=> single variable
if result == {} then
result = {"(" + expression + ")")}
return result
Example: 1|2&3
FullyParenthesize("1|2&3")
result = {}
binary operator | at p = 2;
lps = FullyParenthesize("1")
no operators
result = {"(" + "1" + ")"}
return result = {"(1)"}
rps = Parenthesize("2&3")
result = {"2&3", "(2&3)"}
binary operator & at p = 2
lps = Parenthesize("2")
no operators
result = {"(" + "2" + ")"}
return result = {"(2)"}
rps = Parenthesize("3")
no operators
result = {"(" + "3" + ")"}
return result = {"(3)"}
lp = "(2)"
rp = "(3)"
result = result U {"(" + "(2)" + "&" + "(3)" + ")"}
return result = {"((2)&(3))"}
lp = "(1)"
rp = "((2)&(3))"
result = result U {"(" + "(1)" + "|" + "((2)&(3))" + ")"}
binary operator & at p = 4
...
result = result U {"(" + "((1)|(2))" + "&" + "(3)" + ")"}
return result {"((1)|((2)&(3)))", "(((1)|(2))&(3))"}
You will have 2^k unique fully parenthesized expressions (without repeated parentheses) given an input expression with k binary operators.

Retrieve count of total unique values kibana + Elasticsearch

Based on this question & answer "How to retrieve unique count of a field using Kibana + Elastic Search"
I have been able to collect the individual count of the unique IP addresses from our Apache logs, however, What I actually want to do is to be able to display the count of the individual IP addresses, i.e. how many unique visitors.
I think I need to use the terms_stats facet to do this but I don't know what to set as the "value_field"
This is not possible with the current version of the kibana.
but i have what i did to achieve this is created the custom histogram panel.
to create the custom histogram panel, just copy the existing histogram and modify config.js, module.js to change all the path references to the new panel.
override the doSearch function to use the query http://www.elasticsearch.org/blog/count-elasticsearch/
and update the results parsing logic.
look for function
b.get_data = function(a, j, k)
return b.populate_modal(n), p = n.doSearch(), p.then(function(c) {
if (b.panelMeta.loading = !1, 0 === j && (b.legend = [], b.hits = 0, a = [], b.annotations = [], k = b.query_id = (new Date).getTime()), d.isUndefined(c.error)) {
if (b.query_id === k) {
var i, n, p, q = 0;
o = JSON.parse("[{\"query\":\"*\",\"alias\":\"\",\"color\":\"#7EB26D\",\"id\":0,\"pin\":false,\"type\":\"lucene\",\"enable\":true,\"parent\" : 0}]");
d.each(o, function(e) {
//alert(JSON.stringify(c));
//var f = c.aggregations.monthly.buckets[e.id];
if (d.isUndefined(a[q]) || 0 === j) {
var h = {interval: m,start_date: l && l.from,end_date: l && l.to,fill_style: b.panel.derivative ? "null" : b.panel.zerofill ? "minimal" : "no"};
i = new g.ZeroFilled(h), n = 0, p = {}
} else
i = a[q].time_series, n = a[q].hits, p = a[q].counters;
d.each(c.aggregations.monthly.buckets, function(a) {
var c;
n += a.visitor_count.value, b.hits += a.visitor_count.value, p[a.key] = (p[a.key] || 0) + a.visitor_count.value, "count" === b.panel.mode ? c = (i._data[a.key] || 0) + a.visitor_count.value : "mean" === b.panel.mode ? c = ((i._data[a.key] || 0) * (p[a.key] - a.visitor_count.value) + a.mean * a.visitor_count.value) / p[a.key] : "min" === b.panel.mode ? c = d.isUndefined(i._data[a.key]) ? a.min : i._data[a.key] < a.min ? i._data[a.key] : a.min : "max" === b.panel.mode ? c = d.isUndefined(i._data[a.key]) ? a.max : i._data[a.key] > a.max ? i._data[a.key] : a.max : "total" === b.panel.mode && (c = (i._data[a.key] || 0) + a.total), i.addValue(a.key, c)
}), b.legend[q] = {query: e,hits: n}, a[q] = {info: e,time_series: i,hits: n,counters: p}, q++
}), b.panel.annotate.enable && (b.annotations = b.annotations.concat(d.map(c.hits.hits, function(a) {
var c = d.omit(a, "_source", "sort", "_score"), g = d.extend(e.flatten_json(a._source), c);
return {min: a.sort[1],max: a.sort[1],eventType: "annotation",title: null,description: "<small><i class='icon-tag icon-flip-vertical'></i> " + g[b.panel.annotate.field] + "</small><br>" + f(a.sort[1]).format("YYYY-MM-DD HH:mm:ss"),score: a.sort[0]}
})), b.annotations = d.sortBy(b.annotations, function(a) {
return a.score * ("desc" === b.panel.annotate.sort[1] ? -1 : 1)
}), b.annotations = b.annotations.slice(0, b.panel.annotate.size))
}
} else
b.panel.error = b.parse_error(c.error);
b.$emit("render", a), j < h.indices.length - 1 && b.get_data(a, j + 1, k)
})

Resources