WebPPL: return just one result - probability

i have the following :
var rand = Infer({method: 'enumerate'}, function() {
var A = randomInteger({n: 4})
var B = randomInteger({n: 4})
condition(A == B)
return {A:A, B:B}
})
$ webppl test.wppl
Marginal:
{"A":3,"B":3} : 0.25
{"A":2,"B":2} : 0.25
{"A":1,"B":1} : 0.25
{"A":0,"B":0} : 0.25
I want to test n > 1000, but i will get > 1000 lines result.
What should i use as return to get back just one result f.e.
{"A":_,"B":_} : 0.25

this works, but seems hacky
condition(A == B)
return A == 1 ? {A:A, B:B} : 'rest'

Related

barstate.islast invalidades series[1]

Original Question
Apparently, when using barstate.islast the values before zero in a series are not returned.
Is this the expected behavior? Or is it a problem?
//#version=5
indicator("barstate.islast invalidades series[1]", max_bars_back=5000)
index = bar_index
countNonNa(indices) =>
count = 0
for i = 0 to 4999 by 1
index = indices[i]
if not(na(index))
count += 1
count
count00 = countNonNa(index)
plot(count00, "count00", color = color.blue) // ok
count01 = barstate.isconfirmed ? countNonNa(index) : 0
plot(count01, "count01", color = color.black) // ok
count02 = barstate.islast ? countNonNa(index) : 0
plot(count02, "count02", color = color.red) // error expected 5000 in the last bar, but gets 1
You should execute your function on every bar, as stated in the console.
This will work:
//#version=5
indicator("barstate.islast invalidades series[1]", max_bars_back=5000)
index = bar_index
countNonNa(indices) =>
count = 0
for i = 0 to 4999 by 1
idx = indices[i]
if not(na(idx))
count += 1
count
c = countNonNa(index)
count00 = c
plot(count00, "count00", color = color.blue) // ok
count01 = barstate.isconfirmed ? c : 0
plot(count01, "count01", color = color.black) // ok
count02 = barstate.islast ? c : 0
plot(count02, "count02", color = color.red) // error expected 5000 in the last bar, but gets 1

Retrieve count of total unique values kibana + Elasticsearch

Based on this question & answer "How to retrieve unique count of a field using Kibana + Elastic Search"
I have been able to collect the individual count of the unique IP addresses from our Apache logs, however, What I actually want to do is to be able to display the count of the individual IP addresses, i.e. how many unique visitors.
I think I need to use the terms_stats facet to do this but I don't know what to set as the "value_field"
This is not possible with the current version of the kibana.
but i have what i did to achieve this is created the custom histogram panel.
to create the custom histogram panel, just copy the existing histogram and modify config.js, module.js to change all the path references to the new panel.
override the doSearch function to use the query http://www.elasticsearch.org/blog/count-elasticsearch/
and update the results parsing logic.
look for function
b.get_data = function(a, j, k)
return b.populate_modal(n), p = n.doSearch(), p.then(function(c) {
if (b.panelMeta.loading = !1, 0 === j && (b.legend = [], b.hits = 0, a = [], b.annotations = [], k = b.query_id = (new Date).getTime()), d.isUndefined(c.error)) {
if (b.query_id === k) {
var i, n, p, q = 0;
o = JSON.parse("[{\"query\":\"*\",\"alias\":\"\",\"color\":\"#7EB26D\",\"id\":0,\"pin\":false,\"type\":\"lucene\",\"enable\":true,\"parent\" : 0}]");
d.each(o, function(e) {
//alert(JSON.stringify(c));
//var f = c.aggregations.monthly.buckets[e.id];
if (d.isUndefined(a[q]) || 0 === j) {
var h = {interval: m,start_date: l && l.from,end_date: l && l.to,fill_style: b.panel.derivative ? "null" : b.panel.zerofill ? "minimal" : "no"};
i = new g.ZeroFilled(h), n = 0, p = {}
} else
i = a[q].time_series, n = a[q].hits, p = a[q].counters;
d.each(c.aggregations.monthly.buckets, function(a) {
var c;
n += a.visitor_count.value, b.hits += a.visitor_count.value, p[a.key] = (p[a.key] || 0) + a.visitor_count.value, "count" === b.panel.mode ? c = (i._data[a.key] || 0) + a.visitor_count.value : "mean" === b.panel.mode ? c = ((i._data[a.key] || 0) * (p[a.key] - a.visitor_count.value) + a.mean * a.visitor_count.value) / p[a.key] : "min" === b.panel.mode ? c = d.isUndefined(i._data[a.key]) ? a.min : i._data[a.key] < a.min ? i._data[a.key] : a.min : "max" === b.panel.mode ? c = d.isUndefined(i._data[a.key]) ? a.max : i._data[a.key] > a.max ? i._data[a.key] : a.max : "total" === b.panel.mode && (c = (i._data[a.key] || 0) + a.total), i.addValue(a.key, c)
}), b.legend[q] = {query: e,hits: n}, a[q] = {info: e,time_series: i,hits: n,counters: p}, q++
}), b.panel.annotate.enable && (b.annotations = b.annotations.concat(d.map(c.hits.hits, function(a) {
var c = d.omit(a, "_source", "sort", "_score"), g = d.extend(e.flatten_json(a._source), c);
return {min: a.sort[1],max: a.sort[1],eventType: "annotation",title: null,description: "<small><i class='icon-tag icon-flip-vertical'></i> " + g[b.panel.annotate.field] + "</small><br>" + f(a.sort[1]).format("YYYY-MM-DD HH:mm:ss"),score: a.sort[0]}
})), b.annotations = d.sortBy(b.annotations, function(a) {
return a.score * ("desc" === b.panel.annotate.sort[1] ? -1 : 1)
}), b.annotations = b.annotations.slice(0, b.panel.annotate.size))
}
} else
b.panel.error = b.parse_error(c.error);
b.$emit("render", a), j < h.indices.length - 1 && b.get_data(a, j + 1, k)
})

D3: use d3.max to get containing object?

I have the following data, an array of objects:
var data = [
{ x: 0, y0: 0, y: 100 },
{ x: 1, y0: 0, y: 150 },
{ x: 2, y0: 50, y: 100 },
{ x: 3, y0: 50, y: 150 }
]
I'd like to find the object with the biggest discrepancy between y and y0, using D3.
I can do this to get the biggest difference:
var max_val = d3.max(data, function(d) { return d.y - d.y0;} );
It returns 150. But what I don't know how to do is get the containing object, and learn that the corresponding value of x is 1.
Any ideas?
Your question asks how to use d3.max to find this object, but an alternate idea is to use the Javascript Array's sort function to do this for you:
>>> data.sort(function(a, b){ return (b.y - b.y0) - (a.y - a.y0); } )[0]
Object {x: 1, y0: 0, y: 150}
Here I am sorting data using a function that, given two objects a and b, compares them using the difference of their y and y0 properties. By subtracting the value of a from b, I'm returning the objects in descending order and then taking the first Object.
I believe currently there isn't a good way of doing this through purely d3. From the d3 docs it states:
Returns the maximum value in the given array using natural order. If
the array is empty, returns undefined. An optional accessor function
may be specified, which is equivalent to calling array.map(accessor)
before computing the maximum value.
I went into the source code to look at how d3.max is calculated:
d3.max = function(array, f) {
var i = -1, n = array.length, a, b;
if (arguments.length === 1) {
while (++i < n && !((a = array[i]) != null && a <= a)) a = undefined;
while (++i < n) if ((b = array[i]) != null && b > a) a = b;
} else {
while (++i < n && !((a = f.call(array, array[i], i)) != null && a <= a)) a = undefined;
while (++i < n) if ((b = f.call(array, array[i], i)) != null && b > a) a = b;
}
return a;
}
This is partly due to the fact that your accessor function returns a value itself. You can probably customize it to make it return an object, but d3.max specifically handles numbers.
As other people have stated, there are other ways of handling this with pure Javascript.
Related questions:
Why is domain not using d3.max(data) in D3?
You can also use the standard array.reduce function to find this without using d3 :
var highestDiscrepencyObject = data.reduce(function(memo, val){
var dis = val.y - val.y0,
memoDis = memo.y - memo.y0;
return (dis > memoDis || memo.y === undefined) ? val : memo;
}, {});
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/Reduce

Algorithm to generate a sequence proportional to specified percentage

Given a Map of objects and designated proportions (let's say they add up to 100 to make it easy):
val ss : Map[String,Double] = Map("A"->42, "B"->32, "C"->26)
How can I generate a sequence such that for a subset of size n there are ~42% "A"s, ~32% "B"s and ~26% "C"s? (Obviously, small n will have larger errors).
(Work language is Scala, but I'm just asking for the algorithm.)
UPDATE: I resisted a random approach since, for instance, there's ~16% chance that the sequence would start with AA and ~11% chance it would start with BB and there would be very low odds that for n precisely == (sum of proportions) the distribution would be perfect. So, following #MvG's answer, I implemented as follows:
/**
Returns the key whose achieved proportions are most below desired proportions
*/
def next[T](proportions : Map[T, Double], achievedToDate : Map[T,Double]) : T = {
val proportionsSum = proportions.values.sum
val desiredPercentages = proportions.mapValues(v => v / proportionsSum)
//Initially no achieved percentages, so avoid / 0
val toDateTotal = if(achievedToDate.values.sum == 0.0){
1
}else{
achievedToDate.values.sum
}
val achievedPercentages = achievedToDate.mapValues(v => v / toDateTotal)
val gaps = achievedPercentages.map{ case (k, v) =>
val gap = desiredPercentages(k) - v
(k -> gap)
}
val maxUnder = gaps.values.toList.sortWith(_ > _).head
//println("Max gap is " + maxUnder)
val gapsForMaxUnder = gaps.mapValues{v => Math.abs(v - maxUnder) < Double.Epsilon }
val keysByHasMaxUnder = gapsForMaxUnder.map(_.swap)
keysByHasMaxUnder(true)
}
/**
Stream of most-fair next element
*/
def proportionalStream[T](proportions : Map[T, Double], toDate : Map[T, Double]) : Stream[T] = {
val nextS = next(proportions, toDate)
val tailToDate = toDate + (nextS -> (toDate(nextS) + 1.0))
Stream.cons(
nextS,
proportionalStream(proportions, tailToDate)
)
}
That when used, e.g., :
val ss : Map[String,Double] = Map("A"->42, "B"->32, "C"->26)
val none : Map[String,Double] = ss.mapValues(_ => 0.0)
val mySequence = (proportionalStream(ss, none) take 100).toList
println("Desired : " + ss)
println("Achieved : " + mySequence.groupBy(identity).mapValues(_.size))
mySequence.map(s => print(s))
println
produces :
Desired : Map(A -> 42.0, B -> 32.0, C -> 26.0)
Achieved : Map(C -> 26, A -> 42, B -> 32)
ABCABCABACBACABACBABACABCABACBACABABCABACABCABACBA
CABABCABACBACABACBABACABCABACBACABABCABACABCABACBA
For a deterministic approach, the most obvious solution would probably be this:
Keep track of the number of occurrences of each item in the sequence so far.
For the next item, choose that item for which the difference between intended and actual count (or proportion, if you prefer that) is maximal, but only if the intended count (resp. proportion) is greater than the actual one.
If there is a tie, break it in an arbitrary but deterministic way, e.g. choosing the alphabetically lowest item.
This approach would ensure an optimal adherence to the prescribed ratio for every prefix of the infinite sequence generated in this way.
Quick & dirty python proof of concept (don't expect any of the variable “names” to make any sense):
import sys
p = [0.42, 0.32, 0.26]
c = [0, 0, 0]
a = ['A', 'B', 'C']
n = 0
while n < 70*5:
n += 1
x = 0
s = n*p[0] - c[0]
for i in [1, 2]:
si = n*p[i] - c[i]
if si > s:
x = i
s = si
sys.stdout.write(a[x])
if n % 70 == 0:
sys.stdout.write('\n')
c[x] += 1
Generates
ABCABCABACABACBABCAABCABACBACABACBABCABACABACBACBAABCABCABACABACBABCAB
ACABACBACABACBABCABACABACBACBAABCABCABACABACBABCAABCABACBACABACBABCABA
CABACBACBAABCABCABACABACBABCABACABACBACBAACBABCABACABACBACBAABCABCABAC
ABACBABCABACABACBACBAACBABCABACABACBACBAABCABCABACABACBABCABACABACBACB
AACBABCABACABACBACBAABCABCABACABACBABCAABCABACBACBAACBABCABACABACBACBA
For every item of the sequence, compute a (pseudo-)random number r equidistributed between 0 (inclusive) and 100 (exclusive).
If 0 ≤ r < 42, take A
If 42 ≤ r < (42+32), take B
If (42+32) ≤ r < (42+32+26)=100, take C
The number of each entry in your subset is going to be the same as in your map, but with a scaling factor applied.
The scaling factor is n/100.
So if n was 50, you would have { Ax21, Bx16, Cx13 }.
Randomize the order to your liking.
The simplest "deterministic" [in terms of #elements of each category] solution [IMO] will be: add elements in predefined order, and then shuffle the resulting list.
First, add map(x)/100 * n elements from each element x chose how you handle integer arithmetics to avoid off by one element], and then shuffle the resulting list.
Shuffling a list is simple with fisher-yates shuffle, which is implemented in most languages: for example java has Collections.shuffle(), and C++ has random_shuffle()
In java, it will be as simple as:
int N = 107;
List<String> res = new ArrayList<String>();
for (Entry<String,Integer> e : map.entrySet()) { //map is predefined Map<String,Integer> for frequencies
for (int i = 0; i < Math.round(e.getValue()/100.0 * N); i++) {
res.add(e.getKey());
}
}
Collections.shuffle(res);
This is nondeterministic, but gives a distribution of values close to MvG's. It suffers from the problem that it could give AAA right at the start. I post it here for completeness' sake given how it proves my dissent with MvG was misplaced (and I don't expect any upvotes).
Now, if someone has an idea for an expand function that is deterministic and won't just duplicate MvG's method (rendering the calc function useless), I'm all ears!
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>ErikE's answer</title>
</head>
<body>
<div id="output"></div>
<script type="text/javascript">
if (!Array.each) {
Array.prototype.each = function(callback) {
var i, l = this.length;
for (i = 0; i < l; i += 1) {
callback(i, this[i]);
}
};
}
if (!Array.prototype.sum) {
Array.prototype.sum = function() {
var sum = 0;
this.each(function(i, val) {
sum += val;
});
return sum;
};
}
function expand(counts) {
var
result = "",
charlist = [],
l,
index;
counts.each(function(i, val) {
char = String.fromCharCode(i + 65);
for ( ; val > 0; val -= 1) {
charlist.push(char);
}
});
l = charlist.length;
for ( ; l > 0; l -= 1) {
index = Math.floor(Math.random() * l);
result += charlist[index];
charlist.splice(index, 1);
}
return result;
}
function calc(n, proportions) {
var percents = [],
counts = [],
errors = [],
fnmap = [],
errorSum,
worstIndex;
fnmap[1] = "min";
fnmap[-1] = "max";
proportions.each(function(i, val) {
percents[i] = val / proportions.sum() * n;
counts[i] = Math.round(percents[i]);
errors[i] = counts[i] - percents[i];
});
errorSum = counts.sum() - n;
while (errorSum != 0) {
adjust = errorSum < 0 ? 1 : -1;
worstIndex = errors.indexOf(Math[fnmap[adjust]].apply(0, errors));
counts[worstIndex] += adjust;
errors[worstIndex] = counts[worstIndex] - percents[worstIndex];
errorSum += adjust;
}
return expand(counts);
}
document.body.onload = function() {
document.getElementById('output').innerHTML = calc(99, [25.1, 24.9, 25.9, 24.1]);
};
</script>
</body>
</html>

In MongoDB, how can I replicate this simple query using map/reduce in ruby?

So using the regular MongoDB library in Ruby I have the following query to find average filesize across a set of 5001 documents:
avg = 0
total = collection.count()
Rails.logger.info "#{total} asset creation stats in the system"
collection.find().each {|row| avg += (row["filesize"] * (1/total.to_f)) if row["filesize"]}
Its pretty simple, so I'm trying to do the same using map/reduce as a learning exercise. This is what I came up with:
map = 'function(){emit("filesizes", {size: this.filesize, num: 1});}'
reduce = 'function(k, vals){
var result = {size: 0, num: 0};
for(var x in vals) {
var new_total = result.num + vals[x].num;
result.num = new_total
result.size = result.size + (vals[x].size * (vals[x].num / new_total));
}
return result;
}'
#results = collection.map_reduce(map, reduce)
However the two queries come back with two different results!
What am I doing wrong?
You're weighting the results by doing the division in every reduce function.
Say you had [{size : 5, num : 1}, {size : 5, num : 1}, {size : 5, num : 1}]. Your reduce would calculate:
result.size = 0 + (5*(1/1)) = 5
result.size = 5 + (5*(1/2)) = 7.25
result.size = 7.25 + (5*(1/3)) = 8.9
As you can see, this weights the results towards the earliest elements.
Fortunately, there's a simple solution. Just add a finalize function, which will be run once after the reduce step is finished.

Resources