YAML anchor for sequence? - yaml

Consider the following file YAML file:
A:
&B
- x: 1
y: 2
- x: 10
y: 20
C:
<<: *B
which is read into python via:
from ruamel.yaml import YAML
filename = 'data/configs/debug_config.yml'
with open(filename) as f:
c = YAML(typ='safe').load(f)
print(c)
yielding:
{'A': [{'x': 1, 'y': 2}, {'x': 10, 'y': 20}], 'C': {'x': 1, 'y': 2}}
It is seen that the anchor B only includes the first element of the sequence. Why? I would like an anchor that would include the entire sequence, such that the values of A and C in the python dictionary are identical. How can this be done?

The anchor B includes all elements from A, but you are merging them with the merge key << (source):
If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier in the sequence override keys specified in later mapping nodes.
So,the first item from A overrides the second one.
Remove <<: and C will be the same dictionary as A:
A: &B
- x: 1
y: 2
- x: 10
y: 20
C: *B
Yields:
{
"A": [
{
"y": 2,
"x": 1
},
{
"y": 20,
"x": 10
}
],
"C": [
{
"y": 2,
"x": 1
},
{
"y": 20,
"x": 10
}
]
}
You can check the merge order with this example:
A: &B
- x: 1
y: 2
- x: 10
new: 333
C:
<<: *B
Yields:
{
"A": [
{
"y": 2,
"x": 1
},
{
"x": 10,
"new": 333
}
],
"C": {
"y": 2,
"x": 1,
"new": 333
}
}

As #Hadyniak already indicated you are incorrectly using the merge
keys. Since the alias *B is expanded in the
composer step, before the interpretation of the << merge key in the
constructor step, the latter actually receives a list of dicts, which are
combined with the values of keys of earlier occuring dicts taking precedence. If
the order of processing had happened to be different you would get an error, and
IMO the merge key documentation doesn't explicitly specify that aliases should
be expanded first.
Hadyniak's solution however will have you end
up with c['A'] and c['C'] being the same dictionary and that might not be what you want:
import ruamel.yaml
yaml_str = """\
A:
&B
- x: 1
y: 2
- x: 10
y: 20
C:
*B
"""
yaml = ruamel.yaml.YAML()
c = yaml.load(yaml_str)
print(c['A'] is c['C'], c['A'] == c['C'])
c['A'][0]['x'] = 42
print('x:', c['C'][0]['x'])
which gives:
True True
x: 42
If that is not what you want, you can still use merge keys, but on the dictionaries that are the elements of your sequence:
import ruamel.yaml
yaml_str = """\
A:
- &B1
x: 1
y: 2
- &B2
x: 10
y: 20
C:
- <<: *B1
- <<: *B2
"""
yaml = ruamel.yaml.YAML()
c = yaml.load(yaml_str)
print(c['A'] is c['C'], c['A'] == c['C'])
c['A'][0]['x'] = 42
print('x:', c['C'][0]['x'])
which gives:
False True
x: 1
Alternatively you can tell the composer part of ruamel.yaml to expand the aliases instead of using a reference:
import copy
yaml_str = """\
A:
&B
- x: 1
y: 2
- x: 10
y: 20
C:
*B
"""
yaml = ruamel.yaml.YAML()
yaml.composer.return_alias = lambda s: copy.deepcopy(s)
c = yaml.load(yaml_str)
print(c['A'] is c['C'], c['A'] == c['C'])
c['A'][0]['x'] = 42
print('x:', c['C'][0]['x'])
which gives:
False True
x: 1
The above will only work in ruamel.yaml>0.17.2, for older versions you'll need to copy and modify the compose_node method:
import copy
yaml_str = """\
A:
&B
- x: 1
y: 2
- x: 10
y: 20
C:
*B
"""
class MyComposer(ruamel.yaml.composer.Composer):
def compose_node(self, parent, index):
# type: (Any, Any) -> Any
if self.parser.check_event(ruamel.yaml.events.AliasEvent):
event = self.parser.get_event()
alias = event.anchor
if alias not in self.anchors:
raise ComposerError(
None,
None,
'found undefined alias {alias!r}'.format(alias=alias),
event.start_mark,
)
return copy.deepcopy(self.anchors[alias])
event = self.parser.peek_event()
anchor = event.anchor
if anchor is not None: # have an anchor
if anchor in self.anchors:
ws = (
'\nfound duplicate anchor {!r}\nfirst occurrence {}\nsecond occurrence '
'{}'.format((anchor), self.anchors[anchor].start_mark, event.start_mark)
)
warnings.warn(ws, ruamel.yaml.error.ReusedAnchorWarning)
self.resolver.descend_resolver(parent, index)
if self.parser.check_event(ruamel.yaml.events.ScalarEvent):
node = self.compose_scalar_node(anchor)
elif self.parser.check_event(ruamel.yaml.events.SequenceStartEvent):
node = self.compose_sequence_node(anchor)
elif self.parser.check_event(ruamel.yaml.events.MappingStartEvent):
node = self.compose_mapping_node(anchor)
self.resolver.ascend_resolver()
return node
yaml = ruamel.yaml.YAML()
yaml.Composer = MyComposer
c = yaml.load(yaml_str)
print(c['A'] is c['C'], c['A'] == c['C'])
c['A'][0]['x'] = 42
print('x:', c['C'][0]['x'])
which also gives:
False True
x: 1

Related

How to flip the axis of a plot under ruby?

I wrote a ruby function to display the contents of a Daru dataframe df:
def plot_dataframe(df)
test = df.plot type: :line, x: :value, y: :depth
test.export_html(filename='test')
return
end
This outputs an html file named test.html.
How can I flip the y axis (ordinate) so that the depth starts at 0 and increases downwards?
I am looking for an equivalent to Python's invert_yaxis().
At #Serge de Gosson de Varennes' request, here is a MRE:
require 'json'
require 'daru'
require 'nyaplot'
df = Daru::DataFrame.new(
value: [1.2, 1.4, 1.1, 1.0, 1.0],
depth: [0, 1, 2, 3, 4]
)
test = df.plot type: :line, x: :value, y: :depth, y_reverse: true
test.export_html(filename='MRE')
This outputs:
You can do this in one of two ways:
def plot_dataframe(df)
test = df.plot type: :line, x: :value, y: :depth, y_reverse: true
test.export_html(filename='test')
return
end
or
def plot_dataframe(df)
test = df.plot type: :line, x: :value, y: :depth, y_axis_scale: :reverse
test.export_html(filename='test')
return
end

Simplifying expression trees

I am trying to write a program that simplifies mathematical expressions.
I have already written a parser that converts a string to a binary tree.
For example (1+2)*x will become
*
/ \
+ x
/ \
1 2
The idea I had of simplifying such trees is as follows:
You store a set of trees and their simplified version
For example
* +
/ \ / \
a + and * *
/ \ / \ / \
b c a b a c
(Where a,b,c can be any subtree)
Then, If I find a subtree that matches one of the stored trees, I will
replace it with its simplified version.
If necessary I will repeat the process until the tree is fully simplified.
The problem with this approach is that it can't "combine like terms" in some cases.
For example, if I try to store the tree:
+ *
/ \ and / \
x x 2 x
Then when I try to simplify the expression x+y+x, with the following tree:
+
/ \
x +
/ \
y x
It will not be simplified to 2x+y, because the subtree
+
/ \
x x
Is not contained in the tree, thus the tree will not be simplified.
I tried writing an explicit algorithm that combine like terms but there are too many
cases to consider.
Can anyone please help me find a solution to this problem?
Here is one of the basic ideas which is used in computer algebra systems.
For operators like Plus (+) and Times (*) you can define attributes like Flat (associativity) and Orderless (commutativity). Also don't define Plus and Times as "binary" operators but as "multiple-argument" operators.
So an input like:
Plus(x,Plus(y,x))
in the first step can be transformed (flattened) because of the Flat attribute to
Plus(x,y,x)
in the next step it can be transformed (sorted) because of the Orderless attribute to
Plus(x,x,y)
In your "evaluation" step you can now go through the arguments and "simplify" the expression to:
Plus(Times(2,x),y)
This approach has the advantage that expressions which are "structural equal" are stored in the same "canonical form" and could for example easier compared for "object equality" in the used programming language.
We may consider polynomials
we get the '+'-reducer and '*'-reducer for two polynomes in X
Now in the tree, instead of considering either a scalar or x as node, we may consider an "irreducible" polynomial.
Then we apply the '*'-reducer if the node operator is * or '+'-reducer otherwise which both transform two irreducibles polynome as an new irreducible one.
e.g where P_a, P_b two polynomes and
P_a = {
x0: 1 // term of degree 0 idem 1
x1: 2 // 2x
x3: 4 // 4x^3
}
and P_b = {x1: 3}
we get for sum: P_a + P_b = {x0: 1, x1: 5, x3: 4}
(so tree ['+', P_a, P_b] simplifies as {x: 0, x1: 5, x3: 4})
we get for multiplication: P_a * P_b = {x1: 3, x2: 6, x3: 12}
At the end of the day, we get an irreducible polynome in X.
We can write back that polynome as a binary tree (which is thus a simplified tree):
for each monome (in X^i), write its associated binary tree (which only contains * operator)
e.g: 5x^3 => ['*', ['*', ['*', x, x], x], 5]
then sum them
e.g: 1 + x + x^2 => ['+', 1, ['*', x, 1], ['*', x, x]
Same idea (idem implementing '+'-reducer/'*'-reducer) can be applied with an expression having polynomes in X, Y or Z, or whatever (so in your case, x, y)
below an example of implementation (you may uncomment and pass the tests using nodejs)
// a, b are polynomes of form {monomialKey: scalar, monomialKey2, scalar2, ...}
// a monomial key is e.g x1y2z2
const add = (a, b) => {
const out = Object.assign({}, a)
Object.entries(b).forEach(([monomialKey, scalar]) => {
out[monomialKey] = (out[monomialKey] || 0) + scalar
if (out[monomialKey] === 0) {
delete out[monomialKey]
}
})
return out
}
// transforms x1y2z2 to {x: 1, y: 2, z: 2}
const parseKey = s => s.match(/[a-z]+\d+/g).reduce((o, kv) => {
const [,varname,deg] = kv.match(/([a-z]+)(\d+)/)
o[varname] = parseInt(deg)
return o
}, {})
const writeKey = o => Object.entries(o).reduce((s, [varname, deg]) => s + varname+deg, '')
// simplify monomial, e.g x1y3*x1 => x2y3
const timesMonomialKey = (iA, iB) => {
const a = parseKey(iA)
const b = parseKey(iB)
const out = {}
;[a,b].forEach(x => Object.entries(x).forEach(([varname, deg]) => {
if (deg === 0) return
out[varname] = (out[varname] || 0) + deg
}))
if (Object.keys(out).length === 0) return writeKey({ x: 0 })
return writeKey(out)
}
// a, b both polynomes
const times = (a, b) => {
const out = {}
Object.entries(a).forEach(([monimalKeyA, sA]) => {
Object.entries(b).forEach(([monimalKeyB, sB]) => {
const key = timesMonomialKey(monimalKeyA, monimalKeyB)
out[key] = (out[key] || 0) + sA * sB
if (out[key] === 0) {
delete out[key]
}
})
})
return out
}
const reduceTree = t => { // of the form [operator, left, right] or val
if (!Array.isArray(t)) {
return typeof(t) === 'string'
? { [writeKey({ [t]: 1 })]: 1 } // x => {x1: 1}
: { [writeKey({ x: 0 })]: t } // 5 => {x0: 5}
}
const [op, leftTree, rightTree] = t
const left = reduceTree(leftTree)
const right = reduceTree(rightTree)
return op === '+' ? add(left, right) : times(left, right)
}
const writePolynomial = o => {
const writeMonomial = ([key, s]) => {
const a = parseKey(key)
const factors = Object.entries(a).flatMap(([varname, deg]) => {
return Array.from({length: deg}).fill(varname)
}).concat(s !== 1 ? s : [])
return factors.reduce((t, next) => ['*', t, next])
}
if (Object.keys(o).length === 0) return 0
return Object.entries(o).map(writeMonomial).reduce((t, next) => ['+', t, next])
}
console.log(writePolynomial(reduceTree(['+', ['+', 'x', 'y'], 'x'])))
//const assert = require('assert')
//assert.deepEqual(parseKey('x0y2z3'), { x: 0, y: 2, z: 3 })
//assert.deepEqual(writeKey({ x: 0, y: 2, z: 3 }), 'x0y2z3')
//assert.deepEqual(timesMonomialKey('x1y2', 'x3z1'), 'x4y2z1')
//assert.deepEqual(timesMonomialKey('x0y0', 'z0'), 'x0')
//assert.deepEqual(timesMonomialKey('x0y0', 'z0x1'), 'x1')
//assert.deepEqual(add({x0: 3, x1: 2}, {x0: 4, x3: 5}), {x0: 7, x1: 2, x3: 5})
//assert.deepEqual(add({x0: 3, y1: 2}, {x0: 4, y2: 5}), {x0: 7, y1: 2, y2: 5})
//assert.deepEqual(add({x0: 1}, {x0: -1}), {})
//assert.deepEqual(times({x0: 3, x1: 2}, {x0: 4, x1: 5}), {x0: 12, x1: 23, x2: 10})
//assert.deepEqual(times(
// {x1y0: 3, x1y1: 2},
// {x1y0: 4, x1y1: 5}),
// {x2: 12, x2y1: 23, x2y2: 10}
//)
//assert.deepEqual(reduceTree('x'), {x1: 1})
//assert.deepEqual(reduceTree(['*', 2, 'x']), {x1: 2})
//assert.deepEqual(reduceTree(['+', 2, 'x']), {x0: 2, x1: 1})
//assert.deepEqual(reduceTree(['+', 'x', ['+', 'y', 'x']]), {x1: 2, y1: 1})
//assert.deepEqual(writePolynomial({ x1y1:1, x1y2: 2}), ['+', ['*', 'x', 'y'], ['*', ['*', ['*', 'x', 'y'], 'y'], 2]])
//assert.deepEqual(writePolynomial(reduceTree(['*', ['*', 'x', 'y'], 0])), 0)
//assert.deepEqual(writePolynomial(reduceTree(['+', ['*', ['*', 'x', 'y'], 0], 2])), 2)
//
//// finally your example :)
//assert.deepEqual(writePolynomial(reduceTree(['+', ['+', 'x', 'y'], 'x'])), ['+', ['*', 'x', 2], 'y'])

Add previous value to each hash value

I have a hash with integer values:
h = {
a: 1,
b: 1,
c: 1,
d: 2,
e: 2,
}
I need to add 100 to the first value, and for the second value and on, I need to add the preceding value to the original value to get:
{
a: 101,
b: 102,
c: 103,
d: 105,
e: 107,
}
Is there a good way to achieve this?
You could use inject to calculate the total sum:
h = { a: 1, b: 1, c: 1, d: 2, e: 2}
h.inject(100) { |s, (k, v)| s + v }
#=> 107
And while doing so, you can also set the hash values to get an accumulated sum:
h.inject(100) { |s, (k, v)| h[k] = s + v }
h #=> {:a=>101, :b=>102, :c=>103, :d=>105, :e=>107}
Immutable solution that does not modify the input:
h.each_with_object({sum: 100, outcome: {}}) do |(k, v), acc|
acc[:outcome][k] = acc[:sum] += v
end
#⇒ {:sum=>107, :outcome=>{:a=>101, :b=>102, :c=>103, :d=>105, :e=>107}}
You can just keep track of the sum as an external variable:
sum = 100
h.transform_values{|v| sum += v} # => {:a=>101, :b=>102, :c=>103, :d=>105, :e=>107}
Maybe this is not the most efficient solution, but it is definitely nice and readable.
accumulated_sum = 0
h.each do |key, value|
accumulated_sum += value
hash[key] = 100 + accumulated_sum
end

How can the YAML following references override the front one?

merge:
- &LEFT { x: 1, y: 1, r: 1 }
- &BIG { x: 2, y: 2, r: 2 }
- &SMALL { x: 3, y: 3, r: 3}
- # Override
<< : [ *BIG, *LEFT, *SMALL ]
x: 1
label: big/left/small
I get the output:
{
merge:
[
{ x: 1, y: 1, r: 1 },
{ x: 2, y: 2, r: 2 },
{ x: 3, y: 3, r: 3 },
{ x: 1, y: 2, r: 2, label: 'big/left/small' }
]
}
But the results do not meet my expectation, the last one in the merge object I hope it be
{ x: 1, y: 3, r: 3, label: 'big/left/small' }.
How can I do with the YAML syntax ?
You cannot do this with YAML syntax, and your expectations are unfounded on multiple levels.
An anchored element (whether a sequence element or not) doesn't magically disappear when it is used in merge alias or any other alias nor on the basis of it being an anchor
A toplevel mapping key (merge) doesn't magically disappear because its value is a sequence scalar that contains an element with a merge indicator
The Merge Key Language-Independent Type documentation doesn't indicate such a deletion and neither does the YAML specification. The anchors (and aliases) are not normally preserved in the representation in the language you use for loading your YAML, as per the YAML specs. Therefore it is normally not possible to find the anchored elements and delete them after loading.
A generic solution would be to have top another toplevel key default key that "defines" the anchors and work only with the value associated with the merge key:
import ruamel.yaml
yaml_str = """\
default:
- &LEFT { x: 1, y: 1, r: 1 }
- &BIG { x: 2, y: 2, r: 2 }
- &SMALL { x: 3, y: 3, r: 3}
merge:
# Override
<< : [ *BIG, *LEFT, *SMALL ]
x: 1
label: big/left/small
"""
data = ruamel.yaml.load(yaml_str)['merge']
print(data)
gives:
{'x': 1, 'r': 2, 'y': 2, 'label': 'big/left/small'}
(the order of the keys in your output is of course random)

What is the most idiomatic way to perform multiple tests in ruby?

I have the following logic:
some_array.each do |element|
if element[:apples] == another_hash[:apples] &&
element[:oranges] == another_hash[:oranges] &&
element[:pineapple] == another_hash[:pineapple]
match = element
break
end
end
I iterate through a list of key value pairs. If I can match the required keys (3 of 5), then I toss the element in a var for later use. If I find a match, I break out of the loop.
I am looking for the most idiomatic way to optimize this conditional. Thank you in advance.
How about:
match = some_array.find do |element|
[:apples, :oranges, :pinapple].all? {|key| element[key] == another_hash[key]}
end
If you want to select any element which has at least 3 matching keys from 5 keys given then:
match = some_array.find do |element|
element.keys.select {|key| element[key| == another_hash[key]}.size > 2
end
This is how I'd do it.
Code
def fruit_match(some_array, another_hash, fruit)
other_vals = another_hash.values_at(*fruit)
return nil if other_vals.include?(nil)
some_array.find { |h| h.values_at(*fruit) == other_vals }
end
Examples
some_array = [ { apple: 1, orange: 2, pineapple: 3, plum: 4 },
{ apple: 1, cherry: 7, pineapple: 6, plum: 2 },
{ apple: 6, cherry: 2, pineapple: 8, fig: 3 } ]
another_hash = { apple: 6, cherry: 4, pineapple: 8, quamquat: 5 }
fruit = [:apple, :pineapple]
fruit_match(some_array, another_hash, fruit)
#=> { :apple=>6, :cherry=>2, :pineapple=>8, :fig=>3 }
fruit = [:apple, :plum]
fruit_match(some_array, another_hash, fruit)
#=> nil
[Edit: I didn't notice the "3-5" matches until I saw #7stud's answer. Requiring the number of matches to fall within a given range is an interesting variation. Here's how I would address that requirement.
Code
def fruit_match(some_array, another_hash, fruit, limits)
other_vals = another_hash.values_at(*fruit)
some_array.select { |h| limits.cover?(h.values_at(*fruit)
.zip(other_vals)
.count {|e,o| e==o && e}) }
end
Example
some_array = [ { apple: 1, orange: 2, pineapple: 1, cherry: 1 },
{ apple: 2, cherry: 7, pineapple: 6, plum: 2 },
{ apple: 6, cherry: 1, pineapple: 8, fig: 3 },
{ apple: 1, banana: 2, pineapple: 1, fig: 3 } ]
another_hash = { apple: 1, cherry: 1, pineapple: 1, quamquat: 1 }
fruit = [:apple, :pineapple, :cherry]
limits = (1..2)
fruit_match(some_array, another_hash, fruit, limits)
#=> [{:apple=>6, :cherry=>1, :pineapple=>8, :fig=>3},
# {:apple=>1, :banana=>2, :pineapple=>1, :fig=>3}]
tidE]
If I can match the required keys (3 of 5)
I don't think any of the posted answers addresses that.
target_keys = %i[
apples
oranges
pineapples
strawberries
bananas
]
data = [
{beer: 0, apples: 1, oranges: 2, pineapples: 3, strawberries: 4, bananas: 5},
{beer: 1, apples: 6, oranges: 7, pineapples: 8, strawberries: 9, bananas: 10},
{beer: 2, apples: 6, oranges: 2, pineapples: 3, strawberries: 9, bananas: 10},
]
match_hash = {
apples: 6, oranges: 2, pineapples: 3, strawberries: 9, bananas: 10
}
required_matches = 3
required_values = match_hash.values_at(*target_keys).to_enum
found_match = nil
catch :done do
data.each do |hash|
found_values = hash.values_at(*target_keys).to_enum
match_count = 0
loop do
match_count += 1 if found_values.next == required_values.next
if match_count == required_matches
found_match = hash
throw :done
end
end
required_values.rewind
end
end
p found_match
--output:--
{:beer=>1, :apples=>6, :oranges=>7, :pineapple=>8, :strawberry=>9, :banana=>10
More readable version I could think is slice:
keys = [:apples, :oranges, :pinapple]
match = some_array.find {|e| e.slice( *keys ) == another_hash.slice( *keys )}
UPDATE
Slice is not a pure ruby method of Hash, it includes in Rails' ActiveSupport library.
If you don't want to be using Rails, you can just load Active Support.
Add active_support to your Gemfile and require "active_support/core_ext/hash/slice".
Or you could just paste the contents of slice.rb into your app somewhere. The URL can be found here.

Resources