Can not add highlight option of fragments number - elasticsearch

I have a simple elasticsearch-dsl-py code. Everything is fine, even tags, except number_of_fragments. Response continuing giving multiply highlights for single word.
def name_autocomplete(q):
multi_match = MultiMatch(query=q, fields=ngram_fields, fuzziness='AUTO', tie_breaker=1)
s = s.query(multi_match)[0:5] \
.highlight_options(pre_tags='<b>', post_tags='</b>',
number_of_fragments=1, # This is not working
order='score') \
.highlight('name.ngram') \
.execute()
response_dict = s.to_dict()
return Response(response_dict)
What am I doing wrong?
ADDED:
Analyzer:
edge_ngram_completion = analyzer(
"edge_ngram_completion",
tokenizer=tokenizer('trigram', 'nGram', min_gram=3, max_gram=4),
filter=["lowercase"]
)
Document:
class TerminalDocument(Document):
"""Document Elasticsearch document."""
id = fields.IntegerField(attr='id')
name = fields.TextField(
fields={
'suggest': fields.TextField(analyzer=ascii_fold),
'ngram': fields.TextField(analyzer=edge_ngram_completion)
}
)
Example of hidhlighting for foobar:
"<b>foo</b>something<b>bar</b>"

Related

How to update bokeh active interaction with GeoJSON as data source?

I have made an interactive choropleth map with bokeh, and I'm trying to add active interactions using the dropdown widget (Select). However, most tutorials and SO questions about active interactions use ColumnDataSource, and not GeoJSONDataSource.
The issue is that GeoJSONDataSource doesn't have a .data method like ColumnDataSource does, so idk exactly how the syntax works when updating it.
My dataset is a dictionary in the form of city_dict = {'Amsterdam': <some data frame>, 'Antwerp': <some data frame>, ...}, where the dataframe is in geojson format. I have already confirmed that this format works when making glyphs.
def update(attr, old, new):
s_value = dropdown.value
p.title.text = '%s', s_value
new_src1 = make_dataset(s_value)
val1 = GeoJSONDataSource(new_src1)
r1.data_source = val1
where make_dataset is a function that transforms my original dataset into a dataset that can feed into the GeoJSONDataSource function. make_dataset requires a string (name of the city) to work eg. 'Amsterdam'. It works on passive interactions.
The main plot code (removed unnecessary stuff) is:
dropdown = Select(value='Amsterdam', options = cities)
controls = WidgetBox(dropdown)
initial_city = 'Amsterdam'
a = make_dataset(initial_city)
src1 = GeoJSONDataSource(a)
p = figure(title = 'Amsterdam', plot_height = 750 , plot_width = 900, toolbar_location = 'right')
r1 = p.patches('xs','ys', source = src1, fill_color = {'field' :'norm', 'transform' : color_mapper})
dropdown.on_change('value', update)
layout = row(controls, p)
curdoc().add_root(layout)
I've added the error I get. error handling message Message 'PATCH-DOC' (revision 1) content: {'events': [{'kind': 'ModelChanged', 'model': {'type': 'Select', 'id': '1147'}, 'attr': 'value', 'new': 'Antwerp'}], 'references': []}: ValueError("expected a value of type str, got ('%s', 'Antwerp') of type tuple",)

How to sort a list of text+date strings in Groovy

I have a list of strings, each one contains text with date like this:
"foo_6.7.2016"
"foo_5.10.2016"
"foo_6.30.2016"
"foo_6.23.2016"
"foo_6.2.2016"
"foo_5.22.2016"
I need to sort them by Date and get this:
"foo_6.30.2016"
"foo_6.23.2016"
"foo_6.7.2016"
"foo_6.2.2016"
"foo_5.22.2016"
"foo_5.10.2016"
An alternative might be:
def foos = [
"foo_6.7.2016",
"foo_5.10.2016",
"foo_6.30.2016",
"foo_6.23.2016",
"foo_6.2.2016",
"foo_5.22.2016"
]
def sorted = foos.sort(false) { Date.parse('M.d.yyyy', it - 'foo_') }.reverse()
For a quick answer that needs substantial cleanup:
def dates = [
"foo_6.7.2016"
"foo_5.10.2016"
"foo_6.30.2016"
"foo_6.23.2016"
"foo_6.2.2016"
"foo_5.22.2016"
]
def prefix = "foo_"
java.text.SimpleDateFormat sdf = new java.text.SimpleDateFormat("M.d.yyyy")
def sorted_dates = dates.collect{ sdf.parse(
it, new java.text.ParsePosition(prefix.length()) ) }.sort().reverse()
def newDates = sorted_dates.collect{ "${prefix} + ${sdf.format(it)}"}
println newDates

Rethinkdb, Python, and Filter

So here's a fun reduced snippet on some code I'm working on in Python 2.7 with the latest RethinkDB (1.14). My problem is now I need to add another condition, and there's too many possible combinations. Can this be done in a single filter statement?
query = r.table('messages').order_by(r.desc('created'))
if tag is not None and read is not None:
query = query.filter(lambda n: (n['user_id'] == user_id) &
(n['tags'].contains(tag)) &
(n['read'] == read))
elif read is not None:
query = query.filter(lambda n: (n['user_id'] == user_id) &
(n['read'] == read))
elif tag is not None:
query = query.filter(lambda n: (n['user_id'] == user_id) &
(n['tags'].contains(tag)))
else:
query = query.filter(lambda n: n['user_id'] == user_id)
fields_list = query.skip(skip)\
.limit(limit)\
.run(g.db_conn)
As a side note, this would be so much easier if chained filters worked, essentially acting as an and. But it seems for now you can only have one filter per RethinkDB query.
Edit: Not sure what was happening before, but chaining filters does work.
Turns out this actually works!
def _(n):
return (n['user_id'] == user_id) & \
(n['tags'].contains(tag) if tag is not None else True) & \
(n['read'] == read if read is not None else True)
query = r.table('messages') \
.order_by(r.desc('created')) \
.filter(_) \
.skip(skip)\
.limit(limit)
fields_list = query.run(g.db_conn)
Edit: Also possible:
query = r.table('messages') \
.order_by(r.desc('created')) \
.filter(r.row['user_id'] == user_id) \
.filter(r.row['tags'].contains(tag) if tag is not None else True) \
.filter(r.row['read'] == read if read is not None else True) \
.skip(skip) \
.limit(limit)
fields_list = query.run(g.db_conn)

How does Xpath in groovy to return Tags and Values

I have the following XML, and in SOAPUI Groovy I am attempting go capture a set of XML with its tags and values, for example:
<telephoneNumbers>
<telephone>
<id>125042741</id>
<areaCode>0161</areaCode>
<phoneNumber>4804420</phoneNumber>
<extension>1234</extension>
<usage>Work</usage>
</telephone>
</telephoneNumbers>
im trying to return to following outcome (tags and values):
<telephone>
<id>125042741</id>
<areaCode>0161</areaCode>
<phoneNumber>4804420</phoneNumber>
<extension>1234</extension>
<usage>Work</usage>
</telephone>
Here is the groovy:
def groovyUtils = new com.eviware.soapui.support.GroovyUtils( context )
def Recall = groovyUtils.getXmlHolder( "Recall#Response" )
def telephone = Recall[ "//telephone//*" ] as String
String returnXml = ""
if ( Recall["//restrict"] != null ) {
returnXml= telephone
}
else
return returnXml
Similar to this answer How do I print a groovy Node with namespace preserved? you could parse the Xml using XmlSlurper and afterwards print the xml nodes you wish to with StreamingMarkupBuilder.
String xml = Recall.getXml()
def telephoneNumbers = new XmlSlurper().parseText(xml)​​​​​​​​
def outputBuilder = new groovy.xml.StreamingMarkupBuilder()
String telephoneXml = outputBuilder.bind { mkp.yield telephoneNumbers.telephone }
I posted a runnable example here: http://groovyconsole.appspot.com/script/1245001 not using SOAPUI but a simple string for demonstration. Hope this helps!

sphinx and multilanguage search || search by attribute

I'm trying to get results from sphinx by attr_string. Here is sphinx configuration:
source db
{-
type = mysql
sql_query = \
SELECT id,language,text,page_url \
FROM content
sql_attr_string = language
sql_attr_string = page_url
}
index content
{
source = db
charset_type = utf-8
min_word_len = 3
}
The results that i'm getting are like this:
[matches] => Array
(
[106] => Array
(
[weight] => 4
[attrs] => Array
(
[page_url] => en/example.gtml
[language] => en
)
)
What I want to do is to filter all results by "language"=en.
$sphinx->SetFilter() is working by integers where in this case I'll need only string "en".
Any help is appreciated!
I found solution...
If anybody need it.
Configure "source" to use crc32, Ex:
source db
{
type = mysql
sql_query = \
SELECT id,crc32(language) as language,text,page_url \
FROM content
sql_attr_uint = language
sql_attr_string = page_url
}
And in client, modify setFilter method to use crc32(). ex:
$s->SetFilter('language',array(crc32('en')));
$result = $s->query('bird is a word','content');
I hope it helps somebody...
more information: http://sphinxsearch.com/docs/current.html#attributes

Resources