Retrieve list of objects based on a key value constraint - elasticsearch

I have object instance index in ES 6.2 which I can query like this:
POST /_search
{
"query": {
"bool": {
"must": [
{
"match": {
"instanceId" : "I001"
}
}
]
}
}
}
and receive a particular instance query result:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 15,
"successful": 15,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 5.7745514,
"hits": [
{
"_index": "instance",
"_type": "searchinstance",
"_id": "I001",
"_score": 5.7745514,
"_source": {
"name": "someInstance",
"uuid": "18fab6a6-0fc9-428e-ad60-a13a6a43e0ea",
"id": "I001",
"createdAt": 1559140971501,
"completedAt": 1559140988024,
"modifiedAt": 1559140988028,
"description": "my description",
"instanceId": "I001",
"status": null,
"attributes": [
{
"name": "response.result",
"value": "0"
},
{
"name": "response.value",
"value": "123"
}
],
"createdBy": null
}
}
]
}
}
How do I query all of such instances (i.e. just list of instanceId values) having "attributes.name": "response.result" and "attributes.value": "0"?
I've been trying to combine query_string, match, wildcard and nested query types but still not being successful. It seems that the issue is specifying path to attributes structure correctly. When POSTing:
{
"query": {
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"match": {
"attributes.name": "response.result"
}
},
{
"match": {
"attributes.value": "0"
}
}
]
}
}
}
}
}
I receive a failure reason
{
"type": "query_shard_exception",
"reason": "failed to create query: {...}",
"index_uuid": "8Sr_2jvsRvqGmDjK71SFsw",
"index": ".kibana",
"caused_by": {
"type": "illegal_state_exception",
"reason": "[nested] failed to find nested object under path [attributes]"
}
}
Thank you.

Elasticsearch doesn't have a dedicated array type. In fact, any field of any type is treated as array of values. So assuming your attributes field is of object type you can query it just as you would normally do for single object, for example:
{
"query": {
"bool": {
"must": [
{
"match": {
"attributes.name": "response.result"
}
},
{
"match": {
"attributes.value": "0"
}
}
]
}
}
}

Related

Is it possible to use a query result into another query in ElasticSearch?

I have two queries that I want to combine, the first one returns a document with some fields.
Now I want to use one of these fields into the new query without creating two separates ones.
Is there a way to combine them in order to accomplish my task?
This is the first query
{
"_source": {
"includes": [
"data.session"
]
},
"query": {
"bool": {
"must": [
{
"match": {
"field1": "9419"
}
},
{
"match": {
"field2": "5387"
}
}
],
"filter": [
{
"range": {
"timestamp": {
"time_zone": "+00:00",
"gte": "2020-10-24 10:16",
"lte": "2020-10-24 11:16"
}
}
}
]
}
},
"size" : 1
}
And this is the response returned:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 109,
"relation": "eq"
},
"max_score": 3.4183793,
"hits": [
{
"_index": "file",
"_type": "_doc",
"_id": "UBYCkgsEzLKoXh",
"_score": 3.4183793,
"_source": {
"data": {
"session": "123456789"
}
}
}
]
}
}
I want to use that "data.session" into another query, instead of rewriting the value of the field by passing the result of the first query.
{
"_source": {
"includes": [
"data.session"
]
},
"query": {
"bool": {
"must": [
{
"match": {
"data.session": "123456789"
}
}
]
}
},
"sort": [
{
"timestamp": {
"order": "asc"
}
}
]
}
If you mean to use the result of the first query as an input to the second query, then it's not possible in Elasticsearch. But if you share your query and use-case, we might suggest you better way.
ElasticSearch does not allow sub queries or inner queries.

How to combine simplequerystring with bool/must

I have this ElasticSearch query for ES version 7:
{
"from": 0,
"simple_query_string": {
"query": "*"
},
"query": {
"bool": {
"must": [
{
"term": {
"organization_id": "fred"
}
},
{
"term": {
"assigned_user_id": "24584080"
}
}
]
}
},
"size": 50,
"sort": {
"updated": "desc"
},
"terminate_after": 50,
}
but ES gives me back this error:
reason: Unknown key for a START_OBJECT in [simple_query_string]
my goal is to be able to use a query-string for multiple fields, and also use term/match with bool/must. Should I abandon the query string and just use bool.must[{match:"my query"}]?
You can use bool to combine multiple queries in this way. The must clause will work as logical AND, and will make sure all the conditions are matched.
You need to include the simple_query_string inside the query section
Adding Working example with sample docs, and search query.
Index Sample Data
{
"organization_id": 1,
"assigned_user_id": 2,
"title": "welcome"
}{
"organization_id": 2,
"assigned_user_id": 21,
"title": "hello"
}{
"organization_id": 3,
"assigned_user_id": 22,
"title": "hello welocome"
}
Search Query :
{
"query": {
"bool": {
"must": [
{
"simple_query_string": {
"fields" : ["title"],
"query" : "welcome"
}
},
{
"match": {
"organization_id": "1"
}
},
{
"match": {
"assigned_user_id": "2"
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "my_index",
"_type": "_doc",
"_id": "1",
"_score": 3.0925694,
"_source": {
"organization_id": 1,
"assigned_user_id": 2,
"title": "welcome"
}
}
]

Elasticsearch search query with nested fields

I am working on a resume database on elasticsearch. there are nested fields. For example, there is a "skills" section. "skills" is a nested field containing "skill" and "years". I want to be able to do a query that returns a skill with a certain year. For example, I want to get resumes of people with 3 or more years of "python" experience.
I have successfully run a query that does the following:
It returns all the resumes that has "python as a skills.skill and 3 as a skills.year
This returns result where python is associated with 2 years or experience as long as some other field is associated with 3 years of experience.
GET /resumes/_search
{
"query": {
"bool": {
"must": [
{ "match": { "skills.skill": "python" }},
{ "match": { "skills.years": 3 }}
]
}
}
}
Is there a better way to sort the data where that 3 is more associated with python?
You need to make use of Nested DataType and corresponding to it you would need to make use of Nested Query
What you have in current model appears to be basic object model.
I've mentioned sample mapping, sample documents, nested query and response below. This would give you what you are looking for.
Mapping
PUT resumes
{
"mappings": {
"mydocs": {
"properties": {
"skills": {
"type": "nested",
"properties": {
"skill": {
"type": "keyword"
},
"years": {
"type": "integer"
}
}
}
}
}
}
}
Sample Documents:
POST resumes/mydocs/1
{
"skills": [
{
"skill": "python",
"years": 3
},
{
"skill": "java",
"years": 3
}
]
}
POST resumes/mydocs/2
{
"skills": [
{
"skill": "python",
"years": 2
},
{
"skill": "java",
"years": 3
}
]
}
Query
POST resumes/_search
{
"query": {
"nested": {
"path": "skills",
"query": {
"bool": {
"must": [
{
"match": {
"skills.skill": "python"
}
},
{
"match": {
"skills.years": 3
}
}
]
}
}
}
}
}
Query Response:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1.6931472,
"hits": [
{
"_index": "resumes",
"_type": "mydocs",
"_id": "1",
"_score": 1.6931472,
"_source": {
"skills": [
{
"skill": "python",
"years": 3
},
{
"skill": "java",
"years": 3
}
]
}
}
]
}
}
Note that you only retrieve the document having id 1 in the above response. Also note that just for sake of simplicity I've made skills.skill as keyword type. You can change it to text depending on your use case.
Hope it helps!

Empty inner_hits in compound Elasticsearch filter

I'm seeing what appears to be aberrant behavior in inner_hits results within nested boolean queries.
Test data (abbreviated for brevity):
# MAPPING
PUT unit_testing
{
"mappings": {
"document": {
"properties": {
"display_name": {"type": "text"},
"metadata": {
"properties": {
"NAME": {"type": "text"}
}
}
}
},
"paragraph": {
"_parent": {"type": "document"},
"_routing": {"required": true},
"properties": {
"checksum": {"type": "text"},
"sentences": {
"type": "nested",
"properties": {
"text": {"type": "text"}
}
}
}
}
}
}
# DOCUMENT X 2 (d0, d1)
PUT unit_testing/document/doc_id_d0
{
"display_name": "Test Document d0",
"paragraphs": [
"para_id_d0p0",
"para_id_d0p1"
],
"metadata": {"NAME": "Test Document d0 Metadata"}
}
# PARAGRAPH X 2 (d0p0, d1p0)
PUT unit_testing/paragraph/para_id_d0p0?parent=doc_id_d0
{
"checksum": "para_checksum_d0p0",
"sentences": [
{"text": "Test sentence d0p0s0"},
{"text": "Test sentence d0p0s1 ODD"},
{"text": "Test sentence d0p0s2 EVEN"},
{"text": "Test sentence d0p0s3 ODD"},
{"text": "Test sentence d0p0s4 EVEN"}
]
}
This initial query behaves as I would expect (I'm aware that the metadata filter isn't actually necessary in this example case):
GET unit_testing/paragraph/_search
{
"_source": "false",
"query": {
"bool": {
"must": [
{
"has_parent": {
"query": {
"match_phrase": {
"metadata.NAME": "Test Document d0 Metadata"
}
},
"type": "document"
}
},
{
"nested": {
"inner_hits": {},
"path": "sentences",
"query": {
"match": {
"sentences.text": "d0p0s0"
}
}
}
}
]
}
}
}
It yields an inner_hits object containing the one sentence that matched the predicate (some fields removed for clarity):
{
"hits": {
"hits": [
{
"_source": {},
"inner_hits": {
"sentences": {
"hits": {
"hits": [
{
"_source": {
"text": "Test sentence d0p0s0"
}
}
]
}
}
}
}
]
}
}
The following query is an attempt to embed the query above within a parent "should" clause, to create a logical OR between the initial query, and an additional query that matches a single sentence:
GET unit_testing/paragraph/_search
{
"_source": "false",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"has_parent": {
"query": {
"match_phrase": {
"metadata.NAME": "Test Document d0 Metadata"
}
},
"type": "document"
}
},
{
"nested": {
"inner_hits": {},
"path": "sentences",
"query": {
"match": {
"sentences.text": "d0p0s0"
}
}
}
}
]
}
},
{
"nested": {
"inner_hits": {},
"path": "sentences",
"query": {
"match": {
"sentences.text": "d1p0s0"
}
}
}
}
]
}
}
}
While the "d1" query outputs the result one would expect, with an inner_hits object containing the matching sentence, the original "d0" query now yields an empty inner_hits object:
{
"hits": {
"hits": [
{
"_source": {},
"inner_hits": {
"sentences": {
"hits": {
"total": 0,
"hits": []
}
}
}
},
{
"_source": {},
"inner_hits": {
"sentences": {
"hits": {
"hits": [
{
"_source": {
"text": "Test sentence d1p0s0"
}
}
]
}
}
}
}
]
}
}
Although I'm using the elasticsearch_dsl Python library to build and combine these queries, and I'm something of a novice with respect to the Query DSL, the query format looks solid to me.
What am I missing?
I think what is missing is the name parameter for inner_hits - you have two inner_hits clauses at two different queries that would end up with the same name. Try giving the inner_hits a name parameter (0).
0 - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-inner-hits.html#_options

Trying to extract a leaf field from Elasticsearch

I have an object in elasticsearch which resembles something like this:
{
"text": "something something something",
"entities": { "hashtags":["test","test123"]}
}
The problem is that not each document has the entities attribute set. So I want to write a query which:
must contain a keyword in the text field
must have the entities field
extracts the entities.hashtag field
I'm trying to extract a leaf field using following query, the problem is I still get documents which don't have an entities field.
For the second part of the question, I was wondering: How do I only extract the entities.hashtags field? I tried something like "fields": ["entities.hashtags"] but it didn't work.
{
"size": 2000,
"query": {
"filtered": {
"query": {
"match_all": {
}
},
"filter": {
"bool": {
"must": [{
"term": {
"text": "something"
}
},
{
"missing": {
"field": "entities",
"existence": true
}
}]
}
}
}
}
}
This seems to do what you want, if I'm understanding you correctly. A "term" filter on the "text" field and an "exists" filter on the "entities" field filters the docs, and a "terms" aggregation on "entities.hashtags" extracts the values. I'll just post the full example I used:
DELETE /test_index
PUT /test_index
{
"settings": {
"number_of_shards": 1
}
}
PUT /test_index/doc/1
{
"text": "something something something",
"entities": { "hashtags": ["test","test123"] }
}
PUT /test_index/doc/2
{
"text": "another doc",
"entities": { "hashtags": ["testagain","testagain123"] }
}
PUT /test_index/doc/3
{
"text": "doc with no entities"
}
POST /test_index/_search
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{ "term": { "text": "something" } },
{ "exists": { "field": "entities" } }
]
}
}
}
},
"aggs": {
"hashtags": {
"terms": {
"field": "entities.hashtags"
}
}
}
}
...
{
"took": 35,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0,
"hits": []
},
"aggregations": {
"hashtags": {
"buckets": [
{
"key": "test",
"doc_count": 1
},
{
"key": "test123",
"doc_count": 1
}
]
}
}
}

Resources