Get all data of specific fields - elasticsearch

I 'am using Elastic search 5.1.1,how to get all data specified for these below fields (FeatureValue,FeatureName)
sample document
{
"_index": "rawdata",
"_type": "feed",
"_id": "591031",
"_score": 1,
"_source": {
"sourceproductname": "1-5-Size Relays",
"zmfrid": 4,
"sourcetitle": null,
"featurename": "Coil Magnetic System",
"localsourcepath": null,
"sourcingdate": "2017-01-08T22:00:00.000Z",
"migrationstatus": 3,
"featrueunit": null,
"inputkeyword": null,
"#version": "1",
"sourcetype": "DirectFeed",
"id": 591031,
"sourceid": 674,
"partdataid": null,
"partid": null,
"featurecondition": null,
"sourcingstatus": null,
"sourcetaxonomypath": "1-5-Size Relays",
"sourcename": "CrunchBase ",
"tags": [],
"sourceurl": "N/A",
"#timestamp": "2017-01-10T11:51:54.095Z",
"featurevalue": "Non-Polarized, Monostable",
"mfr": "feed",
"partdataattributeid": null,
"supplierfamily": "null",
"partnumber": "4-1617072-5"
}
}
I tried this
POST /rawdata/feed/_search?pretty=true
{
"_source": ["FeatureValue", "FeatureName"],
"query": {
"match_all":{}
}
}
sample result
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 386424,
"max_score": 1,
"hits": [
{
"_index": "rawdata",
"_type": "feed",
"_id": "591031",
"_score": 1,
"_source": {}
}

You simply need to lowercase the field names in the source filter since they are lowercased in your documents
POST /rawdata/feed/_search?pretty=true
{
"_source": ["featurevalue", "featurename"], <--- change this
"query": {
"match_all":{}
}
}

Related

fuzzy query is not work using elastic search

I am reading the documentation of elastic search and using the fuzzy query but it won't work
I am following the below documentation
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-fuzzy-query.html
postman
GET https://localhost:0000/myindex/_search
I have a 3 records
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 4,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "myindex",
"_type": "default",
"_id": "ioSZw4UBBGNKkg-oBT13",
"_score": 1.0,
"_source": {
"name": "Rahul_Patil",
"contents": {
"topic": "Elasticsearch with postgresql configuration",
"details": "test index"
}
}
},
{
"_index": "myindex",
"_type": "default",
"_id": "jYRCxIUBBGNKkg-ojj36",
"_score": 1.0,
"_source": {
"name": "Rahul_Patilt_test_Index",
"contents": {
"topic": "Elasticsearch with postgresql configuration 2",
"details": "test index 3"
}
}
},
{
"_index": "myindex",
"_type": "default",
"_id": "j4RexIUBBGNKkg-owj3w",
"_score": 1.0,
"_source": {
"name": "Rahul_Patil_test_Index_updates_new",
"contents": {
"topic": "Elasticsearch with postgresql configuration update",
"details": "test index update"
}
}
},
{
"_index": "myindex",
"_type": "default",
"_id": "1",
"_score": 1.0,
"_source": {
"name": "abc_Index",
"contents": {
"topic": "abc 2",
"details": "abc 3"
}
}
}
]
}
}
now I change body->raw
GET https://localhost:0000/myindex/_search
body->raw
{
"query": {
"fuzzy": {
"_source.name": {
"value": "Ra"
}
}
}
}
the response is ok but I am not able to find whose name starts with Rahul_Patil
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 0,
"relation": "eq"
},
"max_score": null,
"hits": []
}
}

mysql field="value" in elasticsearch

I want to display only the items that contain the word itself when "google" searches
How can I only search for items that have only the word "google"?
Request body
(Request created in postman)
{
"query": {
"bool": {
"must": [
{
"match": {
"body": "google"
}
}
]
}
}
}
Response body
(Request created in postman)
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0.6587735,
"hits": [
{
"_index": "s_t",
"_type": "_doc",
"_id": "3",
"_score": 0.6587735,
"_source": {
"body": "google"
}
},
{
"_index": "s_t",
"_type": "_doc",
"_id": "4",
"_score": 0.5155619,
"_source": {
"body": "google map"
}
},
{
"_index": "s_t",
"_type": "_doc",
"_id": "5",
"_score": 0.5155619,
"_source": {
"body": "google-map"
}
}
]
}
}
I need this output
(Request created in postman)
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0.69381464,
"hits": [
{
"_index": "s_t",
"_type": "_doc",
"_id": "3",
"_score": 0.69381464,
"_source": {
"body": "google"
}
}
]
}
}
In mysql with this query I reach my goal.
Similar query in mysql:
select * from s_t where body='google'
well i assume you automap or use a text in your mappings.
specify .keyword in your query. Note this is case sensitive.
{
"query": {
"bool": {
"must": [
{
"match": {
"body.keyword": "google"
}
}
]
}
}
}
If you only want to query your body field using exact match. You need to reindex it using keyword. Take a look at: Exact match in elastic search query

Aggregation in elastic search by field value data

I have below set of data and I want aggregation as per the status. Not sure how to compare the value of status with rejected or success and get the count of result.
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2874,
"max_score": 1,
"hits": [
{
"_index": "testfiles",
"_type": "testfiles",
"_id": "testfile.one",
"_score": 1,
"_source": {
"businessDate": 20171013,
"status": "Success"
}
},
{
"_index": "testfiles",
"_type": "testfiles",
"_id": "testfile.two",
"_score": 1,
"_source": {
"businessDate": 20171013,
"status": "Success"
}
},
{
"_index": "testfiles",
"_type": "testfiles",
"_id": "testfile.three",
"_score": 1,
"_source": {
"businessDate": 20171013,
"status": "Rejected"
}
},
{
"_index": "testfiles",
"_type": "testfiles",
"_id": "testfile.four",
"_score": 1,
"_source": {
"businessDate": 20171013,
"status": "Rejected"
}
}
]
}
}
Can someone help to how to achieve this in elastic search aggregation.
Expected response something like below
"aggregations": {
"success_records": 2,
"rejected_records": 2
}
Assuming status field is of type text, you'll need to update it to multi-fields having a keyword type needed for aggregation. Then query using:
GET my_index/_search
{
"size": 0,
"aggs": {
"statuses": {
"terms": {
"field": "status.raw"
}
}
}
If you already have status as keyword field, then change status.raw to status in the above query.

Terms aggregation not giving buckets

I'm trying a simple terms aggregation but the result is not creating buckets. Here is a sample document:
"hits": {
"total": 27330,
"max_score": 0.8293952,
"hits": [
{
"_index": "policy",
"_type": "policy",
"_id": "W0051311PNWO",
"_score": 0.8293952,
"_source": {
"productname": "UK CARGO",
"alternateproductname": "ABC39393939",
"brokername": "Name***",
"agentname": "Name***",
"policyref": "ABC33333",
"client": "International Cargo Limited",
"addressline1": "",
"post/zipcode": "",
"telephone": null,
"bapolicyendorseid": 123334,
"prevcertnum": "",
"policystatus": "Endorsed",
"#version": "1",
"#timestamp": "2015-10-09T11:11:02.018Z"
}
},
Here is the aggregate search (in sense):
get policy/policy/_search
{
"aggs": {
"statuses": {
"terms": {
"field": "policystatus"
}
}
}
}
I'm trying to get the equivalent of:
select policystatus, count(*) from policy group by policystatus
The result is not showing buckets. It is showing regular document results:
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 227398,
"max_score": 1,
"hits": [
{
"_index": "policy",
"_type": "policy",
"_id": "04/QQQ/04UKI0018",
"_score": 1,
"_source": {
"productname": "2 RES 01/09/04",
"alternateproductname": "2 RES 01/09/04",
"brokername": "Blah LTD",
"agentname": "Insurance",
"policyref": "blah",
"client": "blah",
"addressline1": "blah",
"post/zipcode": "blah",
"telephone": null,
"bapolicyendorseid": 21427,
"prevcertnum": "04UKI0018",
"policystatus": "Pending",
"#version": "1",
"#timestamp": "2015-10-09T11:10:10.146Z"
}
},
Try this:
GET /policy/policy/_search?search_type=count
{
"aggs": {
"statuses": {
"terms": {
"field": "policystatus"
}
}
}
}
meaning capital letters GET and search_type=count to get only the buckets, not also the hits.

how to index for specific fields of documents using elasticsearch

My requirement is to store specific fields of document to index in elasticsearch.
Example:
My document is
{
"name":"stev",
"age":26,
"salary":25000
}
This is my document but i don't want indexing total document.I want store only name field.
I created one index emp and write mapping like below
"person" : {
"_all" : {"enabled" : false},
"properties" : {
"name" : {
"type" : "string", "store" : "yes"
}
}
}
When see the index document
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 2,
"successful": 2,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "test",
"_id": "AU1_p0xAq8r9iH00jFB_",
"_score": 1,
"_source": { }
}
,
{
"_index": "test",
"_type": "test",
"_id": "AU1_lMDCq8r9iH00jFB-",
"_score": 1,
"_source": { }
}
]
}
}
name fields is not generated,Why?
any one help to me
It's hard to tell what you're doing wrong from what you posted, but I can give you an example that works.
Elasticsearch will, by default, index whatever source documents you give it. Every time it sees a new document field, it will create a mapping field with sensible defaults, and it will index them by default as well. If you want to exclude fields, you can set "index": "no" and "store": "no" in the mapping for each field you want to exclude. If you want that behavior to be the default for every field, you can use the "_default_" property for specifying that fields not be stored (though I couldn't get it to work for not indexing).
You probably also will want to disable "_source", and use the "fields" parameter in your search queries.
Here is an example. The index definition looks like this:
PUT /test_index
{
"mappings": {
"person": {
"_all": {
"enabled": false
},
"_source": {
"enabled": false
},
"properties": {
"name": {
"type": "string",
"index": "analyzed",
"store": "yes"
},
"age": {
"type": "integer",
"index": "no",
"store": "no"
},
"salary": {
"type": "integer",
"index": "no",
"store": "no"
}
}
}
}
}
Then I can add a few documents with the bulk api:
POST /test_index/person/_bulk
{"index":{"_id":1}}
{"name":"stev","age":26,"salary":25000}
{"index":{"_id":2}}
{"name":"bob","age":30,"salary":28000}
{"index":{"_id":3}}
{"name":"joe","age":27,"salary":35000}
Since I disabled "_source", a simple query will return only ids:
POST /test_index/_search
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "person",
"_id": "1",
"_score": 1
},
{
"_index": "test_index",
"_type": "person",
"_id": "2",
"_score": 1
},
{
"_index": "test_index",
"_type": "person",
"_id": "3",
"_score": 1
}
]
}
}
But if I specify that I want the "name" field, I'll get it:
POST /test_index/_search
{
"fields": [
"name"
]
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "person",
"_id": "1",
"_score": 1,
"fields": {
"name": [
"stev"
]
}
},
{
"_index": "test_index",
"_type": "person",
"_id": "2",
"_score": 1,
"fields": {
"name": [
"bob"
]
}
},
{
"_index": "test_index",
"_type": "person",
"_id": "3",
"_score": 1,
"fields": {
"name": [
"joe"
]
}
}
]
}
}
You can prove to yourself that the other fields were not stored by running:
POST /test_index/_search
{
"fields": [
"name", "age", "salary"
]
}
which will return the same result. I can also prove that the "age" field wasn't indexed by running this query, which would return a document if "age" had been indexed:
POST /test_index/_search
{
"fields": [
"name", "age"
],
"query": {
"term": {
"age": {
"value": 27
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
Here is a bunch of code I used for playing around with this. I wanted to use a _default mapping and/or field to handle this without having to specify the settings for each field. I was able to make it work in terms of not storing data, but each field was still indexed.
http://sense.qbox.io/gist/d84967923d6c0757dba5f44240f47257ba2fbe50

Resources