AWS Elasticsearch TransportError(413, '') - elasticsearch

We are using the python elasticsearch API version "elasticsearch-6.8.0"
With AWS Elasticsearch version 6.7
It only happens sometimes, on some machines (even when we make the queries within seconds of each other from different machines, one will succeed and the other will throw the error). Thus this is difficult to reproduce. We are using a paginated search like this:
def scrolled_search_query(self, index, query, size=es_config.ES_SIZE,
scroll=es_config.ES_SCROLL,
request_timeout=es_config.ES_TIMEOUT,
docs_only=False):
data = self.es_conn.search(index=index, size=size, scroll=scroll, body=query,
request_timeout=request_timeout)
while len(data['hits']['hits']) > 0:
for hit in data['hits']['hits']:
if docs_only:
yield hit['_source']
else:
yield hit
data = self.es_conn.scroll(scroll_id=data['_scroll_id'], scroll=scroll,
request_timeout=request_timeout)
The error message is utterly useless, just basically saying that there's a 413. like this:
TransportError: TransportError(413, '')
From looking around at forums, it seems the problem is because ES is attempting to return a large payload. However, since we're using scroll pagination, I don't know why this would be an issue because each payload should be well below the 100MB limit. Since the error message is so terrible, I can't really see what it's trying to do in the database. It seems like the database is calculating the total data for the query and rejecting it without taking into account pagination. It's normal for us to request large payloads, and paginate them.
The full exception looks like this, what else do you need?
---------------------------------------------------------------------------
TransportError Traceback (most recent call last)
<ipython-input-3-a3fd964bf999> in <module>
41 blah=blah,
42 blah=blah,
---> 43 start_date=start_date)
44
45 df = df.drop(['#blah', '#blah'], axis=1)
~/Documents/Repositories/blah/blah/downloader.py in get_data(self, data_type, blah, blah, device, blah, start_date, end_date)
186 return self.gatorEs.get_blah(es_requests=self.es_requests, blah=blah, blah=blah, blah=blah,
--> 188 start_date=start_date, end_date=end_date)
189
190 if data_type == 'blah':
~/Documents/Repositories/gator_access/gator_access/es_gator_api.py in get_blah(self, es_requests, blah, blah, blah, blah, blah, start_date, end_date)
180 logger=self.logger)
181 if not data: continue
--> 182 es_data += data
183
184 self.logger.info(" ".join(["Download took {t} seconds for p:'{p}' d:'{d}'",
~/Documents/Repositories/gator_core/gator_core/elasticsearch/es_requests.py in scrolled_search_query(self, index, query, size, scroll, request_timeout, docs_only)
144
145 data = self.es_conn.scroll(scroll_id=data['_scroll_id'], scroll=scroll,
--> 146 request_timeout=request_timeout)
147
148
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/client/utils.py in _wrapped(*args, **kwargs)
82 if p in kwargs:
83 params[p] = kwargs.pop(p)
---> 84 return func(*args, params=params, **kwargs)
85
86 return _wrapped
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/client/__init__.py in scroll(self, scroll_id, body, params)
1353 """
1354 return self.transport.perform_request(
-> 1355 "GET", _make_path("_search", "scroll", scroll_id), params=params, body=body
1356 )
1357
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/transport.py in perform_request(self, method, url, headers, params, body)
351 headers=headers,
352 ignore=ignore,
--> 353 timeout=timeout,
354 )
355
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/connection/http_requests.py in perform_request(self, method, url, params, body, timeout, ignore, headers)
139 raw_data,
140 )
--> 141 self._raise_error(response.status_code, raw_data)
142
143 self.log_request_success(
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/connection/base.py in _raise_error(self, status_code, raw_data)
160
161 raise HTTP_EXCEPTIONS.get(status_code, TransportError)(
--> 162 status_code, error_message, additional_info
163 )
TransportError: TransportError(413, '')

Related

How to get the shap values for the masked language modeling task using transformer?

I am trying to get the shap values for the masked language modeling task using transformer. I get the error KeyError: 'label' for the code where I input a single data sample to get the explanation. My complete code and error trace are as follows:
import transformers
import shap
from transformers import RobertaTokenizer, RobertaForMaskedLM, pipeline
import torch
model = RobertaForMaskedLM.from_pretrained('microsoft/codebert-base-mlm')
tokenizer = RobertaTokenizer.from_pretrained('microsoft/codebert-base-mlm')
code_example = "if (x <mask> 10)"
fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)
explainer = shap.Explainer(fill_mask)
shap_values = explainer(['x {tokenizer.mask_token} 10'])
Following is the error trace
KeyError Traceback (most recent call last)
[<ipython-input-12-bb3832d1772d>](https://localhost:8080/#) in <module>
6 # explain the model on two sample inputs
7 explainer = shap.Explainer(fill_mask)
----> 8 shap_values = explainer(['x {tokenizer.mask_token} 10'])
9 print(shap_values)
10 # visualize the first prediction's explanation for the POSITIVE output class
5 frames
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_partition.py](https://localhost:8080/#) in __call__(self, max_evals, fixed_context, main_effects, error_bounds, batch_size, outputs, silent, *args)
136 return super().__call__(
137 *args, max_evals=max_evals, fixed_context=fixed_context, main_effects=main_effects, error_bounds=error_bounds, batch_size=batch_size,
--> 138 outputs=outputs, silent=silent
139 )
140
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_explainer.py](https://localhost:8080/#) in __call__(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, *args, **kwargs)
266 row_result = self.explain_row(
267 *row_args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
--> 268 batch_size=batch_size, outputs=outputs, silent=silent, **kwargs
269 )
270 values.append(row_result.get("values", None))
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_partition.py](https://localhost:8080/#) in explain_row(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, fixed_context, *row_args)
159 # if not fixed background or no base value assigned then compute base value for a row
160 if self._curr_base_value is None or not getattr(self.masker, "fixed_background", False):
--> 161 self._curr_base_value = fm(m00.reshape(1, -1), zero_index=0)[0] # the zero index param tells the masked model what the baseline is
162 f11 = fm(~m00.reshape(1, -1))[0]
163
[/usr/local/lib/python3.7/dist-packages/shap/utils/_masked_model.py](https://localhost:8080/#) in __call__(self, masks, zero_index, batch_size)
65
66 else:
---> 67 return self._full_masking_call(masks, batch_size=batch_size)
68
69 def _full_masking_call(self, masks, zero_index=None, batch_size=None):
[/usr/local/lib/python3.7/dist-packages/shap/utils/_masked_model.py](https://localhost:8080/#) in _full_masking_call(self, masks, zero_index, batch_size)
142
143 joined_masked_inputs = tuple([np.concatenate(v) for v in all_masked_inputs])
--> 144 outputs = self.model(*joined_masked_inputs)
145 _assert_output_input_match(joined_masked_inputs, outputs)
146 all_outputs.append(outputs)
[/usr/local/lib/python3.7/dist-packages/shap/models/_transformers_pipeline.py](https://localhost:8080/#) in __call__(self, strings)
33 val = [val]
34 for obj in val:
---> 35 output[i, self.label2id[obj["label"]]] = sp.special.logit(obj["score"]) if self.rescale_to_logits else obj["score"]
36 return output
KeyError: 'label'

Win Error 5 has occurred when I tried to execute Pytorch code

I'm new to Anaconda. I tried to execute Pytorch Adversarial Neural Network in anaconda. It shows some error that I have no clue. Here is the code that downloads dataset
# MNIST Test dataset and dataloader declaration
test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([
transforms.ToTensor(),])),batch_size=1, shuffle=True)
This is the error message I got :
PermissionError Traceback (most recent call last)
<ipython-input-4-59310f6a37f8> in <module>
41
42 # MNIST Test dataset and dataloader declaration
43 test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor(),])),batch_size=1, shuffle=True)
44
45 # Define what device we are using
~\anaconda3\lib\site-packages\torchvision\datasets\mnist.py in __init__(self, root,
train,
transform, target_transform, download)
77
78 if download:
79 self.download()
80
81 if not self._check_exists():
~\anaconda3\lib\site-packages\torchvision\datasets\mnist.py in download(self)
138 return
139
140 os.makedirs(self.raw_folder, exist_ok=True)
141 os.makedirs(self.processed_folder, exist_ok=True)
142
~\anaconda3\lib\os.py in makedirs(name, mode, exist_ok)
211 if head and tail and not path.exists(head):
212 try:
213 makedirs(head, exist_ok=exist_ok)
214 except FileExistsError:
215 # Defeats race condition when another thread created the path
~\anaconda3\lib\os.py in makedirs(name, mode, exist_ok)
211 if head and tail and not path.exists(head):
212 try:
213 makedirs(head, exist_ok=exist_ok)
214 except FileExistsError:
215 # Defeats race condition when another thread created the path
~\anaconda3\lib\os.py in makedirs(name, mode, exist_ok)
221 return
222 try:
223 mkdir(name, mode)
224 except OSError:
225 # Cannot rely on checking for EEXIST, since the operating system
PermissionError: [WinError 5] Access is denied: '../data' .
How about changing the '../data' to another directory, like '/Users/***/Downloads/data' or somewhere else.

Kusto Ingest - KustoServiceError 'BadRequest_SyntaxError'

I have the following code for ingesting data into Azure Data Explore using Python in Databricks:
df=pd.DataFrame({"StringCol": ["123ABC", 'B123', 'C123','D123'],"NumberCol": [1,2,3,4],"DecimalCol": [1,2.2,3.3,4.4],"DateCol": ['1/1/20','2/2/20','3/3/30','4/4/20']})
ingestion_props = IngestionProperties(database=db, table='TestTable_DeleteMe')
connWrite.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
This gives me the error:
BadRequest_SyntaxError', 'message': 'Request is invalid and cannot be executed
Earlier in my code I created a table using the same data types as this dummy pandas dataframe. Now I'm trying to load the data into the table. Full stack trace:
KustoServiceError Traceback (most recent call last)
<command-3953651275234016> in <module>
1 df=pd.DataFrame({"StringCol": ["123ABC", 'B123', 'C123','D123'],"NumberCol": [1,2,3,4],"DecimalCol": [1,2.2,3.3,4.4],"DateCol": ['1/1/20','2/2/20','3/3/30','4/4/20']})
2 ingestion_props = IngestionProperties(database=db, table='TestTable_DeleteMe')
----> 3 connWrite.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
4
5 #adx_loadIntoTable(connWrite,db,df,'TestTable_DeleteMe')
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/ingest_client.py in ingest_from_dataframe(self, df, ingestion_properties)
52 ingestion_properties.format = DataFormat.CSV
53
---> 54 self.ingest_from_file(temp_file_path, ingestion_properties)
55
56 os.unlink(temp_file_path)
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/ingest_client.py in ingest_from_file(self, file_descriptor, ingestion_properties)
64 :param azure.kusto.ingest.IngestionProperties ingestion_properties: Ingestion properties.
65 """
---> 66 containers = self._resource_manager.get_containers()
67
68 if isinstance(file_descriptor, FileDescriptor):
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/_resource_manager.py in get_containers(self)
121
122 def get_containers(self) -> List[_ResourceUri]:
--> 123 self._refresh_ingest_client_resources()
124 return self._ingest_client_resources.containers
125
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/_resource_manager.py in _refresh_ingest_client_resources(self)
79 or not self._ingest_client_resources.is_applicable()
80 ):
---> 81 self._ingest_client_resources = self._get_ingest_client_resources_from_service()
82 self._ingest_client_resources_last_update = datetime.utcnow()
83
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/_resource_manager.py in _get_ingest_client_resources_from_service(self)
86
87 def _get_ingest_client_resources_from_service(self):
---> 88 table = self._kusto_client.execute("NetDefaultDB", ".get ingestion resources").primary_results[0]
89
90 secured_ready_for_aggregation_queues = self._get_resource_by_name(table, "SecuredReadyForAggregationQueue")
/databricks/python/lib/python3.7/site-packages/azure/kusto/data/client.py in execute(self, database, query, properties)
553 query = query.strip()
554 if query.startswith("."):
--> 555 return self.execute_mgmt(database, query, properties)
556 return self.execute_query(database, query, properties)
557
/databricks/python/lib/python3.7/site-packages/azure/kusto/data/client.py in execute_mgmt(self, database, query, properties)
578 :rtype: azure.kusto.data.response.KustoResponseDataSet
579 """
--> 580 return self._execute(self._mgmt_endpoint, database, query, None, KustoClient._mgmt_default_timeout, properties)
581
582 def execute_streaming_ingest(
/databricks/python/lib/python3.7/site-packages/azure/kusto/data/client.py in _execute(self, endpoint, database, query, payload, timeout, properties)
654 )
655
--> 656 raise KustoServiceError([response.json()], response)
KustoServiceError: (KustoServiceError(...), [{'error': {'code': 'BadRequest_SyntaxError', 'message': 'Request is invalid and cannot be executed.', '#type': 'Kusto.Data.Exceptions.SyntaxException', '#message': "Syntax error: Query could not be parsed: . Query: '.get ingestion resources'", '#context': {'timestamp': '2020-06-27T21:44:48.0697658Z', 'serviceAlias': 'USCPIRSTASADE01', 'machineName': 'KEngine000000', 'processName': 'Kusto.WinSvc.Svc', 'processId': 7124, 'threadId': 7240, 'appDomainName': 'Kusto.WinSvc.Svc.exe', 'clientRequestId': 'KPC.execute;0c2173bf-ea69-4253-bbaf-0203f3aa298c', 'activityId': 'cf41c806-8e15-458e-b388-386613f63952', 'subActivityId': 'df366667-ca8d-487b-a281-723f696a8f68', 'activityType': 'DN.FE.ExecuteControlCommand', 'parentActivityId': 'f8cd0bb8-04e9-48cf-8a84-8b16e1e24197', 'activityStack': '(Activity stack: CRID=KPC.execute;0c2173bf-ea69-4253-bbaf-0203f3aa298c ARID=cf41c806-8e15-458e-b388-386613f63952 > DN.Admin.Client.ExecuteControlCommand/7271d9ec-2adf-4714-b19e-69495ad80d65 > P.WCF.Service.ExecuteControlCommandInternal..IAdminClientServiceCommunicationContract/f8cd0bb8-04e9-48cf-8a84-8b16e1e24197 > DN.FE.ExecuteControlCommand/df366667-ca8d-487b-a281-723f696a8f68)'}, '#permanent': True}}])
It is likely that your connection has the engine endpoint instead of the data management endpoint. Can you check that the connection to the cluster starts with "ingest-"? See here an example:
client = KustoIngestClient("https://ingest-{cluster_name}.kusto.windows.net")

KeyError: 'Entity c does not exist in dfs'

when i try to run this code,
ftr_mtrx_custmr, features_defs = ft.dfs(entities=entities,
relationships=relationship,
target_entity="transactions")
i get such error,
490 featuretools.entityset - WARNING index session_id not found in dataframe, creating new integer column
KeyError Traceback (most recent call last)
<ipython-input-82-d467a36d5254> in <module>()
1 ftr_mtrx_custmr, features_defs = ft.dfs(entities=entities,
2 relationships=relationshp,
----> 3 target_entity="transactions")
4 frames
/usr/local/lib/python3.6/dist-packages/featuretools/utils/entry_point.py
in function_wrapper(*args, **kwargs)
38 ep.on_error(error=e,
39 runtime=runtime)
---> 40 raise e
41
42 # send return value
/usr/local/lib/python3.6/dist-packages/featuretools/utils/entry_point.py
in function_wrapper(*args, **kwargs)
30 # call function
31 start = time.time()
---> 32 return_value = func(*args, **kwargs)
33 runtime = time.time() - start
34 except Exception as e:
/usr/local/lib/python3.6/dist-packages/featuretools/synthesis/dfs.py
in dfs(entities, relationships, entityset, target_entity, cutoff_time,
instance_ids, agg_primitives, trans_primitives,
groupby_trans_primitives, allowed_paths, max_depth, ignore_entities,
ignore_variables, primitive_options, seed_features, drop_contains,
drop_exact, where_primitives, max_features, cutoff_time_in_index,
save_progress, features_only, training_window, approximate,
chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types,
progress_callback)
225 '''
226 if not isinstance(entityset, EntitySet):
--> 227 entityset = EntitySet("dfs", entities, relationships)
228
229 dfs_object = DeepFeatureSynthesis(target_entity, entityset,
/usr/local/lib/python3.6/dist-packages/featuretools/entityset/entityset.py
in init(self, id, entities, relationships)
83
84 for relationship in relationships:
---> 85 parent_variable = self[relationship[0]][relationship[1]]
86 child_variable = self[relationship[2]][relationship[3]]
87 self.add_relationship(Relationship(parent_variable,
/usr/local/lib/python3.6/dist-packages/featuretools/entityset/entityset.py
in getitem(self, entity_id)
124 return self.entity_dict[entity_id]
125 name = self.id or "entity set"
--> 126 raise KeyError('Entity %s does not exist in %s' % (entity_id, name))
127
128 #property
however, this returned KeyError : 'Entity c does not exist in dfs'
any idea what's wrong with my code?

opendaylight: inconsistent api response

I have Opendaylight up and running but when I try to configure that through the REST api,
doing exactly the same request, the 1st time I get an exception, and the 2nd time I get error message that the configuration is present already.
See below an example making the same request 2 times.
One is causing an exception, another one returns a response
And I would like to ask you:
Where can I address this issue ?
I am trying to find some diagnostic
restful GET commands for Opendaylight BGP configuration, for
example; show me all BGP attributes for BGP session/group/neighbor.
Where can I find that ?
In [9]: cmd = """ <neighbor xmlns="urn:opendaylight:params:xml:ns:yang:bgp:openconfig-extensions">
...: <neighbor-address>10.20.14.1</neighbor-address>
...: <afi-safis>
...: <afi-safi>
...: <afi-safi-name>LINKSTATE</afi-safi-name>
...: </afi-safi>
...: </afi-safis>
...: </neighbor>
...: """
In [10]: url = "/restconf/config/openconfig-network-instance:network-instances/network-instance/global-bgp/openconfig-network-instance:protocols/protocol/openconfig-policy-types:BGP/bgp-example/bgp/ne
...: ighbors"
In [11]:
In [11]: requests.post('http://{}:{}{}'.format(odl['api']['ipaddr'],odl['api']['port'],url), auth=auth, headers=headers, data=cmd).json()
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
<ipython-input-11-6c5beab7d9fb> in <module>()
----> 1 requests.post('http://{}:{}{}'.format(odl['api']['ipaddr'],odl['api']['port'],url), auth=auth, headers=headers, data=cmd).json()
/usr/lib/python3.6/site-packages/requests/models.py in json(self, **kwargs)
894 # used.
895 pass
--> 896 return complexjson.loads(self.text, **kwargs)
897
898 #property
/usr/lib64/python3.6/json/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
352 parse_int is None and parse_float is None and
353 parse_constant is None and object_pairs_hook is None and not kw):
--> 354 return _default_decoder.decode(s)
355 if cls is None:
356 cls = JSONDecoder
/usr/lib64/python3.6/json/decoder.py in decode(self, s, _w)
337
338 """
--> 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340 end = _w(s, end).end()
341 if end != len(s):
/usr/lib64/python3.6/json/decoder.py in raw_decode(self, s, idx)
355 obj, end = self.scan_once(s, idx)
356 except StopIteration as err:
--> 357 raise JSONDecodeError("Expecting value", s, err.value) from None
358 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
In [12]: requests.post('http://{}:{}{}'.format(odl['api']['ipaddr'],odl['api']['port'],url), auth=auth, headers=headers, data=cmd).json()
Out[12]:
{'errors': {'error': [{'error-type': 'protocol',
'error-tag': 'data-exists',
'error-message': 'Data already exists for path: /(http://openconfig.net/yang/network-instance?revision=2015-10-18)network-instances/network-instance/network-instance[{(http://openconfig.net/yang/network-instance?revision=2015-10-18)name=global-bgp}]/protocols/protocol/protocol[{(http://openconfig.net/yang/network-instance?revision=2015-10-18)identifier=(http://openconfig.net/yang/policy-types?revision=2015-10-09)BGP, (http://openconfig.net/yang/network-instance?revision=2015-10-18)name=bgp-example}]/AugmentationIdentifier{childNames=[(urn:opendaylight:params:xml:ns:yang:bgp:openconfig-extensions?revision=2017-12-07)bgp]}/(urn:opendaylight:params:xml:ns:yang:bgp:openconfig-extensions?revision=2017-12-07)bgp/neighbors/neighbor/neighbor[{(urn:opendaylight:params:xml:ns:yang:bgp:openconfig-extensions?revision=2017-12-07)neighbor-address=10.20.14.1}]'}]}}
You're using POST which means create-if-not-exist so the second request is correctly returning a "data-exists" error as specified in the RESTCONF RFC.

Resources