I have the following code for ingesting data into Azure Data Explore using Python in Databricks:
df=pd.DataFrame({"StringCol": ["123ABC", 'B123', 'C123','D123'],"NumberCol": [1,2,3,4],"DecimalCol": [1,2.2,3.3,4.4],"DateCol": ['1/1/20','2/2/20','3/3/30','4/4/20']})
ingestion_props = IngestionProperties(database=db, table='TestTable_DeleteMe')
connWrite.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
This gives me the error:
BadRequest_SyntaxError', 'message': 'Request is invalid and cannot be executed
Earlier in my code I created a table using the same data types as this dummy pandas dataframe. Now I'm trying to load the data into the table. Full stack trace:
KustoServiceError Traceback (most recent call last)
<command-3953651275234016> in <module>
1 df=pd.DataFrame({"StringCol": ["123ABC", 'B123', 'C123','D123'],"NumberCol": [1,2,3,4],"DecimalCol": [1,2.2,3.3,4.4],"DateCol": ['1/1/20','2/2/20','3/3/30','4/4/20']})
2 ingestion_props = IngestionProperties(database=db, table='TestTable_DeleteMe')
----> 3 connWrite.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
4
5 #adx_loadIntoTable(connWrite,db,df,'TestTable_DeleteMe')
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/ingest_client.py in ingest_from_dataframe(self, df, ingestion_properties)
52 ingestion_properties.format = DataFormat.CSV
53
---> 54 self.ingest_from_file(temp_file_path, ingestion_properties)
55
56 os.unlink(temp_file_path)
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/ingest_client.py in ingest_from_file(self, file_descriptor, ingestion_properties)
64 :param azure.kusto.ingest.IngestionProperties ingestion_properties: Ingestion properties.
65 """
---> 66 containers = self._resource_manager.get_containers()
67
68 if isinstance(file_descriptor, FileDescriptor):
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/_resource_manager.py in get_containers(self)
121
122 def get_containers(self) -> List[_ResourceUri]:
--> 123 self._refresh_ingest_client_resources()
124 return self._ingest_client_resources.containers
125
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/_resource_manager.py in _refresh_ingest_client_resources(self)
79 or not self._ingest_client_resources.is_applicable()
80 ):
---> 81 self._ingest_client_resources = self._get_ingest_client_resources_from_service()
82 self._ingest_client_resources_last_update = datetime.utcnow()
83
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/_resource_manager.py in _get_ingest_client_resources_from_service(self)
86
87 def _get_ingest_client_resources_from_service(self):
---> 88 table = self._kusto_client.execute("NetDefaultDB", ".get ingestion resources").primary_results[0]
89
90 secured_ready_for_aggregation_queues = self._get_resource_by_name(table, "SecuredReadyForAggregationQueue")
/databricks/python/lib/python3.7/site-packages/azure/kusto/data/client.py in execute(self, database, query, properties)
553 query = query.strip()
554 if query.startswith("."):
--> 555 return self.execute_mgmt(database, query, properties)
556 return self.execute_query(database, query, properties)
557
/databricks/python/lib/python3.7/site-packages/azure/kusto/data/client.py in execute_mgmt(self, database, query, properties)
578 :rtype: azure.kusto.data.response.KustoResponseDataSet
579 """
--> 580 return self._execute(self._mgmt_endpoint, database, query, None, KustoClient._mgmt_default_timeout, properties)
581
582 def execute_streaming_ingest(
/databricks/python/lib/python3.7/site-packages/azure/kusto/data/client.py in _execute(self, endpoint, database, query, payload, timeout, properties)
654 )
655
--> 656 raise KustoServiceError([response.json()], response)
KustoServiceError: (KustoServiceError(...), [{'error': {'code': 'BadRequest_SyntaxError', 'message': 'Request is invalid and cannot be executed.', '#type': 'Kusto.Data.Exceptions.SyntaxException', '#message': "Syntax error: Query could not be parsed: . Query: '.get ingestion resources'", '#context': {'timestamp': '2020-06-27T21:44:48.0697658Z', 'serviceAlias': 'USCPIRSTASADE01', 'machineName': 'KEngine000000', 'processName': 'Kusto.WinSvc.Svc', 'processId': 7124, 'threadId': 7240, 'appDomainName': 'Kusto.WinSvc.Svc.exe', 'clientRequestId': 'KPC.execute;0c2173bf-ea69-4253-bbaf-0203f3aa298c', 'activityId': 'cf41c806-8e15-458e-b388-386613f63952', 'subActivityId': 'df366667-ca8d-487b-a281-723f696a8f68', 'activityType': 'DN.FE.ExecuteControlCommand', 'parentActivityId': 'f8cd0bb8-04e9-48cf-8a84-8b16e1e24197', 'activityStack': '(Activity stack: CRID=KPC.execute;0c2173bf-ea69-4253-bbaf-0203f3aa298c ARID=cf41c806-8e15-458e-b388-386613f63952 > DN.Admin.Client.ExecuteControlCommand/7271d9ec-2adf-4714-b19e-69495ad80d65 > P.WCF.Service.ExecuteControlCommandInternal..IAdminClientServiceCommunicationContract/f8cd0bb8-04e9-48cf-8a84-8b16e1e24197 > DN.FE.ExecuteControlCommand/df366667-ca8d-487b-a281-723f696a8f68)'}, '#permanent': True}}])
It is likely that your connection has the engine endpoint instead of the data management endpoint. Can you check that the connection to the cluster starts with "ingest-"? See here an example:
client = KustoIngestClient("https://ingest-{cluster_name}.kusto.windows.net")
Related
The code below is triggering a AttributeError: can't set attribute. I'm still new to programming so am having a difficult time figuring out why this error is occurring. Any help is appreciated.
import cimcb_lite as cb
cv = cb.cross_val.kfold(model=cb.model.PLS_SIMPLS,X=XTknn,
Y=Ytrain,
param_dict={'n_components': [1,2,3,4,5]},
folds=5,
bootnum=100)
cv.run()
seeing this error
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/var/folders/rs/f6nsd1894354_821jj157jnr0000gn/T/ipykernel_30013/1292624611.py in <module>
8
9 # run the cross validation
---> 10 cv.run()
11
/opt/anaconda3/lib/python3.9/site-packages/cimcb_lite/cross_val/kfold.py in run(self)
82 def run(self):
83 """Runs all functions prior to plot."""
---> 84 self.calc_ypred()
85 self.calc_stats()
86 if self.bootnum > 1:
/opt/anaconda3/lib/python3.9/site-packages/cimcb_lite/cross_val/kfold.py in calc_ypred(self)
55 model_i = self.model(**params_i)
56 # Full
---> 57 model_i.train(self.X, self.Y)
58 ypred_full_i = model_i.test(self.X)
59 self.ypred_full.append(ypred_full_i)
/opt/anaconda3/lib/python3.9/site-packages/cimcb_lite/model/PLS_SIMPLS.py in train(self, X, Y)
77 # Calculates and store attributes of PLS SIMPLS
78 Xscores, Yscores, Xloadings, Yloadings, Weights, Beta = self.pls_simpls(X, Y, ncomp=self.n_component)
---> 79 self.model.x_scores_ = Xscores
80 self.model.y_scores_ = Yscores
81 self.model.x_loadings_ = Xloadings
AttributeError: can't set attribute
I doing object detection with Pytorch/Torchvision with this tutorial, which is very similar to the official pytorch Tutorial.
https://towardsdatascience.com/building-your-own-object-detector-pytorch-vs-tensorflow-and-how-to-even-get-started-1d314691d4ae
While training the model, execution stops after set epochs and gives me this error:
/usr/local/lib/python3.7/dist-packages/torch/autograd/grad_mode.py in decorate_context(*args, **kwargs)
26 def decorate_context(*args, **kwargs):
27 with self.__class__():
---> 28 return func(*args, **kwargs)
29 return cast(F, decorate_context)
30
/content/Pytorch_OD/My Drive/Pytorch_OD/vision/engine.py in evaluate(model, data_loader, device)
78 header = 'Test:'
79
---> 80 coco = get_coco_api_from_dataset(data_loader.dataset)
81 iou_types = _get_iou_types(model)
82 coco_evaluator = CocoEvaluator(coco, iou_types)
/content/Pytorch_OD/My Drive/Pytorch_OD/vision/coco_utils.py in get_coco_api_from_dataset(dataset)
204 if isinstance(dataset, torchvision.datasets.CocoDetection):
205 return dataset.coco
--> 206 return convert_to_coco_api(dataset)
207
208
/content/Pytorch_OD/My Drive/Pytorch_OD/vision/coco_utils.py in convert_to_coco_api(ds)
157 img_dict = {}
158 img_dict['id'] = image_id
--> 159 img_dict['height'] = img.shape[-2]
160 img_dict['width'] = img.shape[-1]
161 dataset['images'].append(img_dict)
AttributeError: 'Image' object has no attribute 'shape
'Image' is imported via from PIL import Image.
The attribute shape is not being defined in the code in my notebook.
Can it be that itis an error in the source file of coco_utils.py?
Or do I need to provide more information or parts of my code?
I'm new to Anaconda. I tried to execute Pytorch Adversarial Neural Network in anaconda. It shows some error that I have no clue. Here is the code that downloads dataset
# MNIST Test dataset and dataloader declaration
test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([
transforms.ToTensor(),])),batch_size=1, shuffle=True)
This is the error message I got :
PermissionError Traceback (most recent call last)
<ipython-input-4-59310f6a37f8> in <module>
41
42 # MNIST Test dataset and dataloader declaration
43 test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor(),])),batch_size=1, shuffle=True)
44
45 # Define what device we are using
~\anaconda3\lib\site-packages\torchvision\datasets\mnist.py in __init__(self, root,
train,
transform, target_transform, download)
77
78 if download:
79 self.download()
80
81 if not self._check_exists():
~\anaconda3\lib\site-packages\torchvision\datasets\mnist.py in download(self)
138 return
139
140 os.makedirs(self.raw_folder, exist_ok=True)
141 os.makedirs(self.processed_folder, exist_ok=True)
142
~\anaconda3\lib\os.py in makedirs(name, mode, exist_ok)
211 if head and tail and not path.exists(head):
212 try:
213 makedirs(head, exist_ok=exist_ok)
214 except FileExistsError:
215 # Defeats race condition when another thread created the path
~\anaconda3\lib\os.py in makedirs(name, mode, exist_ok)
211 if head and tail and not path.exists(head):
212 try:
213 makedirs(head, exist_ok=exist_ok)
214 except FileExistsError:
215 # Defeats race condition when another thread created the path
~\anaconda3\lib\os.py in makedirs(name, mode, exist_ok)
221 return
222 try:
223 mkdir(name, mode)
224 except OSError:
225 # Cannot rely on checking for EEXIST, since the operating system
PermissionError: [WinError 5] Access is denied: '../data' .
How about changing the '../data' to another directory, like '/Users/***/Downloads/data' or somewhere else.
We are using the python elasticsearch API version "elasticsearch-6.8.0"
With AWS Elasticsearch version 6.7
It only happens sometimes, on some machines (even when we make the queries within seconds of each other from different machines, one will succeed and the other will throw the error). Thus this is difficult to reproduce. We are using a paginated search like this:
def scrolled_search_query(self, index, query, size=es_config.ES_SIZE,
scroll=es_config.ES_SCROLL,
request_timeout=es_config.ES_TIMEOUT,
docs_only=False):
data = self.es_conn.search(index=index, size=size, scroll=scroll, body=query,
request_timeout=request_timeout)
while len(data['hits']['hits']) > 0:
for hit in data['hits']['hits']:
if docs_only:
yield hit['_source']
else:
yield hit
data = self.es_conn.scroll(scroll_id=data['_scroll_id'], scroll=scroll,
request_timeout=request_timeout)
The error message is utterly useless, just basically saying that there's a 413. like this:
TransportError: TransportError(413, '')
From looking around at forums, it seems the problem is because ES is attempting to return a large payload. However, since we're using scroll pagination, I don't know why this would be an issue because each payload should be well below the 100MB limit. Since the error message is so terrible, I can't really see what it's trying to do in the database. It seems like the database is calculating the total data for the query and rejecting it without taking into account pagination. It's normal for us to request large payloads, and paginate them.
The full exception looks like this, what else do you need?
---------------------------------------------------------------------------
TransportError Traceback (most recent call last)
<ipython-input-3-a3fd964bf999> in <module>
41 blah=blah,
42 blah=blah,
---> 43 start_date=start_date)
44
45 df = df.drop(['#blah', '#blah'], axis=1)
~/Documents/Repositories/blah/blah/downloader.py in get_data(self, data_type, blah, blah, device, blah, start_date, end_date)
186 return self.gatorEs.get_blah(es_requests=self.es_requests, blah=blah, blah=blah, blah=blah,
--> 188 start_date=start_date, end_date=end_date)
189
190 if data_type == 'blah':
~/Documents/Repositories/gator_access/gator_access/es_gator_api.py in get_blah(self, es_requests, blah, blah, blah, blah, blah, start_date, end_date)
180 logger=self.logger)
181 if not data: continue
--> 182 es_data += data
183
184 self.logger.info(" ".join(["Download took {t} seconds for p:'{p}' d:'{d}'",
~/Documents/Repositories/gator_core/gator_core/elasticsearch/es_requests.py in scrolled_search_query(self, index, query, size, scroll, request_timeout, docs_only)
144
145 data = self.es_conn.scroll(scroll_id=data['_scroll_id'], scroll=scroll,
--> 146 request_timeout=request_timeout)
147
148
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/client/utils.py in _wrapped(*args, **kwargs)
82 if p in kwargs:
83 params[p] = kwargs.pop(p)
---> 84 return func(*args, params=params, **kwargs)
85
86 return _wrapped
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/client/__init__.py in scroll(self, scroll_id, body, params)
1353 """
1354 return self.transport.perform_request(
-> 1355 "GET", _make_path("_search", "scroll", scroll_id), params=params, body=body
1356 )
1357
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/transport.py in perform_request(self, method, url, headers, params, body)
351 headers=headers,
352 ignore=ignore,
--> 353 timeout=timeout,
354 )
355
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/connection/http_requests.py in perform_request(self, method, url, params, body, timeout, ignore, headers)
139 raw_data,
140 )
--> 141 self._raise_error(response.status_code, raw_data)
142
143 self.log_request_success(
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/connection/base.py in _raise_error(self, status_code, raw_data)
160
161 raise HTTP_EXCEPTIONS.get(status_code, TransportError)(
--> 162 status_code, error_message, additional_info
163 )
TransportError: TransportError(413, '')
when i try to run this code,
ftr_mtrx_custmr, features_defs = ft.dfs(entities=entities,
relationships=relationship,
target_entity="transactions")
i get such error,
490 featuretools.entityset - WARNING index session_id not found in dataframe, creating new integer column
KeyError Traceback (most recent call last)
<ipython-input-82-d467a36d5254> in <module>()
1 ftr_mtrx_custmr, features_defs = ft.dfs(entities=entities,
2 relationships=relationshp,
----> 3 target_entity="transactions")
4 frames
/usr/local/lib/python3.6/dist-packages/featuretools/utils/entry_point.py
in function_wrapper(*args, **kwargs)
38 ep.on_error(error=e,
39 runtime=runtime)
---> 40 raise e
41
42 # send return value
/usr/local/lib/python3.6/dist-packages/featuretools/utils/entry_point.py
in function_wrapper(*args, **kwargs)
30 # call function
31 start = time.time()
---> 32 return_value = func(*args, **kwargs)
33 runtime = time.time() - start
34 except Exception as e:
/usr/local/lib/python3.6/dist-packages/featuretools/synthesis/dfs.py
in dfs(entities, relationships, entityset, target_entity, cutoff_time,
instance_ids, agg_primitives, trans_primitives,
groupby_trans_primitives, allowed_paths, max_depth, ignore_entities,
ignore_variables, primitive_options, seed_features, drop_contains,
drop_exact, where_primitives, max_features, cutoff_time_in_index,
save_progress, features_only, training_window, approximate,
chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types,
progress_callback)
225 '''
226 if not isinstance(entityset, EntitySet):
--> 227 entityset = EntitySet("dfs", entities, relationships)
228
229 dfs_object = DeepFeatureSynthesis(target_entity, entityset,
/usr/local/lib/python3.6/dist-packages/featuretools/entityset/entityset.py
in init(self, id, entities, relationships)
83
84 for relationship in relationships:
---> 85 parent_variable = self[relationship[0]][relationship[1]]
86 child_variable = self[relationship[2]][relationship[3]]
87 self.add_relationship(Relationship(parent_variable,
/usr/local/lib/python3.6/dist-packages/featuretools/entityset/entityset.py
in getitem(self, entity_id)
124 return self.entity_dict[entity_id]
125 name = self.id or "entity set"
--> 126 raise KeyError('Entity %s does not exist in %s' % (entity_id, name))
127
128 #property
however, this returned KeyError : 'Entity c does not exist in dfs'
any idea what's wrong with my code?