Reading hdf file - https and Xarray - https

I am trying to read hdf files, over https connection, from the Harmonized Landsat Sentinel repository (here: https://hls.gsfc.nasa.gov/data/v1.4/
Ideally, I would use xarray to do this. Here is an example:
Example of https:
xr.open_rasterio('https://hls.gsfc.nasa.gov/data/v1.4/S30/2017/13/T/E/F/HLS.S30.T13TEF.2017002.v1.4.hdf')
<xarray.DataArray (band: 1, y: 3660, x: 3660)>
[13395600 values with dtype=int16]
Coordinates:
* band (band) int64 1
* y (y) float64 4.6e+06 4.6e+06 4.6e+06 ... 4.49e+06 4.49e+06 4.49e+06
* x (x) float64 5e+05 5e+05 5.001e+05 ... 6.097e+05 6.097e+05 6.098e+05
Attributes:
transform: (30.0, -0.0, 499980.0, -0.0, -30.0, 4600020.0)
crs: +init=epsg:32613
res: (30.0, 30.0)
is_tiled: 0
nodatavals: (nan,)
scales: (1.0,)
offsets: (0.0,)
bands: 1
byte_order: 0
coordinate_system_string: PROJCS["UTM_Zone_13N",GEOGCS["GCS_WGS_1984",DA...
data_type: 2
description: HDF Imported into ENVI.
file_type: HDF Scientific Data
header_offset: 0
interleave: bsq
lines: 3660
samples: 3660
Note these files have multiple datasets/bands, so the above is incorrect.
xr.open_dataset('https://hls.gsfc.nasa.gov/data/v1.4/S30/2017/13/T/E/F/HLS.S30.T13TEF.2017002.v1.4.hdf')
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/xarray/backends/file_manager.py in _acquire_with_cache_info(self, needs_lock)
194 try:
--> 195 file = self._cache[self._key]
196 except KeyError:
/opt/conda/lib/python3.7/site-packages/xarray/backends/lru_cache.py in __getitem__(self, key)
42 with self._lock:
---> 43 value = self._cache[key]
44 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('https://hls.gsfc.nasa.gov/data/v1.4/S30/2017/13/T/E/F/HLS.S30.T13TEF.2017002.v1.4.hdf',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-85-7765ae565af3> in <module>
----> 1 xr.open_dataset('https://hls.gsfc.nasa.gov/data/v1.4/S30/2017/13/T/E/F/HLS.S30.T13TEF.2017002.v1.4.hdf')
/opt/conda/lib/python3.7/site-packages/xarray/backends/api.py in open_dataset(filename_or_obj, group, decode_cf, mask_and_scale, decode_times, autoclose, concat_characters, decode_coords, engine, chunks, lock, cache, drop_variables, backend_kwargs, use_cftime)
497 if engine == "netcdf4":
498 store = backends.NetCDF4DataStore.open(
--> 499 filename_or_obj, group=group, lock=lock, **backend_kwargs
500 )
501 elif engine == "scipy":
/opt/conda/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
387 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
388 )
--> 389 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
390
391 def _acquire(self, needs_lock=True):
/opt/conda/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in __init__(self, manager, group, mode, lock, autoclose)
333 self._group = group
334 self._mode = mode
--> 335 self.format = self.ds.data_model
336 self._filename = self.ds.filepath()
337 self.is_remote = is_remote_uri(self._filename)
/opt/conda/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in ds(self)
396 #property
397 def ds(self):
--> 398 return self._acquire()
399
400 def open_store_variable(self, name, var):
/opt/conda/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in _acquire(self, needs_lock)
390
391 def _acquire(self, needs_lock=True):
--> 392 with self._manager.acquire_context(needs_lock) as root:
393 ds = _nc4_require_group(root, self._group, self._mode)
394 return ds
/opt/conda/lib/python3.7/contextlib.py in __enter__(self)
110 del self.args, self.kwds, self.func
111 try:
--> 112 return next(self.gen)
113 except StopIteration:
114 raise RuntimeError("generator didn't yield") from None
/opt/conda/lib/python3.7/site-packages/xarray/backends/file_manager.py in acquire_context(self, needs_lock)
181 def acquire_context(self, needs_lock=True):
182 """Context manager for acquiring a file."""
--> 183 file, cached = self._acquire_with_cache_info(needs_lock)
184 try:
185 yield file
/opt/conda/lib/python3.7/site-packages/xarray/backends/file_manager.py in _acquire_with_cache_info(self, needs_lock)
199 kwargs = kwargs.copy()
200 kwargs["mode"] = self._mode
--> 201 file = self._opener(*self._args, **kwargs)
202 if self._mode == "w":
203 # ensure file doesn't get overriden when opened again
netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Dataset.__init__()
netCDF4/_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()
OSError: [Errno -90] NetCDF: file not found: b'https://hls.gsfc.nasa.gov/data/v1.4/S30/2017/13/T/E/F/HLS.S30.T13TEF.2017002.v1.4.hdf'
When read from disc:
xr.open_rasterio('HLS.S30.T13TEF.2017002.v1.4.hdf')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-88-f4ae5075928a> in <module>
----> 1 xr.open_rasterio('HLS.S30.T13TEF.2017002.v1.4.hdf')
/opt/conda/lib/python3.7/site-packages/xarray/backends/rasterio_.py in open_rasterio(filename, parse_coordinates, chunks, cache, lock)
250 # Get bands
251 if riods.count < 1:
--> 252 raise ValueError("Unknown dims")
253 coords["band"] = np.asarray(riods.indexes)
254
ValueError: Unknown dims
and
xr.open_dataset('/home/rowangaffney/Desktop/HLS.S30.T13TEF.2017002.v1.4.hdf')
<xarray.Dataset>
Dimensions: (XDim_Grid: 3660, YDim_Grid: 3660)
Dimensions without coordinates: XDim_Grid, YDim_Grid
Data variables:
B01 (YDim_Grid, XDim_Grid) float32 ...
B02 (YDim_Grid, XDim_Grid) float32 ...
B03 (YDim_Grid, XDim_Grid) float32 ...
B04 (YDim_Grid, XDim_Grid) float32 ...
B05 (YDim_Grid, XDim_Grid) float32 ...
B06 (YDim_Grid, XDim_Grid) float32 ...
B07 (YDim_Grid, XDim_Grid) float32 ...
B08 (YDim_Grid, XDim_Grid) float32 ...
B8A (YDim_Grid, XDim_Grid) float32 ...
B09 (YDim_Grid, XDim_Grid) float32 ...
B10 (YDim_Grid, XDim_Grid) float32 ...
B11 (YDim_Grid, XDim_Grid) float32 ...
B12 (YDim_Grid, XDim_Grid) float32 ...
QA (YDim_Grid, XDim_Grid) float32 ...
Attributes:
PRODUCT_URI: S2A_MSIL1C_20170102T17...
L1C_IMAGE_QUALITY: SENSOR:PASSED GEOMETRI...
SPACECRAFT_NAME: Sentinel-2A
TILE_ID: S2A_OPER_MSI_L1C_TL_SG...
DATASTRIP_ID: S2A_OPER_MSI_L1C_DS_SG...
PROCESSING_BASELINE: 02.04
SENSING_TIME: 2017-01-02T17:58:23.575Z
L1_PROCESSING_TIME: 2017-01-02T21:41:37.84...
HORIZONTAL_CS_NAME: WGS84 / UTM zone 13N
HORIZONTAL_CS_CODE: EPSG:32613
NROWS: 3660
NCOLS: 3660
SPATIAL_RESOLUTION: 30
ULX: 499980.0
ULY: 4600020.0
MEAN_SUN_ZENITH_ANGLE(B01): 65.3577462333765
MEAN_SUN_AZIMUTH_ANGLE(B01): 165.01162242158
MEAN_VIEW_ZENITH_ANGLE(B01): 8.10178275092502
MEAN_VIEW_AZIMUTH_ANGLE(B01): 285.224586475702
spatial_coverage: 89
cloud_coverage: 72
ACCODE: LaSRCS2AV3.5.5
arop_s2_refimg: NONE
arop_ncp: 0
arop_rmse(meters): 0.0
arop_ave_xshift(meters): 0.0
arop_ave_yshift(meters): 0.0
HLS_PROCESSING_TIME: 2018-02-24T18:17:49Z
NBAR_Solar_Zenith: 44.82820466504637
AngleBand: [ 0 1 2 3 4 5 6 ...
MSI band 01 bandpass adjustment slope and offset: 0.995900, -0.000200
MSI band 02 bandpass adjustment slope and offset: 0.977800, -0.004000
MSI band 03 bandpass adjustment slope and offset: 1.005300, -0.000900
MSI band 04 bandpass adjustment slope and offset: 0.976500, 0.000900
MSI band 8a bandpass adjustment slope and offset: 0.998300, -0.000100
MSI band 11 bandpass adjustment slope and offset: 0.998700, -0.001100
MSI band 12 bandpass adjustment slope and offset: 1.003000, -0.001200
StructMetadata.0: GROUP=SwathStructure\n.
Any idea on best practices for reading these data over https?
Thanks!

I recommend reading http://matthewrocklin.com/blog/work/2018/02/06/hdf-in-the-cloud to understand why it's not as easy as it seems (to access HDF5 files directly from https). So not exactly a solution, but you'll probably need to download the data and load it from there (in the short term at least).
Oh, and you might want to try using the 'h5netcdf' engine to read the file instead:
xr.open_dataset("HLS.S30.T13TEF.2017002.v1.4.hdf", engine="h5netcdf")
and if you're interested in just one band, do something like this:
xr.open_dataset("HLS.S30.T13TEF.2017002.v1.4.hdf", engine="h5netcdf", group="B01")
Just a note for others though, the below code would work in some cases if you use xarray with the 'h5netcdf' engine, have installed the 'h5pyd' library and the URL is stored on a HDF REST API interface:
xr.open_dataset(
"https://hls.gsfc.nasa.gov/data/v1.4/S30/2017/13/T/E/F/HLS.S30.T13TEF.2017002.v1.4.hdf",
engine="h5netcdf",
)
But unfortunately, that's not quite the case with these NASA datasets...

Related

How to get the shap values for the masked language modeling task using transformer?

I am trying to get the shap values for the masked language modeling task using transformer. I get the error KeyError: 'label' for the code where I input a single data sample to get the explanation. My complete code and error trace are as follows:
import transformers
import shap
from transformers import RobertaTokenizer, RobertaForMaskedLM, pipeline
import torch
model = RobertaForMaskedLM.from_pretrained('microsoft/codebert-base-mlm')
tokenizer = RobertaTokenizer.from_pretrained('microsoft/codebert-base-mlm')
code_example = "if (x <mask> 10)"
fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)
explainer = shap.Explainer(fill_mask)
shap_values = explainer(['x {tokenizer.mask_token} 10'])
Following is the error trace
KeyError Traceback (most recent call last)
[<ipython-input-12-bb3832d1772d>](https://localhost:8080/#) in <module>
6 # explain the model on two sample inputs
7 explainer = shap.Explainer(fill_mask)
----> 8 shap_values = explainer(['x {tokenizer.mask_token} 10'])
9 print(shap_values)
10 # visualize the first prediction's explanation for the POSITIVE output class
5 frames
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_partition.py](https://localhost:8080/#) in __call__(self, max_evals, fixed_context, main_effects, error_bounds, batch_size, outputs, silent, *args)
136 return super().__call__(
137 *args, max_evals=max_evals, fixed_context=fixed_context, main_effects=main_effects, error_bounds=error_bounds, batch_size=batch_size,
--> 138 outputs=outputs, silent=silent
139 )
140
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_explainer.py](https://localhost:8080/#) in __call__(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, *args, **kwargs)
266 row_result = self.explain_row(
267 *row_args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
--> 268 batch_size=batch_size, outputs=outputs, silent=silent, **kwargs
269 )
270 values.append(row_result.get("values", None))
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_partition.py](https://localhost:8080/#) in explain_row(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, fixed_context, *row_args)
159 # if not fixed background or no base value assigned then compute base value for a row
160 if self._curr_base_value is None or not getattr(self.masker, "fixed_background", False):
--> 161 self._curr_base_value = fm(m00.reshape(1, -1), zero_index=0)[0] # the zero index param tells the masked model what the baseline is
162 f11 = fm(~m00.reshape(1, -1))[0]
163
[/usr/local/lib/python3.7/dist-packages/shap/utils/_masked_model.py](https://localhost:8080/#) in __call__(self, masks, zero_index, batch_size)
65
66 else:
---> 67 return self._full_masking_call(masks, batch_size=batch_size)
68
69 def _full_masking_call(self, masks, zero_index=None, batch_size=None):
[/usr/local/lib/python3.7/dist-packages/shap/utils/_masked_model.py](https://localhost:8080/#) in _full_masking_call(self, masks, zero_index, batch_size)
142
143 joined_masked_inputs = tuple([np.concatenate(v) for v in all_masked_inputs])
--> 144 outputs = self.model(*joined_masked_inputs)
145 _assert_output_input_match(joined_masked_inputs, outputs)
146 all_outputs.append(outputs)
[/usr/local/lib/python3.7/dist-packages/shap/models/_transformers_pipeline.py](https://localhost:8080/#) in __call__(self, strings)
33 val = [val]
34 for obj in val:
---> 35 output[i, self.label2id[obj["label"]]] = sp.special.logit(obj["score"]) if self.rescale_to_logits else obj["score"]
36 return output
KeyError: 'label'

Could ruamel.yaml support type descriptor like "num: !!float 4"?

I am learning using ruamel.yaml, and I am wondering whether it supports type descriptor as the original YAML like "num: !!float 4"?
The file is like:
num: !!float 4
I tried import a file like this, but met an error:
---------------------------------------------------------------------------<br>
ValueError Traceback (most recent call last)
Input In [22], in <cell line: 2>()
1 from ruamel import yaml
2 with open("net.yaml", "r", encoding="utf-8") as yaml_file:
----> 3 yaml_dict = yaml.round_trip_load(yaml_file)
4 yaml_dict
...
File ~/software/python/anaconda/anaconda3/envs/conda-general/lib/python3.10/site-packages/ruamel/yaml/constructor.py:1469, in RoundTripConstructor.construct_mapping(self, node, maptyp, deep)
1462 if not isinstance(key, Hashable):
1463 raise ConstructorError(
1464 'while constructing a mapping',
1465 node.start_mark,
1466 'found unhashable key',
1467 key_node.start_mark,
1468 )
-> 1469 value = self.construct_object(value_node, deep=deep)
1470 if self.check_mapping_key(node, key_node, maptyp, key, value):
1471 if key_node.comment and len(key_node.comment) > 4 and key_node.comment[4]:
File ~/software/python/anaconda/anaconda3/envs/conda-general/lib/python3.10/site-packages/ruamel/yaml/constructor.py:146, in BaseConstructor.construct_object(self, node, deep)
142 # raise ConstructorError(
143 # None, None, 'found unconstructable recursive node', node.start_mark
144 # )
145 self.recursive_objects[node] = None
--> 146 data = self.construct_non_recursive_object(node)
148 self.constructed_objects[node] = data
149 del self.recursive_objects[node]
File ~/software/python/anaconda/anaconda3/envs/conda-general/lib/python3.10/site-packages/ruamel/yaml/constructor.py:181, in BaseConstructor.construct_non_recursive_object(self, node, tag)
179 constructor = self.__class__.construct_mapping
180 if tag_suffix is None:
--> 181 data = constructor(self, node)
182 else:
183 data = constructor(self, tag_suffix, node)
File ~/software/python/anaconda/anaconda3/envs/conda-general/lib/python3.10/site-packages/ruamel/yaml/constructor.py:1271, in RoundTripConstructor.construct_yaml_float(self, node)
1259 return ScalarFloat(
1260 sign * float(value_s),
1261 width=width,
(...)
1268 anchor=node.anchor,
1269 )
1270 width = len(value_so)
-> 1271 prec = value_so.index('.') # you can use index, this would not be float without dot
1272 lead0 = leading_zeros(value_so)
1273 return ScalarFloat(
1274 sign * float(value_s),
1275 width=width,
(...)
1279 anchor=node.anchor,
1280 )
ValueError: substring not found
Why do I get this error, and how do I get rid of it?
That is a bug in ruamel.yaml<=0.17.21. The comment on the offending line (1271) says
# you can use index, this would not be float without dot
Obviously the author of that comment didn't know what he was talking about, as in your case, when using !!float 4 you have a float without a dot...
It is trivial to "fix" that by replacing index with find in line 1271, and when doing so that will load your document and you can dump the data.
But the corresponding representer for dumping doesn't cope with that outputs the float as 4.0, dropping the tag.
You could temporarily fix this by registering a simpler float constructor (e.g. the simple one from the SafeLoader), although this will affect all floats:
import sys
import ruamel.yaml
yaml_str = """\
num: !!float 4
"""
yaml = ruamel.yaml.YAML()
yaml.constructor.add_constructor(
'tag:yaml.org,2002:float', ruamel.yaml.constructor.SafeConstructor.construct_yaml_float
)
data = yaml.load(yaml_str)
yaml.dump(data, sys.stdout)
which gives:
num: 4.0

Cannot run pool.amap() twice in pathos.multiprocessing

I want to parallelize simulation of multiple agents. Because I want my results to be instances of a class, to avoid issues with serialization I use pathos.multiprocessing instead of multiprocessing.
I do it like this:
import pathos.multiprocessing as mp
sim_agent(T): #simulate single agent for T periods
ag = agent()
for t in range(T):
ag.step()
return ag
def simulate_parallel(N, T):
if __name__ == '__main__':
pool = mp.ProcessPool()
results = pool.amap(sim_agent, [T]*N)
agents = results.get()
return agents
I can run simulate_parallel once. When I do it again, I get an error:
--> 142 agents = results.get()
~/anaconda3/anaconda3/lib/python3.7/site-packages/multiprocess/pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
~/anaconda3/anaconda3/lib/python3.7/site-packages/multiprocess/pool.py in _handle_tasks(taskqueue, put, outqueue, pool, cache)
429 break
430 try:
--> 431 put(task)
432 except Exception as e:
433 job, idx = task[:2]
~/anaconda3/anaconda3/lib/python3.7/site-packages/multiprocess/connection.py in send(self, obj)
207 self._check_closed()
208 self._check_writable()
--> 209 self._send_bytes(_ForkingPickler.dumps(obj))
210
211 def recv_bytes(self, maxlength=None):
~/anaconda3/anaconda3/lib/python3.7/site-packages/multiprocess/connection.py in _send_bytes(self, buf)
394 n = len(buf)
395 # For wire compatibility with 3.2 and lower
--> 396 header = struct.pack("!i", n)
397 if n > 16384:
398 # The payload is large so Nagle's algorithm won't be triggered
error: 'i' format requires -2147483648 <= number <= 2147483647
I also get the same error when I try to use pool.amap() in other functions. Even when I make N and T small, it is still the same behavior: I can run simulate_parallel once and cannot do it for the second time. Why could that be? Thanks!

Kusto Ingest - KustoServiceError 'BadRequest_SyntaxError'

I have the following code for ingesting data into Azure Data Explore using Python in Databricks:
df=pd.DataFrame({"StringCol": ["123ABC", 'B123', 'C123','D123'],"NumberCol": [1,2,3,4],"DecimalCol": [1,2.2,3.3,4.4],"DateCol": ['1/1/20','2/2/20','3/3/30','4/4/20']})
ingestion_props = IngestionProperties(database=db, table='TestTable_DeleteMe')
connWrite.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
This gives me the error:
BadRequest_SyntaxError', 'message': 'Request is invalid and cannot be executed
Earlier in my code I created a table using the same data types as this dummy pandas dataframe. Now I'm trying to load the data into the table. Full stack trace:
KustoServiceError Traceback (most recent call last)
<command-3953651275234016> in <module>
1 df=pd.DataFrame({"StringCol": ["123ABC", 'B123', 'C123','D123'],"NumberCol": [1,2,3,4],"DecimalCol": [1,2.2,3.3,4.4],"DateCol": ['1/1/20','2/2/20','3/3/30','4/4/20']})
2 ingestion_props = IngestionProperties(database=db, table='TestTable_DeleteMe')
----> 3 connWrite.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
4
5 #adx_loadIntoTable(connWrite,db,df,'TestTable_DeleteMe')
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/ingest_client.py in ingest_from_dataframe(self, df, ingestion_properties)
52 ingestion_properties.format = DataFormat.CSV
53
---> 54 self.ingest_from_file(temp_file_path, ingestion_properties)
55
56 os.unlink(temp_file_path)
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/ingest_client.py in ingest_from_file(self, file_descriptor, ingestion_properties)
64 :param azure.kusto.ingest.IngestionProperties ingestion_properties: Ingestion properties.
65 """
---> 66 containers = self._resource_manager.get_containers()
67
68 if isinstance(file_descriptor, FileDescriptor):
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/_resource_manager.py in get_containers(self)
121
122 def get_containers(self) -> List[_ResourceUri]:
--> 123 self._refresh_ingest_client_resources()
124 return self._ingest_client_resources.containers
125
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/_resource_manager.py in _refresh_ingest_client_resources(self)
79 or not self._ingest_client_resources.is_applicable()
80 ):
---> 81 self._ingest_client_resources = self._get_ingest_client_resources_from_service()
82 self._ingest_client_resources_last_update = datetime.utcnow()
83
/databricks/python/lib/python3.7/site-packages/azure/kusto/ingest/_resource_manager.py in _get_ingest_client_resources_from_service(self)
86
87 def _get_ingest_client_resources_from_service(self):
---> 88 table = self._kusto_client.execute("NetDefaultDB", ".get ingestion resources").primary_results[0]
89
90 secured_ready_for_aggregation_queues = self._get_resource_by_name(table, "SecuredReadyForAggregationQueue")
/databricks/python/lib/python3.7/site-packages/azure/kusto/data/client.py in execute(self, database, query, properties)
553 query = query.strip()
554 if query.startswith("."):
--> 555 return self.execute_mgmt(database, query, properties)
556 return self.execute_query(database, query, properties)
557
/databricks/python/lib/python3.7/site-packages/azure/kusto/data/client.py in execute_mgmt(self, database, query, properties)
578 :rtype: azure.kusto.data.response.KustoResponseDataSet
579 """
--> 580 return self._execute(self._mgmt_endpoint, database, query, None, KustoClient._mgmt_default_timeout, properties)
581
582 def execute_streaming_ingest(
/databricks/python/lib/python3.7/site-packages/azure/kusto/data/client.py in _execute(self, endpoint, database, query, payload, timeout, properties)
654 )
655
--> 656 raise KustoServiceError([response.json()], response)
KustoServiceError: (KustoServiceError(...), [{'error': {'code': 'BadRequest_SyntaxError', 'message': 'Request is invalid and cannot be executed.', '#type': 'Kusto.Data.Exceptions.SyntaxException', '#message': "Syntax error: Query could not be parsed: . Query: '.get ingestion resources'", '#context': {'timestamp': '2020-06-27T21:44:48.0697658Z', 'serviceAlias': 'USCPIRSTASADE01', 'machineName': 'KEngine000000', 'processName': 'Kusto.WinSvc.Svc', 'processId': 7124, 'threadId': 7240, 'appDomainName': 'Kusto.WinSvc.Svc.exe', 'clientRequestId': 'KPC.execute;0c2173bf-ea69-4253-bbaf-0203f3aa298c', 'activityId': 'cf41c806-8e15-458e-b388-386613f63952', 'subActivityId': 'df366667-ca8d-487b-a281-723f696a8f68', 'activityType': 'DN.FE.ExecuteControlCommand', 'parentActivityId': 'f8cd0bb8-04e9-48cf-8a84-8b16e1e24197', 'activityStack': '(Activity stack: CRID=KPC.execute;0c2173bf-ea69-4253-bbaf-0203f3aa298c ARID=cf41c806-8e15-458e-b388-386613f63952 > DN.Admin.Client.ExecuteControlCommand/7271d9ec-2adf-4714-b19e-69495ad80d65 > P.WCF.Service.ExecuteControlCommandInternal..IAdminClientServiceCommunicationContract/f8cd0bb8-04e9-48cf-8a84-8b16e1e24197 > DN.FE.ExecuteControlCommand/df366667-ca8d-487b-a281-723f696a8f68)'}, '#permanent': True}}])
It is likely that your connection has the engine endpoint instead of the data management endpoint. Can you check that the connection to the cluster starts with "ingest-"? See here an example:
client = KustoIngestClient("https://ingest-{cluster_name}.kusto.windows.net")

KeyError: 'Entity c does not exist in dfs'

when i try to run this code,
ftr_mtrx_custmr, features_defs = ft.dfs(entities=entities,
relationships=relationship,
target_entity="transactions")
i get such error,
490 featuretools.entityset - WARNING index session_id not found in dataframe, creating new integer column
KeyError Traceback (most recent call last)
<ipython-input-82-d467a36d5254> in <module>()
1 ftr_mtrx_custmr, features_defs = ft.dfs(entities=entities,
2 relationships=relationshp,
----> 3 target_entity="transactions")
4 frames
/usr/local/lib/python3.6/dist-packages/featuretools/utils/entry_point.py
in function_wrapper(*args, **kwargs)
38 ep.on_error(error=e,
39 runtime=runtime)
---> 40 raise e
41
42 # send return value
/usr/local/lib/python3.6/dist-packages/featuretools/utils/entry_point.py
in function_wrapper(*args, **kwargs)
30 # call function
31 start = time.time()
---> 32 return_value = func(*args, **kwargs)
33 runtime = time.time() - start
34 except Exception as e:
/usr/local/lib/python3.6/dist-packages/featuretools/synthesis/dfs.py
in dfs(entities, relationships, entityset, target_entity, cutoff_time,
instance_ids, agg_primitives, trans_primitives,
groupby_trans_primitives, allowed_paths, max_depth, ignore_entities,
ignore_variables, primitive_options, seed_features, drop_contains,
drop_exact, where_primitives, max_features, cutoff_time_in_index,
save_progress, features_only, training_window, approximate,
chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types,
progress_callback)
225 '''
226 if not isinstance(entityset, EntitySet):
--> 227 entityset = EntitySet("dfs", entities, relationships)
228
229 dfs_object = DeepFeatureSynthesis(target_entity, entityset,
/usr/local/lib/python3.6/dist-packages/featuretools/entityset/entityset.py
in init(self, id, entities, relationships)
83
84 for relationship in relationships:
---> 85 parent_variable = self[relationship[0]][relationship[1]]
86 child_variable = self[relationship[2]][relationship[3]]
87 self.add_relationship(Relationship(parent_variable,
/usr/local/lib/python3.6/dist-packages/featuretools/entityset/entityset.py
in getitem(self, entity_id)
124 return self.entity_dict[entity_id]
125 name = self.id or "entity set"
--> 126 raise KeyError('Entity %s does not exist in %s' % (entity_id, name))
127
128 #property
however, this returned KeyError : 'Entity c does not exist in dfs'
any idea what's wrong with my code?

Resources