Sagemaker Studio UnkownServiceError for Session - session

When I run the following code in Sagemaker Studio, I get the UnknownServiceError:
import boto3
import sagemaker
sagemaker_session = sagemaker.Session()
role = get_execution_role()
Any advice to overcome this would be greatly appreciated.
The error message in the following:
---------------------------------------------------------------------------
UnknownServiceError Traceback (most recent call last)
<ipython-input-47-2d2ae2d1e577> in <module>
1 import boto3
2 import sagemaker
----> 3 sagemaker_session = sagemaker.Session()
4 role = get_execution_role()
/opt/conda/lib/python3.7/site-packages/sagemaker/session.py in __init__(self, boto_session, sagemaker_client, sagemaker_runtime_client, sagemaker_featurestore_runtime_client, default_bucket)
124 sagemaker_client=sagemaker_client,
125 sagemaker_runtime_client=sagemaker_runtime_client,
--> 126 sagemaker_featurestore_runtime_client=sagemaker_featurestore_runtime_client,
127 )
128
/opt/conda/lib/python3.7/site-packages/sagemaker/session.py in _initialize(self, boto_session, sagemaker_client, sagemaker_runtime_client, sagemaker_featurestore_runtime_client)
164 else:
165 self.sagemaker_featurestore_runtime_client = self.boto_session.client(
--> 166 "sagemaker-featurestore-runtime"
167 )
168
/opt/conda/lib/python3.7/site-packages/boto3/session.py in client(self, service_name, region_name, api_version, use_ssl, verify, endpoint_url, aws_access_key_id, aws_secret_access_key, aws_session_token, config)
261 aws_access_key_id=aws_access_key_id,
262 aws_secret_access_key=aws_secret_access_key,
--> 263 aws_session_token=aws_session_token, config=config)
264
265 def resource(self, service_name, region_name=None, api_version=None,
/opt/conda/lib/python3.7/site-packages/botocore/session.py in create_client(self, service_name, region_name, api_version, use_ssl, verify, endpoint_url, aws_access_key_id, aws_secret_access_key, aws_session_token, config)
833 is_secure=use_ssl, endpoint_url=endpoint_url, verify=verify,
834 credentials=credentials, scoped_config=self.get_scoped_config(),
--> 835 client_config=config, api_version=api_version)
836 monitor = self._get_internal_component('monitor')
837 if monitor is not None:
/opt/conda/lib/python3.7/site-packages/botocore/client.py in create_client(self, service_name, region_name, is_secure, endpoint_url, verify, credentials, scoped_config, api_version, client_config)
76 'choose-service-name', service_name=service_name)
77 service_name = first_non_none_response(responses, default=service_name)
---> 78 service_model = self._load_service_model(service_name, api_version)
79 cls = self._create_client_class(service_name, service_model)
80 endpoint_bridge = ClientEndpointBridge(
/opt/conda/lib/python3.7/site-packages/botocore/client.py in _load_service_model(self, service_name, api_version)
114 def _load_service_model(self, service_name, api_version=None):
115 json_model = self._loader.load_service_model(service_name, 'service-2',
--> 116 api_version=api_version)
117 service_model = ServiceModel(json_model, service_name=service_name)
118 return service_model
/opt/conda/lib/python3.7/site-packages/botocore/loaders.py in _wrapper(self, *args, **kwargs)
130 if key in self._cache:
131 return self._cache[key]
--> 132 data = func(self, *args, **kwargs)
133 self._cache[key] = data
134 return data
/opt/conda/lib/python3.7/site-packages/botocore/loaders.py in load_service_model(self, service_name, type_name, api_version)
376 raise UnknownServiceError(
377 service_name=service_name,
--> 378 known_service_names=', '.join(sorted(known_services)))
379 if api_version is None:
380 api_version = self.determine_latest_version(
UnknownServiceError: Unknown service: 'sagemaker-featurestore-runtime'. Valid service names are: accessanalyzer, acm, acm-pca, alexaforbusiness, amplify, apigateway, apigatewaymanagementapi, apigatewayv2, appconfig, application-autoscaling, application-insights, appmesh, appstream, appsync, athena, autoscaling, autoscaling-plans, backup, batch, braket, budgets, ce, chime, cloud9, clouddirectory, cloudformation, cloudfront, cloudhsm, cloudhsmv2, cloudsearch, cloudsearchdomain, cloudtrail, cloudwatch, codeartifact, codebuild, codecommit, codedeploy, codeguru-reviewer, codeguruprofiler, codepipeline, codestar, codestar-connections, codestar-notifications, cognito-identity, cognito-idp, cognito-sync, comprehend, comprehendmedical, compute-optimizer, config, connect, connectparticipant, cur, dataexchange, datapipeline, datasync, dax, detective, devicefarm, directconnect, discovery, dlm, dms, docdb, ds, dynamodb, dynamodbstreams, ebs, ec2, ec2-instance-connect, ecr, ecs, efs, eks, elastic-inference, elasticache, elasticbeanstalk, elastictranscoder, elb, elbv2, emr, es, events, firehose, fms, forecast, forecastquery, frauddetector, fsx, gamelift, glacier, globalaccelerator, glue, greengrass, groundstation, guardduty, health, honeycode, iam, imagebuilder, importexport, inspector, iot, iot-data, iot-jobs-data, iot1click-devices, iot1click-projects, iotanalytics, iotevents, iotevents-data, iotsecuretunneling, iotsitewise, iotthingsgraph, ivs, kafka, kendra, kinesis, kinesis-video-archived-media, kinesis-video-media, kinesis-video-signaling, kinesisanalytics, kinesisanalyticsv2, kinesisvideo, kms, lakeformation, lambda, lex-models, lex-runtime, license-manager, lightsail, logs, machinelearning, macie, macie2, managedblockchain, marketplace-catalog, marketplace-entitlement, marketplacecommerceanalytics, mediaconnect, mediaconvert, medialive, mediapackage, mediapackage-vod, mediastore, mediastore-data, mediatailor, meteringmarketplace, mgh, migrationhub-config, mobile, mq, mturk, neptune, networkmanager, opsworks, opsworkscm, organizations, outposts, personalize, personalize-events, personalize-runtime, pi, pinpoint, pinpoint-email, pinpoint-sms-voice, polly, pricing, qldb, qldb-session, quicksight, ram, rds, rds-data, redshift, rekognition, resource-groups, resourcegroupstaggingapi, robomaker, route53, route53domains, route53resolver, s3, s3control, sagemaker, sagemaker-a2i-runtime, sagemaker-runtime, savingsplans, schemas, sdb, secretsmanager, securityhub, serverlessrepo, service-quotas, servicecatalog, servicediscovery, ses, sesv2, shield, signer, sms, sms-voice, snowball, sns, sqs, ssm, sso, sso-oidc, stepfunctions, storagegateway, sts, support, swf, synthetics, textract, transcribe, transfer, translate, waf, waf-regional, wafv2, workdocs, worklink, workmail, workmailmessageflow, workspaces, xray

I encountered the same issue and pip install sagemaker -U (to 2.20.0) resolved it for me, hopefully that's all you need to get around this.

Related

trainer.train() in Kaggle: StdinNotImplementedError: getpass was called, but this frontend does not support input requests

When saving a version in Kaggle, I get StdinNotImplementedError: getpass was called, but this frontend does not support input requests whenever I use the Transformers.Trainer class. The general code I use:
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(params)
trainer = Trainer(params)
trainer.train()
And the specific cell I am running now:
from transformers import Trainer, TrainingArguments,EarlyStoppingCallback
early_stopping = EarlyStoppingCallback()
training_args = TrainingArguments(
output_dir=OUT_FINETUNED_MODEL_PATH,
num_train_epochs=20,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
warmup_steps=0,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=100,
evaluation_strategy="steps",
eval_steps=100,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
greater_is_better=False
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
callbacks=[early_stopping]
)
trainer.train()
When trainer.train() is called, I get the error below, which I do not get if I train with native PyTorch. I understood that the error arises since I am asked to input a password, but no password is asked when using native PyTorch code, nor when using the same code with trainer.train() on Google Colab.
Any solution would be ok, like:
Avoid being asked the password.
Enable input requests when saving a notebook on Kaggle. After that, if I understood correctly, I would need to go to https://wandb.ai/authorize (after having created an account) and copy the generated key to console. However, I do not understand why wandb should be necessary since I never explicitly used it so far.
wandb: You can find your API key in your browser here: https://wandb.ai/authorize
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/wandb/sdk/wandb_init.py", line 741, in init
wi.setup(kwargs)
File "/opt/conda/lib/python3.7/site-packages/wandb/sdk/wandb_init.py", line 155, in setup
wandb_login._login(anonymous=anonymous, force=force, _disable_warning=True)
File "/opt/conda/lib/python3.7/site-packages/wandb/sdk/wandb_login.py", line 210, in _login
wlogin.prompt_api_key()
File "/opt/conda/lib/python3.7/site-packages/wandb/sdk/wandb_login.py", line 144, in prompt_api_key
no_create=self._settings.force,
File "/opt/conda/lib/python3.7/site-packages/wandb/sdk/lib/apikey.py", line 135, in prompt_api_key
key = input_callback(api_ask).strip()
File "/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 825, in getpass
"getpass was called, but this frontend does not support input requests."
IPython.core.error.StdinNotImplementedError: getpass was called, but this frontend does not support input requests.
wandb: ERROR Abnormal program exit
---------------------------------------------------------------------------
StdinNotImplementedError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/wandb/sdk/wandb_init.py in init(job_type, dir, config, project, entity, reinit, tags, group, name, notes, magic, config_exclude_keys, config_include_keys, anonymous, mode, allow_val_change, resume, force, tensorboard, sync_tensorboard, monitor_gym, save_code, id, settings)
740 wi = _WandbInit()
--> 741 wi.setup(kwargs)
742 except_exit = wi.settings._except_exit
/opt/conda/lib/python3.7/site-packages/wandb/sdk/wandb_init.py in setup(self, kwargs)
154 if not settings._offline and not settings._noop:
--> 155 wandb_login._login(anonymous=anonymous, force=force, _disable_warning=True)
156
/opt/conda/lib/python3.7/site-packages/wandb/sdk/wandb_login.py in _login(anonymous, key, relogin, host, force, _backend, _silent, _disable_warning)
209 if not key:
--> 210 wlogin.prompt_api_key()
211
/opt/conda/lib/python3.7/site-packages/wandb/sdk/wandb_login.py in prompt_api_key(self)
143 no_offline=self._settings.force,
--> 144 no_create=self._settings.force,
145 )
/opt/conda/lib/python3.7/site-packages/wandb/sdk/lib/apikey.py in prompt_api_key(settings, api, input_callback, browser_callback, no_offline, no_create, local)
134 )
--> 135 key = input_callback(api_ask).strip()
136 write_key(settings, key, api=api)
/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py in getpass(self, prompt, stream)
824 raise StdinNotImplementedError(
--> 825 "getpass was called, but this frontend does not support input requests."
826 )
StdinNotImplementedError: getpass was called, but this frontend does not support input requests.
The above exception was the direct cause of the following exception:
Exception Traceback (most recent call last)
<ipython-input-82-4d1046ab80b8> in <module>
42 )
43
---> 44 trainer.train()
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, **kwargs)
1067 model.zero_grad()
1068
-> 1069 self.control = self.callback_handler.on_train_begin(self.args, self.state, self.control)
1070
1071 # Skip the first epochs_trained epochs to get the random state of the dataloader at the right point.
/opt/conda/lib/python3.7/site-packages/transformers/trainer_callback.py in on_train_begin(self, args, state, control)
338 def on_train_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl):
339 control.should_training_stop = False
--> 340 return self.call_event("on_train_begin", args, state, control)
341
342 def on_train_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl):
/opt/conda/lib/python3.7/site-packages/transformers/trainer_callback.py in call_event(self, event, args, state, control, **kwargs)
386 train_dataloader=self.train_dataloader,
387 eval_dataloader=self.eval_dataloader,
--> 388 **kwargs,
389 )
390 # A Callback can skip the return of `control` if it doesn't change it.
/opt/conda/lib/python3.7/site-packages/transformers/integrations.py in on_train_begin(self, args, state, control, model, **kwargs)
627 self._wandb.finish()
628 if not self._initialized:
--> 629 self.setup(args, state, model, **kwargs)
630
631 def on_train_end(self, args, state, control, model=None, tokenizer=None, **kwargs):
/opt/conda/lib/python3.7/site-packages/transformers/integrations.py in setup(self, args, state, model, **kwargs)
604 project=os.getenv("WANDB_PROJECT", "huggingface"),
605 name=run_name,
--> 606 **init_args,
607 )
608 # add config parameters (run may have been created manually)
/opt/conda/lib/python3.7/site-packages/wandb/sdk/wandb_init.py in init(job_type, dir, config, project, entity, reinit, tags, group, name, notes, magic, config_exclude_keys, config_include_keys, anonymous, mode, allow_val_change, resume, force, tensorboard, sync_tensorboard, monitor_gym, save_code, id, settings)
779 if except_exit:
780 os._exit(-1)
--> 781 six.raise_from(Exception("problem"), error_seen)
782 return run
/opt/conda/lib/python3.7/site-packages/six.py in raise_from(value, from_value)
Exception: problem
You may want to try adding report_to="tensorboard" or any other reasonable string array in your TrainingArguments
https://huggingface.co/transformers/main_classes/trainer.html#transformers.TrainingArguments
If you have multiple logger that you want to use report_to="all" (the default value)
try os.environ["WANDB_DISABLED"] = "true" such that wandb is always disabled.
see: https://huggingface.co/transformers/main_classes/trainer.html#transformers.TFTrainer.setup_wandb

Register Azure ML Model from DatabricksStep

I'm calculating a model while executing a DatabricksStep in an Azure ML Pipeline, save it on my Blob Storage as .pkl file and upload it to the current Azure ML Run using Run.upload_file (). All this works without any problems.
But as soon as I try to register the model to the Azure ML Workspace using Run.register_model (), the script throws the following error:
UserErrorException: UserErrorException:
Message:
Operation returned an invalid status code 'Forbidden'. The possible reason could be:
You are not authorized to access this resource, or directory listing denied.
you may not login your azure service, or use other subscription, you can check your
default account by running azure cli commend:
'az account list -o table'.
You have multiple objects/login session opened, please close all session and try again.
InnerException None
ErrorResponse
{
"error": {
"code": "UserError",
"message": "\nOperation returned an invalid status code 'Forbidden'. The possible reason could be:\n1. You are not authorized to access this resource, or directory listing denied.\n2. you may not login your azure service, or use other subscription, you can check your\ndefault account by running azure cli commend:\n'az account list -o table'.\n3. You have multiple objects/login session opened, please close all session and try again.\n "
}
}
with the following call stack
/databricks/python/lib/python3.7/site-packages/azureml/_restclient/models_client.py in register_model(self, name, tags, properties, description, url, mime_type, framework, framework_version, unpack, experiment_name, run_id, datasets, sample_input_data, sample_output_data, resource_requirements)
70 return self.
71 _execute_with_workspace_arguments(self._client.ml_models.register, model,
---> 72 custom_headers=ModelsClient.get_modelmanagement_custom_headers())
73
74 #error_with_model_id_handling
/databricks/python/lib/python3.7/site-packages/azureml/_restclient/workspace_client.py in _execute_with_workspace_arguments(self, func, *args, **kwargs)
65
66 def _execute_with_workspace_arguments(self, func, *args, **kwargs):
---> 67 return self._execute_with_arguments(func, copy.deepcopy(self._workspace_arguments), *args, **kwargs)
68
69 def get_or_create_experiment(self, experiment_name, is_async=False):
/databricks/python/lib/python3.7/site-packages/azureml/_restclient/clientbase.py in _execute_with_arguments(self, func, args_list, *args, **kwargs)
536 return self._call_paginated_api(func, *args_list, **kwargs)
537 else:
--> 538 return self._call_api(func, *args_list, **kwargs)
539 except ErrorResponseException as e:
540 raise ServiceException(e)
/databricks/python/lib/python3.7/site-packages/azureml/_restclient/clientbase.py in _call_api(self, func, *args, **kwargs)
234 return AsyncTask(future, _ident=ident, _parent_logger=self._logger)
235 else:
--> 236 return self._execute_with_base_arguments(func, *args, **kwargs)
237
238 def _call_paginated_api(self, func, *args, **kwargs):
/databricks/python/lib/python3.7/site-packages/azureml/_restclient/clientbase.py in _execute_with_base_arguments(self, func, *args, **kwargs)
323 total_retry = 0 if self.retries < 0 else self.retries
324 return ClientBase._execute_func_internal(
--> 325 back_off, total_retry, self._logger, func, _noop_reset, *args, **kwargs)
326
327 #classmethod
/databricks/python/lib/python3.7/site-packages/azureml/_restclient/clientbase.py in _execute_func_internal(cls, back_off, total_retry, logger, func, reset_func, *args, **kwargs)
343 return func(*args, **kwargs)
344 except Exception as error:
--> 345 left_retry = cls._handle_retry(back_off, left_retry, total_retry, error, logger, func)
346
347 reset_func(*args, **kwargs) # reset_func is expected to undo any side effects from a failed func call.
/databricks/python/lib/python3.7/site-packages/azureml/_restclient/clientbase.py in _handle_retry(cls, back_off, left_retry, total_retry, error, logger, func)
384 3. You have multiple objects/login session opened, please close all session and try again.
385 """
--> 386 raise_from(UserErrorException(error_msg), error)
387
388 elif error.response.status_code == 429:
/databricks/python/lib/python3.7/site-packages/six.py in raise_from(value, from_value)
Did anybody experience the same error and knows what is its cause and how to solve it?
Best,
Jonas
UPDATE:
model = sklearn.linear_model.LinearRegression ( )
model_path = "<path to 'model.pkl' in my blob storage>"
joblib.dump(model, model_path)
aml_run = azureml.core.get_context ( )
aml_run.upload_file (name = "model.pkl", path_or_stream = model_path)
# Until this point, everything works fine
aml_run.register_model (model_name = "model.pkl")
# This throws the posted "Forbidden"-Error
To configure the workspace to authenticate to the subscription, Please follow the steps in the notebooks..
Persist model (joblib.dump) to a custom folder other than outputs.
Manually run upload_file to upload the model AML workspace. Name the destination same name with your model file.
Then run run.register_model.
or
AML background process to automatically upload content under ./outputs to AML workspace. Once the upload is complete and call run.register_model which takes the content from AML workspace.
The documentation DatabricksStep class and the sample notebook https://aka.ms/pl-databricks should both be helpful.
UserErrorException: UserErrorException: Message: Operation returned an invalid status code 'Forbidden'.
This error might be due to the fact that Azure Databricks Compute is unable to authenticate the Azure Machine Learning Workspace.
I had been facing a similar error, and this is the Microsoft preferred way of solving this issue:
Create an Azure Key Vault.
Create a Service Principal (App registration) inside Azure Active Directory.
Add this Service Principal with Contributor/ Owner access in AML and ADB.
Create an Azure Databricks Scope and link it with the key vault created in Step 1.
Save the Client ID, Directory ID and Client Secret in the Key Vault.
Use ServicePrincipalAuthentication to validate the credentials.
For Step Six use Databricks Secret Scope to get the values. This resource will walk you through this step: Secret Management in Azure Databricks
Some references that will be helpful:
A worked-out example provided by Microsoft.
Microsoft Documentation on ServicePrincipalAuthentication

How do I access SparkContext in Dataproc?

My goal is to use the elasticsearch-hadoop connector to load data directly into ES with pySpark.
I'm quite new to dataproc and pySpark and got stuck quite early.
I run a single node cluster (Image 1.3 ,Debian 9,Hadoop 2.9,Spark 2.3)
and this my code. I assume I need to install Java.
Thanks!
from pyspark.sql import SQLContext
from pyspark.sql.functions import lit
import os
from pyspark.sql import SparkSession
def install_java():
!apt-get install -y openjdk-8-jdk-headless -qq > /dev/null #install openjdk
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64" #set environment
variable
!java -version #check java version
install_java()
conf = SparkConf().setAppName("testing").setMaster('ip-address')
sc = SparkContext.getOrCreate()
ExceptionTraceback (most recent call last)
<ipython-input-18-df37a24b7514> in <module>()
----> 1 sc = SparkContext.getOrCreate()
/usr/lib/spark/python/pyspark/context.pyc in getOrCreate(cls, conf)
361 with SparkContext._lock:
362 if SparkContext._active_spark_context is None:
--> 363 SparkContext(conf=conf or SparkConf())
364 return SparkContext._active_spark_context
365
/usr/lib/spark/python/pyspark/context.pyc in __init__(self, master, appName, sparkHome,
pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
127 " note this option will be removed in Spark 3.0")
128
--> 129 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
130 try:
131 self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize,
serializer,
/usr/lib/spark/python/pyspark/context.pyc in _ensure_initialized(cls, instance, gateway,
conf)
310 with SparkContext._lock:
311 if not SparkContext._gateway:
--> 312 SparkContext._gateway = gateway or launch_gateway(conf)
313 SparkContext._jvm = SparkContext._gateway.jvm
314
/usr/lib/spark/python/pyspark/java_gateway.pyc in launch_gateway(conf)
44 :return: a JVM gateway
45 """
---> 46 return _launch_gateway(conf)
47
48
/usr/lib/spark/python/pyspark/java_gateway.pyc in _launch_gateway(conf, insecure)
106
107 if not os.path.isfile(conn_info_file):
--> 108 raise Exception("Java gateway process exited before sending its port
number")
109
110 with open(conn_info_file, "rb") as info:
Exception: Java gateway process exited before sending its port number
Ok, solved, I needed to stop the current context before I create my new SparkContext.
sc.stop()

AttributeError: 'NoneType' object has no attribute 'ReadAsArray' when loading an image, what could be the cause?

I'm trying to build a convolutional neural network for image classification in Python.
I run my code on CoLab and have loaded my data on Google Drive.
I can see all the files and folders in my google drive from python, but when I try to actually load an image it gives me the error in the title.
I'm using the skimage.io package, I'm actually just running a notebook I found on kaggle so the code should run fine, only difference I noticed is that the kaggle user was probably not working on CoLab with his data in GoogleDrive so I think maybe that's the problem, anyway here's my code:
from skimage.io import imread
img=imread('/content/drive/My Drive/CoLab/Data/chest_xray/train/PNEUMONIA/person53_bacteria_255.jpeg')
Which gives me the following error:
AttributeError: 'NoneType' object has no attribute 'ReadAsArray'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-12-4a64aebb8504> in <module>()
----> 1 img=imread('/content/drive/My Drive/CoLab/Data/chest_xray/train/PNEUMONIA/person53_bacteria_255.jpeg')
4 frames
/usr/local/lib/python3.6/dist-packages/skimage/io/_io.py in imread(fname, as_gray, plugin, flatten, **plugin_args)
59
60 with file_or_url_context(fname) as fname:
---> 61 img = call_plugin('imread', fname, plugin=plugin, **plugin_args)
62
63 if not hasattr(img, 'ndim'):
/usr/local/lib/python3.6/dist-packages/skimage/io/manage_plugins.py in call_plugin(kind, *args, **kwargs)
208 (plugin, kind))
209
--> 210 return func(*args, **kwargs)
211
212
/usr/local/lib/python3.6/dist-packages/imageio/core/functions.py in imread(uri, format, **kwargs)
221 reader = read(uri, format, "i", **kwargs)
222 with reader:
--> 223 return reader.get_data(0)
224
225
/usr/local/lib/python3.6/dist-packages/imageio/core/format.py in get_data(self, index, **kwargs)
345 self._checkClosed()
346 self._BaseReaderWriter_last_index = index
--> 347 im, meta = self._get_data(index, **kwargs)
348 return Array(im, meta) # Array tests im and meta
349
/usr/local/lib/python3.6/dist-packages/imageio/plugins/gdal.py in _get_data(self, index)
64 if index != 0:
65 raise IndexError("Gdal file contains only one dataset")
---> 66 return self._ds.ReadAsArray(), self._get_meta_data(index)
67
68 def _get_meta_data(self, index):
AttributeError: 'NoneType' object has no attribute 'ReadAsArray'
Frist instead of My Drive it should be MyDrive (no space).
If it still doesn't work, you can try the following:
%cd /content/drive/MyDrive/CoLab/Data/chest_xray/train/PNEUMONIA
img=imread('person53_bacteria_255.jpeg')```

Compatibility issues with H2O.ai Hadoop on MapR 6.0 via python API?

Having apparent compatibility issues running H2O (via the 3.18.0.2 MapR 5.2 driver (trying with the latest driver (3.20.0.7) as recommended in another SO post did not help the problem)) on MapR 6.0.
While able to start an H2O cluster on MapR 6.0 (via something like hadoop jar h2odriver.jar -nodes 3 -mapperXmx 6g -output hdfsOutputDirName
) and seem to be able to access h2o Flow UI, having problems accessing the cluster via python API (pip show h2o confirms matching package version to driver being used).
Is the MapR 5.2 driver (currently the latest MapR driver version offered by H2O) incompatible with MapR 6.0 (would not be asking if not for the fact that seem to be able to use the H2O Flow UI on cluster instance started on MapR 6.0)? Any workaround other than standalone driver version (would like to still be able to leverage YARN on hadoop cluster)?
The code and error being seen when trying to connect to the running H2O using the python APIis shown below.
# connect to h2o service
h2o.init(ip=h2o_cnxn_ip)
where the h2o_cnxn_ip is the IP and port generated after starting the h2o cluster on the MapR 6.0 system. Produces error
Checking whether there is an H2O instance running at http://172.18.0.123:54321...
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-5-1728877a03a2> in <module>()
1 # connect to h2o service
----> 2 h2o.init(ip=h2o_cnxn_ip)
/home/me/projects/myproject/lib/python2.7/site-packages/h2o/h2o.pyc in init(url, ip, port, https, insecure, username, password, cookies, proxy, start_h2o, nthreads, ice_root, enable_assertions, max_mem_size, min_mem_size, strict_version_check, ignore_config, extra_classpath, **kwargs)
250 auth=auth, proxy=proxy,cookies=cookies, verbose=True,
251 _msgs=("Checking whether there is an H2O instance running at {url}",
--> 252 "connected.", "not found."))
253 except H2OConnectionError:
254 # Backward compatibility: in init() port parameter really meant "baseport" when starting a local server...
/home/me/projects/myproject/lib/python2.7/site-packages/h2o/backend/connection.pyc in open(server, url, ip, port, https, auth, verify_ssl_certificates, proxy, cookies, verbose, _msgs)
316 conn._stage = 1
317 conn._timeout = 3.0
--> 318 conn._cluster = conn._test_connection(retries, messages=_msgs)
319 # If a server is unable to respond within 1s, it should be considered a bug. However we disable this
320 # setting for now, for no good reason other than to ignore all those bugs :(
/home/me/projects/myproject/lib/python2.7/site-packages/h2o/backend/connection.pyc in _test_connection(self, max_retries, messages)
558 raise H2OServerError("Local server was unable to start")
559 try:
--> 560 cld = self.request("GET /3/Cloud")
561 if cld.consensus and cld.cloud_healthy:
562 self._print(" " + messages[1])
/home/me/projects/myproject/lib/python2.7/site-packages/h2o/backend/connection.pyc in request(self, endpoint, data, json, filename, save_to)
400 auth=self._auth, verify=self._verify_ssl_cert, proxies=self._proxies)
401 self._log_end_transaction(start_time, resp)
--> 402 return self._process_response(resp, save_to)
403
404 except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e:
/home/me/projects/myproject/lib/python2.7/site-packages/h2o/backend/connection.pyc in _process_response(response, save_to)
711 if content_type == "application/json":
712 try:
--> 713 data = response.json(object_pairs_hook=H2OResponse)
714 except (JSONDecodeError, requests.exceptions.ContentDecodingError) as e:
715 raise H2OServerError("Malformed JSON from server (%s):\n%s" % (str(e), response.text))
/home/me/projects/myproject/lib/python2.7/site-packages/requests/models.pyc in json(self, **kwargs)
882 try:
883 return complexjson.loads(
--> 884 self.content.decode(encoding), **kwargs
885 )
886 except UnicodeDecodeError:
/usr/lib64/python2.7/json/__init__.pyc in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
349 if parse_constant is not None:
350 kw['parse_constant'] = parse_constant
--> 351 return cls(encoding=encoding, **kw).decode(s)
/usr/lib64/python2.7/json/decoder.pyc in decode(self, s, _w)
364
365 """
--> 366 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
367 end = _w(s, end).end()
368 if end != len(s):
/usr/lib64/python2.7/json/decoder.pyc in raw_decode(self, s, idx)
380 """
381 try:
--> 382 obj, end = self.scan_once(s, idx)
383 except StopIteration:
384 raise ValueError("No JSON object could be decoded")
/home/me/projects/myproject/lib/python2.7/site-packages/h2o/backend/connection.pyc in __new__(cls, keyvals)
823 for k, v in keyvals:
824 if k == "__meta" and isinstance(v, dict):
--> 825 schema = v["schema_name"]
826 break
827 if k == "__schema" and is_type(v, str):
KeyError: u'schema_name'
MapR 6 is not currently supported by H2O. Currently H2O supports up to MapR 5.2.
Please see the downloads page for supported Hadoop versions.

Resources