Related
I am trying to get the shap values for the masked language modeling task using transformer. I get the error KeyError: 'label' for the code where I input a single data sample to get the explanation. My complete code and error trace are as follows:
import transformers
import shap
from transformers import RobertaTokenizer, RobertaForMaskedLM, pipeline
import torch
model = RobertaForMaskedLM.from_pretrained('microsoft/codebert-base-mlm')
tokenizer = RobertaTokenizer.from_pretrained('microsoft/codebert-base-mlm')
code_example = "if (x <mask> 10)"
fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)
explainer = shap.Explainer(fill_mask)
shap_values = explainer(['x {tokenizer.mask_token} 10'])
Following is the error trace
KeyError Traceback (most recent call last)
[<ipython-input-12-bb3832d1772d>](https://localhost:8080/#) in <module>
6 # explain the model on two sample inputs
7 explainer = shap.Explainer(fill_mask)
----> 8 shap_values = explainer(['x {tokenizer.mask_token} 10'])
9 print(shap_values)
10 # visualize the first prediction's explanation for the POSITIVE output class
5 frames
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_partition.py](https://localhost:8080/#) in __call__(self, max_evals, fixed_context, main_effects, error_bounds, batch_size, outputs, silent, *args)
136 return super().__call__(
137 *args, max_evals=max_evals, fixed_context=fixed_context, main_effects=main_effects, error_bounds=error_bounds, batch_size=batch_size,
--> 138 outputs=outputs, silent=silent
139 )
140
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_explainer.py](https://localhost:8080/#) in __call__(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, *args, **kwargs)
266 row_result = self.explain_row(
267 *row_args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
--> 268 batch_size=batch_size, outputs=outputs, silent=silent, **kwargs
269 )
270 values.append(row_result.get("values", None))
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_partition.py](https://localhost:8080/#) in explain_row(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, fixed_context, *row_args)
159 # if not fixed background or no base value assigned then compute base value for a row
160 if self._curr_base_value is None or not getattr(self.masker, "fixed_background", False):
--> 161 self._curr_base_value = fm(m00.reshape(1, -1), zero_index=0)[0] # the zero index param tells the masked model what the baseline is
162 f11 = fm(~m00.reshape(1, -1))[0]
163
[/usr/local/lib/python3.7/dist-packages/shap/utils/_masked_model.py](https://localhost:8080/#) in __call__(self, masks, zero_index, batch_size)
65
66 else:
---> 67 return self._full_masking_call(masks, batch_size=batch_size)
68
69 def _full_masking_call(self, masks, zero_index=None, batch_size=None):
[/usr/local/lib/python3.7/dist-packages/shap/utils/_masked_model.py](https://localhost:8080/#) in _full_masking_call(self, masks, zero_index, batch_size)
142
143 joined_masked_inputs = tuple([np.concatenate(v) for v in all_masked_inputs])
--> 144 outputs = self.model(*joined_masked_inputs)
145 _assert_output_input_match(joined_masked_inputs, outputs)
146 all_outputs.append(outputs)
[/usr/local/lib/python3.7/dist-packages/shap/models/_transformers_pipeline.py](https://localhost:8080/#) in __call__(self, strings)
33 val = [val]
34 for obj in val:
---> 35 output[i, self.label2id[obj["label"]]] = sp.special.logit(obj["score"]) if self.rescale_to_logits else obj["score"]
36 return output
KeyError: 'label'
My code is
from geopandas.tools import sjoin
ref_bts_adm = sjoin(Airport, ref_adm, how='left', op = 'within')
The error message
--------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-10-5c31e9843a8a> in <module>
1 from geopandas.tools import sjoin
----> 2 ref_bts_adm = sjoin(Airport, ref_adm, how='left', op = 'within')
~/.local/lib/python3.6/site-packages/geopandas/tools/sjoin.py in sjoin(left_df, right_df, how, op, lsuffix, rsuffix)
87 _basic_checks(left_df, right_df, how, lsuffix, rsuffix)
88
---> 89 indices = _geom_predicate_query(left_df, right_df, op)
90
91 joined = _frame_join(indices, left_df, right_df, how, lsuffix, rsuffix)
~/.local/lib/python3.6/site-packages/geopandas/tools/sjoin.py in _geom_predicate_query(left_df, right_df, op)
171 # see discussion at https://github.com/geopandas/geopandas/pull/1421
172 predicate = "contains"
--> 173 sindex = left_df.sindex
174 input_geoms = right_df.geometry
175 else:
~/.local/lib/python3.6/site-packages/geopandas/base.py in sindex(self)
2628 [2]])
2629 """
-> 2630 return self.geometry.values.sindex
2631
2632 #property
~/.local/lib/python3.6/site-packages/geopandas/array.py in sindex(self)
307 def sindex(self):
308 if self._sindex is None:
--> 309 self._sindex = _get_sindex_class()(self.data)
310 return self._sindex
311
~/.local/lib/python3.6/site-packages/geopandas/sindex.py in _get_sindex_class()
20 return RTreeIndex
21 raise ImportError(
---> 22 "Spatial indexes require either `rtree` or `pygeos`. "
23 "See installation instructions at https://geopandas.org/install.html"
24 )
ImportError: Spatial indexes require either `rtree` or `pygeos`. See installation instructions at https://geopandas.org/install.html
Basically I have similar error with this
ImportError: Spatial indexes require either `rtree` or `pygeos` in geopanda but rtree is installed
But I don't have sudo right
I had a similar issue and solved it by installing libspatialindex via spack, e.g., spack install libspatialindex (note: this installs the library in your homedir and does not require sudo privileges). Then you can pip install pygeos and rtree, as indicated in the other answers.
I'm trying to do sentiment analysis on Amazon product reviews using the Spacy module for preprocessing the text data. The code I'm using is exactly this. I modified the dataset that I'm using according to what's shown in the link. I'm getting the error:
TypeError Traceback (most recent call last)
<ipython-input-139-bcbf2d3c9cce> in <module>
4 ('classifier', classifier)])
5 # Fit our data
----> 6 pipe_countvect.fit(X_train,y_train)
7 # Predicting with a test dataset
8 sample_prediction = pipe_countvect.predict(X_test)
~\.conda\envs\py36\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
328 """
329 fit_params_steps = self._check_fit_params(**fit_params)
--> 330 Xt = self._fit(X, y, **fit_params_steps)
331 with _print_elapsed_time('Pipeline',
332 self._log_message(len(self.steps) - 1)):
~\.conda\envs\py36\lib\site-packages\sklearn\pipeline.py in _fit(self, X, y, **fit_params_steps)
294 message_clsname='Pipeline',
295 message=self._log_message(step_idx),
--> 296 **fit_params_steps[name])
297 # Replace the transformer of the step with the fitted
298 # transformer. This is necessary when loading the transformer
~\.conda\envs\py36\lib\site-packages\joblib\memory.py in __call__(self, *args, **kwargs)
350
351 def __call__(self, *args, **kwargs):
--> 352 return self.func(*args, **kwargs)
353
354 def call_and_shelve(self, *args, **kwargs):
~\.conda\envs\py36\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
738 with _print_elapsed_time(message_clsname, message):
739 if hasattr(transformer, 'fit_transform'):
--> 740 res = transformer.fit_transform(X, y, **fit_params)
741 else:
742 res = transformer.fit(X, y, **fit_params).transform(X)
~\.conda\envs\py36\lib\site-packages\sklearn\feature_extraction\text.py in fit_transform(self, raw_documents, y)
1197
1198 vocabulary, X = self._count_vocab(raw_documents,
-> 1199 self.fixed_vocabulary_)
1200
1201 if self.binary:
~\.conda\envs\py36\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
1108 for doc in raw_documents:
1109 feature_counter = {}
-> 1110 for feature in analyze(doc):
1111 try:
1112 feature_idx = vocabulary[feature]
~\.conda\envs\py36\lib\site-packages\sklearn\feature_extraction\text.py in _analyze(doc, analyzer, tokenizer, ngrams, preprocessor, decoder, stop_words)
104 doc = preprocessor(doc)
105 if tokenizer is not None:
--> 106 doc = tokenizer(doc)
107 if ngrams is not None:
108 if stop_words is not None:
TypeError: 'str' object is not callable
I'm not sure what's causing this error and how to get rid of it. I'm pretty sure the count vectorizer produces a sparse matrix and not a string one. One thing that I've considered is that I'm using the spacy tokenizer, which was used in the link as vectorizer = CountVectorizer(tokenizer = spacy_tokenizer, ngram_range=(1,1)) but when I ran the program it was saying that spacy_tokenizer was undefined. So I used vectorizer = CountVectorizer(tokenizer = 'spacy', ngram_range=(1,1)) instead. But if I remove this then I don't know how to use the spacy tokenizer, and either way I am not certain that this was indeed the cause of the problem. Please help me out!
The error comes at this line:
doc = tokenizer(doc)
Since it says 'str' is not callable and the only thing being called here is the tokenizer object, it looks like your tokenizer is a string for some reason.
Based on the code you linked it looks like the spacy_tokenizer object is being configured incorrectly. But that variable isn't defined anywhere in the code despite being passed as an option, so the code you linked to looks like it can't possibly run.
It would help if you could make a minimal example that you could actually paste in the question here.
I'm new to Anaconda. I tried to execute Pytorch Adversarial Neural Network in anaconda. It shows some error that I have no clue. Here is the code that downloads dataset
# MNIST Test dataset and dataloader declaration
test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([
transforms.ToTensor(),])),batch_size=1, shuffle=True)
This is the error message I got :
PermissionError Traceback (most recent call last)
<ipython-input-4-59310f6a37f8> in <module>
41
42 # MNIST Test dataset and dataloader declaration
43 test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor(),])),batch_size=1, shuffle=True)
44
45 # Define what device we are using
~\anaconda3\lib\site-packages\torchvision\datasets\mnist.py in __init__(self, root,
train,
transform, target_transform, download)
77
78 if download:
79 self.download()
80
81 if not self._check_exists():
~\anaconda3\lib\site-packages\torchvision\datasets\mnist.py in download(self)
138 return
139
140 os.makedirs(self.raw_folder, exist_ok=True)
141 os.makedirs(self.processed_folder, exist_ok=True)
142
~\anaconda3\lib\os.py in makedirs(name, mode, exist_ok)
211 if head and tail and not path.exists(head):
212 try:
213 makedirs(head, exist_ok=exist_ok)
214 except FileExistsError:
215 # Defeats race condition when another thread created the path
~\anaconda3\lib\os.py in makedirs(name, mode, exist_ok)
211 if head and tail and not path.exists(head):
212 try:
213 makedirs(head, exist_ok=exist_ok)
214 except FileExistsError:
215 # Defeats race condition when another thread created the path
~\anaconda3\lib\os.py in makedirs(name, mode, exist_ok)
221 return
222 try:
223 mkdir(name, mode)
224 except OSError:
225 # Cannot rely on checking for EEXIST, since the operating system
PermissionError: [WinError 5] Access is denied: '../data' .
How about changing the '../data' to another directory, like '/Users/***/Downloads/data' or somewhere else.
We are using the python elasticsearch API version "elasticsearch-6.8.0"
With AWS Elasticsearch version 6.7
It only happens sometimes, on some machines (even when we make the queries within seconds of each other from different machines, one will succeed and the other will throw the error). Thus this is difficult to reproduce. We are using a paginated search like this:
def scrolled_search_query(self, index, query, size=es_config.ES_SIZE,
scroll=es_config.ES_SCROLL,
request_timeout=es_config.ES_TIMEOUT,
docs_only=False):
data = self.es_conn.search(index=index, size=size, scroll=scroll, body=query,
request_timeout=request_timeout)
while len(data['hits']['hits']) > 0:
for hit in data['hits']['hits']:
if docs_only:
yield hit['_source']
else:
yield hit
data = self.es_conn.scroll(scroll_id=data['_scroll_id'], scroll=scroll,
request_timeout=request_timeout)
The error message is utterly useless, just basically saying that there's a 413. like this:
TransportError: TransportError(413, '')
From looking around at forums, it seems the problem is because ES is attempting to return a large payload. However, since we're using scroll pagination, I don't know why this would be an issue because each payload should be well below the 100MB limit. Since the error message is so terrible, I can't really see what it's trying to do in the database. It seems like the database is calculating the total data for the query and rejecting it without taking into account pagination. It's normal for us to request large payloads, and paginate them.
The full exception looks like this, what else do you need?
---------------------------------------------------------------------------
TransportError Traceback (most recent call last)
<ipython-input-3-a3fd964bf999> in <module>
41 blah=blah,
42 blah=blah,
---> 43 start_date=start_date)
44
45 df = df.drop(['#blah', '#blah'], axis=1)
~/Documents/Repositories/blah/blah/downloader.py in get_data(self, data_type, blah, blah, device, blah, start_date, end_date)
186 return self.gatorEs.get_blah(es_requests=self.es_requests, blah=blah, blah=blah, blah=blah,
--> 188 start_date=start_date, end_date=end_date)
189
190 if data_type == 'blah':
~/Documents/Repositories/gator_access/gator_access/es_gator_api.py in get_blah(self, es_requests, blah, blah, blah, blah, blah, start_date, end_date)
180 logger=self.logger)
181 if not data: continue
--> 182 es_data += data
183
184 self.logger.info(" ".join(["Download took {t} seconds for p:'{p}' d:'{d}'",
~/Documents/Repositories/gator_core/gator_core/elasticsearch/es_requests.py in scrolled_search_query(self, index, query, size, scroll, request_timeout, docs_only)
144
145 data = self.es_conn.scroll(scroll_id=data['_scroll_id'], scroll=scroll,
--> 146 request_timeout=request_timeout)
147
148
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/client/utils.py in _wrapped(*args, **kwargs)
82 if p in kwargs:
83 params[p] = kwargs.pop(p)
---> 84 return func(*args, params=params, **kwargs)
85
86 return _wrapped
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/client/__init__.py in scroll(self, scroll_id, body, params)
1353 """
1354 return self.transport.perform_request(
-> 1355 "GET", _make_path("_search", "scroll", scroll_id), params=params, body=body
1356 )
1357
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/transport.py in perform_request(self, method, url, headers, params, body)
351 headers=headers,
352 ignore=ignore,
--> 353 timeout=timeout,
354 )
355
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/connection/http_requests.py in perform_request(self, method, url, params, body, timeout, ignore, headers)
139 raw_data,
140 )
--> 141 self._raise_error(response.status_code, raw_data)
142
143 self.log_request_success(
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/connection/base.py in _raise_error(self, status_code, raw_data)
160
161 raise HTTP_EXCEPTIONS.get(status_code, TransportError)(
--> 162 status_code, error_message, additional_info
163 )
TransportError: TransportError(413, '')