TypeError: Expected data to be int, Sequence or Mapping, but got method pytorch lighning - pytorch-lightning

I have this LightningDataModule:
class MTmetricDataModule(pl.LightningDataModule):
def __init__(self, df):
super().__init__()
self.reference = df['reference'].astype(str)
self.translation = df['translation'].astype(str)
self.z_score = df['avg-score']
self.z_score = np.array(self.z_score)
def setup(self, stage=None):
self.reference_train, self.reference_test, self.translation_train, self.translation_test,
self.z_score_train, self.z_score_test = train_test_split(self.reference, self.translation,
self.z_score, test_size=0.2)
self.reference_test, self.reference_dev, self.translation_test, self.translation_dev,
self.z_score_test, self.z_score_dev = train_test_split(self.reference_test,
self.translation_test, self.z_score_test, test_size=0.1)
self.df_train = pd.DataFrame()
self.df_train['reference'] = self.reference_train
self.df_train['translation'] = self.translation_train
self.df_train['z_score'] = self.z_score_train
self.train = self.df_train.to_dict("records")
self.df_dev = pd.DataFrame()
self.df_dev['reference'] = self.reference_dev
self.df_dev['translation'] = self.translation_dev
self.df_dev['z_score'] = self.z_score_dev
self.dev = self.df_dev.to_dict("records")
self.df_test = pd.DataFrame()
self.df_test['reference'] = self.reference_test
self.df_test['translation'] = self.translation_test
self.df_test['z_score'] = self.z_score_test
self.test = self.df_test.to_dict("records")
def train_dataloader(self) -> DataLoader:
return DataLoader(
dataset=self.train,
batch_size=batch_size
)
def val_dataloader(self) -> DataLoader:
return DataLoader(
dataset=self.dev,
batch_size=1
)
def test_dataloader(self) -> DataLoader:
return DataLoader(
dataset=self.test,
batch_size=1
)
That I then just feed into my LightningModule
data = MTmetricDataModule(df)
model = MTmetric()
trainer = Trainer(gpus=1, progress_bar_refresh_rate=20, max_epochs=2)
trainer.fit(model, data)
But I am currently getting the error "RuntimeError: Input, output and indices must be on the current device".
I am currently running it in Colab with a GPU instance and nothing seems to make it work.
Anyone know how to fix it?
Thank you

It means some of your data is on gpu and some is using cpu. Pleae transfer the whole data on same device and run it again.
Currently, you data loader is loaded on cpu and for 'trainer' you are using GPU might be the error.

Using .to(device) might be useful. device = cuda() or cpu()
This solution on Pytorch_Forums will be helpful.

Related

User warning when I use more than one gpu with trainer function

I am doing classification text and for the training of the model I am using trainer function from HuggingFace, the code is:
def get_model(name_model):
model = AutoModelForSequenceClassification.from_pretrained(
name_model,
num_labels=2,
problem_type = "single_label_classification"
)
return model
model = get_model(name_model)
training_args = TrainingArguments(
learning_rate = 3e-5,
max_grad_norm = 1.0,
#weight_decay = 0.01,
num_train_epochs = 3,
per_device_train_batch_size = 32,
per_device_eval_batch_size = 1,
logging_steps = 300,
output_dir = "./training_output",
overwrite_output_dir = True,
seed =42,
fp16=True,
remove_unused_columns = False
)
trainer = Trainer(
model = model,
args = training_args,
train_dataset = train
)
trainer.args._n_gpu = 2
So, when it finish to train the model (which is BERT model) it says
I am afraid that the model is not correctly trained and that predictions that made are not okay.
Do you know how to fix this?, with only one gpu the are not warnings.
I tried to set fp16=True because I read in another forum that it could help, and I tried to set is_model_parallel= True but I didn't fix it. I tried too to set place_model_on_device = True too but did not work.

Fine-tune custom pre-trained language model

I am new to fine-tuning transformer models. I am trying to fine-tune this model on my dataset but i got an error. The code:
distil_bert = 'Chramer/remote-sensing-distilbert-cased'
config = transformers.BertConfig(dropout=0.2, attention_dropout=0.2)
config.output_hidden_states=False
transformer_model=transformers.TFBertModel.from_pretrained(distil_bert,config=config, from_pt=True)
input_ids = tf.keras.layers.Input(shape=(128,), name='input_ids', dtype='int32')
attention_mask = tf.keras.layers.Input(shape=(128,), name='attention_mask',
dtype='int32')
input_segments = tf.keras.layers.Input(shape=(128,), name='input_segments',
dtype='int32')
embedding_layer = transformer_model(input_ids, attention_mask,input_segments)[0]
X = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50, return_sequences=True,
dropout=0.1, recurrent_dropout=0.1))(embedding_layer)
X = tf.keras.layers.GlobalMaxPool1D()(X)
X = tf.keras.layers.Dense(50, activation='relu')(X)
X = tf.keras.layers.Dropout(0.2)(X)
X = tf.keras.layers.Dense(1, activation='sigmoid')(X)
model = tf.keras.Model(inputs={'input_ids': input_ids,'attention_mask':
attention_mask,'input_segments': input_segments}, outputs=X)
I got this error:
enter image description here
I will be thankful if anyone could help. I am using Tensorflow.

[XAI for transformer custom model using AllenNLP]

I have been solving the NER problem for a Vietnamese dataset with 15 tags in IO format. I have been using the AllenNLP Interpret Toolkit for my model, but I can not configure it completely.
I have used a pre-trained language model "xlm-roberta-base" based-on HuggingFace. I have concatenated 4 last bert layers, and pass through to linear layer. The model architecture you can see in the source below.
class BaseBertSoftmax(nn.Module):
def __init__(self, model, drop_out , num_labels):
super(BaseBertSoftmax, self).__init__()
self.num_labels = num_labels
self.model = model
self.dropout = nn.Dropout(drop_out)
self.classifier = nn.Linear(4*768, num_labels) # 4 last of layer
def forward_custom(self, input_ids, attention_mask=None,
labels=None, head_mask=None):
outputs = self.model(input_ids = input_ids, attention_mask=attention_mask)
sequence_output = torch.cat((outputs[1][-1], outputs[1][-2], outputs[1][-3], outputs[1][-4]),-1)
sequence_output = self.dropout(sequence_output)
logits = self.classifier(sequence_output) # bsz, seq_len, num_labels
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
if labels is not None:
loss_fct = nn.CrossEntropyLoss(ignore_index=0)
if attention_mask is not None:
active_loss = attention_mask.view(-1) == 1
active_logits = logits.view(-1, self.num_labels)[active_loss]
active_labels = labels.view(-1)[active_loss]
loss = loss_fct(active_logits, active_labels)
else:
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
outputs = (loss,) + outputs
return outputs #scores, (hidden_states), (attentions)
What steps do I have to take to integrate this model to AllenNLP Interpret?
Could you please help me with this problem?

How to test a model before fine-tuning in Pytorch Lightning?

Doing things on Google Colab.
transformers: 4.10.2
pytorch-lightning: 1.2.7
import torch
from torch.utils.data import DataLoader
from transformers import BertJapaneseTokenizer, BertForSequenceClassification
import pytorch_lightning as pl
dataset_for_loader = [
{'data':torch.tensor([0,1]), 'labels':torch.tensor(0)},
{'data':torch.tensor([2,3]), 'labels':torch.tensor(1)},
{'data':torch.tensor([4,5]), 'labels':torch.tensor(2)},
{'data':torch.tensor([6,7]), 'labels':torch.tensor(3)},
]
loader = DataLoader(dataset_for_loader, batch_size=2)
for idx, batch in enumerate(loader):
print(f'# batch {idx}')
print(batch)
category_list = [
'dokujo-tsushin',
'it-life-hack',
'kaden-channel',
'livedoor-homme',
'movie-enter',
'peachy',
'smax',
'sports-watch',
'topic-news'
]
tokenizer = BertJapaneseTokenizer.from_pretrained(MODEL_NAME)
max_length = 128
dataset_for_loader = []
for label, category in enumerate(tqdm(category_list)):
# file ./text has lots of articles, categorized by category
# and they are just plain texts, whose content begins from forth line
for file in glob.glob(f'./text/{category}/{category}*'):
lines = open(file).read().splitlines()
text = '\n'.join(lines[3:])
encoding = tokenizer(
text,
max_length=max_length,
padding='max_length',
truncation=True
)
encoding['labels'] = label
encoding = { k: torch.tensor(v) for k, v in encoding.items() }
dataset_for_loader.append(encoding)
SEED=lambda:0.0
# random.shuffle(dataset_for_loader) # ランダムにシャッフル
random.shuffle(dataset_for_loader,SEED)
n = len(dataset_for_loader)
n_train = int(0.6*n)
n_val = int(0.2*n)
dataset_train = dataset_for_loader[:n_train]
dataset_val = dataset_for_loader[n_train:n_train+n_val]
dataset_test = dataset_for_loader[n_train+n_val:]
dataloader_train = DataLoader(
dataset_train, batch_size=32, shuffle=True
)
dataloader_val = DataLoader(dataset_val, batch_size=256)
dataloader_test = DataLoader(dataset_test, batch_size=256)
class BertForSequenceClassification_pl(pl.LightningModule):
def __init__(self, model_name, num_labels, lr):
super().__init__()
self.save_hyperparameters()
self.bert_sc = BertForSequenceClassification.from_pretrained(
model_name,
num_labels=num_labels
)
def training_step(self, batch, batch_idx):
output = self.bert_sc(**batch)
loss = output.loss
self.log('train_loss', loss)
return loss
def validation_step(self, batch, batch_idx):
output = self.bert_sc(**batch)
val_loss = output.loss
self.log('val_loss', val_loss)
def test_step(self, batch, batch_idx):
labels = batch.pop('labels')
output = self.bert_sc(**batch)
labels_predicted = output.logits.argmax(-1)
num_correct = ( labels_predicted == labels ).sum().item()
accuracy = num_correct/labels.size(0)
self.log('accuracy', accuracy)
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
checkpoint = pl.callbacks.ModelCheckpoint(
monitor='val_loss',
mode='min',
save_top_k=1,
save_weights_only=True,
dirpath='model/',
)
trainer = pl.Trainer(
gpus=1,
max_epochs=10,
callbacks = [checkpoint]
)
model = BertForSequenceClassification_pl(
MODEL_NAME, num_labels=9, lr=1e-5
)
### (a) ###
# I think this is where I am doing fine-tuning
trainer.fit(model, dataloader_train, dataloader_val)
# this is to score after fine-tuning
test = trainer.test(test_dataloaders=dataloader_test)
print(f'Accuracy: {test[0]["accuracy"]:.2f}')
But I am not really sure how to do a test before fine-tuning, in order to compare two models before and after fine-tuning, in order to show how effective fine-tuning is.
Inserting the following two lines to ### (a) ###:
test = trainer.test(test_dataloaders=dataloader_test)
print(f'Accuracy: {test[0]["accuracy"]:.2f}')
I got this result:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-13-c8b2c67f2d5c> in <module>()
9
10 # 6-19
---> 11 test = trainer.test(test_dataloaders=dataloader_test)
12 print(f'Accuracy: {test[0]["accuracy"]:.2f}')
13
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in test(self, model, test_dataloaders, ckpt_path, verbose, datamodule)
896 self.verbose_test = verbose
897
--> 898 self._set_running_stage(RunningStage.TESTING, model or self.lightning_module)
899
900 # If you supply a datamodule you can't supply train_dataloader or val_dataloaders
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _set_running_stage(self, stage, model_ref)
563 the trainer and the model
564 """
--> 565 model_ref.running_stage = stage
566 self._running_stage = stage
567
AttributeError: 'NoneType' object has no attribute 'running_stage'
I noticed that Trainer.fit() can take None as arguments other than model, so I tried this:
trainer.fit(model)
test=trainer.test(test_dataloaders=dataloader_test)
print(f'Accuracy: {test[0]["accuracy"]:.2f}')
The result:
MisconfigurationException: No `train_dataloader()` method defined. Lightning `Trainer` expects as minimum a `training_step()`, `train_dataloader()` and `configure_optimizers()` to be defined.
Thanks.
The Trainer needs to call its .fit() in order to set up a lot of things and then only you can do .test() or other methods.
You are right about putting a .fit() just before .test() but the fit call needs to a valid one. You have to feed a dataloader/datamodule to it. But since you don't want to do a training/validation in this fit call, just pass limit_[train/val]_batches=0 while Trainer construction.
trainer = Trainer(gpus=..., ..., limit_train_batches=0, limit_val_batches=0)
trainer.fit(model, dataloader_train, dataloader_val)
trainer.test(model, dataloader_test) # without fine-tuning
The fit call here will just set things up for you and skip training/validation. And then the testing follows. Next time run the same code but without the limit_[train/val]_batches, this will do the pretraining for you
trainer = Trainer(gpus=..., ...)
trainer.fit(model, dataloader_train, dataloader_val)
trainer.test(model, dataloader_test) # with fine-tuning
Clarifying a bit about .fit() taking None for all but model: Its not quite true - you must provide either a DataLoader or a DataModule.

Unable to predict when loading a Tensorflow model in Go

I've loaded a Tensorflow model in Go and cannot get predictions - it keeps complaining about shape mismatch - a simple 2d array. Would appreciate an idea here, thank you so much in advance.
Error running the session with input, err: You must feed a value for placeholder tensor 'theoutput_target' with dtype float
[[Node: theoutput_target = Placeholder[_output_shapes=[[?,?]], dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Input tensor being sent is a [][]float32{ {1.0}, }
a := [][]float32{ {1.0}, }
tensor, terr := tf.NewTensor(a)
if terr != nil {
fmt.Printf("Error creating input tensor: %s\n", terr.Error())
return
}
result, runErr := model.Session.Run(
map[tf.Output]*tf.Tensor{
model.Graph.Operation("theinput").Output(0): tensor,
},
[]tf.Output{
model.Graph.Operation("theoutput_target").Output(0),
},
nil,
)
and the model is generated via Keras and exported to TF using SavedModelBuilder after:
layer_name_input = "theinput"
layer_name_output = "theoutput"
def get_encoder():
model = Sequential()
model.add(Dense(5, input_dim=1))
model.add(Activation("relu"))
model.add(Dense(5, input_dim=1))
return model
inputs = Input(shape=(1, ), name=layer_name_input)
encoder = get_encoder()
model = encoder(inputs)
model = Activation("relu")(model)
objective = Dense(1, name=layer_name_output)(model)
model = Model(inputs=[inputs], outputs=objective)
model.compile(loss='mean_squared_error', optimizer='sgd')
EDIT - fixed, it was a problem with exporting from Keras to TF (layer names). Pasting the export here, hopefully helpful for someone else:
def export_to_tf(keras_model_path, export_path, export_version, is_functional=False):
sess = tf.Session()
K.set_session(sess)
K.set_learning_phase(0)
export_path = os.path.join(export_path, str(export_version))
model = load_model(keras_model_path)
config = model.get_config()
weights = model.get_weights()
if is_functional == True:
model = Model.from_config(config)
else:
model = Sequential.from_config(config)
model.set_weights(weights)
with K.get_session() as sess:
inputs = [ (model_input.name.split(":")[0], model_input) for model_input in model.inputs]
outputs = [ (model_output.name.split(":")[0], model_output) for model_output in model.outputs]
signature = predict_signature_def(inputs=dict(inputs),
outputs=dict(outputs))
input_descriptor = [ { 'name': item[0], 'shape': item[1].shape.as_list() } for item in inputs]
output_descriptor = [ { 'name': item[0], 'shape': item[1].shape.as_list() } for item in outputs]
builder = saved_model_builder.SavedModelBuilder(export_path)
builder.add_meta_graph_and_variables(
sess=sess,
tags=[tag_constants.SERVING],
signature_def_map={signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature})
builder.save()
descriptor = dict()
descriptor["inputs"] = input_descriptor
descriptor["outputs"] = output_descriptor
pprint.pprint(descriptor)
That's something strange in your code and error. Tensorflow is complaining about a missing value for the placeholder with name 'theoutput_target', whilst this placeholder is never defined in the code you posted. Instead, your code defines a placeholder whose name is 'theinput'.
Also, I suggest you to use a more complete and easy to use wrapper around the tensorflow API: tfgo

Resources