Fine tuning Bert for NER attempt on Mac OS - macos

I'm using a MacBook Air/OS Monterey 12.5 (There are updates available; Ventura 13.1
Python version 3.10.8 and also tried using 3.11
Pylance has pointed that all the imports I was trying to execute were not being resolved so I changed the VS Code interpreter to Python 3.10.
Anyways, here's the code:
import pandas as pd
import torch
import numpy as np
from tqdm import tqdm
from transformers import BertTokenizerFast
from transformers import BertForTokenClassification
from torch.utils.data import Dataset, DataLoader
df = pd.read_csv('ner.csv')
labels = [i.split() for i in df['labels'].values.tolist()]
unique_labels = set()
for lb in labels:
[unique_labels.add(i) for i in lb if i not in unique_labels]
# print(unique_labels)
labels_to_ids = {k: v for v, k in enumerate(sorted(unique_labels))}
ids_to_labels = {v: k for v, k in enumerate(sorted(unique_labels))}
# print(labels_to_ids)
text = df['text'].values.tolist()
example = text[36]
#print(example)
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
text_tokenized = tokenizer(example, padding='max_length', max_length=512, truncation=True, return_tensors='pt')
'''
print(text_tokenized)
print(tokenizer.decode(text_tokenized.input_ids[0]))
'''
def align_label_example(tokenized_input, labels):
word_ids = tokenized_input.word_ids()
previous_word_idx = None
label_ids = []
for word_idx in word_ids:
if word_idx is None:
label_ids.append(-100)
elif word_idx != previous_word_idx:
try:
label_ids.append(labels_to_ids[labels[word_idx]])
except:
label_ids.append(-100)
else:
label_ids.append(labels_to_ids[labels[word_idx]] if label_all_tokens else -100)
previous_word_idx = word_idx
return label_ids;
label = labels[36]
label_all_tokens = False
new_label = align_label_example(text_tokenized, label)
'''
print(new_label)
print(tokenizer.convert_ids_to_tokens(text_tokenized['input_ids'][0]))
'''
def align_label(texts, labels):
tokenized_inputs = tokenizer(texts, padding='max_length', max_length=512, truncation=True)
word_ids = tokenized_inputs.word_ids()
previous_word_idx = None
label_ids = []
for word_idx in word_ids:
if word_idx is None:
label_ids.append(-100)
elif word_idx != previous_word_idx:
try:
label_ids.append(labels_to_ids[labels[word_idx]])
except:
label_ids.append(-100)
else:
try:
label_ids.append(labels_to_ids[labels[word_idx]] if label_all_tokens else -100)
except:
label_ids.append(-100)
previous_word_idx = word_idx
return label_ids
class DataSequence(torch.utils.data.Dataset):
def __init__(self, df):
lb = [i.split() for i in df['labels'].values.tolist()]
txt = df['text'].values.tolist()
self.texts = [tokenizer(str(i),
padding='max_length', max_length=512, truncation=True, return_tensors='pt') for i in txt]
self.labels = [align_label(i,j) for i,j in zip(txt, lb)]
def __len__(self):
return len(self.labels)
def get_batch_labels(self, idx):
return torch.LongTensor(self.labels[idx])
def __getitem__(self, idx):
batch_data = self.get_batch_data(idx)
batch_labels = self.get_batch_labels(idx)
return batch_data, batch_labels
df = df[0:1000]
df_train, df_val, df_test = np.split(df.sample(frac=1, random_state=42),
[int(.8 * len(df)), int(.9 * len(df))])
class BertModel(torch.nn.Module):
def __init__(self):
super(BertModel, self).__init__()
self.bert = BertForTokenClassification.from_pretrained('bert-base-cased', num_labels=len(unique_labels))
def forward(self, input_id, mask, label):
output = self.bert(input_ids=input_id, attention_mask=mask, labels=label, return_dict=False)
return output
def train_loop(model, df_train, df_val):
train_dataset = DataSequence(df_train)
val_dataset = DataSequence(df_val)
train_dataloader = DataLoader(train_dataset, num_workers=4, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, num_workers=4, batch_size=BATCH_SIZE)
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
if use_cuda:
model = model.cuda()
best_acc = 0
best_loss = 1000
for epoch_num in range(EPOCHS):
total_acc_train = 0
total_loss_train = 0
model.train()
for train_data, train_label in tqdm(train_dataloader):
train_label = train_label.to(device)
mask = train_data['attention_mask'].squeeze(1).to(device)
input_id = train_data['input_ids'].squeeze(1).to(device)
optimizer.zero_grad()
loss, logits = model(input_id, mask, train_label)
for i in range(logits.shape[0]):
logits_clean = logits[i][train_label[i] != -100]
label_clean = train_label[i][train_label[i] != -100]
predictions = logits_clean.argmax(dim=1)
acc = (predictions == label_clean).float().mean()
total_acc_train += acc
total_loss_train += loss.item()
loss.backward()
optimizer.step()
model.eval()
total_acc_val = 0
total_loss_val = 0
for val_data, val_label in val_dataloader:
val_label = val_label.to(device)
mask = val_data['attention_mask'].squeeze(1).to(device)
input_id = val_data['input_ids'].squeeze(1).to(device)
loss, logits = model(input_id, mask, val_label)
for i in range(logits.shape[0]):
logits_clean = logits[i][val_label[i] != -100]
label_clean = val_label[i][val_label[i] != -100]
predictions = logits_clean.argmax(dim=1)
acc = (predictions == label_clean).float().mean()
total_acc_val += acc
total_loss_val += loss.item()
val_accuracy = total_acc_val / len(df_val)
val_loss = total_loss_val / len(df_val)
print(
f'Epochs: {epoch_num + 1} | Loss: {total_loss_train / len(df_train): .3f} | Accuracy: {total_acc_train / len(df_train): .3f} | Val_Loss: {total_loss_val / len(df_val): .3f} | Accuracy: {total_acc_val / len(df_val): .3f}')
LEARNING_RATE = 5e-3
EPOCHS = 5
BATCH_SIZE = 2
model = BertModel()
train_loop(model, df_train, df_val)
And the debugger says:
Exception has occurred: RuntimeError (note: full exception trace is shown but execution is paused at: <module>)
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
File "/Users/filipedonatti/Projects/pyCodes/second_try.py", line 141, in train_loop
for train_data, train_label in tqdm(train_dataloader):
File "/Users/filipedonatti/Projects/pyCodes/second_try.py", line 197, in <module>
train_loop(model, df_train, df_val)
File "<string>", line 1, in <module> (Current frame)
By the way,
Despite using Mac, I have downloaded Anaconda-Navigator, however I've been trying and executing this code on VS Code. I've downloaded numpy, torch, datasets and other libraries through Brew with the pip3 command.
I'm at a loss, I can run the code on a google collab notebook or Jupiter notebook, and I know training models and such in my humble Mac would not be advised, but I am just exercising this so I can train and use the model in a much more powerful machine.
Please help me with this issue, I've been trying to find a solution for days.
Peace and happy holidays.
I've tried solving the issue by writing:
if __name__ == '__main__':
freeze_support()
I've tried using this:
import parallelTestModule
extractor = parallelTestModule.ParallelExtractor()
extractor.runInParallel(numProcesses=2, numThreads=4)

So...
It turns out the correct way to solve this is to implement a function to train the loop as such:
def run():
model = BertModel()
torch.multiprocessing.freeze_support()
print('loop')
train_loop(model, df_train, df_val)
if __name__ == '__main__':
run()
Redefining that train_loop line in the end. Issue solved. For more see this link: https://github.com/pytorch/pytorch/issues/5858

Related

py_environment 'time_step' doesn't match 'time_step_spec'

I have created a custom pyenvironment via tf agents. However I can't validate the environment or take steps within it with py_policy.action
I'm confused as to what is excepted from the time_step_specs
I have tried converting to tf_py_environment via tf_py_environment.TFPyEnvironment and was successful in taking actions with tf_policy but I'm still confused as to the difference.
import abc
import numpy as np
from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment
from tf_agents.environments import utils
from tf_agents.specs import array_spec
from tf_agents.environments import wrappers
from tf_agents.trajectories import time_step as ts
from tf_agents.policies import random_tf_policy
import tensorflow as tf
import tf_agents
class TicTacToe(py_environment.PyEnvironment):
def __init__(self,n):
super(TicTacToe,self).__init__()
self.n = n
self.winner = None
self._episode_ended = False
self.inital_state = np.zeros((n,n))
self._state = self.inital_state
self._observation_spec = array_spec.BoundedArraySpec(
shape = (n,n),dtype='int32',minimum = -1,maximum = 1,name =
'TicTacToe board state spec')
self._action_spec = array_spec.BoundedArraySpec(
shape = (),dtype = 'int32', minimum = 0,maximum = 8, name =
'TicTacToe action spec')
def observation_spec(self):
return self._observation_spec
def action_spec(self):
return self._action_spec
def _reset(self):
return ts.restart(self.inital_state)
def check_game_over(self):
for i in range(self.n):
if (sum(self._state[i,:])==self.n) or
(sum(self._state[:,i])==self.n):
self.winner = 1
return True
elif (sum(self._state[i,:])==-self.n) or
(sum(self._state[:,i])==-self.n):
self.winner = -1
return True
if (self._state.trace()==self.n) or
(self._state[::-1].trace()==self.n):
self.winner = 1
return True
elif (self._state.trace()==-self.n) or (self._state[::-1].trace()==-
self.n):
self.winner = -1
return True
if not (0 in self._state):
return True
def _step(self,action):
self._state[action//3,action%3]=1
self._episode_ended = self.check_game_over
if self._episode_ended==True:
if self.winner == 1:
reward = 1
elif self.winner == None:
reward = 0
else:
reward = -1
return ts.termination(self._state,dtype = 'int32',reward=reward)
else:
return ts.transition(self._state,dtype = 'int32',reward =
0.0,discount = 0.9)
env = TicTacToe(3)
utils.validate_py_environment(env, episodes=5)
This is the error I get:
ValueError Traceback (most recent call last)
in
----> 1 utils.validate_py_environment(env, episodes=5)
C:\Users\bzhang\AppData\Local\Continuum\anaconda3\lib\site-packages\tf_agents\environments\utils.py in validate_py_environment(environment, episodes)
58 raise ValueError(
59 'Given time_step: %r does not match expected time_step_spec: %r' %
---> 60 (time_step, time_step_spec))
61
62 action = random_policy.action(time_step).action
ValueError: Given time_step: TimeStep(step_type=array(0), reward=array(0., dtype=float32), discount=array(1., dtype=float32), observation=array([[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]])) does not match expected time_step_spec: TimeStep(step_type=ArraySpec(shape=(), dtype=dtype('int32'), name='step_type'), reward=ArraySpec(shape=(), dtype=dtype('float32'), name='reward'), discount=BoundedArraySpec(shape=(), dtype=dtype('float32'), name='discount', minimum=0.0, maximum=1.0), observation=BoundedArraySpec(shape=(3, 3), dtype=dtype('int32'), name='TicTacToe board state spec', minimum=-1, maximum=1))
Your observation does not match the spec, you need to pass dtype=np.int32 to the np array to make sure the type match.

Can't pickle <function <lambda> when using multiprocessing Pool.map()

I'm trying to parallelize my python script with the multiprocessing library. My function is part of a class and I used Pool.map.
import numpy as np
import pandas as pd
import netCDF4
import itertools
import multiprocessing as mpp
from tqdm import tqdm
Class catch2grid(object):
def __init__(self):
"""Init of catch2grid."""
self.pbar = None
...
def main(self, db_Qobs_meta_dir, ens_mean_dir, ens_sd_dir, db_Qobs_dir,
range_start, range_end):
"""Sequential computing of several flow percentiles for Qobs and Qsim,
the standard deviation of the flow percentiles of Qsim and the
KGE alpha.
db_Qobs_meta_dir -- Path to file with meta informations on the
Catchments
ens_mean_dir -- Path to file with runoff ensemble mean
ens_sd_dir -- Path to file with runoff ensemble standard deviation
db_Qobs_dir -- Path to folder with observed runoff database_Qobs_new
range_start -- starting value of range
range_end -- stopping value of range
"""
range_catch = range(range_start, range_end)
df_meta = self.import_meta(db_Qobs_meta_dir)
df_meta = self.select_catchments(df_meta)
Ens_mean, Ens_mean_Q = self.import_ens_mean(ens_mean_dir)
Ens_sd, Ens_sd_Q = self.import_ens_sd(ens_sd_dir)
Grid_lats_cen, Grid_lons_cen = self.grid_cen_arr(Ens_mean)
df_Qobs_percs = pd.DataFrame(index=range_catch, columns=
['Catch_name', 't_scale_Qobs', 'Time_cov',
'Q_5', 'Q_25', 'Q_50',
'Q_75', 'Q_95'])
df_Qsim_percs = pd.DataFrame(index=range_catch, columns=
['Catch_name', 'Q_5', 'Q_25', 'Q_50',
'Q_75', 'Q_95'])
df_sdQsim_percs = pd.DataFrame(index=range_catch, columns=
['Catch_name', 'sdQsim_5', 'sdQsim_25',
'sdQsim_50', 'sdQsim_75', 'sdQsim_95'])
df_KGE_alpha = pd.DataFrame(index=range_catch, columns=['KGE_alpha'])
df_Qobs_percs['Catch_name'] = df_meta['Catchments']\
[range_catch[0]:range_catch[-1]+1]
df_Qsim_percs['Catch_name'] = df_meta['Catchments']\
[range_catch[0]:range_catch[-1]+1]
df_sdQsim_percs['Catch_name'] = df_meta['Catchments']\
[range_catch[0]:range_catch[-1]+1]
df_KGE_alpha['Catch_name'] = df_meta['Catchments']\
[range_catch[0]:range_catch[-1]+1]
for k in range_catch:
sum_Lat_bool, sum_Lon_bool, Lat_idx, Lon_idx = self.matchgrid(df_meta,
db_Qobs_dir,
Grid_lats_cen,
Grid_lons_cen,
k)
df_Q, t_scale_Qobs = self.Qsim_to_catch(df_meta, db_Qobs_dir,
Ens_mean, Ens_mean_Q,
sum_Lat_bool, sum_Lon_bool,
Lat_idx, Lon_idx, k)
df_sdQsim = self.sdQsim_to_catch(df_meta, db_Qobs_dir, Ens_sd,
Ens_sd_Q, sum_Lat_bool,
sum_Lon_bool, Lat_idx, Lon_idx, k)
df_Qobs_percs['t_scale_Qobs'][k] = t_scale_Qobs
no_NAs = df_Q['Qobs'].isnull().sum().sum()
df_Qobs_percs['Time_cov'][k] = 1 - (no_NAs/len(df_Q.index))
df_Qobs_percs['Q_95'][k] = self.flow_perc(df_Q['Qobs'], perc=95)
df_Qobs_percs['Q_75'][k] = self.flow_perc(df_Q['Qobs'], perc=75)
df_Qobs_percs['Q_50'][k] = self.flow_perc(df_Q['Qobs'], perc=50)
df_Qobs_percs['Q_25'][k] = self.flow_perc(df_Q['Qobs'], perc=25)
df_Qobs_percs['Q_5'][k] = self.flow_perc(df_Q['Qobs'], perc=5)
df_Qsim_percs['Q_95'][k] = self.flow_perc(df_Q['Qsim'], perc=95)
df_Qsim_percs['Q_75'][k] = self.flow_perc(df_Q['Qsim'], perc=75)
df_Qsim_percs['Q_50'][k] = self.flow_perc(df_Q['Qsim'], perc=50)
df_Qsim_percs['Q_25'][k] = self.flow_perc(df_Q['Qsim'], perc=25)
df_Qsim_percs['Q_5'][k] = self.flow_perc(df_Q['Qsim'], perc=5)
df_sdQsim_percs['sdQsim_95'][k] = self.flow_perc_sd(df_Q['Qsim'], df_sdQsim['sdQsim'], perc=95)
df_sdQsim_percs['sdQsim_75'][k] = self.flow_perc_sd(df_Q['Qsim'], df_sdQsim['sdQsim'], perc=75)
df_sdQsim_percs['sdQsim_50'][k] = self.flow_perc_sd(df_Q['Qsim'], df_sdQsim['sdQsim'], perc=50)
df_sdQsim_percs['sdQsim_25'][k] = self.flow_perc_sd(df_Q['Qsim'], df_sdQsim['sdQsim'], perc=25)
df_sdQsim_percs['sdQsim_5'][k] = self.flow_perc_sd(df_Q['Qsim'], df_sdQsim['sdQsim'], perc=5)
df_KGE_alpha['KGE_alpha'][k] = self.KGE_alpha(df_Q['Qsim'], df_Q['Qobs'])
# display progress
self.pbar.update(1)
df_Qobs_percs.index = df_Qobs_percs['Catch_name']
df_Qsim_percs.index = df_Qsim_percs['Catch_name']
df_sdQsim_percs.index = df_sdQsim_percs['Catch_name']
df_KGE_alpha.index = df_KGE_alpha['Catch_name']
df_Qobs_percs = df_Qobs_percs.loc[:, 'Q_5':'Q_95']
df_Qsim_percs = df_Qsim_percs.loc[:, 'Q_5':'Q_95']
df_sdQsim_percs = df_sdQsim_percs.loc[:, 'sdQsim_5':'sdQsim_95']
df_KGE_alpha = df_KGE_alpha.loc[:, 'KGE_alpha']
return df_Qobs_percs, df_Qsim_percs, df_sdQsim_percs, df_KGE_alpha
def main_par(self, db_Qobs_meta_dir, ens_mean_dir, ens_sd_dir, db_Qobs_dir):
"""Parallel computing of several flow percentiles for Qobs and Qsim,
the standard deviation of the flow percentiles of Qsim and the
KGE alpha.
db_Qobs_meta_dir -- Path to file with meta informations on the
Catchments
ens_mean_dir -- Path to file with runoff ensemble mean
ens_sd_dir -- Path to file with runoff ensemble standard deviation
db_Qobs_dir -- Path to folder with observed runoff database_Qobs_new
"""
cpu_cores = mpp.cpu_count() - 1
df_meta = self.import_meta(db_Qobs_meta_dir)
df_meta = self.select_catchments(df_meta)
# chunking subsets for parallelization
ll_start = []
ll_end = []
lin_dist = np.linspace(0, len(df_meta.index), cpu_cores+1)
l = len(lin_dist)
# list of tuples with input arguments for map
for i in range(len(lin_dist) - 1):
temp = list(range(int(lin_dist[i]), int(lin_dist[i+1]), 1))
ll_start.append(temp[0])
ll_end.append(temp[-1]+1)
ll_db_Qobs_meta_dir = list(itertools.repeat(db_Qobs_meta_dir, l))
ll_Ens_mean_dir = list(itertools.repeat(ens_mean_dir, l))
ll_Ens_sd_dir = list(itertools.repeat(ens_sd_dir, l))
ll_db_Qobs_dir = list(itertools.repeat(db_Qobs_dir, l))
subsets = zip(ll_db_Qobs_meta_dir, ll_Ens_mean_dir, ll_Ens_sd_dir,
ll_db_Qobs_dir, ll_start, ll_end)
p = mpp.Pool(cpu_cores) # launch pool of workers
res = p.starmap(self.main, subsets)
p.close()
p.join()
res_obs = []
res_sim = []
res_simsd = []
res_kgealpha = []
# collect dataframes and merge them
[res_obs.append(res[:][i][0]) for i in range(len(res))]
[res_sim.append(res[:][i][1]) for i in range(len(res))]
[res_simsd.append(res[:][i][2]) for i in range(len(res))]
[res_kgealpha.append(res[:][i][3]) for i in range(len(res))]
df_Qobs_percs = pd.concat(res_obs[:], ignore_index=True)
df_Qsim_percs = pd.concat(res_sim[:], ignore_index=True)
df_sdQsim_percs = pd.concat(res_simsd[:], ignore_index=True)
df_KGE_alpha = pd.concat(res_kgealpha[:], ignore_index=True)
return df_Qobs_percs, df_Qsim_percs, df_sdQsim_percs, df_KGE_alpha
...
if __name__ == "__main__":
cpu_cores = mp.cpu_count() - 1
c2g = catch2grid()
p = mp.Pool(cpu_cores) # launch pool of workers
c2g.init_pbar(l)
ll_range_catch = list(range(0, 5000))
res = p.map(c2g.main_par, ll_range_catch)
p.close()
p.join()
After running it the following error message is displayed:
File "<ipython-input-1-3828921ab3bd>", line 1, in <module>
runfile('/Users/robinschwemmle/Desktop/MSc_Thesis/Python/catch2grid.py', wdir='/Users/robinschwemmle/Desktop/MSc_Thesis/Python')
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/Users/robinschwemmle/Desktop/MSc_Thesis/Python/catch2grid.py", line 1285, in <module>
c2g.main_par(db_Qobs_meta_dir, Ens_mean_dir, Ens_sd_dir, db_Qobs_dir)
File "/Users/robinschwemmle/Desktop/MSc_Thesis/Python/catch2grid.py", line 798, in main_par
res = p.starmap(self.main, subsets)
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/multiprocessing/pool.py", line 274, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/multiprocessing/pool.py", line 644, in get
raise self._value
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/multiprocessing/pool.py", line 424, in _handle_tasks
put(task)
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
PicklingError: Can't pickle <function <lambda> at 0x1164e42f0>: attribute lookup <lambda> on jupyter_client.session failed
The error occured just a few days ago. Before the code was working properly. Have there been any changes to the mulitprocessing or pickling library I'm not aware of? Or has anyone an advice for me which parallel library I could choose instead?

Loading test data using batch Tensorflow

The following code is my pipeline for reading images and labels from files:
import tensorflow as tf
import numpy as np
import tflearn.data_utils
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import sys
#process labels in the input file
def process_label(label):
info=np.zeros(6)
...
return info
def read_label_file(file):
f = open(file, "r")
filepaths = []
labels = []
lines = []
for line in f:
tokens = line.split(",")
filepaths.append([tokens[0],tokens[1],tokens[2]])
labels.append(process_label(tokens[3:]))
lines.append(line)
return filepaths, np.vstack(labels), lines
def get_data_batches(params):
# reading labels and file path
train_filepaths, train_labels, train_line = read_label_file(params.train_info)
test_filepaths, test_labels, test_line = read_label_file(params.test_info)
# convert string into tensors
train_images = ops.convert_to_tensor(train_filepaths)
train_labels = ops.convert_to_tensor(train_labels)
train_line = ops.convert_to_tensor(train_line)
test_images = ops.convert_to_tensor(test_filepaths)
test_labels = ops.convert_to_tensor(test_labels)
test_line = ops.convert_to_tensor(test_line)
# create input queues
train_input_queue = tf.train.slice_input_producer([train_images, train_labels, train_line], shuffle=params.shuffle)
test_input_queue = tf.train.slice_input_producer([test_images, test_labels, test_line],shuffle=False)
# process path and string tensor into an image and a label
train_image=None
for i in range(train_input_queue[0].get_shape()[0]):
file_content = tf.read_file(params.path_prefix+train_input_queue[0][i])
train_imageT = (tf.to_float(tf.image.decode_jpeg(file_content, channels=params.num_channels)))*(1.0/255)
train_imageT = tf.image.resize_images(train_imageT,[params.load_size[0],params.load_size[1]])
train_imageT = tf.random_crop(train_imageT,size=[params.crop_size[0],params.crop_size[1],params.num_channels])
train_imageT = tf.image.random_flip_up_down(train_imageT)
train_imageT = tf.image.per_image_standardization(train_imageT)
if(i==0):
train_image = train_imageT
else:
train_image = tf.concat([train_image, train_imageT], 2)
train_label = train_input_queue[1]
train_lineInfo = train_input_queue[2]
test_image=None
for i in range(test_input_queue[0].get_shape()[0]):
file_content = tf.read_file(params.path_prefix+test_input_queue[0][i])
test_imageT = tf.to_float(tf.image.decode_jpeg(file_content, channels=params.num_channels))*(1.0/255)
test_imageT = tf.image.resize_images(test_imageT,[params.load_size[0],params.load_size[1]])
test_imageT = tf.image.central_crop(test_imageT, (params.crop_size[0]+0.0)/params.load_size[0])
test_imageT = tf.image.per_image_standardization(test_imageT)
if(i==0):
test_image = test_imageT
else:
test_image = tf.concat([test_image, test_imageT],2)
test_label = test_input_queue[1]
test_lineInfo = test_input_queue[2]
# define tensor shape
train_image.set_shape([params.crop_size[0], params.crop_size[1], params.num_channels*3])
train_label.set_shape([66])
test_image.set_shape( [params.crop_size[0], params.crop_size[1], params.num_channels*3])
test_label.set_shape([66])
# collect batches of images before processing
train_image_batch, train_label_batch, train_lineno = tf.train.batch([train_image, train_label, train_lineInfo],batch_size=params.batch_size,num_threads=params.num_threads,allow_smaller_final_batch=True)
test_image_batch, test_label_batch, test_lineno = tf.train.batch([test_image, test_label, test_lineInfo],batch_size=params.test_size,num_threads=params.num_threads,allow_smaller_final_batch=True)
if(params.loadSlice=='all'):
return train_image_batch, train_label_batch, train_lineno, test_image_batch, test_label_batch, test_lineno
elif params.loadSlice=='train':
return train_image_batch, train_label_batch
elif params.loadSlice=='test':
return test_image_batch, test_label_batch
elif params.loadSlice=='train_info':
return train_image_batch, train_label_batch, train_lineno
elif params.loadSlice=='test_info':
return test_image_batch, test_label_batch, test_lineno
else:
return train_image_batch, train_label_batch, test_image_batch, test_label_batch
I want to use the same pipeline for loading the test data. The size of my test data is huge and I cannot load all of them at once.
I have 20453 test examples which is not an integer multiply of the batch size (here 512).
How can I read all of my test examples via this pipeline one and only one time and then measure the performance on them?
Currently, I am using this code for batching my test data and it does not work. It always read a full batch from the queue even when I set allow_smaller_final_batch to True
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess,"checkpoints2/snapshot-16")
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
more = True
num_examples=0
while(more):
img_test, lbl_test, lbl_line=sess.run([test_image_batch,test_label_batch,test_lineno])
print(lbl_test.shape)
size=lbl_test.shape[0]
num_examples += size
if size<args.batch_size:
more = False
sess.close()
This is the code of my model:
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.normalization import batch_normalization
from tflearn.layers.estimator import regression
from tflearn.activations import relu
def get_alexnet(x,num_output):
network = conv_2d(x, 64, 11, strides=4)
network = batch_normalization(network,epsilon=0.001)
network = relu (network)
network = max_pool_2d(network, 3, strides=2)
network = conv_2d(network, 192, 5)
network = batch_normalization(network,epsilon=0.001)
network = relu(network)
network = max_pool_2d(network, 3, strides=2)
network = conv_2d(network, 384, 3)
network = batch_normalization(network,epsilon=0.0001)
network = relu(network)
network = conv_2d(network, 256, 3)
network = batch_normalization(network,epsilon=0.001)
network = relu(network)
network = conv_2d(network, 256, 3)
network = batch_normalization(network,epsilon=0.001)
network = relu(network)
network = max_pool_2d(network, 3, strides=2)
network = fully_connected(network, 4096)
network = batch_normalization(network,epsilon=0.001)
network = relu(network)
network = dropout(network, 0.5)
network = fully_connected(network, 4096)
network = batch_normalization(network,epsilon=0.001)
network = relu(network)
network = dropout(network, 0.5)
network1 = fully_connected(network, num_output)
network2 = fully_connected(network, 12)
network3 = fully_connected(network,6)
return network1,network2,network3
This simply could be achieved by setting num_epochs=1 and allow_smaller_final_batch= True!
One solution is set batch_size=size of test set

Can I sort a QPainter to be drawn on top on other widgets in PyQt4?

I'm doing a basic node editor with PyQt4. However I'm stuck with this: The lines linking nodes are drawn behind of the widgets when it should be in the reverse order.
Is there a way to force this or should I restart from scratch and try another approach?
The attacahed code should work and give you 2 nodes that you can move. The line linking them is drawn behind of everything instead of on top.
Thank you
from PyQt4 import QtGui
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtCore import Qt
import sys
class Connector():
def __init__(self, name="", id=id, data=None, connected=None, isInput=True, index=-1, parent=None):
self.id = id
self.name = name
self.data = data
self.parent = parent
self.isConnected = not connected
self.connectedTo = connected
self.isInput = isInput
self.index = index
self.width = 16
self.height = 16
self.upperMargin = 30
def getAbsPosition(self):
return QPoint(self.parent.pos().x()+self.getPosition().x(), self.parent.pos().y() + self.getPosition().y())
def getPosition(self):
if self.parent:
if self.isInput:
deltaX = 0 + self.width / 2
else:
deltaX = self.parent.width() - self.width
deltaY = self.upperMargin + (self.index * self.height*2)
pos = QPoint(deltaX, deltaY)
else:
pos = QPoint(0, 0)
return pos
class Node(QLineEdit ):
def __init__(self,parent=None, name=""):
QLineEdit.__init__(self,parent)
self.size = QSize(200, 300)
self.setText(name)
self.resize(self.size.width(), self.size.height())
self.connectors = []
self.createInputConnector()
self.createOutputConnector()
def getConnectors(self):
return self.connectors
def getNewConnectorIndex(self):
indexInputs = 0
indexOutputs = 0
for connector in self.connectors:
if connector.isInput:
indexInputs += 1
else:
indexOutputs += 1
return indexInputs, indexOutputs
def createConnector(self, name, isInput, index):
newConnector = Connector(name=name, isInput=isInput, index=index, parent=self)
self.connectors.append(newConnector)
return newConnector
def createInputConnector(self, name=""):
index, ignore = self.getNewConnectorIndex()
newConnector = self.createConnector(name, isInput=True, index=index)
return newConnector
def createOutputConnector(self, name=""):
ignore, index = self.getNewConnectorIndex()
newConnector = self.createConnector(name, isInput=False, index=index)
return newConnector
def paintEvent(self, event):
painter = QPainter()
painter.begin(self)
painter.path = QPainterPath()
# disabling next line "solves" the problem but not really.
painter.setBrush(QColor(122, 163, 39))
painter.path.addRoundRect(2,2, self.size.width()-6,self.size.height()-6,20,15)
painter.drawPath(painter.path)
for connector in self.connectors:
pos = connector.getPosition()
cx = pos.x()
cy = pos.y()
cw = connector.width
ch = connector.height
painter.drawEllipse(cx, cy, cw, ch)
painter.end()
def mousePressEvent(self, event):
self.__mousePressPos = None
self.__mouseMovePos = None
if event.button() == Qt.LeftButton:
self.__mousePressPos = event.globalPos()
self.__mouseMovePos = event.globalPos()
super(Node, self).mousePressEvent(event)
def mouseMoveEvent(self, event):
if event.buttons() == Qt.LeftButton:
currPos = self.mapToGlobal(self.pos())
globalPos = event.globalPos()
diff = globalPos - self.__mouseMovePos
newPos = self.mapFromGlobal(currPos + diff)
self.move(newPos)
self.__mouseMovePos = globalPos
super(Node, self).mouseMoveEvent(event)
def mouseReleaseEvent(self, event):
if self.__mousePressPos is not None:
moved = event.globalPos() - self.__mousePressPos
if moved.manhattanLength() > 3:
event.ignore()
return
super(Node, self).mouseReleaseEvent(event)
class Window(QMainWindow):
def __init__(self):
QMainWindow.__init__(self)
self.resize(640,480)
self.entry1 = Node(self, "NODE1")
self.entry2 = Node(self, "NODE2")
def paintEvent(self, e):
qp = QtGui.QPainter()
qp.begin(self)
qp.setClipping(False)
self.doDrawing(qp)
qp.end()
self.update()
def doDrawing(self, qp):
p1 = self.entry1.getConnectors()[0].getAbsPosition()
p2 = self.entry2.getConnectors()[0].getAbsPosition()
qp.drawLine(p1.x(),p1.y(), p2.x(), p2.y())
if __name__=="__main__":
app=QApplication(sys.argv)
win=Window()
win.show()
sys.exit(app.exec_())

Graphite / Carbon / Ceres node overlap

I'm working with Graphite monitoring using Carbon and Ceres as the storage method. I have some problems with correcting bad data. It seems that (due to various problems) I've ended up with overlapping files. That is, since Carbon / Ceres stores the data as timestamp#interval.slice, I can have two or more files with overlapping time ranges.
There are two kinds of overlaps:
File A: +------------+ orig file
File B: +-----+ subset
File C: +---------+ overlap
This is causing problems because the existing tools available (ceres-maintenance defrag and rollup) don't cope with these overlaps. Instead, they skip the directory and move on. This is a problem, obviously.
I've created a script that fixes this problem, as follows:
For subsets, just delete the subset file.
For overlaps, using the file system 'truncate' on the orig file at the point where the next file starts. While it is possible to cut off the start of the overlap file and rename it properly, I would suggest that this is fraught with danger.
I've found that it's possible to do this in two ways:
Walk the dirs and iterate over the files, fixing as you go, and find the file subsets, remove them;
Walk the dirs and fix all the problems in a dir before moving on. This is BY FAR the faster approach, since the dir walk is hugely time consuming.
Code:
#!/usr/bin/env python2.6
################################################################################
import io
import os
import time
import sys
import string
import logging
import unittest
import datetime
import random
import zmq
import json
import socket
import traceback
import signal
import select
import simplejson
import cPickle as pickle
import re
import shutil
import collections
from pymongo import Connection
from optparse import OptionParser
from pprint import pprint, pformat
################################################################################
class SliceFile(object):
def __init__(self, fname):
self.name = fname
basename = fname.split('/')[-1]
fnArray = basename.split('#')
self.timeStart = int(fnArray[0])
self.freq = int(fnArray[1].split('.')[0])
self.size = None
self.numPoints = None
self.timeEnd = None
self.deleted = False
def __repr__(self):
out = "Name: %s, tstart=%s tEnd=%s, freq=%s, size=%s, npoints=%s." % (
self.name, self.timeStart, self.timeEnd, self.freq, self.size, self.numPoints)
return out
def setVars(self):
self.size = os.path.getsize(self.name)
self.numPoints = int(self.size / 8)
self.timeEnd = self.timeStart + (self.numPoints * self.freq)
################################################################################
class CeresOverlapFixup(object):
def __del__(self):
import datetime
self.writeLog("Ending at %s" % (str(datetime.datetime.today())))
self.LOGFILE.flush()
self.LOGFILE.close()
def __init__(self):
self.verbose = False
self.debug = False
self.LOGFILE = open("ceresOverlapFixup.log", "a")
self.badFilesList = set()
self.truncated = 0
self.subsets = 0
self.dirsExamined = 0
self.lastStatusTime = 0
def getOptionParser(self):
return OptionParser()
def getOptions(self):
parser = self.getOptionParser()
parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="debug mode for this program, writes debug messages to logfile." )
parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="verbose mode for this program, prints a lot to stdout." )
parser.add_option("-b", "--basedir", action="store", type="string", dest="basedir", default=None, help="base directory location to start converting." )
(options, args) = parser.parse_args()
self.debug = options.debug
self.verbose = options.verbose
self.basedir = options.basedir
assert self.basedir, "must provide base directory."
# Examples:
# ./updateOperations/1346805360#60.slice
# ./updateOperations/1349556660#60.slice
# ./updateOperations/1346798040#60.slice
def getFileData(self, inFilename):
ret = SliceFile(inFilename)
ret.setVars()
return ret
def removeFile(self, inFilename):
os.remove(inFilename)
#self.writeLog("removing file: %s" % (inFilename))
self.subsets += 1
def truncateFile(self, fname, newSize):
if self.verbose:
self.writeLog("Truncating file, name=%s, newsize=%s" % (pformat(fname), pformat(newSize)))
IFD = None
try:
IFD = os.open(fname, os.O_RDWR|os.O_CREAT)
os.ftruncate(IFD, newSize)
os.close(IFD)
self.truncated += 1
except:
self.writeLog("Exception during truncate: %s" % (traceback.format_exc()))
try:
os.close(IFD)
except:
pass
return
def printStatus(self):
now = self.getNowTime()
if ((now - self.lastStatusTime) > 10):
self.writeLog("Status: time=%d, Walked %s dirs, subsetFilesRemoved=%s, truncated %s files." % (now, self.dirsExamined, self.subsets, self.truncated))
self.lastStatusTime = now
def fixupThisDir(self, inPath, inFiles):
# self.writeLog("Fixing files in dir: %s" % (inPath))
if not '.ceres-node' in inFiles:
# self.writeLog("--> Not a slice directory, skipping.")
return
self.dirsExamined += 1
sortedFiles = sorted(inFiles)
sortedFiles = [x for x in sortedFiles if ((x != '.ceres-node') and (x.count('#') > 0)) ]
lastFile = None
fileObjList = []
for thisFile in sortedFiles:
wholeFilename = os.path.join(inPath, thisFile)
try:
curFile = self.getFileData(wholeFilename)
fileObjList.append(curFile)
except:
self.badFilesList.add(wholeFilename)
self.writeLog("ERROR: file %s, %s" % (wholeFilename, traceback.format_exc()))
# name is timeStart, really.
fileObjList = sorted(fileObjList, key=lambda thisObj: thisObj.name)
while fileObjList:
self.printStatus()
changes = False
firstFile = fileObjList[0]
removedFiles = []
for curFile in fileObjList[1:]:
if (curFile.timeEnd <= firstFile.timeEnd):
# have subset file. elim.
self.removeFile(curFile.name)
removedFiles.append(curFile.name)
self.subsets += 1
changes = True
if self.verbose:
self.writeLog("Subset file situation. First=%s, overlap=%s" % (firstFile, curFile))
fileObjList = [x for x in fileObjList if x.name not in removedFiles]
if (len(fileObjList) < 2):
break
secondFile = fileObjList[1]
# LT is right. FirstFile's timeEnd is always the first open time after first is done.
# so, first starts#100, len=2, end=102, positions used=100,101. second start#102 == OK.
if (secondFile.timeStart < firstFile.timeEnd):
# truncate first file.
# file_A (last): +---------+
# file_B (curr): +----------+
# solve by truncating previous file at startpoint of current file.
newLenFile_A_seconds = int(secondFile.timeStart - firstFile.timeStart)
newFile_A_datapoints = int(newLenFile_A_seconds / firstFile.freq)
newFile_A_bytes = int(newFile_A_datapoints) * 8
if (not newFile_A_bytes):
fileObjList = fileObjList[1:]
continue
assert newFile_A_bytes, "Must have size. newLenFile_A_seconds=%s, newFile_A_datapoints=%s, newFile_A_bytes=%s." % (newLenFile_A_seconds, newFile_A_datapoints, newFile_A_bytes)
self.truncateFile(firstFile.name, newFile_A_bytes)
if self.verbose:
self.writeLog("Truncate situation. First=%s, overlap=%s" % (firstFile, secondFile))
self.truncated += 1
fileObjList = fileObjList[1:]
changes = True
if not changes:
fileObjList = fileObjList[1:]
def getNowTime(self):
return time.time()
def walkDirStructure(self):
startTime = self.getNowTime()
self.lastStatusTime = startTime
updateStatsDict = {}
self.okayFiles = 0
emptyFiles = 0
for (thisPath, theseDirs, theseFiles) in os.walk(self.basedir):
self.printStatus()
self.fixupThisDir(thisPath, theseFiles)
self.dirsExamined += 1
endTime = time.time()
# time.sleep(11)
self.printStatus()
self.writeLog( "now = %s, started at %s, elapsed time = %s seconds." % (startTime, endTime, endTime - startTime))
self.writeLog( "Done.")
def writeLog(self, instring):
print instring
print >> self.LOGFILE, instring
self.LOGFILE.flush()
def main(self):
self.getOptions()
self.walkDirStructure()

Resources