How to proper read ADC by ULP while ESP32 wake up - esp32

I am going to continously measure ADC data with ULP. Periodically I wake up ESP32 to do some stuff. When I analyse data received from ADC I found that there is about 100 ms of time where ADC return 0xfff while wake up ESP32 core (ULP work continously)
I am using Olimex ESP32-DevKit-LiPo, PlatformIO with Hulp library.
With sample code (below), which measure ADC and after about 400 measurements it wakes ESP32, I always receive 4095 for about 100 ms and then ADC values return to measured values. In example code there is ULP period set to 4 ms, and there are 24 0xfff results.
Is it ESP32 bug (or feature) or what am I doing wrong (and how could it be fixed?)
below some results with format sample number --> ADC result,
0 --> 1891, 1 --> 1893, 2 --> 1890, 3 --> 1882, 4 --> 1881, 5 --> 1883,
. . . . . .
399 --> 1893, 400 --> 1887, 401 --> 1889, 402 --> 1891, 403 --> 1899,
404 --> 4095, 405 --> 4095, 406 --> 4095, 407 --> 4095, 408 --> 4095, 409 -->
4095, 410 --> 4095, 411 --> 4095, 412 --> 4095, 413 --> 4095,
414 --> 4095, 415 --> 4095, 416 --> 4095, 417 --> 4095, 418 -->
4095, 419 --> 4095, 420 --> 4095, 421 --> 4095, 422 --> 4095, 423
--> 4095, 424 --> 4095, 425 --> 4095, 426 --> 4095, 427 --> 4095, 428 --> 1904, 429 --> 1893, 430 --> 1890, 431 --> 1883, 432 -->
1879, 433 --> 1879, 434 --> 1877, 435 --> 1873, 436 --> 1881, 437
--> 1894, 438 --> 1904, 439 --> 1904, 440 --> 1903, 441 --> 1904, 442 --> 1905, 443 --> 1909, 444 --> 1910, 445 --> 1914,
#include "Arduino.h"
#include "hulp_arduino.h"
#define bufor_lnght 400
#define bufor_lnght_max bufor_lnght + 400
typedef union
{
uint16_t d_ADC[bufor_lnght + 200];
uint8_t d_ADC_8[(bufor_lnght + 200) * 2];
} dane_pom_X;
struct paczka_ws
{
uint16_t meas_id;
dane_pom_X dane;
};
paczka_ws dane_ADC_ws;
#define ULP_WAKEUP_INTERVAL_MS (4)
#define ADC_IN GPIO_NUM_34
RTC_DATA_ATTR struct
{
ulp_var_t ULP_meas_cntr;
ulp_var_t ULP_buf_RTC_to_RAM;
ulp_var_t ULP_ADC_result[bufor_lnght_max];
} ulp_vars;
void ulp_init()
{
enum
{
LBL_MEAS_ADC,
LBL_WAKEUP_TRIGGERED,
LBL_HALT,
};
const ulp_insn_t program[] = {
// Set a register to 0 for use with I_GET and I_PUT
I_MOVI(R3, 0),
I_GET(R0, R3, ulp_vars.ULP_meas_cntr),
I_SUBI(R0, R0, bufor_lnght_max),
M_BXF(LBL_MEAS_ADC),
I_MOVI(R0, bufor_lnght_max - 1),
I_PUT(R0, R3, ulp_vars.ULP_meas_cntr),
M_LABEL(LBL_MEAS_ADC),
I_ANALOG_READ(R0, ADC_IN),
I_GET(R1, R3, ulp_vars.ULP_meas_cntr),
I_PUT(R0, R1, ulp_vars.ULP_ADC_result),
I_ADDI(R1, R1, 1),
I_PUT(R1, R3, ulp_vars.ULP_meas_cntr),
I_GET(R0, R3, ulp_vars.ULP_meas_cntr),
I_SUBI(R0, R0, bufor_lnght + 2),
M_BXZ(LBL_WAKEUP_TRIGGERED),
M_BX(LBL_HALT),
M_LABEL(LBL_WAKEUP_TRIGGERED),
I_MOVI(R0, 1),
I_PUT(R0, R3, ulp_vars.ULP_buf_RTC_to_RAM),
M_WAKE_WHEN_READY(),
M_LABEL(LBL_HALT),
I_HALT(),
};
ESP_ERROR_CHECK(hulp_configure_analog_pin(ADC_IN, ADC_ATTEN_DB_11, ADC_WIDTH_BIT_12));
ESP_ERROR_CHECK(hulp_ulp_load(program, sizeof(program), 1000UL * ULP_WAKEUP_INTERVAL_MS, 0));
ESP_ERROR_CHECK(hulp_ulp_run(0));
}
void setup()
{
Serial.begin(115200);
if (hulp_is_deep_sleep_wakeup())
{
if (ulp_vars.ULP_buf_RTC_to_RAM.val == 1)
{
for (uint16_t i = 0; i < bufor_lnght + 100; i++)
{
dane_ADC_ws.dane.d_ADC[i] = ulp_vars.ULP_ADC_result[i].val;
}
ulp_vars.ULP_buf_RTC_to_RAM.val = 0;
for (uint16_t i = 0; i < bufor_lnght + 100; i++)
{
Serial.print(i);
Serial.print(" --> ");
Serial.print(dane_ADC_ws.dane.d_ADC[i]);
Serial.print(", ");
if (i % 6 == 5)
{
Serial.println();
}
}
}
}
else
{
ulp_init();
ulp_vars.ULP_meas_cntr.val = 0;
ulp_vars.ULP_buf_RTC_to_RAM.val = 0;
}
Serial.println("Sleeping...");
esp_sleep_enable_timer_wakeup(10000000);
esp_deep_sleep_disable_rom_logging();
esp_sleep_enable_ulp_wakeup();
esp_deep_sleep_start();
}
void loop()
{
// put your main code here, to run repeatedly:
}

Related

fine tuning with hugging face trainer when adding layer on eletra model

i'm trying to fine tune my own model with hugging face trainer module. There was no problem until just training ElectraforQuestionAnswering, however I tried to add additional layer on the model and tried the same process. And there comes this error
from transformers import ElectraForQuestionAnswering
from torch import nn
class Jelectra(nn.Module):
def __init__(self):
super().__init__()
self.model = ElectraForQuestionAnswering.from_pretrained("google/electra-small-discriminator")
self.sm = nn.Softmax(dim=1)
def forward(self,
input_ids=None,
attention_mask=None,
token_type_ids=None,
position_ids=None,
head_mask=None,
inputs_embeds=None,
start_positions=None,
end_positions=None,
output_attentions=None,
output_hidden_states=None,
return_dict=None,):
outputs = self.model(input_ids, token_type_ids, attention_mask, start_positions, end_positions)
output_start = self.sm(outputs[0])
output_end = self.sm(outputs[1])
return QuestionAnsweringModelOutput(start_logits=output_start, end_logits=output_end)
model = Jelectra()
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
output_dir="./fine_tuned_electra",
evaluation_strategy="epoch",
learning_rate=5e-4,
per_device_train_batch_size=12,
per_device_eval_batch_size=12,
num_train_epochs=2,
weight_decay=0.01,
gradient_accumulation_steps=2,
eval_accumulation_steps=1,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_squad["train"],
eval_dataset=tokenized_squad["validation"],
tokenizer=tokenizer,
data_collator=data_collator,
)
trainer.train()
The error is...
RuntimeError Traceback (most recent call last)
Input In [12], in <module>
3 training_args = TrainingArguments(
4 output_dir="./fine_tuned_electra",
5 evaluation_strategy="epoch",
(...)
12 eval_accumulation_steps=1,
13 )
15 trainer = Trainer(
16 model=model,
17 args=training_args,
(...)
21 data_collator=data_collator,
22 )
---> 24 trainer.train()
File ~/anaconda3/envs/domain/lib/python3.8/site-packages/transformers/trainer.py:1365, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1363 tr_loss_step = self.training_step(model, inputs)
1364 else:
-> 1365 tr_loss_step = self.training_step(model, inputs)
1367 if (
1368 args.logging_nan_inf_filter
1369 and not is_torch_tpu_available()
1370 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
1371 ):
1372 # if loss is nan or inf simply add the average of previous logged losses
1373 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
File ~/anaconda3/envs/domain/lib/python3.8/site-packages/transformers/trainer.py:1940, in Trainer.training_step(self, model, inputs)
1937 return loss_mb.reduce_mean().detach().to(self.args.device)
1939 with self.autocast_smart_context_manager():
-> 1940 loss = self.compute_loss(model, inputs)
1942 if self.args.n_gpu > 1:
1943 loss = loss.mean() # mean() to average on multi-gpu parallel training
File ~/anaconda3/envs/domain/lib/python3.8/site-packages/transformers/trainer.py:1972, in Trainer.compute_loss(self, model, inputs, return_outputs)
1970 else:
1971 labels = None
-> 1972 outputs = model(**inputs)
1973 # Save past state if it exists
1974 # TODO: this needs to be fixed and made cleaner later.
1975 if self.args.past_index >= 0:
File ~/anaconda3/envs/domain/lib/python3.8/site-packages/torch/nn/modules/module.py:727, in Module._call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
730 self._forward_hooks.values()):
731 hook_result = hook(self, input, result)
Input In [11], in Jelectra.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, start_positions, end_positions, output_attentions, output_hidden_states, return_dict)
9 def forward(self,
10 input_ids=None,
11 attention_mask=None,
(...)
19 output_hidden_states=None,
20 return_dict=None,):
---> 22 outputs = self.model(input_ids, token_type_ids, attention_mask, start_positions, end_positions)
23 output_start = self.sm(outputs[0])
24 output_end = self.sm(outputs[1])
File ~/anaconda3/envs/domain/lib/python3.8/site-packages/torch/nn/modules/module.py:727, in Module._call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
730 self._forward_hooks.values()):
731 hook_result = hook(self, input, result)
File ~/anaconda3/envs/domain/lib/python3.8/site-packages/transformers/models/electra/modeling_electra.py:1377, in ElectraForQuestionAnswering.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, start_positions, end_positions, output_attentions, output_hidden_states, return_dict)
1365 r"""
1366 start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1367 Labels for position (index) of the start of the labelled span for computing the token classification loss.
(...)
1373 are not taken into account for computing the loss.
1374 """
1375 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-> 1377 discriminator_hidden_states = self.electra(
1378 input_ids,
1379 attention_mask=attention_mask,
1380 token_type_ids=token_type_ids,
1381 position_ids=position_ids,
1382 head_mask=head_mask,
1383 inputs_embeds=inputs_embeds,
1384 output_attentions=output_attentions,
1385 output_hidden_states=output_hidden_states,
1386 )
1388 sequence_output = discriminator_hidden_states[0]
1390 logits = self.qa_outputs(sequence_output)
File ~/anaconda3/envs/domain/lib/python3.8/site-packages/torch/nn/modules/module.py:727, in Module._call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
730 self._forward_hooks.values()):
731 hook_result = hook(self, input, result)
File ~/anaconda3/envs/domain/lib/python3.8/site-packages/transformers/models/electra/modeling_electra.py:905, in ElectraModel.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
901 encoder_extended_attention_mask = None
903 head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
--> 905 hidden_states = self.embeddings(
906 input_ids=input_ids,
907 position_ids=position_ids,
908 token_type_ids=token_type_ids,
909 inputs_embeds=inputs_embeds,
910 past_key_values_length=past_key_values_length,
911 )
913 if hasattr(self, "embeddings_project"):
914 hidden_states = self.embeddings_project(hidden_states)
File ~/anaconda3/envs/domain/lib/python3.8/site-packages/torch/nn/modules/module.py:727, in Module._call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
730 self._forward_hooks.values()):
731 hook_result = hook(self, input, result)
File ~/anaconda3/envs/domain/lib/python3.8/site-packages/transformers/models/electra/modeling_electra.py:212, in ElectraEmbeddings.forward(self, input_ids, token_type_ids, position_ids, inputs_embeds, past_key_values_length)
210 if self.position_embedding_type == "absolute":
211 position_embeddings = self.position_embeddings(position_ids)
--> 212 embeddings += position_embeddings
213 embeddings = self.LayerNorm(embeddings)
214 embeddings = self.dropout(embeddings)
RuntimeError: The size of tensor a (512) must match the size of tensor b (12) at non-singleton dimension 1
how can i solve this?? I'm using squad data

UnboundLocalError: local variable 'f' referenced before assignment. ("f" is just defined variable)

I have a calculating and fitting code(below is some part of it).
def NFW(self, r, r_s, c):
x = r/r_s
delta_c = (200/3) * (c**3 / (log(1+c) - (c/(1+c))))
surface_mass_density = (c_velocity**2 / (4*pi*G)) * (self.D_s / (self.D_l * self.D_ls))
rho_c = self.Cosmological_Model.critical_density(self.z_l).to(u.M_sun/(u.Mpc)**3).value
Kk = ( 2*r_s*delta_c*rho_c ) / surface_mass_density
# Kappa of NFW
K_NFW = np.array([])
for i in range(len(r)):
if 0 <= x[i] <1:
f = ( 1 / (x[i]**2-1) ) * ( 1 - ( 2*atanh( np.sqrt( ( (1-x[i]) / (1+x[i]) ) ) ) / np.sqrt(1-x[i]**2) ) )
elif x[i] == 1:
f = 1/3
elif 1 < x[i]:
f = ( 1 / (x[i]**2-1) ) * ( 1 - ( 2*atan( np.sqrt( (x[i]-1) / (1+x[i]) ) ) / np.sqrt(x[i]**2-1) ) )
else:
print("x[i] can not have negative number!")
K_NFW = np.append(K_NFW, Kk * f)
And I tried to fit it with "scipy.curve_fit".
But the strange things is that sometimes it works, some times it dose not work depending on "r" which is one of the input parameter "NFW" function
below is fitting code.
ind_sub_R01 = np.where(self.obs_sub_R_LensPlane>0.10)#(<--this)
# If I put this '0.10' it doesn't work but if I put this '0.20' it works.
# This number means cut of the radius which is fitting start point.
def NFW_fitting(self, main_NFW_p0, main_NFW_bounds, sub_NFW_p0, sub_NFW_bounds):
main_popt, main_pcov = curve_fit(self.NFWfunc, self.obs_main_R_LensPlane, self.obs_main_r_T_avg, p0=main_NFW_p0, bounds=main_NFW_bounds)
sub_popt, sub_pcov = curve_fit(self.NFWfunc, self.obs_sub_R_LensPlane, self.obs_sub_r_T_avg, p0=sub_NFW_p0, bounds=sub_NFW_bounds)
return main_popt[0], main_popt[1], main_pcov, sub_popt[0], sub_popt[1], sub_pcov
IF I put 'this' '0.1', I face the error like below.
---------------------------------------------------------------------------
UnboundLocalError Traceback (most recent call last)
/var/folders/42/grbryvqx3vndy45_5d7lxyqr0000gn/T/ipykernel_34140/3894437891.py in <module>
198 drz_sci_606 = fits.open('bullet_f606w_drz_sci.fits')
199 FITandPLOT = profile_FITandPLOT(reduced_shear_main=reduced_shear_main, reduced_shear_sub=reduced_shear_sub, SISfunc=profile.SIS, NFWfunc=profile.NFW, data_for_WCS=drz_sci_606, D_l=profile.D_l)
--> 200 FITandPLOT.plotting(main_SIS_p0=v_disp_main, main_SIS_bounds=[v_disp_main-5,v_disp_main+5], sub_SIS_p0=v_disp_sub, sub_SIS_bounds=[v_disp_main-5,v_disp_main+5],
201 main_NFW_p0=(r_s_main,c_vir_main), main_NFW_bounds=([r_s_main-5,c_vir_main-5], [r_s_main+5,c_vir_main+5]), sub_NFW_p0=(r_s_sub,c_vir_sub), sub_NFW_bounds=([r_s_sub-5,c_vir_sub-5], [r_s_sub+5,c_vir_sub+5]))
202
/var/folders/42/grbryvqx3vndy45_5d7lxyqr0000gn/T/ipykernel_34140/3894437891.py in plotting(self, main_SIS_p0, main_SIS_bounds, sub_SIS_p0, sub_SIS_bounds, main_NFW_p0, main_NFW_bounds, sub_NFW_p0, sub_NFW_bounds)
59 def plotting(self, main_SIS_p0, main_SIS_bounds, sub_SIS_p0, sub_SIS_bounds, main_NFW_p0, main_NFW_bounds, sub_NFW_p0, sub_NFW_bounds):
60 main_SIS_v_disp, main_SIS_err_v_disp, sub_SIS_v_disp, sub_SIS_err_v_disp = self.SIS_fitting(main_SIS_p0, main_SIS_bounds, sub_SIS_p0, sub_SIS_bounds)
---> 61 main_NFW_r_s, main_NFW_c, main_NFW_err_matrix, sub_NFW_r_s, sub_NFW_c, sub_NFW_err_matrix = self.NFW_fitting(main_NFW_p0, main_NFW_bounds, sub_NFW_p0, sub_NFW_bounds)
62
63 #----main
/var/folders/42/grbryvqx3vndy45_5d7lxyqr0000gn/T/ipykernel_34140/3894437891.py in NFW_fitting(self, main_NFW_p0, main_NFW_bounds, sub_NFW_p0, sub_NFW_bounds)
54 def NFW_fitting(self, main_NFW_p0, main_NFW_bounds, sub_NFW_p0, sub_NFW_bounds):
55 main_popt, main_pcov = curve_fit(self.NFWfunc, self.obs_main_R_LensPlane, self.obs_main_r_T_avg, p0=main_NFW_p0, bounds=main_NFW_bounds)
---> 56 sub_popt, sub_pcov = curve_fit(self.NFWfunc, self.obs_sub_R_LensPlane, self.obs_sub_r_T_avg, p0=sub_NFW_p0, bounds=sub_NFW_bounds)
57 return main_popt[0], main_popt[1], main_pcov, sub_popt[0], sub_popt[1], sub_pcov
58
~/opt/anaconda3/envs/first_envs/lib/python3.9/site-packages/scipy/optimize/minpack.py in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs)
798 kwargs['max_nfev'] = kwargs.pop('maxfev', None)
799
--> 800 res = least_squares(func, p0, jac=jac, bounds=bounds, method=method,
801 **kwargs)
802
~/opt/anaconda3/envs/first_envs/lib/python3.9/site-packages/scipy/optimize/_lsq/least_squares.py in least_squares(fun, x0, jac, bounds, method, ftol, xtol, gtol, x_scale, loss, f_scale, diff_step, tr_solver, tr_options, jac_sparsity, max_nfev, verbose, args, kwargs)
926
927 elif method == 'trf':
--> 928 result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol,
929 gtol, max_nfev, x_scale, loss_function, tr_solver,
930 tr_options.copy(), verbose)
~/opt/anaconda3/envs/first_envs/lib/python3.9/site-packages/scipy/optimize/_lsq/trf.py in trf(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, loss_function, tr_solver, tr_options, verbose)
121 loss_function, tr_solver, tr_options, verbose)
122 else:
--> 123 return trf_bounds(
124 fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
125 loss_function, tr_solver, tr_options, verbose)
~/opt/anaconda3/envs/first_envs/lib/python3.9/site-packages/scipy/optimize/_lsq/trf.py in trf_bounds(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, loss_function, tr_solver, tr_options, verbose)
336
337 x_new = make_strictly_feasible(x + step, lb, ub, rstep=0)
--> 338 f_new = fun(x_new)
339 nfev += 1
340
~/opt/anaconda3/envs/first_envs/lib/python3.9/site-packages/scipy/optimize/_lsq/least_squares.py in fun_wrapped(x)
813
814 def fun_wrapped(x):
--> 815 return np.atleast_1d(fun(x, *args, **kwargs))
816
817 if method == 'trf':
~/opt/anaconda3/envs/first_envs/lib/python3.9/site-packages/scipy/optimize/minpack.py in func_wrapped(params)
483 if transform is None:
484 def func_wrapped(params):
--> 485 return func(xdata, *params) - ydata
486 elif transform.ndim == 1:
487 def func_wrapped(params):
/var/folders/42/grbryvqx3vndy45_5d7lxyqr0000gn/T/ipykernel_34140/2654829019.py in NFW(self, r, r_s, c)
142 else:
143 print("x[i] can not have negative number!")
--> 144 K_NFW = np.append(K_NFW, Kk * f)
145
146 # Gamma of NFW
UnboundLocalError: local variable 'f' referenced before assignment
If I put 'this' '0.20' or '0.09', '0.25', I can obtain the fitting graph parameter well.
And these number mean the fitting start point like below
If I put the number '0.25', I fit the graph only right of '0.25' region.
I want to know why the error occurs.

what is the best way to generate random pattern inside of a table

I'v got a table (2d array), c x r. Need to generate a random pattern of connected cells inside of it. No self-crossings and no diagonal-moves. See related picture for example. ex. 1
с = 6, r = 7, the pattern is shown in numbers.
I'w wrote a function for this and it works fine, but I'm looking for hard optimization. In the code below you can see that if the pattern gets into a dead end it just rebuilds itself from the start. That is very inefficient if the pattern length is close or equals to the number of cells, c*r (42 in the example). So some smart solution is needed for this, like moving the whole pattern symmetrically when it runs out of possible moves or to add some analytics to the function so it never cathes up in the dead ends. Again, for the low values of c, r and patternLength my example works fine, but I'm looking for algorithmic perfection and high performance even on pretty high numbers.
function ClassLogic:generatePattern()
--[[ subfunctions ]]
--choosing next point for the pattern
local move = function( seq )
--getting the last sequence point
local last = seq[#seq]
-- checking the nearness of walls
local
wallLeft,
wallRight,
wallUp,
wallDown =
(last.c==1),
(last.c==config.tableSize.c),
(last.r==1),
(last.r==config.tableSize.r)
-- checking the nearness of already sequenced points
local
spLeft,
spRight,
spUp,
spDown =
(utilities.indexOfTable( seq, { c = last.c - 1, r = last.r } )~=-1),
(utilities.indexOfTable( seq, { c = last.c + 1, r = last.r } )~=-1),
(utilities.indexOfTable( seq, { c = last.c, r = last.r - 1 } )~=-1),
(utilities.indexOfTable( seq, { c = last.c, r = last.r + 1 } )~=-1)
local leftRestricted = (wallLeft or spLeft)
local rightRestricted = (wallRight or spRight)
local upRestricted = (wallUp or spUp)
local downRestricted = (wallDown or spDown)
if ( leftRestricted and rightRestricted and upRestricted and downRestricted ) then
-- dead end
print('d/e')
return nil
else
-- go somewhere possible
local possibleDirections = {}
if (not leftRestricted) then possibleDirections[#possibleDirections+1] = 1 end
if (not rightRestricted) then possibleDirections[#possibleDirections+1] = 2 end
if (not upRestricted) then possibleDirections[#possibleDirections+1] = 3 end
if (not downRestricted) then possibleDirections[#possibleDirections+1] = 4 end
local direction = possibleDirections[math.random( 1, #possibleDirections )]
if (direction==1) then
--next point is left
return { c = last.c - 1, r = last.r }
elseif (direction==2) then
--next point is right
return { c = last.c + 1, r = last.r }
elseif (direction==3) then
--next point is up
return { c = last.c, r = last.r - 1 }
elseif (direction==4) then
--next point is down
return { c = last.c, r = last.r + 1 }
end
end
end
--[[ subfunctions end ]]
-- choose random entry point
local entry = { c = math.random( 1, config.tableSize.c ),
r = math.random( 1, config.tableSize.r ) }
-- start points sequence
local pointSequence = { [1] = entry }
-- building the pattern
local succeed = false
while (not succeed) do
for i = 2, self.patternLength do
local nextPoint = move( pointSequence )
if (nextPoint~=nil) then
pointSequence[i] = nextPoint
if (i==self.patternLength) then succeed = true end
else
pointSequence = { [1] = entry }
break
end
end
end
return pointSequence
end
Any ideas or approaches on how this could be realized would be highly appreciated. Maybe some recursive backtracker or a pathfinding or a random-walk algorithms?
The snake-style growing is not enough for good performance.
The main idea is to randomly modify the path being generated by adding small detours like the following:
- - 6 - - - - 8 - -
- - 5 - - - 6 7 - -
- - 4 1 - ===> - 5 4 1 -
- - 3 2 - - - 3 2 -
- - - - - - - - - -
(note the additional two cells added to the left of 4-5 segment)
Such implementation works very fast for area filling < 95%
local function generate_path(W, H, L)
-- W = field width (number of columns) -- c = 1..W
-- H = field height (number of rows) -- r = 1..H
-- L = path length, must be within range 1..W*H
assert(L >= 1 and L <= W * H, "Path length is greater than field area")
local function get_idx(x, y)
return x >= 1 and x <= W and y >= 1 and y <= H and (y - 1) * W + x
end
local function get_x_y(idx)
local x = (idx - 1) % W + 1
local y = (idx - x) / W + 1
return x, y
end
local function random_sort(array)
for last = #array, 2, -1 do
local pos = math.random(last)
array[pos], array[last] = array[last], array[pos]
end
end
local path_sum_x = 0
local path_sum_y = 0
local path_ctr = 0
local is_unused = {} -- [idx] = true/nil (or idx recently swapped with)
local function mark_as_unused(idx, value)
local x, y = get_x_y(idx)
path_sum_x = path_sum_x - x
path_sum_y = path_sum_y - y
path_ctr = path_ctr - 1
is_unused[idx] = value or true
end
local function mark_as_path(idx)
local x, y = get_x_y(idx)
path_sum_x = path_sum_x + x
path_sum_y = path_sum_y + y
path_ctr = path_ctr + 1
is_unused[idx] = nil
end
for x = 1, W do
for y = 1, H do
is_unused[get_idx(x, y)] = true
end
end
-- create path of length 1 by selecting random cell
local idx = get_idx(math.random(W), math.random(H))
mark_as_path(idx)
local path = {first = idx, last = idx, [idx] = {}}
-- path[idx] == {next=next_idx/nil, prev=prev_idx/nil}
local function grow()
local variants = {
{dx=-1, dy=0, origin="last"}, {dx=1, dy=0, origin="last"},
{dx=0, dy=-1, origin="last"}, {dx=0, dy=1, origin="last"},
{dx=-1, dy=0, origin="first"}, {dx=1, dy=0, origin="first"},
{dx=0, dy=-1, origin="first"}, {dx=0, dy=1, origin="first"}
}
random_sort(variants)
for _, vector in ipairs(variants) do
local x, y = get_x_y(path[vector.origin])
local idx = get_idx(vector.dx + x, vector.dy + y)
if is_unused[idx] then
if vector.origin == 'first' then
-- add new first cell of the path
local old_first = path.first
path[old_first].prev = idx
path[idx] = {next = old_first}
path.first = idx
else
-- add new last cell of the path
local old_last = path.last
path[old_last].next = idx
path[idx] = {prev = old_last}
path.last = idx
end
mark_as_path(idx)
return true
end
end
end
local function shrink()
if math.random(2) == 2 then
-- remove first cell of the path
local old_first = path.first
local new_first = assert(path[old_first].next)
path[old_first] = nil
path.first = new_first
path[new_first].prev = nil
mark_as_unused(old_first)
else
-- remove last cell of the path
local old_last = path.last
local new_last = assert(path[old_last].prev)
path[old_last] = nil
path.last = new_last
path[new_last].next = nil
mark_as_unused(old_last)
end
end
local function inflate()
local variants = {}
local idx1 = path.first
repeat
local idx4 = path[idx1].next
if idx4 then
local x1, y1 = get_x_y(idx1)
local x4, y4 = get_x_y(idx4)
local dx14, dy14 = x4 - x1, y4 - y1
local dx, dy = dy14, dx14
for side = 1, 2 do
dx, dy = -dx, -dy
local x2, y2 = x1 + dx, y1 + dy
local idx2 = get_idx(x2, y2)
local idx3 = get_idx(x2 + dx14, y2 + dy14)
if is_unused[idx2] and is_unused[idx3] then
table.insert(variants, {idx1, idx2, idx3, idx4})
end
end
end
idx1 = idx4
until not idx4
if #variants > 0 then
local idx1, idx2, idx3, idx4 =
(table.unpack or unpack)(variants[math.random(#variants)])
-- insert idx2 and idx3 between idx1 and idx4
path[idx1].next = idx2
path[idx2] = {prev = idx1, next = idx3}
path[idx3] = {prev = idx2, next = idx4}
path[idx4].prev = idx3
mark_as_path(idx2)
mark_as_path(idx3)
return true
end
end
local function euclid(dx, dy)
return dx*dx + dy*dy
end
local function swap()
local variants = {}
local path_center_x = path_sum_x / path_ctr
local path_center_y = path_sum_y / path_ctr
local idx1 = path.first
repeat
local idx2 = path[idx1].next
local idx3 = idx2 and path[idx2].next
if idx3 then
local x1, y1 = get_x_y(idx1)
local x2, y2 = get_x_y(idx2)
local x3, y3 = get_x_y(idx3)
local dx12, dy12 = x2 - x1, y2 - y1
local dx23, dy23 = x3 - x2, y3 - y2
if dx12 * dx23 + dy12 * dy23 == 0 then
local x, y = x1 + dx23, y1 + dy23
local idx = get_idx(x, y)
local dist2 = euclid(x2 - path_center_x, y2 - path_center_y)
local dist = euclid(x - path_center_x, y - path_center_y)
if is_unused[idx] and dist2<dist and is_unused[idx]~=idx2 then
table.insert(variants, {idx1, idx2, idx3, idx})
end
end
end
idx1 = idx2
until not idx3
if #variants > 0 then
local idx1, idx2, idx3, idx =
(table.unpack or unpack)(variants[math.random(#variants)])
-- swap idx2 and idx
path[idx1].next = idx
path[idx] = path[idx2]
path[idx3].prev = idx
path[idx2] = nil
mark_as_unused(idx2, idx)
mark_as_path(idx)
return true
end
end
local actions = {grow, inflate, swap}
repeat
random_sort(actions)
local success
for _, action in ipairs(actions) do
success = action()
if success then
break
end
end
if not success and path_ctr < L then
-- erase and rewind
while path_ctr > 1 do
shrink()
end
end
until path_ctr >= L
while path_ctr > L do
shrink()
end
local pointSequence = {}
local idx = path.first
local step = 0
repeat
step = step + 1
path[idx].step = step
local x, y = get_x_y(idx)
pointSequence[step] = {c = x, r = y}
idx = path[idx].next
until not idx
local field = 'W = '..W..', H = '..H..', L = '..L..'\n'
for y = 1, H do
for x = 1, W do
local c = path[get_idx(x, y)]
field = field..(' '..(c and c.step or '-')):sub(-4)
end
field = field..'\n'
end
print(field)
return pointSequence
end
Usage example:
math.randomseed(os.time())
local pointSequence = generate_path(6, 7, 10)
-- pointSequence = {[1]={r=r1,c=c1}, [2]={r=r2,c=c2},...,[10]={r=r10,c=c10}}
Result examples:
W = 5, H = 5, L = 10
- - - 9 10
- 6 7 8 -
- 5 4 1 -
- - 3 2 -
- - - - -
W = 5, H = 5, L = 19
15 16 17 18 19
14 1 2 3 4
13 12 11 6 5
- - 10 7 -
- - 9 8 -
W = 6, H = 7, L = 35
- 35 34 25 24 23
- - 33 26 21 22
- 31 32 27 20 19
- 30 29 28 - 18
- 1 10 11 12 17
3 2 9 8 13 16
4 5 6 7 14 15
W = 19, H = 21, L = 394
77 78 79 84 85 118 119 120 121 122 123 124 125 126 127 128 129 254 255
76 75 80 83 86 117 116 115 114 141 140 139 138 135 134 131 130 253 256
73 74 81 82 87 88 89 112 113 142 145 146 137 136 133 132 - 252 257
72 69 68 67 92 91 90 111 - 143 144 147 148 149 150 151 152 251 258
71 70 65 66 93 108 109 110 163 162 161 160 159 158 157 156 153 250 259
58 59 64 63 94 107 166 165 164 191 192 193 196 197 - 155 154 249 260
57 60 61 62 95 106 167 168 189 190 - 194 195 198 241 242 243 248 261
56 55 54 53 96 105 170 169 188 203 202 201 200 199 240 239 244 247 262
47 48 51 52 97 104 171 172 187 204 205 206 231 232 237 238 245 246 263
46 49 50 99 98 103 174 173 186 209 208 207 230 233 236 267 266 265 264
45 42 41 100 101 102 175 184 185 210 211 228 229 234 235 268 269 270 271
44 43 40 39 38 177 176 183 214 213 212 227 226 225 276 275 274 273 272
33 34 35 36 37 178 179 182 215 216 217 218 223 224 277 278 279 280 281
32 29 28 23 22 - 180 181 12 11 10 219 222 287 286 285 284 283 282
31 30 27 24 21 18 17 14 13 8 9 220 221 288 289 290 291 292 293
380 381 26 25 20 19 16 15 394 7 4 3 304 303 300 299 296 295 294
379 382 383 384 387 388 391 392 393 6 5 2 305 302 301 298 297 312 313
378 371 370 385 386 389 390 347 346 343 342 1 306 307 308 309 310 311 314
377 372 369 364 363 350 349 348 345 344 341 340 333 332 319 318 317 316 315
376 373 368 365 362 351 352 353 354 355 338 339 334 331 320 321 322 323 324
375 374 367 366 361 360 359 358 357 356 337 336 335 330 329 328 327 326 325

MATLAB genetic algorithm optimization returns integer values higher than boundaries and violates inequality constraints. Why?

I'm using MATLAB R2016a genetic algorithm optimization toolbox to optimize 80 integer values. I have these constraints:
x(80) > x(79) > x(78) > x(77) > x(76) ... x(5) > x(4) > x(3) > x(2) > x(1)
The range for all integer variables is between 1 and 500. I used this code in MATLAB:
f = #(x)Cost_function(x, my_data);
num_of_var = 80;
for mx = 1:num_of_var-1
A(mx,:) = [zeros(1,mx-1),1,-1, zeros(1,num_of_var-mx-1)];
end
b = repmat(-3, [num_of_var-1,1]);
lb = ones([num_of_var-1,1]);
up = repmat(500,[num_of_var-1,1]);
options = optimoptions('ga');
options.Display = 'iter';
options.PopulationSize = 200;
options.UseParallel = 0;
IntCon = 1:1:num_of_var;
[x, fval, exitflag] = ga(f, num_of_var, A, b, [], [], lb, up,[] ,IntCon, options);
Is this code correct? In some cases this code returns integer higher than boundaries. For example this is first return of this code for cost function:
11 89 129 136 168 191 208 232 267 299 306 312 312 270 270 293 297 296 283 192 188 239 241 239 226 212 212 301 275 231 221 210 179 182 200 224 227 258 270 264 225 204 183 199 202 236 305 310 313 276 272 259 256 336 329 310 303 303 296 289 275 235 233 232 194 196 203 268 294 313 340 336 333 263 260 257 265 275 409 174964160
Otherwise this output structure doesn't satisfy my mentioned constraints. why?
Why higher than boundaries.
I think you are talking about the last number in your result: 174964160. That is because you use num_of_var-1 instead of num_of_var in the calculation of lb and up.
Does not satisfy inequality constraints.
You may need to do more iterations. Otherwise you can model this differently. Instead of using variables x with x(k) <= x(k+1) - 3, use variables dx(k)>=3 indicating the difference between x(k) and x(k+1).

CUDA: Send data from GPU to GPU

I have two GPU cards Tesla C2070 (P2P & UAV support) that I want to Send and Receive data using CUDA.
In GPU A, I have a matrix: a11 a12 a13 a14 a21 a22
a23 a24
In GPU B, I have another matrix:
b11 b12 b13 b14
b21 b22 b23 b24
I can only send contiguous elements as the code below:
int main(void)
{
float *d_a, *d_b;
int N = 4;
int M = 2;
size_t pitch;
cudaSetDevice(0);
cudaMallocPitch(&d_a, &pitch, sizeof(float)*N, M);
cudaDeviceEnablePeerAccess(1, 0);
cudaSetDevice(1);
cudaMallocPitch(&d_b, &pitch, sizeof(float)*N, M);
cudaDeviceEnablePeerAccess(0, 0);
//Initialization for d_a
//Initialization for d_b
//Copy M*N/2 element from d_a to d_b, starting from d_a[1]
cudaMemcpy(&d_b[1], &d_a[1], M*N/2*sizeof(float), cudaMemcpyDefault);
//Print result d_b
}
How to send the last two columns of the matrix from GPU A to GPU B directly, so on GPU B I will get:
b11 b12 a13 a14
b21 b22 a23 a24
Similarly, how to send the first row of the matrix from GPU A to GPU B, so on GPU B I will get:
a11 a12 a13 a14
b21 b22 b23 b24
If I have 1-D array as follow: a1 a2 a3 a4 a5 a6 a7 a8.....
How to send elements 1,4,7,...(every 3 elements) from GPU A to replace the same ones on GPU B?
The API call you need to look at is cudaMemcpy2D. This allows fairly straightforward copying of all or portions of pitched data, and is the natural counterpart of cudaMallocPitch.
If we leave aside the multiGPU aspect of your question for a moment, and just focus on the copying of pitched data (in UVA platforms, how GPU to GPU transfers are handled is basically an implementation detail you don't need to know about), there are only three things required to do what you want:
Use pointer arithmetic to calculate the starting address of source and destination memory
Remember that the pitch of the source and destination memory is always constant (that returned by cudaMallocPitch). Note you should keep a pitch for each pointer you allocate. There is no guarantee that the API will return the same pitch for two different allocations of the same size, this is particularly true if the allocations are not on the same device
Remember that you need to calculate the width of any transfer in bytes, and the number widths is always a count, not a byte value.
Here is a concrete example based off the code you posted which performs copying of a subset of data between two pitched allocations assuming column major order. Note that for brevity, I have encapsulated most of the addressing mechanics in a simple class which can be used on both the host and device. Two 5x10 pitched arrays are allocated, and a 3x3 sub array is copied from one to the other. I have used kernel printf to show the copying action:
#include <cstdio>
struct mat
{
int m, n;
size_t pitch;
char *ptr;
__device__ __host__
mat(int _m, int _n, size_t _pitch, char *_ptr) : m(_m), n(_n), pitch(_pitch), ptr(_ptr) {};
__device__ __host__ float * getptr(int i=0, int j=0) {
float * col = (float*)(ptr + j*pitch);
return col + i;
};
__device__ __host__ float& operator() (int i, int j) {
return *getptr(i,j);
};
__device__ __host__
void print() {
for(int i=0; i<m; i++) {
for(int j=0; j<n; j++) {
printf("%4.f ", (*this)(i,j));
}
printf("\n");
}
};
};
__global__ void printmat(struct mat x) { x.print(); }
int main(void)
{
const int M = 5, N = 10;
const size_t hostpitch = M * sizeof(float);
float *a = new float[M*N], *b = new float[M*N];
mat A(M, N, hostpitch, (char *)(a));
mat B(M, N, hostpitch, (char *)(b));
for(int v=0, j=0; j<N; j++) {
for(int i=0; i<M; i++) {
A(i,j) = (float)v; B(i,j) = (float)(100+v++);
}
}
char *d_a, *d_b;
size_t pitch_a, pitch_b;
cudaMallocPitch((void **)&d_a, &pitch_a, sizeof(float)*M, N);
cudaMallocPitch((void **)&d_b, &pitch_b, sizeof(float)*M, N);
mat Ad(M, N, pitch_a, d_a); mat Bd(M, N, pitch_b, d_b);
cudaMemcpy2D(Ad.getptr(), Ad.pitch, A.getptr(), A.pitch,
A.pitch, A.n, cudaMemcpyHostToDevice);
printmat<<<1,1>>>(Ad);
cudaMemcpy2D(Bd.getptr(), Bd.pitch, B.getptr(), B.pitch,
B.pitch, B.n, cudaMemcpyHostToDevice);
printmat<<<1,1>>>(Bd);
int ci = 3, cj = 3;
cudaMemcpy2D(Ad.getptr(1,1), Ad.pitch, Bd.getptr(1,1), Bd.pitch,
ci*sizeof(float), cj, cudaMemcpyDeviceToDevice);
printmat<<<1,1>>>(Ad); cudaDeviceSynchronize();
return 0;
}
which does this:
>nvcc -m32 -Xptxas="-v" -arch=sm_21 pitched.cu
pitched.cu
tmpxft_00001348_00000000-5_pitched.cudafe1.gpu
tmpxft_00001348_00000000-10_pitched.cudafe2.gpu
pitched.cu
ptxas : info : 0 bytes gmem, 8 bytes cmem[2]
ptxas : info : Compiling entry function '_Z8printmat3mat' for 'sm_21'
ptxas : info : Function properties for _Z8printmat3mat
8 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
ptxas : info : Used 23 registers, 48 bytes cmem[0]
tmpxft_00001348_00000000-5_pitched.cudafe1.cpp
tmpxft_00001348_00000000-15_pitched.ii
>cuda-memcheck a.exe
========= CUDA-MEMCHECK
0 5 10 15 20 25 30 35 40 45
1 6 11 16 21 26 31 36 41 46
2 7 12 17 22 27 32 37 42 47
3 8 13 18 23 28 33 38 43 48
4 9 14 19 24 29 34 39 44 49
100 105 110 115 120 125 130 135 140 145
101 106 111 116 121 126 131 136 141 146
102 107 112 117 122 127 132 137 142 147
103 108 113 118 123 128 133 138 143 148
104 109 114 119 124 129 134 139 144 149
0 5 10 15 20 25 30 35 40 45
1 106 111 116 21 26 31 36 41 46
2 107 112 117 22 27 32 37 42 47
3 108 113 118 23 28 33 38 43 48
4 9 14 19 24 29 34 39 44 49
========= ERROR SUMMARY: 0 errors

Resources