Scheduling periodic requests to multiple devices using a shared channel - algorithm

I need to request data periodically from a configurable number of devices at configurable intervals (per device). All devices are connected to a shared data bus, so only one device can send data at the same time.
The devices have very little memory, so each device can only keep the data for a certain period of time before it is overwritten by the next chunk. This means I need to make sure to request data from any given device while it is still available, or else it will be lost.
I am looking for an algorithm that, given a list of devices and their respective timing properties, finds a feasible schedule in order to achieve minimal data loss.
I guess each device could be formally described using the following properties:
data_interval: time it takes for the next chunk of data to become available
max_request_interval: maximum amount of time between requests that will not cause data loss
processing_time: time it takes to send a request and fully receive the corresponding response containing the requested data
Basically, I need to make sure to request data from every device once its data is ready and not yet expired, while keeping in mind the deadlines for all other devices.
Is there some sort of algorithm for this kind of problem? I highly doubt I'm the first person to ever encounter a situation like this. Searching for existing solutions online didn't yield many useful results, mainly because scheduling algorithms are mostly used for operating systems and such, where scheduled processes can be paused and resumed at will. I can't do this in my case, however, since the process of requesting and receiving a chunk of data is atomic, i.e. it can only be performed in its entirety or not at all.

I solved this problem using non-preemptive deadline monotonic scheduling.
Here's some python code for anyone interested:
"""This module implements non-preemptive deadline monotonic scheduling (NPDMS) to compute a schedule of periodic,
non-preemptable requests to slave devices connected to a shared data bus"""
from math import gcd
from functools import reduce
from typing import List
class Slave:
def __init__(self, name: str, period: int, processing_time: int, offset=0, deadline=None):
self.name = name
self.period = int(period)
self.processing_time = int(processing_time)
self.offset = int(offset)
if self.offset >= self.period:
raise ValueError("Slave %s: offset must be < period" % name)
self.deadline = int(deadline) if deadline else self.period
if self.deadline > self.period:
raise ValueError("Slave %s: deadline must be <= period" % name)
class Request:
def __init__(self, slave: Slave, start_time: int):
self.slave = slave
self.start_time = start_time
self.end_time = start_time + slave.processing_time
self.duration = self.end_time - self.start_time
def overlaps_with(self, other: 'Request'):
min_duration = self.duration + other.duration
start = min(other.start_time, self.start_time)
end = max(other.end_time, self.end_time)
effective_duration = end - start
return effective_duration < min_duration
class Scenario:
def __init__(self, *slaves: Slave):
self.slaves = list(slaves)
self.slaves.sort(key=lambda slave: slave.deadline)
# LCM of all slave periods
self.cycle_period = reduce(lambda a, b: a * b // gcd(a, b), [slave.period for slave in slaves])
def compute_schedule(self, resolution=1) -> 'Schedule':
request_pool = []
for t in range(0, self.cycle_period, resolution):
for slave in self.slaves:
if (t - slave.offset) % slave.period == 0 and t >= slave.offset:
request_pool.append(Request(slave, t))
request_pool.reverse()
scheduled_requests = []
current_request = request_pool.pop()
t = current_request.start_time
while t < self.cycle_period:
ongoing_request = Request(current_request.slave, t)
while ongoing_request.start_time <= t < ongoing_request.end_time:
t += resolution
scheduled_requests.append(ongoing_request)
if len(request_pool):
current_request = request_pool.pop()
t = max(current_request.start_time, t)
else:
current_request = None
break
if current_request:
request_pool.append(current_request)
return Schedule(self, scheduled_requests, request_pool)
class Schedule:
def __init__(self, scenario: Scenario, requests: List[Request], unscheduled: List[Request] = None):
self.scenario = scenario
self.requests = requests
self.unscheduled_requests = unscheduled if unscheduled else []
self._utilization = 0
for slave in self.scenario.slaves:
self._utilization += float(slave.processing_time) / float(slave.period)
self._missed_deadlines_dict = {}
for slave in self.scenario.slaves:
periods = scenario.cycle_period // slave.period
missed_deadlines = []
for period in range(periods):
start = period * slave.period
end = start + slave.period
request = self._find_request(slave, start, end)
if request:
if request.start_time < (start + slave.offset) or request.end_time > start + slave.deadline:
missed_deadlines.append(request)
if missed_deadlines:
self._missed_deadlines_dict[slave] = missed_deadlines
self._overlapping_requests = []
for i in range(0, len(requests)):
if i == 0:
continue
previous_request = requests[i - 1]
current_request = requests[i]
if current_request.overlaps_with(previous_request):
self._overlapping_requests.append((current_request, previous_request))
self._incomplete_requests = []
for request in self.requests:
if request.duration < request.slave.processing_time:
self._incomplete_requests.append(request)
#property
def is_feasible(self) -> bool:
return self.utilization <= 1 \
and not self.has_missed_deadlines \
and not self.has_overlapping_requests \
and not self.has_unscheduled_requests \
and not self.has_incomplete_requests
#property
def utilization(self) -> float:
return self._utilization
#property
def has_missed_deadlines(self) -> bool:
return len(self._missed_deadlines_dict) > 0
#property
def has_overlapping_requests(self) -> bool:
return len(self._overlapping_requests) > 0
#property
def has_unscheduled_requests(self) -> bool:
return len(self.unscheduled_requests) > 0
#property
def has_incomplete_requests(self) -> bool:
return len(self._incomplete_requests) > 0
def _find_request(self, slave, start, end) -> [Request, None]:
for r in self.requests:
if r.slave == slave and r.start_time >= start and r.end_time < end:
return r
return None
def read_scenario(file) -> Scenario:
from csv import DictReader
return Scenario(*[Slave(**row) for row in DictReader(file)])
def write_schedule(schedule: Schedule, file):
from csv import DictWriter
writer = DictWriter(file, fieldnames=["name", "start", "end"])
writer.writeheader()
for request in schedule.requests:
writer.writerow({"name": request.slave.name, "start": request.start_time, "end": request.end_time})
if __name__ == '__main__':
import argparse
import sys
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
description='Use non-preemptive deadline monotonic scheduling (NPDMS) to\n'
'compute a schedule of periodic, non-preemptable requests to\n'
'slave devices connected to a shared data bus.\n\n'
'Prints the computed schedule to stdout as CSV. Returns with\n'
'exit code 0 if the schedule is feasible, else 1.')
parser.add_argument("csv_file", metavar="SCENARIO", type=str,
help="A csv file describing the scenario, i.e. a list\n"
"of slave devices with the following properties:\n"
"* name: name/id of the slave device\n\n"
"* period: duration of the period of time during\n"
" which requests must be dispatched\n\n"
"* processing_time: amount of time it takes to\n"
" fully process a request (worst-case)\n\n"
"* offset: offset for initial phase-shifting\n"
" (default: 0)\n\n"
"* deadline: amount of time during which data is\n"
" available after the start of each period\n"
" (default: <period>)")
parser.add_argument("-r", "--resolution", type=int, default=1,
help="The resolution used to simulate the passage of time (default: 1)")
args = parser.parse_args()
with open(args.csv_file, 'r') as f:
schedule = read_scenario(f).compute_schedule(args.resolution)
write_schedule(schedule, sys.stdout)
exit(0 if schedule.is_feasible else 1)

Related

Improve code result speed by multiprocessing

I'm self study of Python and it's my first code.
I'm working for analyze logs from the servers. Usually I need analyze full day logs. I created script (this is example, simple logic) just for check speed. If I use normal coding the duration of analyzing 20mil rows about 12-13 minutes. I need 200mil rows by 5 min.
What I tried:
Use multiprocessing (met issue with share memory, think that fix it). But as the result - 300K rows = 20 sec and no matter how many processes. (PS: Also need control processors count in advance)
Use threading (I found that it's not give any speed, 300K rows = 2 sec. But normal code same, 300K = 2 sec)
Use asyncio (I think that script is slow because need reads many files). Result same as threading - 300K = 2 sec.
Finally I think that all three my script incorrect and didn't work correctly.
PS: I try to avoid use specific python modules (like pandas) because in this case it will be more difficult to execute on different servers. Better to use common lib.
Please help to check 1st - multiprocessing.
import csv
import os
from multiprocessing import Process, Queue, Value, Manager
file = {"hcs.log", "hcs1.log", "hcs2.log", "hcs3.log"}
def argument(m, a, n):
proc_num = os.getpid()
a_temp_m = a["vod_miss"]
a_temp_h = a["vod_hit"]
with open(os.getcwd() + '/' + m, newline='') as hcs_1:
hcs_2 = csv.reader(hcs_1, delimiter=' ')
for j in hcs_2:
if j[3].find('MISS') != -1:
a_temp_m[n] = a_temp_m[n] + 1
elif j[3].find('HIT') != -1:
a_temp_h[n] = a_temp_h[n] + 1
a["vod_miss"][n] = a_temp_m[n]
a["vod_hit"][n] = a_temp_h[n]
if __name__ == '__main__':
procs = []
manager = Manager()
vod_live_cuts = manager.dict()
i = "vod_hit"
ii = "vod_miss"
cpu = 1
n = 1
vod_live_cuts[i] = manager.list([0] * cpu)
vod_live_cuts[ii] = manager.list([0] * cpu)
for m in file:
proc = Process(target=argument, args=(m, vod_live_cuts, (n-1)))
procs.append(proc)
proc.start()
if n >= cpu:
n = 1
proc.join()
else:
n += 1
[proc.join() for proc in procs]
[proc.close() for proc in procs]
I'm expect, each file by def argument will be processed by independent process and finally all results will be saved in dict vod_live_cuts. For each process I added independent list in dict. I think it will help cross operation for use this parameter. But maybe it's wrong way :(
using IPC is costly, so only use "shared objects" for saving the final result, not for intermediate results while parsing the file.
limiting the number of processes is done by using a multiprocessing.Pool, the following code uses it to reach the max hard-disk speed, you only need to post-process the results.
you can only parse data as fast as your HDD can read it (typically 30-80 MB/s), so if you need to improve the performance further you should use SSD or RAID0 for higher disk speed, you cannot get much faster than this without changing your hardware.
import csv
import os
from multiprocessing import Process, Queue, Value, Manager, Pool
file = {"hcs.log", "hcs1.log", "hcs2.log", "hcs3.log"}
def argument(m, a):
proc_num = os.getpid()
a_temp_m_n = 0 # make it local to process
a_temp_h_n = 0 # as shared lists use IPC
with open(os.getcwd() + '/' + m, newline='') as hcs_1:
hcs_2 = csv.reader(hcs_1, delimiter=' ')
for j in hcs_2:
if j[3].find('MISS') != -1:
a_temp_m_n = a_temp_m_n + 1
elif j[3].find('HIT') != -1:
a_temp_h_n = a_temp_h_n + 1
a["vod_miss"].append(a_temp_m_n)
a["vod_hit"].append(a_temp_h_n)
if __name__ == '__main__':
manager = Manager()
vod_live_cuts = manager.dict()
i = "vod_hit"
ii = "vod_miss"
cpu = 1
vod_live_cuts[i] = manager.list()
vod_live_cuts[ii] = manager.list()
with Pool(cpu) as pool:
tasks = []
for m in file:
task = pool.apply_async(argument, args=(m, vod_live_cuts))
tasks.append(task)
for task in tasks:
task.get()
print(list(vod_live_cuts[i]))
print(list(vod_live_cuts[ii]))

How do I get historical candlestick data or kline from Phemex Public API?

I need to be able to extract historical candlestick data (such as Open, Close, High, Low, and Volume) of a candlestick in differing intervals (1m, 3m, 5m, 1H, etc.) at a specified time (timestamps) from Phemex.
Other exchanges, such as Binance or FTX, seem to provide REST Websocket API for this, yet I can't seem to find one for Phemex. Mind helping me resolve this issue? Thank you so much.
Steps I have taken, yet found no resolution:
Went to https://phemex.com/user-guides/api-overview
Went to https://github.com/phemex/phemex-api-docs/blob/master/Public-Contract-API-en.md
None of the items listed in 'Market Data API List' seem to do the task
This code will get the candels and save them to a csv file. Hope this helps:)
exchange = ccxt.phemex({
'options': { 'defaultType': 'swap' },
'enableRateLimit': True
})
# Load the markets
markets = exchange.load_markets()
curent_time = int(time.time()*1000)
one_min = 60000
def get_all_candels(symbol,start_time,stop_time):
counter = 0
candel_counter = 0
data_set = []
t = 0
while t < stop_time:
if data_set == []:
block = exchange.fetch_ohlcv(symbol,'1m',start_time)
for candle in block:
if candle == []:
break
data_set.append(candle)
last_time_in_block = block[-1][0]
counter += 1
candel_counter += len(block)
print(f'{counter} - {block[0]} - {candel_counter} - {last_time_in_block}')
if data_set != []:
t = last_time_in_block + one_min
block = exchange.fetch_ohlcv(symbol,'1m',t)
if block == []:
break
for candle in block:
if candle == []:
break
data_set.append(candle)
last_time_in_block = block[-1][0]
candel_counter += len(block)
counter += 1
print(f'{counter} - {block[0]} - {candel_counter} - {last_time_in_block}')
time.sleep(1)
return data_set
data_set = get_all_candels('BTCUSD',1574726400000,curent_time)
print(np.shape(data_set))
with open('raw.csv', 'w', newline='') as csv_file:
column_names = ['time', 'open', 'high', 'low', 'close', 'volume']
csv_writer = csv.DictWriter(csv_file,fieldnames=column_names)
csv_writer.writeheader()
for candel in data_set:
csv_writer.writerow({
'time':candel[0],
'open':candel[1],
'high':candel[2],
'low':candel[3],
'close':candel[4],
'volume':candel[5]
})

Fitting Lightgbm distributed with lgb.train hangs

I'm trying to learn how to use lightgbm distributed.
I wrote a simple hello world kind of code where I use iris dataset with 150 rows, split it into train (100 rows) and test(50 rows). Then training the train test set are further split into two parts. Each part is fed into two machines with appropriate rank.
The problem I see is that lgb.train hangs.
Here is the code:
import argparse
import logging
import lightgbm as lgb
import pandas as pd
from sklearn import datasets
import socket
print('lightgbm', lgb.__version__)
HOST = socket.gethostname()
ip_address = socket.gethostbyname(HOST)
print("IP=", ip_address)
# looks like lightgbm operates only with ip addresses
IPS = ['10.121.22.166', '10.121.22.83']
assert ip_address in IPS
logger = logging.getLogger(__name__)
pd.set_option('display.max_rows', 4)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 10000)
pd.set_option('max_colwidth', 100)
pd.set_option('precision', 5)
def read_train_data(rank):
iris = datasets.load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
partition = rank
assert partition < 2
separate = 100
train_df = iris_df.iloc[:separate]
test_df = iris_df.iloc[separate:]
separate_train = 60
separate_test = 30
if partition == 0:
train_df = train_df.iloc[:separate_train]
test_df = test_df.iloc[:separate_test]
else:
train_df = train_df.iloc[separate_train:]
test_df = test_df.iloc[separate_test:]
def get_lgb_dataset(df):
target_column = df.columns[-1]
columns = df.columns[:-1]
assert target_column not in columns
print('Target column', target_column)
x = df[columns]
y = df[target_column]
print(x)
ds = lgb.Dataset(free_raw_data=False, data=x, label=y, params={
"enable_bundle": False
})
ds.construct()
return ds
dtrain = get_lgb_dataset(train_df)
dtest = get_lgb_dataset(test_df)
return dtrain, dtest
def train(args):
port0 = 56456
rank = IPS.index(ip_address)
print("Rank=", rank, HOST)
print("RR", rank)
dtrain, dtest = read_train_data(rank=rank)
params = {'boosting_type': 'gbdt',
'class_weight': None,
'colsample_bytree': 1.0,
'importance_type': 'split',
'learning_rate': 0.1,
'max_depth': 2,
'min_child_samples': 20,
'min_child_weight': 0.001,
'min_split_gain': 0.0,
'n_estimators': 1,
'num_leaves': 31,
'objective': 'regression',
'metric': 'rmse',
'random_state': None,
'reg_alpha': 0.0,
'reg_lambda': 0.0,
'silent': False,
'subsample': 1.0,
'subsample_for_bin': 200000,
'subsample_freq': 0,
'tree_learner': 'data_parallel',
'num_threads': 48,
'machines': ','.join([f'{machine}:{port0}' for i, machine in enumerate(IPS)]),
'local_listen_port': port0,
'time_out': 120,
'num_machines': len(IPS)
}
print(params)
logging.info("starting to train lgb at node with rank %d", rank)
evals_result = {}
if args.scikit == 1:
print("Using scikit learn")
bst = lgb.sklearn.LGBMRegressor(**params)
bst.fit(
dtrain.data,
dtrain.label,
eval_set=[(dtest.data, dtest.label)],
)
else:
print("Using regular LGB")
bst = lgb.train(params,
dtrain,
valid_sets=[dtest],
evals_result=evals_result)
print(evals_result)
logging.info("finish xgboost training at node with rank %d", rank)
return bst
def main(args):
logging.info("starting the train job")
model = train(args)
pd.set_option('display.max_rows', 500)
print("OUT", model.__class__)
try:
print(model.trees_to_dataframe())
except:
print(model.booster_.trees_to_dataframe())
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--scikit',
help='scikit',
default=0,
type=int,
)
main(parser.parse_args())
I can run it with the scikit fit interface by running: python simple_distributed_lgb_test.py --scikit 1
On the two machines. It produces a reasonable result.
However, when I use -- scikit 0 (which uses lgb.train), then fitting just hangs on both nodes. Last messages before it hangs:
[LightGBM] [Info] Total Bins 22
[LightGBM] [Info] Number of data points in the train set: 40, number of used features: 2
[LightGBM] [Warning] Found whitespace in feature_names, replace with underlines
[LightGBM] [Info] Start training from score 0.873750
Is that a bug or an expected behavior? dask.py in lightgbm does use scikit learn fit interface.
I use an overnight master version 3.2.1.99. 5b7a6f3e7150aeb704d1dd2b852d246af3e913a3 tag to be exact from Jul 12.
UPDATE 1
I'm trying to dig into the code. So far I see few things:
scikit.train interface appears to have an extra syncronization step before fitting first tree. lgb.train doesn't have it. Dunno yet where it comes from. (I see some Network::Allreduce operations)
It appears that scikit.train has workers syncronized - each worker knows the correct sizes of the blocks to send and receive during reducescatter operations. For example one the first allreduce worker1 sends 208 blocks and receives 368 blocks of data (in Linkers::SendRecv), while worker2 is reversed - sends 368 and receives 208. So allreduce completes fine. ()
On the contrary, lgb.train has workers not syncronized - each worker has numbers for send and receive blocks during reducescatter at the first DataParallelTreeLearner::FindBestSplits encounter. But they don't match. Worker1 sends 208 abd wants to receive 400. Worker2 sends 192 and wants to receive 176. So, the worker that wants to receive more just hangs. The other worker eventually hangs too.
Possibly it has something to do with lgb.Dataset. That thing may need to have same bins or something. I tried to force it by forcedbins_filename parameter. But it doesn't seem to help with lgb.train.
UPDATE 2
Success. If I remove the following line from the example:
ds.construct()
Everything works. So I guess we can't use construct on Dataset when using distributed training.

Confused about the use of validation set here

For the main.py of the px2graph project, the part of training and validation is shown as below:
splits = [s for s in ['train', 'valid'] if opt.iters[s] > 0]
start_round = opt.last_round - opt.num_rounds
# Main training loop
for round_idx in range(start_round, opt.last_round):
for split in splits:
print("Round %d: %s" % (round_idx, split))
loader.start_epoch(sess, split, train_flag, opt.iters[split] * opt.batchsize)
flag_val = split == 'train'
for step in tqdm(range(opt.iters[split]), ascii=True):
global_step = step + round_idx * opt.iters[split]
to_run = [sample_idx, summaries[split], loss, accuracy]
if split == 'train': to_run += [optim]
# Do image summaries at the end of each round
do_image_summary = step == opt.iters[split] - 1
if do_image_summary: to_run[1] = image_summaries[split]
# Start with lower learning rate to prevent early divergence
t = 1/(1+np.exp(-(global_step-5000)/1000))
lr_start = opt.learning_rate / 15
lr_end = opt.learning_rate
tmp_lr = (1-t) * lr_start + t * lr_end
# Run computation graph
result = sess.run(to_run, feed_dict={train_flag:flag_val, lr:tmp_lr})
out_loss = result[2]
out_accuracy = result[3]
if sum(out_loss) > 1e5:
print("Loss diverging...exiting before code freezes due to NaN values.")
print("If this continues you may need to try a lower learning rate, a")
print("different optimizer, or a larger batch size.")
return
time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, global_step, out_loss, out_accuracy))
# Log data
if split == 'valid' or (split == 'train' and step % 20 == 0) or do_image_summary:
writer.add_summary(result[1], global_step)
writer.flush()
# Save training snapshot
saver.save(sess, 'exp/' + opt.exp_id + '/snapshot')
with open('exp/' + opt.exp_id + '/last_round', 'w') as f:
f.write('%d\n' % round_idx)
It seems that the author only get the result of each batch of the validation set. I am wondering, if I want to observe whether the model is improving or reaching the best performance, should I use the result on the whole validation set?
If the validation set is small enough, we could calculate the loss, accuracy on the whole validation set during training to observe the performance. However, if the validation set is too large, it is better to calculate batch-wise validation results and for multiple steps.

Q: Resample bitstamp bars in pyalgotrade

I'm working with an algo on the bitstamp client that works better with 30-min bars, rather than seeing each trade as a bar.
Is there a "right" way to resample those bars into 30-min intervals on the fly?
I can do it no problem with the bitcoincharts broker, but I need the execution from the bitstampbroker, so I was hoping to do it with one.
This should help:
from pyalgotrade.bitstamp import barfeed
from pyalgotrade.bitstamp import broker
from pyalgotrade import strategy
class Strategy(strategy.BaseStrategy):
def __init__(self, feed, brk):
super(Strategy, self).__init__(feed, brk)
self._instrument = "BTC"
self._bid = None
self._ask = None
self._resampledBF = self.resampleBarFeed(60, self.onResampledBars)
# Subscribe to order book update events to get bid/ask prices to trade.
feed.getOrderBookUpdateEvent().subscribe(self._onOrderBookUpdate)
def _onOrderBookUpdate(self, orderBookUpdate):
bid = orderBookUpdate.getBidPrices()[0]
ask = orderBookUpdate.getAskPrices()[0]
if bid != self._bid or ask != self._ask:
self._bid = bid
self._ask = ask
self.info("Order book updated. Best bid: %s. Best ask: %s" % (self._bid, self._ask))
def onResampledBars(self, dt, bars):
bar = bars[self._instrument]
self.info("Resampled - Price: %s. Volume: %s." % (bar.getClose(), bar.getVolume()))
def onBars(self, bars):
bar = bars[self._instrument]
self.info("Price: %s. Volume: %s." % (bar.getClose(), bar.getVolume()))
def main():
barFeed = barfeed.LiveTradeFeed()
brk = broker.PaperTradingBroker(1000, barFeed)
strat = Strategy(barFeed, brk)
strat.run()
if __name__ == "__main__":
main()

Resources