Julia - LoadError on worker with function in the file (main) - parallel-processing

I am starting trying to use Julia for parallel processing.
I am using the #spawn macro in this example, but had the same error using the remotecall_fetch function.
Following is the code:
function count_proteins(fpath::String)
cnt::Int = 0
if !isfile(fpath)
write(Base.stderr, "FASTA not found!")
else
reader = open(FASTA.Reader, fpath)
for record in reader
cnt += 1
end
end
# return the count
cnt
end
"""Count sequences in parallel."""
function parallel_count_proteins(fPaths::Array{String, 1}, threads::Int16=4)
# initialize workers
addprocs(threads)
fut = Dict{Int, Future}()
# launch the jobs
for (i, fastaPath) in enumerate(fPaths)
r = #spawn count_proteins(fastaPath)
fut[i] = r
end
for (i, res) in fut
s = fetch(res)
end
end
### MAIN ###
flist = ["f1", "f2", "f3", "f4"]
threads = Int16(2)
parallel_count_proteins(flist, threads)
The error happens when I try to fetch the results using fetch():
ERROR: LoadError: On worker 3
...and here is the stacktrace:
Stacktrace:
[1] #remotecall_fetch#149(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Distributed.Worker, ::Distributed.RRID) at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:379
[2] remotecall_fetch(::Function, ::Distributed.Worker, ::Distributed.RRID, ::Vararg{Any,N} where N) at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:371
[3] #remotecall_fetch#152 at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:406 [inlined]
[4] remotecall_fetch at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:406 [inlined]
[5] call_on_owner at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:479 [inlined]
[6] fetch(::Future) at /Users/osx/buildbot/slave/package_osx64/build/usr/share/julia/stdlib/v1.1/Distributed/src/remotecall.jl:511
[7] parallel_count_proteins(::Array{String,1}, ::Int16) at /Users/salvocos/Google_Drive/julia_programming/mcl_graph_to_label.jl:150
[8] top-level scope at none:0
[9] include at ./boot.jl:326 [inlined]
[10] include_relative(::Module, ::String) at ./loading.jl:1038
[11] include(::Module, ::String) at ./sysimg.jl:29
[12] exec_options(::Base.JLOptions) at ./client.jl:267
[13] _start() at ./client.jl:436
I know in need to make all the workers aware of the exstance of the function count_proteins but I am quite not sure on how to do it.

As you said, you need to make count_proteins available to all the worker processes.
You can use #everywhere macro before the function definitions to make them available to all the workers. #everywhere executes a given expression on all workers.
Another way would be to put the functions that should be available to workers inside another .jl file and #everywhere include("my_helper_functions.jl"), or put your function definitions inside a begin...end block and put an #everywhere right before begin and run the block. You need to do this after the creation of worker processes. Putting such functions inside a module/package and running #everywhere using MyModule should also work.
For your code the solution would be
# addprocs here before #everywhere definitions
addprocs(2)
#everywhere function count_proteins(fpath::String)
cnt::Int = 0
if !isfile(fpath)
write(Base.stderr, "FASTA not found!")
else
reader = open(FASTA.Reader, fpath)
for record in reader
cnt += 1
end
end
# return the count
cnt
end
"""Count sequences in parallel."""
function parallel_count_proteins(fPaths::Array{String, 1})
fut = Dict{Int, Future}()
# launch the jobs
for (i, fastaPath) in enumerate(fPaths)
r = #spawn count_proteins(fastaPath)
fut[i] = r
end
for (i, res) in fut
s = fetch(res)
end
end
### MAIN ###
flist = ["f1", "f2", "f3", "f4"]
parallel_count_proteins(flist)
As a side note, If I understand what you are trying to do correctly, you can simply use pmap here instead, which will send the tasks one by one to processes, effectively balancing load.
You might find it useful to read the manual entry regarding code and data availability in parallel computing and also the Parallel Computing section overall. For data availability part there is also a package called ParallelDataTransfer.jl which makes moving data between processes a lot easier if you ever need it.

As #hckr nicely explains above, the workers should be deployed (using addprocs(threads)) before using the #everywhere macro.
#everywhere can be called and used in different ways and in different parts of the program.
In my case I am loading the function that I want to run in parallel from a module.
To use this function in parallel from the main I am using #everywhere include("myModule.jl").
Following are the code for MyModule:
module MyModule
using Distributed
using Printf: #printf
using Base
"""Count sequences in the input FASTA"""
function count_proteins(fpath::String)::Int
cnt::Int = 0
##show fpath
if !isfile(fpath)
write(Base.stderr, "\nInput FASTA not found!")
else
open(fpath, "r") do ifd
for ln in eachline(ifd)
if ln[1] == '>'
#println(ln)
cnt += 1
end
end
end
end
# return the count
#printf("%s\t%d\n", fpath, cnt)
cnt
end
"""Count sequences in parallel."""
function parallel_count_proteins(fPaths::Array{String, 1})
# spawn the jobs
for (i, fastaPath) in enumerate(fPaths)
r = #spawn count_proteins(fastaPath)
# #show r
s = fetch(r)
end
end
And following is the main.jl using the function parallel_count_proteins from MyModule.
### main.jl ###
using Base
using Distributed
using Printf: #printf
# add path to the modules directory
push!(LOAD_PATH, dirname(#__FILE__)) # MyModule is in the same directory as main.jl
#### MAIN START ####
# deploy the workers
addprocs(4)
# load modules with multi-core functions
#everywhere include(joinpath(dirname(#__FILE__), "MyModule.jl"))
# paths with 4 input files (all in same dir as main.jl)
flist = ["tv1", "tv2", "tv3", "tv4"]
# count proteins
MyModule.parallel_count_proteins(flist)

Related

Ruby GC::Profiler no output

I'm running a ruby script and trying to see the GC stats on it, but the output is just empty string. Here are the contents of my script:
class NumberPool
...
attr_accessor :sets
def initialize
#sets = []
end
def allocate
allocated_number = Random.rand(min_bound..max_bound)
sets.each do |set|
next unless set.range.include?(allocated_number)
return set.range.delete(allocated_number)
end
factor = allocated_number / batch_size
min = factor * batch_size
max = min + batch_size
sub = SubPool.new(min, max)
sub.range.delete(allocated_number)
sets.push(sub)
allocated_number
end
...
def run_test
GC::Profiler.enable
a = NumberPool.new
p a.allocate
GC::Profiler.report
end
puts run_test
When I run this, the output is:
$ ruby number_pool.rb
1855532
I expected to see something from the GC report in standard out.
This is a guess, but maybe GC hasn't triggered (no need to collect garbage yet because plenty of free memory).
See what happens if you force GC by adding GC.start (modify code like so):
p a.allocate
GC.start
GC::Profiler.report

Ruby Parallel each loop

I have a the following code:
FTP ... do |ftp|
files.each do |file|
...
ftp.put(file)
sleep 1
end
end
I'd like to run the each file in a separate thread or some parallel way. What's the correct way to do this? Would this be right?
Here's my try on the parallel gem
FTP ... do |ftp|
Parallel.map(files) do |file|
...
ftp.put(file)
sleep 1
end
end
The issue with parallel is puts/outputs can occur at the same time like so:
as = [1,2,3,4,5,6,7,8]
results = Parallel.map(as) do |a|
puts a
end
How can I force puts to occur like they normally would line separated.
The whole point of parallelization is to run at the same time. But if there's some part of the process that you'd like to run some of the code sequentially you could use a mutex like:
semaphore = Mutex.new
as = [1,2,3,4,5,6,7,8]
results = Parallel.map(as, in_threads: 3) do |a|
# Parallel stuff
sleep rand
semaphore.synchronize {
# Sequential stuff
puts a
}
# Parallel stuff
sleep rand
end
You'll see that it prints stuff correctly but not necesarily in the same order. I used in_threads instead of in_processes (default) because Mutex doesn't work with processes. See below for an alternative if you do need processes.
References:
http://ruby-doc.org/core-2.2.0/Mutex.html
http://dev.housetrip.com/2014/01/28/efficient-cross-processing-locking-in-ruby/
In the interest of keeping it simple, here's what I'd do with built-in Thread:
results = files.map do |file|
result = Thread.new do
ftp.put(file)
end
end
Note that this code assumes that ftp.put(file) returns safely. If that isn't guaranteed, you'll have to do that yourself by wrapping calls in a timeout block and have each thread return an exception if one is thrown and then at the very end of the loop have a blocking check to see that results does not contain any exceptions.

Ruby 192 recursive thread lock error

I am working with ruby 192 p290: under one unit-test script (shown below) throws ThreadError
1) Error:
test_orchpr_pass(TC_MyTest):
ThreadError: deadlock; recursive locking
internal:prelude:8:in `lock'
internal:prelude:8:in `synchronize'
testth.rb:121:in `orchpr_run'
testth.rb:158:in `test_orchpr_pass'
With ruby 187 gives error: Thread tried to join itself.
CODE
def orchpr_run(timeout = 60)
# used by the update function to signal that a final update was
# received from all clients
#update_mutex.lock
# required since we'll have to act as an observer to the DRb server
DRb.start_service
# get configuration objects
run_config_type = DataLayer.get_run_config_type
client_daemon = DataLayer.get_client_daemon_by_branch (run_config_type, #branch)
client_daemon['port_no'] = 9096
#get the servers for this client_daemon
servers = DataLayer.get_servers(run_config_type, client_daemon.id)
servers.each { |server| #pr[server.host_name] = OrchestratedPlatformRun.new(run_config_type, server, timeout)
}
#pr.each_value { |x| x.add_observer(self)
#pr.each_value { |x| x.start(#service_command_pass, true)
# wait for update to receive notifications from all servers # this is the statement causing error:
#update_mutex.synchronize {} end
Another piece of code throwing same error:
require "thread"
require "timeout"
def calc_fib(n)
if n == 0
0
elsif n == 1
1
else
calc_fib(n-1) + calc_fib(n-2)
end
end
lock = Mutex.new
threads = 20.times.collect do
Thread.new do
20.times do
begin
Timeout.timeout(0.25) do
lock.synchronize{ calc_fib(1000) }
end
rescue ThreadError => e
puts "#{e.class}: #{e.message}:\n" + e.backtrace.join("\n") + "\n\n"
rescue Timeout::Error => e
#puts e.class
nil
end
end
end
end
threads.each{ |t| t.join }
Commenting synchronizing Block will cause the error to disappear but then then threads are not able to synchronize. I found some stuff on net, saying bug with ruby 192 , need changes in file prelude.rb and thread.c regarding MUTEX synchronization.
But Under windows installation Unable to find file prelude.rb
If a mutex is locked by a thread then an error will be raised if you try and lock it again from the same thread
This is exactly what you are doing, since synchronize is just a convenience for method for locking the mutex, yielding to the block and then releasing the lock. I'm not sure what you're trying to do, but it feels to me like you might be trying to use mutexes for something other than their intended purposes.
Using threads and locks well is difficult to get right - you might want to look at celluloid for a different approach to concurrency.

Thread and Queue

I am interested in knowing what would be the best way to implement a thread based queue.
For example:
I have 10 actions which I want to execute with only 4 threads. I would like to create a queue with all the 10 actions placed linearly and start the first 4 action with 4 threads, once one of the thread is done executing, the next one will start etc - So at a time, the number of thread is either 4 or less than 4.
There is a Queue class in thread in the standard library. Using that you can do something like this:
require 'thread'
queue = Queue.new
threads = []
# add work to the queue
queue << work_unit
4.times do
threads << Thread.new do
# loop until there are no more things to do
until queue.empty?
# pop with the non-blocking flag set, this raises
# an exception if the queue is empty, in which case
# work_unit will be set to nil
work_unit = queue.pop(true) rescue nil
if work_unit
# do work
end
end
# when there is no more work, the thread will stop
end
end
# wait until all threads have completed processing
threads.each { |t| t.join }
The reason I pop with the non-blocking flag is that between the until queue.empty? and the pop another thread may have pop'ed the queue, so unless the non-blocking flag is set we could get stuck at that line forever.
If you're using MRI, the default Ruby interpreter, bear in mind that threads will not be absolutely concurrent. If your work is CPU bound you may just as well run single threaded. If you have some operation that blocks on IO you may get some parallelism, but YMMV. Alternatively, you can use an interpreter that allows full concurrency, such as jRuby or Rubinius.
There area a few gems that implement this pattern for you; parallel, peach,and mine is called threach (or jruby_threach under jruby). It's a drop-in replacement for #each but allows you to specify how many threads to run with, using a SizedQueue underneath to keep things from spiraling out of control.
So...
(1..10).threach(4) {|i| do_my_work(i) }
Not pushing my own stuff; there are plenty of good implementations out there to make things easier.
If you're using JRuby, jruby_threach is a much better implementation -- Java just offers a much richer set of threading primatives and data structures to use.
Executable descriptive example:
require 'thread'
p tasks = [
{:file => 'task1'},
{:file => 'task2'},
{:file => 'task3'},
{:file => 'task4'},
{:file => 'task5'}
]
tasks_queue = Queue.new
tasks.each {|task| tasks_queue << task}
# run workers
workers_count = 3
workers = []
workers_count.times do |n|
workers << Thread.new(n+1) do |my_n|
while (task = tasks_queue.shift(true) rescue nil) do
delay = rand(0)
sleep delay
task[:result] = "done by worker ##{my_n} (in #{delay})"
p task
end
end
end
# wait for all threads
workers.each(&:join)
# output results
puts "all done"
p tasks
You could use a thread pool. It's a fairly common pattern for this type of problem.
http://en.wikipedia.org/wiki/Thread_pool_pattern
Github seems to have a few implementations you could try out:
https://github.com/search?type=Everything&language=Ruby&q=thread+pool
Celluloid have a worker pool example that does this.
I use a gem called work_queue. Its really practic.
Example:
require 'work_queue'
wq = WorkQueue.new 4, 10
(1..10).each do |number|
wq.enqueue_b("Thread#{number}") do |thread_name|
puts "Hello from the #{thread_name}"
end
end
wq.join

Detect number of IDLE processors ruby

I work on shared linux machines with between 4 and 24 cores. To make best use of them, I use the following code to detect the number of processors from my ruby scripts:
return `cat /proc/cpuinfo | grep processor | wc -l`.to_i
(perhaps there is a pure-ruby way of doing this?)
But sometimes a colleague is using six or eight of the 24 cores. (as seen via top). How can I get an estimate of the number of currently unused processors that I can use without making anyone upset?
Thanks!
You can use the data in the /proc filesystem to get CPU affinity info for running processes. The following should give you the number of CPUs currently in use (Note: I don't have a Linux or Ruby box handy so this code is untested, but you can get the idea):
def processors_in_use
procs=[]
Dir.glob("/proc/*/stat") {|filename|
next if File.directory?(filename)
this_proc=[]
File.open(filename) {|file| this_proc = file.gets.split.values_at(2,38)}
procs << this_proc[1].to_i if this_proc[0]=="R"
}
procs.uniq.length
end
def num_processors
IO.readlines("/proc/cpuinfo").delete_if{|x| x.index("processor")==nil}.length
end
def num_free_processors
num_processors - processors_in_use
end
def estimate_free_cpus(count, waittime)
results=[]
count.times {
results << num_free_processors
sleep(waittime)
}
sum=0
results.each {|x| sum += x}
(sum.to_f / results.length).round
end
Edit: I verified that the above code works (I was using Ruby 1.9)
inspired by bta's reply, this is what i'm using:
private
def YWSystemTools.numberOfActiveProcessors # internal
processorForProcs = []
processFiles = Dir.glob("/proc/*/stat")
raise IOError, 'Cannot find /proc/*/stat files. Are you sure this is a linux machine?' if processFiles.empty?
processFiles.each do |filename|
next if File.directory?(filename) # because /proc/net/stat is a directory
next if !File.exists?(filename) # may have disappeared in the meantime
this_proc = []
File.open(filename) { |file| this_proc = file.gets.split.values_at(2,38) }
processorForProcs << this_proc[1].to_i if this_proc[0]=="R"
end
processorsInUse = processorForProcs.uniq
return(processorsInUse.length)
end
public
def YWSystemTools.numberOfAvailableProcessors
numberOfAttempts = 5
$log.info("Will determine number of available processors. Wait #{numberOfAttempts.to_s} seconds.")
#we estimate 5 times because of local fluctuations in procesor use. Keep minimum.
estimationsOfNumberOfActiveProcessors = []
numberOfAttempts.times do
estimationsOfNumberOfActiveProcessors << YWSystemTools.numberOfActiveProcessors
sleep(1)
end
numberOfActiveProcessors = estimationsOfNumberOfActiveProcessors.min
numberOfTotalProcessors = number_of_processors()
raise IOError, '!! # active Processors > # processors' if numberOfActiveProcessors > numberOfTotalProcessors
numberOfAvailableProcessors = numberOfTotalProcessors - numberOfActiveProcessors
$log.info("#{numberOfAvailableProcessors} out of #{numberOfTotalProcessors} are available!")
return(numberOfAvailableProcessors)
end

Resources