The following does not work. The call to resources.next_document within the thread returns nil. The same call without threading works as expected.
Any MongoDB experts out there? :P
resources = db[Resource::COLLECTION].find
number_of_threads.times do
threads << Thread.new do
while resource = resources.next_document
puts 'one more doc'
end
end
end
This is the solution I ended up using:
Feedback welcome
pool = DocumentPool.new(db)
5.times do
Thread.new do
while doc = pool.next_document
#something cool
end
end
end
class DocumentPool
COLLECTION = 'some_collection'
def initialize(db)
#db = db
#first_doc = cursor.next_document
end
def collection
#db[COLLECTION]
end
def cursor
#cursor ||= collection.find
end
def shift
doc = nil
if #first_doc
doc = #first_doc
#first_doc = nil
else
doc = cursor.next_document
end
doc
end
def count
collection.count
end
end
Although the driver itself is threadsafe, individuals cursor aren't, so you can't reliably process the data in the way you're describing.
One possibility would be to have a single thread that iterates over the documents, handing them off to any number of worker threads for the actual processing.
Related
I have something similar in my project
class Raj
def execute
5.times do
Thread.new do
object = Gopal.new
object.db_connection
object.enter_tax_id
end
end
end
end
class Gopal
def db_connection
#db = "" # Created db connection here
#browser = Watir::Browser.new
end
def enter_tax_id
m = Mutex.new
m.synchronize do
data = #db_conn.select_one("select max(tax_id_no) from pcmp.tax_identifier")
#browser.text_field(id: 'something').set 'data'
end
end
end
The enter tax id method pulls information from the database and then enters a value into the text field. This thread has an issue since other threads are interacting with it; when multiple threads attempt to execute the same procedure, a 'executing in another thread' error is raised.
In order to resolve this issue, I used the following strategy:
class Gopal
#mutex = Mutex.new
def self.mutex_var
#mutex
end
def db_connection
#db = "" # Created db connection here
#browser = Watir::Browser.new
end
def enter_tax_id
Gopal.mutex_var.synchronize do
data = #db.select_one("select max(tax_id)+1 from table")
#browser.text_field(id: 'something').set 'data'
end
end
end
The mutex variable is now persistent as long as the class is loaded, thus even if another object is instantiated, the mutex variable will still be present and provide protection.
class Tour
def destinations
threads = []
[:new_york, :london, :syndey].each { |city|
#threads << Thread.new {
where = city
goto(where)
}
}
threads.each(&:join)
end
def where=(location)
Thread.current[:city] = location
end
def where
Thread.current[:city]
end
def goto(city)
puts "I am going to visit #{city}."
end
end
Tour.new.destinations
In order to access thread local variable in method goto(), the thread local variable has to be passed to it like goto(where), if there are many other similar methods need to do things based upon current thread local variable :city, then it has to be passed to other methods too.
I guess there is an elegant/ruby way to avoid passing thread local variable as an option, what does that look like?
This seems like you'll trip yourself up a lot. It might be better to initialize a new object for each thread.
class Tour
def self.destinations
threads = []
[:new_york, :london, :sydney].each do |city|
threads << Thread.new { Destination.new(city).go }
end
threads.each(&:join)
end
end
class Destination
attr_reader :location
def initialize(location)
#location = location
end
def go
puts "I am going to visit #{location}."
end
end
# Tour.destinations
Suggested reading: https://blog.engineyard.com/2011/a-modern-guide-to-threads
I was looking in detail at the Thread class. Basically, I was looking for an elegant mechanism to allow thread-local variables to be inherited as threads are created. For example the functionality I am looking to create would ensure that
Thread.new do
self[:foo]="bar"
t1=Thread.new { puts self[:foo] }
end
=> "bar"
i.e. a Thread would inherit it's calling thread's thread-local variables
So I hit upon the idea of redefining Thread.new, so that I could add an extra step to copy the thread-local variables into the new thread from the current thread. Something like this:
class Thread
def self.another_new(*args)
o=allocate
o.send(:initialize, *args)
Thread.current.keys.each{ |k| o[k]=Thread.current[k] }
o
end
end
But when I try this I get the following error:
:in `allocate': allocator undefined for Thread (TypeError)
I thought that as Thread is a subclass of Object, it should have a working #allocate method. Is this not the case?
Does anyone have any deep insight on this, and on how to achieve the functionality I am looking for.
Thanks in advance
Steve
Thread.new do
Thread.current[:foo]="bar"
t1=Thread.new(Thread.current) do |parent|
puts parent[:foo] ? parent[:foo] : 'nothing'
end.join
end.join
#=> bar
UPDATED:
Try this in irb:
thread_ext.rb
class Thread
def self.another_new(*args)
parent = Thread.current
a = Thread.new(parent) do |parent|
parent.keys.each{ |k| Thread.current[k] = parent[k] }
yield
end
a
end
end
use_case.rb
A = Thread.new do
Thread.current[:local_a]="A"
B1 =Thread.another_new do
C1 = Thread.another_new{p Thread.current[:local_a] }.join
end
B2 =Thread.another_new do
C2 = Thread.another_new{p Thread.current[:local_a] }.join
end
[B1, B2].each{|b| b.join }
end.join
output
"A"
"A"
Here is a revised answer based on #CodeGroover's suggestion, with a simple unit test harness
ext/thread.rb
class Thread
def self.inherit(*args, &block)
parent = Thread.current
t = Thread.new(parent, *args) do |parent|
parent.keys.each{ |k| Thread.current[k] = parent[k] }
yield *args
end
t
end
end
test/thread.rb
require 'test/unit'
require 'ext/thread'
class ThreadTest < Test::Unit::TestCase
def test_inherit
Thread.current[:foo]=1
m=Mutex.new
#check basic inheritence
t1= Thread.inherit do
assert_equal(1, Thread.current[:foo])
end
#check inheritence with parameters - in this case a mutex
t2= Thread.inherit(m) do |m|
assert_not_nil(m)
m.synchronize{ Thread.current[:bar]=2 }
assert_equal(1, Thread.current[:foo])
assert_equal(2, Thread.current[:bar])
sleep 0.1
end
#ensure t2 runs its mutexs-synchronized block first
sleep 0.05
#check that the inheritence works downwards only - not back up in reverse
m.synchronize do
assert_nil(Thread.current[:bar])
end
[t1,t2].each{|x| x.join }
end
end
I was looking for the same thing recently and was able to come up with the following answer. Note I am aware the following is a hack and not recommended, but for the sake of answering the specific question on how you could alter the Thread.new functionality, I have done as following:
class Thread
class << self
alias :original_new :new
def new(*args, **options, &block)
original_thread = Thread.current
instance = original_new(*args, **options, &block)
original_thread.keys.each do |key|
instance[key] = original_thread[key]
end
instance
end
end
end
Help me plz
How i can implement method pmap for Array like map but in two process
I have code
class Array
def pmap
out = []
each do |e|
out << yield(e)
end
out
end
end
require 'benchmark'
seconds = Benchmark.realtime do
[1, 2, 3].pmap do |x|
sleep x
puts x**x
end
end
puts "work #{seconds} seconds"
In result i must get 3 second for benchmark
To get absolutely 2 forks
You don't absolutely need RPC. Marshal + Pipe should usually work.
class Array
def pmap
first, last = self[0..(self.length/2)], self[(self.length/2+1)..-1]
pipes = [first, last].map do |array|
read, write = IO.pipe
fork do
read.close
message = []
array.each do |item|
message << yield(item)
end
write.write(Marshal.dump message)
write.close
end
write.close
read
end
Process.waitall
first_out, last_out = pipes.map do |read|
Marshal.load(read.read)
end
first_out + last_out
end
end
Edit
Now using fork
Try the parallel gem.
require 'parallel'
class Array
def pmap(&blk)
Parallel.map(self, {:in_processes: 3}, &blk)
end
end
In the situation below the #crawl object DOES RECEIVE the crawl call, but the method mock fails ie: the method is not mocked.
Does Thread somehow create its own copy of the #crawl object escaping the mock?
#crawl.should_receive(:crawl).with(an_instance_of(String)).twice.and_return(nil)
threads = #crawl.create_threads
thread creation code:
def crawl(uri)
dosomecrawling
end
def create_threads
(1..5).each do
Thread.new do
crawl(someurifeedingmethod)
end
end
end
It does not appear from the code posted that you are joining the threads. If so, there is a race condition: Sometimes the test will execute with some or all of the threads not having done their job; The fix is along these lines:
!/usr/bin/ruby1.9
class Crawler
def crawl(uri)
dosomecrawling
end
def create_threads
#threads = (1..5).collect do
Thread.new do
crawl(someurifeedingmethod)
end
end
end
def join
#threads.each do |thread|
thread.join
end
end
end
describe "the above code" do
it "should crawl five times" do
crawler = Crawler.new
uri = "uri"
crawler.should_receive(:someurifeedingmethod).with(no_args).exactly(5).times.and_return(uri)
crawler.should_receive(:crawl).with(uri).exactly(5).times
crawler.create_threads
crawler.join
end
end
This code works perfectly.
You can add 5 times the expects.
class Hello
def crawl(uri)
puts uri
end
def create_threads
(1..5).each do
Thread.new do
crawl('http://hello')
end
end
end
end
describe 'somting' do
it 'should mock' do
crawl = Hello.new
5.times do
crawl.should_receive(:crawl).with(an_instance_of(String)).and_return(nil)
end
threads = crawl.create_threads
end
end