Reading messages in Chronicle Queue Tailer (v5.16.11) does not auto-move the index to next roll cycle - chronicle

I use CQ v5.16.11 (with openjdk 11) to save data with daily roll cycles.
The process runs non-stop from Sun to Fri so I have 5 cq4 files per week. I ran the process for 1.5 weeks and have 8 files (3 for 1st and 5 for 2nd week).
So the files that I have are:
20181003.cq4 cycle=17807,
20181004.cq4 cycle=17808,
20181005.cq4 cycle=17809,
20181007.cq4 cycle=17811,
20181008.cq4 cycle=17812,
20181009.cq4 cycle=17813,
20181010.cq4 cycle=17814,
20181011.cq4 cycle=17815,
Note the missing file for 20181006.cq4 (cycle=17810) as the process does not run on Saturday.
I use this code to read data:
tailer.toEnd();
lastTailerIndex = tailer.index();
tailer.toStart();
while (tailer.index() <= lastTailerIndex) {
// read data
if (tailer.readBytes(data) {
/// do something with data bytes
}
if (tailer.index() == lastTailerIndex) {
break;
}
}
This correctly reads the 1st week data but does not read the 2nd week data as it does not auto-roll to next cycle.
Any idea why this is happening or how to fix this?
The issue is similar to this which was for an older version
Reading message from chronicle queue does not move the current index to the next cycle automatically
I have created a single queue with daily rolling
Logs:
2018-10-12 12:41:15,784 DEBUG [main] net.openhft.chronicle.bytes.MappedFile - Allocation of 0 chunk in /site/data/metadata.cq4t took 19.237 ms.
2018-10-12 12:41:15,876 DEBUG [main] net.openhft.chronicle.bytes.MappedFile - Allocation of 0 chunk in /site/data/20181011.cq4 took 0.063 ms.
2018-10-12 12:41:15,881 DEBUG [main] net.openhft.chronicle.queue.impl.single.PretoucherState - /site/data/20181011.cq4 - Reset pretoucher to pos 4835096 as the underlying MappedBytes changed.
2018-10-12 12:41:15,887 DEBUG [main] net.openhft.chronicle.bytes.MappedFile - Allocation of 0 chunk in /site/data/20181003.cq4 took 0.065 ms.
2018-10-12 12:41:15,995 DEBUG [main] net.openhft.chronicle.bytes.MappedFile - Allocation of 0 chunk in /site/data/20181011.cq4 took 0.082 ms.
2018-10-12 12:41:15,996 DEBUG [main] net.openhft.chronicle.queue.impl.single.SingleChronicleQueueBuilder - File released /site/data/20181003.cq4
2018-10-12 12:41:15,997 DEBUG [main] net.openhft.chronicle.queue.impl.single.SingleChronicleQueueBuilder - File released /site/data/20181011.cq4
2018-10-12 12:41:16,418 DEBUG [main] net.openhft.chronicle.bytes.MappedFile - Allocation of 0 chunk in /site/data/20181004.cq4 took 0.112 ms.
2018-10-12 12:41:16,418 DEBUG [main] net.openhft.chronicle.queue.impl.single.SingleChronicleQueueBuilder - File released /site/data/20181003.cq4
2018-10-12 12:41:16,813 DEBUG [main] net.openhft.chronicle.bytes.MappedFile - Allocation of 0 chunk in /site/data/20181005.cq4 took 0.084 ms.
2018-10-12 12:41:16,813 DEBUG [main] net.openhft.chronicle.queue.impl.single.SingleChronicleQueueBuilder - File released /site/data/20181004.cq4
[Edit 1]:
The same thing happened on this last weekend, i.e., as expected no new file for Oct 13. Now I have files from Oct 7th to Oct 15th (with missing Oct 13th file). If I do tailer.toStart(); while(tailer.readBytes() { ...} it only reads files from Oct7th till Oct 12th and does not read Oct 14th and 15th.
[Edit 2]: Replicated the issue as below Chronicle-Queue/issues/537
Setup / Libs: jvm openjdk 11, Ubuntu 16.04, openhft.affinity/3.1.9,
chronicle-map/3.16.0, chronicle-queue/5.16.11,
chronicle-bytes/1.16.23, chronicle-core/1.16.20,
chronicle-wire/1.16.16, chronicle-threads/1.16.3, jna/4.4.0
Steps:
Start WriterProcess - let it finish.
Start ReaderProcess - see the 5 print statements.
Stop ReaderProcess
Wait for some time - 10 mins.
Start WriterProcess again - let it finish or keep running this process.
Start ReaderProcess - it prints only the first 5 print statements and nothing prints after this. Even if the WriterProcess is
running/writing to queue the tailer in this process does not move
forward.
public class WriterProcess {
public static void main(String[] args) throws InterruptedException {
final String dir = "/tmp/demo/";
final LocalTime localTime = LocalTime.of(17, 0);
final ZoneId zoneID = ZoneId.of("America/New_York");
final ScheduledExecutorService scheduledExecutorService = Executors.newScheduledThreadPool(2);
final SingleChronicleQueue queue = SingleChronicleQueueBuilder.binary(dir)
.blockSize((long) Math.pow(2, 23))
.rollCycle(RollCycles.MINUTELY)
.rollTime(localTime, zoneID)
.build();
final ExcerptAppender appender = queue.acquireAppender();
// pre touch
scheduledExecutorService.scheduleAtFixedRate(appender::pretouch,0,30, TimeUnit.SECONDS);
// write data
System.out.println("writing data ...");
writeData(appender, 5);
// close queue
System.out.println("shutting down now ...");
queue.close();
scheduledExecutorService.shutdown();
scheduledExecutorService.awaitTermination(1, TimeUnit.SECONDS);
}
public static void writeData(ExcerptAppender appender, int count) {
int ctr = 0;
String dateStr;
Date date = new Date();
while (true) {
dateStr = date.toString();
appender.writeText("["+ctr+"] Written " + dateStr);
System.out.println("["+ctr+"] Written " + dateStr);
ctr++;
if (ctr >= count) {
break;
}
try {
Thread.sleep(65_000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
public class ReaderProcess {
public static void main(String[] args) {
final String dir = "/tmp/demo/";
final LocalTime localTime = LocalTime.of(17, 0);
final ZoneId zoneID = ZoneId.of("America/New_York");
final SingleChronicleQueue queue = SingleChronicleQueueBuilder.binary(dir)
.blockSize((long) Math.pow(2, 23))
.rollCycle(RollCycles.MINUTELY)
.rollTime(localTime, zoneID)
.build();
final ExcerptTailer tailer = queue.createTailer();
tailer.toStart();
// read data
System.out.println("reading data ...");
readData(tailer, 25);
// close
System.out.println("shutting down now ...");
queue.close();
}
public static void readData(ExcerptTailer tailer, int count) {
int ctr = 0;
Bytes data = Bytes.allocateDirect(new byte[500]);
while (true) {
if (tailer.readBytes(data)) {
System.out.println("["+ctr+"] Read {"+ data + "}");
ctr++;
if (ctr >= count) {
break;
}
}
}
}
}

I have written a slightly simpler version which works with chronicle-bom 2.17 and the versions it uses. The biggest change I made was to clear the Bytes data before reading otherwise it only appends so as to not overwrite anything.
import net.openhft.chronicle.bytes.Bytes;
import net.openhft.chronicle.core.OS;
import net.openhft.chronicle.queue.ExcerptAppender;
import net.openhft.chronicle.queue.ExcerptTailer;
import net.openhft.chronicle.queue.RollCycles;
import net.openhft.chronicle.queue.impl.single.SingleChronicleQueue;
import net.openhft.chronicle.queue.impl.single.SingleChronicleQueueBuilder;
import java.time.LocalDateTime;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class WriterProcess {
static final String dir = OS.TMP + "/demo-" + System.nanoTime() + "/";
public static void main(String[] args) throws InterruptedException {
final ScheduledExecutorService scheduledExecutorService = Executors.newScheduledThreadPool(2);
final SingleChronicleQueue queue = SingleChronicleQueueBuilder.binary(dir)
.testBlockSize()
.rollCycle(RollCycles.TEST_SECONDLY)
.build();
final ExcerptAppender appender = queue.acquireAppender();
// pre touch
scheduledExecutorService.scheduleAtFixedRate(appender::pretouch, 3, 30, TimeUnit.SECONDS);
new Thread(ReaderProcess::main).start();
// write data
System.out.println("writing data ...");
writeData(appender, 100);
// close queue
System.out.println("shutting down now ...");
queue.close();
scheduledExecutorService.shutdown();
scheduledExecutorService.awaitTermination(1, TimeUnit.SECONDS);
}
public static void writeData(ExcerptAppender appender, int count) {
int ctr = 0;
while (true) {
LocalDateTime date = LocalDateTime.now();
appender.writeText("[" + ctr + "] Written " + date);
System.out.println("[" + ctr + "] Written " + date);
ctr++;
if (ctr >= count) {
break;
}
try {
Thread.sleep(2_200);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
class ReaderProcess {
public static void main(String... args) {
final String dir = WriterProcess.dir;
final SingleChronicleQueue queue = SingleChronicleQueueBuilder.binary(dir)
.testBlockSize()
.rollCycle(RollCycles.TEST_SECONDLY)
.build();
final ExcerptTailer tailer = queue.createTailer();
tailer.toStart();
// read data
System.out.println("reading data ...");
readData(tailer, 100);
// close
System.out.println("shutting down now ...");
queue.close();
}
public static void readData(ExcerptTailer tailer, int count) {
int ctr = 0;
Bytes data = Bytes.allocateDirect(64);
while (true) {
data.clear();
if (tailer.readBytes(data)) {
System.out.println("[" + ctr + "] Read {" + data + "}");
ctr++;
if (ctr >= count) {
break;
}
}
}
}
}

Related

Incorrect file being produced using websockets in helidon

I am trying to upload a file using websockets in Helidon.I think i am doing it write the right way but the code seems to be flaky in terms of the size of the file produced which is different. The size of the file being produced is different for different runs.
How can i make sure that the file size is same on both ends?
I use a simple protocol for handshake[code below]:
Step1 client sends filesize=11000 buffer=5000
Step2 server sends SENDFILE
Step3 client >> buffer 1 server >> write 1 5000
Step4 client >> buffer 2 server >> write 2 5000
Step5 client >> buffer 3 server >> write 3 1000
Step6 client sends ENDOFFILE server >> session.close
//SERVER side OnOpen session below
session.addMessageHandler(new MessageHandler.Whole<String>() {
#Override
public void onMessage(String message) {
System.out.println("Server >> " + message);
if (message.contains("FILESIZE")) {
session.getBasicRemote().sendText("SENDFILENOW");
}
if(message.contains("ENDOFFILE")) {
System.out.println("Server >> FILE_SIZE=" + FILE_SIZE);
finalFileOutputStream.close();
session.close();
}
}
});
session.addMessageHandler(new MessageHandler.Whole<ByteBuffer>() {
#Override
public void onMessage(ByteBuffer b) {
finalFileOutputStream.write(b.array(), 0, b.array().length);
finalFileOutputStream.flush();
}
});
//CLIENT OnOpen session below
session.getBasicRemote().sendText("FILESIZE=" + FILE_SIZE);
session.addMessageHandler(new MessageHandler.Whole<String>() {
#Override
public void onMessage(String message) {
long M = FILE_SIZE / BUFFER_SIZE;
long R = FILE_SIZE % BUFFER_SIZE;
if(!message.equals("SENDFILENOW"))
return;
try {
System.out.println("Starting File read ... " + path + " " + FILE_SIZE + " " + M + " " +message );
byte[] buffer = new byte[(int) BUFFER_SIZE];
while (M > 0) {
fileInputStream.read(buffer);
ByteBuffer bytebuffer = ByteBuffer.wrap(buffer);
session.getBasicRemote().sendBinary(bytebuffer);
M--;
}
buffer = new byte[(int) R];
fileInputStream.read(buffer, 0, (int) R);
fileInputStream.close();
ByteBuffer bytebuffer = ByteBuffer.wrap(buffer);
session.getBasicRemote().sendBinary(bytebuffer);
session.getBasicRemote().sendText("FILEREADDONE");
session.close();
f.complete(true);
} catch (IOException e) {
fail("Unexpected exception " + e);
}
}
});
Your solution is unnecessarily built on top of several levels of abstraction just to use websockets. Do you really need that? Helidon is very well equipped to handle huge file upload directly and much more efficiently.
public class LargeUpload {
public static void main(String[] args) {
ExecutorService executor = ThreadPoolSupplier.create("upload-thread-pool").get();
WebServer server = WebServer.builder(Routing.builder()
.post("/streamUpload", (req, res) -> req.content()
.map(DataChunk::data)
.flatMapIterable(Arrays::asList)
.to(IoMulti.writeToFile(createFile(req.queryParams().first("fileName").orElse("bigFile.mkv")))
.executor(executor)
.build())
.onError(res::send)
.onComplete(() -> {
res.status(Http.Status.ACCEPTED_202);
res.send();
}).ignoreElement())
.build())
.port(8080)
.build()
.start()
.await(Duration.ofSeconds(10));
// Server started - do upload
//several gigs file
Path file = Path.of("/home/kec/helidon-kafka.mkv");
try (FileInputStream fis = new FileInputStream(file.toFile())) {
WebClient.builder()
.baseUri("http://localhost:8080")
.build()
.post()
.path("/streamUpload")
.queryParam("fileName", "bigFile_" + System.currentTimeMillis() + ".mkv")
.contentType(MediaType.APPLICATION_OCTET_STREAM)
.submit(IoMulti.multiFromByteChannelBuilder(fis.getChannel())
.bufferCapacity(1024 * 1024 * 4)
.build()
.map(DataChunk::create)
)
.await(Duration.ofMinutes(10));
} catch (IOException e) {
throw new RuntimeException(e);
}
executor.shutdown();
server.shutdown()
.await(Duration.ofSeconds(10));
}
static Path createFile(String path) {
try {
Path filePath = Path.of("/home/kec/tmp/" + path);
System.out.println("Creating " + filePath);
return Files.createFile(filePath);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

Update Vaadin Progressbar with push asynchronously

I have a question when calculating the hash of files eg: md5, sha1, sha256 setting the progress in the progressBar through the asynchronous process
Thanks to Alejandro Duarte who shows a very practical example
https://github.com/newUserRepo/testbar/blob/issueBar/vaadin-upload/src/main/java/com/example/vaadinupload/ProcessingService.java#L44
the only way I can get it to work is that in method line 75, I put 4 milliseconds to process the task and the bar is updated, but it is too slow.
Yes, I do not sleep the Thread the application does not do the push correctly, and the changes are not reflected correctly to the client.
Another way that actually worked was with the Runnable interface and execute the heavy task in the run() method
#Override
public void run() {
calcularHash();
}
public void calcularHash() {
System.out.println("Path tmp archivo: " +
tmpPath.toFile().getAbsolutePath());
for(int f=0; f<hashType.size(); f++) {
try (InputStream bis = new
BufferedInputStream(Files.newInputStream(tmpPath))) {
t.initTime();
byte[] buffer = new byte[1024];
MessageDigest messageDigest =
MessageDigest.getInstance(hashType.get(f));
int dataRead = 0;
long largo = tmpPath.toFile().length();
Long acum = 0L;
while ((dataRead = bis.read(buffer)) != -1) {
messageDigest.update(buffer, 0, dataRead);
acum += dataRead;
Float per = ((float) acum / largo);
bar.setValue(per);
System.out.println(per * 100);
//textFieldPercent.setValue(Types.formatPercentaje(per *
100));
}
final byte[] bytesDigest = messageDigest.digest();
final StringBuilder sb = new StringBuilder();
for (int c = 0; c < bytesDigest.length; c++) {
sb.append(Integer.toString((bytesDigest[c] & 0xFF) + 0x100,
16).substring(1));
}
final String hashObtenido = sb.toString();
t.finishTime();
final String totalTime = t.getFinalTimeSec() + "seg " +
t.getFinalTimeMs() + "ms";
final String large = Types.getLargeFileFormat(largo);
System.out.println(hashObtenido);
ui.access(() -> {
checksumTransactions.initData(messageDigest.getAlgorithm(),
sb.toString(),large, totalTime);
});
//Files.delete(tmpPath); //fixme borrar desde el grid o UI
} catch (IOException e) {
e.printStackTrace();
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
}
In the present picture I use a simple public void run () and the Progressbar is updated well
but we know that the application may have memory leaks and is not a good practice, the ideal would be to know how to execute that Background thread
I still do not know the best way to achieve this :$

Confluent HDFS Connector is losing messages

Community, could you please help me to understand why ~3% of my messages don't end up in HDFS? I wrote a simple producer in JAVA to generate 10 million messages.
public static final String TEST_SCHEMA = "{"
+ "\"type\":\"record\","
+ "\"name\":\"myrecord\","
+ "\"fields\":["
+ " { \"name\":\"str1\", \"type\":\"string\" },"
+ " { \"name\":\"str2\", \"type\":\"string\" },"
+ " { \"name\":\"int1\", \"type\":\"int\" }"
+ "]}";
public KafkaProducerWrapper(String topic) throws UnknownHostException {
// store topic name
this.topic = topic;
// initialize kafka producer
Properties config = new Properties();
config.put("client.id", InetAddress.getLocalHost().getHostName());
config.put("bootstrap.servers", "myserver-1:9092");
config.put("key.serializer", "io.confluent.kafka.serializers.KafkaAvroSerializer");
config.put("value.serializer", "io.confluent.kafka.serializers.KafkaAvroSerializer");
config.put("schema.registry.url", "http://myserver-1:8089");
config.put("acks", "all");
producer = new KafkaProducer(config);
// parse schema
Schema.Parser parser = new Schema.Parser();
schema = parser.parse(TEST_SCHEMA);
}
public void send() {
// generate key
int key = (int) (Math.random() * 20);
// generate record
GenericData.Record r = new GenericData.Record(schema);
r.put("str1", "text" + key);
r.put("str2", "text2" + key);
r.put("int1", key);
final ProducerRecord<String, GenericRecord> record = new ProducerRecord<>(topic, "K" + key, (GenericRecord) r);
producer.send(record, new Callback() {
public void onCompletion(RecordMetadata metadata, Exception e) {
if (e != null) {
logger.error("Send failed for record {}", record, e);
messageErrorCounter++;
return;
}
logger.debug("Send succeeded for record {}", record);
messageCounter++;
}
});
}
public String getStats() { return "Messages sent: " + messageCounter + "/" + messageErrorCounter; }
public long getMessageCounter() {
return messageCounter + messageErrorCounter;
}
public void close() {
producer.close();
}
public static void main(String[] args) throws InterruptedException, UnknownHostException {
// initialize kafka producer
KafkaProducerWrapper kafkaProducerWrapper = new KafkaProducerWrapper("my-test-topic");
long max = 10000000L;
for (long i = 0; i < max; i++) {
kafkaProducerWrapper.send();
}
logger.info("producer-demo sent all messages");
while (kafkaProducerWrapper.getMessageCounter() < max)
{
logger.info(kafkaProducerWrapper.getStats());
Thread.sleep(2000);
}
logger.info(kafkaProducerWrapper.getStats());
kafkaProducerWrapper.close();
}
And I use the Confluent HDFS Connector in standalone mode to write data to HDFS. The configuration is as follows:
name=hdfs-consumer-test
connector.class=io.confluent.connect.hdfs.HdfsSinkConnector
tasks.max=1
topics=my-test-topic
hdfs.url=hdfs://my-cluster/kafka-test
hadoop.conf.dir=/etc/hadoop/conf/
flush.size=100000
rotate.interval.ms=20000
# increase timeouts to avoid CommitFailedException
consumer.session.timeout.ms=300000
consumer.request.timeout.ms=310000
heartbeat.interval.ms= 60000
session.timeout.ms= 100000
The connector writes the data into HDFS, but after waiting for 20000 ms (due to rotate.interval.ms) not all messages are received.
scala> spark.read.avro("/kafka-test/topics/my-test-topic/partition=*/my-test-topic*")
.count()
res0: Long = 9749015
Any idea what is the reason for this behavior? Where is my mistake? I'm using Confluent 3.0.1/Kafka 10.0.0.1.
Are you seeing the last few messages are not moved to HDFS? If so, it's likely you are running into the issue described here https://github.com/confluentinc/kafka-connect-hdfs/pull/100
Try sending one more message to the topic after the rotate.interval.ms has expired to validate this is what you are running into. If you need to rotate based on time, it's probably a good idea to upgrade to pickup the fix.

What is the efficient algorithm to implement Stack?

I have come across a problem. I need to implement stack with push and pop operations.
Input
The first line of the input file contains a single integer number N (1 <= N <= 10^6) – the number of test cases.
Next N lines tells about operations. + means push. - means pop. I need to print popped element.
Example
Input Output
6
+ 1 10
+ 10 1234
-
+ 2
+ 1234
-
I have written following code
public class Main {
public static void main(String[] args) throws FileNotFoundException {
Scanner sc = new Scanner(new File("stack.in"));
PrintWriter pw = new PrintWriter(new File("stack.out"));
int n=sc.nextInt();
int[] stack = new int[n]; int i=0;
while(n-->0) {
String s = sc.next();
if(s.equals("+")) {
stack[i++]=sc.nextInt();
} else {
pw.println(stack[--i]);
}
}
sc.close(); pw.close();
}
}
This program is giving me Time Limit Exceeded.
Please suggest me an efficient algorithm to solve this.
For each input file:
Time limit: 2 seconds
Memory limit: 256 megabytes
A rule of thumb: if you're solving a competitive programming style problem and the input is large (say, 10^5 numbers or more), the Scanner is too slow.
You can use a StringTokenizer on top of a BufferedReader to speed up the input.
It can look like this:
class FastScanner {
private StringTokenizer tokenizer;
private BufferedReader reader;
public FastScanner(InputStream inputStream) {
reader = new BufferedReader(new InputStreamReader(inputStream));
}
public String next() {
while (tokenizer == null || !tokenizer.hasMoreTokens()) {
String line;
try {
line = reader.readLine();
} catch (IOException e) {
throw new RuntimeException(e);
}
if (line == null)
return null;
tokenizer = new StringTokenizer(line);
}
return tokenizer.nextToken();
}
public int nextInt() {
return Integer.parseInt(next());
}
}

Running Multiple threads in queue using BlockingCollections

My program has 3 functions. Each function takes a list of Items and fill certain information.
For example
class Item {
String sku,upc,competitorName;
double price;
}
function F1 takes a List and fills upc
function F2 takes List (output of F1) and fills price.
function F3 takes List (output of F2) and fills competitorName
F1 can process 5 items at a time,
F2 can process 20 items at a time,
F3 also 20.
Right now I am running F1 -> F2 -> F3 in serial because F2 needs info(UPC code) from F1. F3 needs price from F2.
I would like to make this process efficient by running F1 run continuously instead of waiting for F2 and F3 to be completed. F1 executes and output into queue then F2 takes 20 items at a time and process them. and then follows F3.
How can i achieve this by using BlockingCollection and Queue?
This is a typical use case of Apache Storm in case you've continuous items coming in to F1. You can implement this in Storm in matter of minutes and you'll have fast and perfectly parallel system in place. Your F1, F2 and F3 will become bolts and your Items producer will become spout.
Since you asked how to do it using BlockingCollections here is an implementation. You'll need 3 threads in total.
ItemsProducer: It is producing 5 items at a time and feeding it to F1.
F2ExecutorThread: It is consuming 20 items at a time and feeding it to F2.
F3ExecutorThread: It is consuming 20 items at a time and feeding it to F3.
You also have 2 blocking queues one is used to transfer data from F1->F2 and one from F2->F3. You can also have a queue to feed data to F1 in similar fashion if required. It depends upon how you are getting the items. I've used Thread.sleep to simulate the time required to execute the function.
Each function will keep looking for items in their assigned queue, irrespective of what other functions are doing and wait until the queue has items. Once they've processed the item they'll put it in another queue for another function. They'll wait until the other queue has space if it is full.
Since all your functions are running in different threads, F1 won't be waiting for F2 or F3 to finish. If your F2 and F3 are significantly faster then F1 you can assign more threads to F1 and keep pushing to same f2Queue.
public class App {
final BlockingQueue<Item> f2Queue = new ArrayBlockingQueue<>(100);
final BlockingQueue<Item> f3Queue = new ArrayBlockingQueue<>(100);
public static void main(String[] args) throws InterruptedException {
App app = new App();
app.start();
}
public void start() throws InterruptedException {
Thread t1 = new ItemsProducer(f2Queue);
Thread t2 = new F2ExecutorThread(f2Queue, f3Queue);
Thread t3 = new F3ExecutorThread(f3Queue);
t1.start();
t2.start();
t3.start();
t1.join();
t2.join();
t3.join();
}
}
/**
* Thread producing 5 items at a time and feeding it to f1()
*/
class ItemsProducer extends Thread {
private BlockingQueue<Item> f2Queue;
private static final int F1_BATCH_SIZE = 5;
public ItemsProducer(BlockingQueue<Item> f2Queue) {
this.f2Queue = f2Queue;
}
public void run() {
Random random = new Random();
while (true) {
try {
List<Item> items = new ArrayList<>();
for (int i = 0; i < F1_BATCH_SIZE; i++) {
Item item = new Item(String.valueOf(random.nextInt(100)));
Thread.sleep(20);
items.add(item);
System.out.println("Item produced: " + item);
}
// Feed items to f1
f1(items);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
void f1(List<Item> items) throws InterruptedException {
Random random = new Random();
for (Item item : items) {
Thread.sleep(100);
item.upc = String.valueOf(random.nextInt(100));
f2Queue.put(item);
}
}
}
/**
* Thread consuming items produced by f1(). It takes 20 items at a time, but if they are not
* available it waits and starts processesing as soon as one gets available
*/
class F2ExecutorThread extends Thread {
static final int F2_BATCH_SIZE = 20;
private BlockingQueue<Item> f2Queue;
private BlockingQueue<Item> f3Queue;
public F2ExecutorThread(BlockingQueue<Item> f2Queue, BlockingQueue<Item> f3Queue) {
this.f2Queue = f2Queue;
this.f3Queue = f3Queue;
}
public void run() {
try {
List<Item> items = new ArrayList<>();
while (true) {
items.clear();
if (f2Queue.drainTo(items, F2_BATCH_SIZE) == 0) {
items.add(f2Queue.take());
}
f2(items);
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
void f2(List<Item> items) throws InterruptedException {
Random random = new Random();
for (Item item : items) {
Thread.sleep(100);
item.price = random.nextInt(100);
f3Queue.put(item);
}
}
}
/**
* Thread consuming items produced by f2(). It takes 20 items at a time, but if they are not
* available it waits and starts processesing as soon as one gets available.
*/
class F3ExecutorThread extends Thread {
static final int F3_BATCH_SIZE = 20;
private BlockingQueue<Item> f3Queue;
public F3ExecutorThread(BlockingQueue<Item> f3Queue) {
this.f3Queue = f3Queue;
}
public void run() {
try {
List<Item> items = new ArrayList<>();
while (true) {
items.clear();
if (f3Queue.drainTo(items, F3_BATCH_SIZE) == 0) {
items.add(f3Queue.take());
}
f3(items);
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
private void f3(List<Item> items) throws InterruptedException {
Random random = new Random();
for (Item item : items) {
Thread.sleep(100);
item.competitorName = String.valueOf(random.nextInt(100));
System.out.println("Item done: " + item);
}
}
}
class Item {
String sku, upc, competitorName;
double price;
public Item(String sku) {
this.sku = sku;
}
public String toString() {
return "sku: " + sku + " upc: " + upc + " price: " + price + " compName: " + competitorName;
}
}
I guess you can follow the exact same approach in .Net as well. For better understanding I suggest you to go through basic architecture of http://storm.apache.org/releases/current/Tutorial.html
I tried to do same thing in .NET and i think it is working.
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace BlockingCollectionExample
{
class Program
{
static void Main(string[] args)
{
BlockingCollection<Listing> needUPCJobs = new BlockingCollection<Listing>();
BlockingCollection<Listing> needPricingJobs = new BlockingCollection<Listing>();
// This will have final output
List<Listing> output = new List<Listing>();
// start executor 1 which waits for data until available
var executor1 = Task.Factory.StartNew(() =>
{
int maxSimutenousLimit = 5;
int gg = 0;
while (true)
{
while (needUPCJobs.Count >= maxSimutenousLimit)
{
List<Listing> tempListings = new List<Listing>();
for (int i = 0; i < maxSimutenousLimit; i++)
{
Listing listing = new Listing();
if (needUPCJobs.TryTake(out listing))
tempListings.Add(listing);
}
// Simulating some delay for first executor
Thread.Sleep(1000);
foreach (var eachId in tempListings)
{
eachId.UPC = gg.ToString();
gg++;
needPricingJobs.Add(eachId);
}
}
if (needUPCJobs.IsAddingCompleted)
{
if (needUPCJobs.Count == 0)
break;
else
maxSimutenousLimit = needUPCJobs.Count;
}
}
needPricingJobs.CompleteAdding();
});
// start executor 2 which waits for data until available
var executor2 = Task.Factory.StartNew(() =>
{
int maxSimutenousLimit = 10;
int gg = 10;
while (true)
{
while (needPricingJobs.Count >= maxSimutenousLimit)
{
List<Listing> tempListings = new List<Listing>();
for (int i = 0; i < maxSimutenousLimit; i++)
{
Listing listing = new Listing();
if (needPricingJobs.TryTake(out listing))
tempListings.Add(listing);
}
// Simulating more delay for second executor
Thread.Sleep(10000);
foreach (var eachId in tempListings)
{
eachId.Price = gg;
gg++;
output.Add(eachId);
}
}
if (needPricingJobs.IsAddingCompleted)
{
if(needPricingJobs.Count==0)
break;
else
maxSimutenousLimit = needPricingJobs.Count;
}
}
});
// producer thread
var producer = Task.Factory.StartNew(() =>
{
for (int i = 0; i < 100; i++)
{
needUPCJobs.Add(new Listing() { ID = i });
}
needUPCJobs.CompleteAdding();
});
// wait for producer to finish producing
producer.Wait();
// wait for all executors to finish executing
Task.WaitAll(executor1, executor2);
Console.WriteLine();
Console.WriteLine();
}
}
public class Listing
{
public int ID;
public string UPC;
public double Price;
public Listing() { }
}
}

Resources