Java 8 Stream , convert List<File> to Map<Integer, List<FIle>>

Java 8 Stream , convert List<File> to Map<Integer, List<FIle>> - java-8

I have below code in traditional java loop. Would like to use Java 8 Stream instead.
I have a sorted list of files(Sorted by file size). I group these files together in a way that the total size of all files does not exceed the given max size and put them in a Map with the key 1,2,3,... so on. Here is the code.
List<File> allFilesSortedBySize = getListOfFiles();
Map<Integer, List<File>> filesGroupedByMaxSizeMap = new HashMap<Integer, List<File>>();
double totalLength = 0L;
int count = 0;
List<File> filesWithSizeTotalMaxSize = Lists.newArrayList();
//group the files to be zipped together as per maximum allowable size in a map
for (File file : allFilesSortedBySize) {
long sizeInBytes = file.length();
double sizeInMb = (double)sizeInBytes / (1024 * 1024);
totalLength = totalLength + sizeInMb;
if(totalLength <= maxSize) {
filesWithSizeTotalMaxSize.add(file);
} else {
count = count + 1;
filesGroupedByMaxSizeMap.put(count, filesWithSizeTotalMaxSize);
filesWithSizeTotalMaxSize = Lists.newArrayList();
filesWithSizeTotalMaxSize.add(file);
totalLength = sizeInMb;
}
}
filesGroupedByMaxSizeMap.put(count+1, filesWithSizeTotalMaxSize);
return filesGroupedByMaxSizeMap;

after reading,I found the solution using Collectors.groupBy instead.
Code using java8 lambda expression
private final long MB = 1024 * 1024;
private Map<Integer, List<File>> grouping(List<File> files, long maxSize) {
AtomicInteger group = new AtomicInteger(0);
AtomicLong groupSize = new AtomicLong();
return files.stream().collect(groupingBy((file) -> {
if (groupSize.addAndGet(file.length()) <= maxSize * MB) {
return group.get() == 0 ? group.incrementAndGet() : group.get();
}
groupSize.set(file.length());
return group.incrementAndGet();
}));
}
Code provided by #Holger then you are free to checking group whether equals 0
private static final long MB = 1024 * 1024;
private Map<Integer, List<File>> grouping(List<File> files, long maxSize) {
AtomicInteger group = new AtomicInteger(0);
//force initializing group starts with 1 even if the first file is empty.
AtomicLong groupSize = new AtomicLong(maxSize * MB + 1);
return files.stream().collect(groupingBy((file) -> {
if (groupSize.addAndGet(file.length()) <= maxSize * MB) {
return group.get();
}
groupSize.set(file.length());
return group.incrementAndGet();
}));
}
Code using anonymous class
inspired by #Holger, All “solutions” using a grouping function that modifies external state are hacks abusing the API,so you can use anonymous class to manage the grouping logic state in class.
private static final long MB = 1024 * 1024;
private Map<Integer, List<File>> grouping(List<File> files, long maxSize) {
return files.stream().collect(groupingBy(groupSize(maxSize)));
}
private Function<File, Integer> groupSize(final long maxSize) {
long maxBytesSize = maxSize * MB;
return new Function<File, Integer>() {
private int group;
private long groupSize = maxBytesSize + 1;
#Override
public Integer apply(File file) {
return hasRemainingFor(file) ? current(file) : next(file);
}
private boolean hasRemainingFor(File file) {
return (groupSize += file.length()) <= maxBytesSize;
}
private int next(File file) {
groupSize = file.length();
return ++group;
}
private int current(File file) {
return group;
}
};
}
Test
import org.junit.jupiter.api.Test;
import java.io.File;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
import static java.util.Arrays.asList;
import static java.util.Collections.singletonList;
import static java.util.stream.Collectors.groupingBy;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
/**
* Created by holi on 3/24/17.
*/
public class StreamGroupingTest {
private final File FILE_1MB = file(1);
private final File FILE_2MB = file(2);
private final File FILE_3MB = file(3);
#Test
void eachFileInIndividualGroupIfEachFileSizeGreaterThanMaxSize() {
Map<Integer, List<File>> groups = grouping(asList(FILE_2MB, FILE_3MB), 1);
assertThat(groups.size(), equalTo(2));
assertThat(groups.get(1), equalTo(singletonList(FILE_2MB)));
assertThat(groups.get(2), equalTo(singletonList(FILE_3MB)));
}
#Test
void allFilesInAGroupIfTotalSizeOfFilesLessThanOrEqualMaxSize() {
Map<Integer, List<File>> groups = grouping(asList(FILE_2MB, FILE_3MB), 5);
assertThat(groups.size(), equalTo(1));
assertThat(groups.get(1), equalTo(asList(FILE_2MB, FILE_3MB)));
}
#Test
void allNeighboringFilesInAGroupThatTotalOfTheirSizeLessThanOrEqualMaxSize() {
Map<Integer, List<File>> groups = grouping(asList(FILE_1MB, FILE_2MB, FILE_3MB), 3);
assertThat(groups.size(), equalTo(2));
assertThat(groups.get(1), equalTo(asList(FILE_1MB, FILE_2MB)));
assertThat(groups.get(2), equalTo(singletonList(FILE_3MB)));
}
#Test
void eachFileInIndividualGroupIfTheFirstFileAndTotalOfEachNeighboringFilesSizeGreaterThanMaxSize() {
Map<Integer, List<File>> groups = grouping(asList(FILE_2MB, FILE_1MB, FILE_3MB), 2);
assertThat(groups.size(), equalTo(3));
assertThat(groups.get(1), equalTo(singletonList(FILE_2MB)));
assertThat(groups.get(2), equalTo(singletonList(FILE_1MB)));
assertThat(groups.get(3), equalTo(singletonList(FILE_3MB)));
}
#Test
void theFirstEmptyFileInGroup1() throws Throwable {
File emptyFile = file(0);
Map<Integer, List<File>> groups = grouping(singletonList(emptyFile), 2);
assertThat(groups.get(1), equalTo(singletonList(emptyFile)));
}
private static final long MB = 1024 * 1024;
private Map<Integer, List<File>> grouping(List<File> files, long maxSize) {
AtomicInteger group = new AtomicInteger(0);
AtomicLong groupSize = new AtomicLong(maxSize * MB + 1);
return files.stream().collect(groupingBy((file) -> {
if (groupSize.addAndGet(file.length()) <= maxSize * MB) {
return group.get();
}
groupSize.set(file.length());
return group.incrementAndGet();
}));
}
private Function<File, Integer> groupSize(final long maxSize) {
long maxBytesSize = maxSize * MB;
return new Function<File, Integer>() {
private int group;
private long groupSize = maxBytesSize + 1;
#Override
public Integer apply(File file) {
return hasRemainingFor(file) ? current(file) : next(file);
}
private boolean hasRemainingFor(File file) {
return (groupSize += file.length()) <= maxBytesSize;
}
private int next(File file) {
groupSize = file.length();
return ++group;
}
private int current(File file) {
return group;
}
};
}
private File file(int sizeOfMB) {
return new File(String.format("%dMB file", sizeOfMB)) {
#Override
public long length() {
return sizeOfMB * MB;
}
#Override
public boolean equals(Object obj) {
File that = (File) obj;
return length() == that.length();
}
};
}
}

Since the processing of each element highly depends on the previous’ processing, this task is not suitable for streams. You still can achieve it using a custom collector, but the implementation would be much more complicated than the loop solution.
In other words, there is no improvement when you rewrite this as a stream operation. Stay with the loop.
However, there are still some things you can improve.
List<File> allFilesSortedBySize = getListOfFiles();
// get maxSize in bytes ONCE, instead of converting EACH size to MiB
long maxSizeBytes = (long)(maxSize * 1024 * 1024);
// use "diamond operator"
Map<Integer, List<File>> filesGroupedByMaxSizeMap = new HashMap<>();
// start with "create new list" condition to avoid code duplication
long totalLength = maxSizeBytes;
// count is obsolete, the map maintains a size
// the initial "totalLength = maxSizeBytes" forces creating a new list within the loop
List<File> filesWithSizeTotalMaxSize = null;
for(File file: allFilesSortedBySize) {
long length = file.length();
if(maxSizeBytes-totalLength <= length) {
filesWithSizeTotalMaxSize = new ArrayList<>(); // no utility method needed
// store each list immediately, so no action after the loop needed
filesGroupedByMaxSizeMap.put(filesGroupedByMaxSizeMap.size()+1,
filesWithSizeTotalMaxSize);
totalLength = 0;
}
totalLength += length;
filesWithSizeTotalMaxSize.add(file);
}
return filesGroupedByMaxSizeMap;
You may further replace
filesWithSizeTotalMaxSize = new ArrayList<>();
filesGroupedByMaxSizeMap.put(filesGroupedByMaxSizeMap.size()+1,
filesWithSizeTotalMaxSize);
with
filesWithSizeTotalMaxSize = filesGroupedByMaxSizeMap.computeIfAbsent(
filesGroupedByMaxSizeMap.size()+1, x -> new ArrayList<>());
but there might be different opinions whether this is an improvement.

The simplest solution to the problem I could think of is to use an AtomicLong wrapper for the size and a AtomicInteger wrapper for length. These have some useful methods for performing basic arithmetic operations on them which are very useful in this particular case.
List<File> files = getListOfFiles();
AtomicLong length = new AtomicLong();
AtomicInteger index = new AtomicInteger(1);
long maxLength = SOME_ARBITRARY_NUMBER;
Map<Integer, List<File>> collect = files.stream().collect(Collectors.groupingBy(
file -> {
if (length.addAndGet(file.length()) <= maxLength) {
return index.get();
}
length.set(file.length());
return index.incrementAndGet();
}
));
return collect;
Basically what Collectors.groupingBy does the work which you Intended.

Related

Memory grows until OOM crash when using a WindowStore in Kafka streams

I built a stream that does a windowed join, when deploying on production all is fine in terms of memory and performance.
However, I needed Deduplication, hence implemented a Transformer that does that with the help of a WindowStore.
After deploying it, we are getting the data results that was expected, but memory keeps growing until the pod crashes with OOM.
After doing research I implemented many tricks to reduce memory usage, but they didn't help, below is the code.
It's clear to me that using the WindowStore is causing this issue, but how to limit it?
The Store:
var storeBuilder = Stores.windowStoreBuilder(
Stores.persistentWindowStore(
storeName,
Duration.ofSeconds(6),
Duration.ofSeconds(5),
false
),
Serdes.String(),
SerdeFactory.JsonSerde(valueDataClass)
);
The stream:
var leftStream = builder.stream("leftTopic").filter(...);
var rightStream = builder.stream("rightTopic").filter(...);
leftStream.join(rightStream, joiner, JoinWindows
.of(Duration.ofSeconds(5))
.grace(Duration.ofSeconds(1))
.until(
Duration.ofSeconds(6)
)
.transformValues(
() ->
new DeduplicationTransformer<>(
storeName,
Duration.ofSeconds(6),
(key, value) -> value.id
),
storeName
)
.filter((k, v) -> v != null)
.to("targetTopic");
Deduplication Transformer:
public class DeduplicationTransformer<K, V extends StreamModel>
implements ValueTransformerWithKey<K, V, V> {
private ProcessorContext context;
private String storeName;
private WindowStore<K, V> eventIdStore;
private final long leftDurationMs;
private final KeyValueMapper<K, V, K> idExtractor;
public DeduplicationTransformer(
String storeName,
long maintainDurationPerEventInMs,
final KeyValueMapper<K, V, K> idExtractor
) {
if (maintainDurationPerEventInMs < 2) {
throw new IllegalArgumentException(
"maintain duration per event must be > 1"
);
}
leftDurationMs = maintainDurationPerEventInMs;
this.idExtractor = idExtractor;
this.storeName = storeName;
}
#Override
public void init(final ProcessorContext context) {
this.context = context;
eventIdStore = (WindowStore<K, V>) context.getStateStore(storeName);
Duration interval = Duration.ofMillis(leftDurationMs);
this.context.schedule(
interval,
PunctuationType.WALL_CLOCK_TIME,
timestamp -> {
Instant from = Instant.ofEpochMilli(
System.currentTimeMillis() - leftDurationMs * 2
);
Instant to = Instant.ofEpochMilli(
System.currentTimeMillis() - leftDurationMs
);
KeyValueIterator<Windowed<K>, V> iterator = eventIdStore.fetchAll(
from,
to
);
while (iterator.hasNext()) {
KeyValue<Windowed<K>, V> entry = iterator.next();
eventIdStore.put(entry.key.key(), null, entry.key.window().start());
}
iterator.close();
context.commit();
}
);
}
#Override
public V transform(final K key, final V value) {
try {
final K eventId = idExtractor.apply(key, value);
if (eventId == null) {
return value;
} else {
final V output;
if (isDuplicate(eventId)) {
output = null;
} else {
output = value;
rememberNewEvent(eventId, value, context.timestamp());
}
return output;
}
} catch (Exception e) {
return null;
}
}
private boolean isDuplicate(final K eventId) {
final long eventTime = context.timestamp();
final WindowStoreIterator<V> timeIterator = eventIdStore.fetch(
eventId,
eventTime - leftDurationMs,
eventTime
);
final boolean isDuplicate = timeIterator.hasNext();
timeIterator.close();
return isDuplicate;
}
private void rememberNewEvent(final K eventId, V v, final long timestamp) {
eventIdStore.put(eventId, v, timestamp);
}
#Override
public void close() {}
}
RocksDB config:
public class BoundedMemoryRocksDBConfig implements RocksDBConfigSetter {
private Cache cache = new LRUCache(5 * 1024 * 1204L);
private Filter filter = new BloomFilter();
private WriteBufferManager writeBufferManager = new WriteBufferManager(
4 * 1024 * 1204L,
cache
);
#Override
public void setConfig(
final String storeName,
final Options options,
final Map<String, Object> configs
) {
BlockBasedTableConfig tableConfig = (BlockBasedTableConfig) options.tableFormatConfig();
tableConfig.setBlockCache(cache);
tableConfig.setCacheIndexAndFilterBlocks(true);
options.setWriteBufferManager(writeBufferManager);
tableConfig.setCacheIndexAndFilterBlocksWithHighPriority(false);
tableConfig.setPinTopLevelIndexAndFilter(true);
tableConfig.setBlockSize(4 * 1024L);
options.setMaxWriteBufferNumber(1);
options.setWriteBufferSize(1024 * 1024L);
options.setTableFormatConfig(tableConfig);
}
#Override
public void close(final String storeName, final Options options) {
cache.close();
filter.close();
}
}
Config:
props.put(
StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG,
0
);
props.put(
StreamsConfig.ROCKSDB_CONFIG_SETTER_CLASS_CONFIG,
BoundedMemoryRocksDBConfig.class
);
Things I've tried so far:
Using a bounded RocksDB config setter
Using jemalloc instead of malloc
Reducing the retention period to 5 seconds
Reducing the number of partitions of the topics (this only slowed the rate of the memory leak)
used in-memory stores instead of persistent, and memory was very stable, but the app startup takes around 10 minutes on each deployment.

how to use Java 8 for long function definitions?

I have a an object Product,
and code as below , hashSetProducts is LinkedHashSet of Products. How can I write all below using Java 8 stream function ? I understand that value of remianing will be replaced each time. I want the final value after the for loop exits.
int getRemaining(int remaining){
for(Product P : hashSetProducts){
remaining = calculate(p.qty(), p.price(), remaining, location); //
use Java 8 stream here
}
return remaining
}
private int calculate(int qty, double price, int rem, Location location){
if(rem== 0){
return 0;
}
int avail = location.get(qty, rem);
if(avail > 0){
rem = avail - rem;
}
return rem;
}

mapToLong will execute arbitrary code that returns a long. Here is an MCVE that uses your calculation verbatim:
import java.util.LinkedHashSet;
public class HelloWorld{
public static class Product {
private int qty;
private double price;
private int used;
public Product(int qty, double price, int used) {
this.qty = qty;
this.price =price;
this.used = used;
}
public int qty() {return qty;}
public double price() {return price;}
public int used() {return used;}
};
public static class Location {
public long get(int qty, int used) { return 0; };
};
public static void main(String []args) {
LinkedHashSet<Product> hashSetProducts = new LinkedHashSet();
hashSetProducts.add(new Product(1,1.0,1));
hashSetProducts.add(new Product(2,2.0,2));
hashSetProducts.add(new Product(3,3.0,3));
Location location = new Location();
long remaining = hashSetProducts.stream().mapToLong(p -> {
int qty = p.qty();
int used = p.used();
if( used == 0 )
return 0;
long rem = location.get(qty, used);
if( qty > 0)
rem = used - rem;
return rem;
}).sum();
System.out.println(remaining);
}
}

Java8 Stream Collectors - Splitting a list based on sum of values

I am trying partition a list into multiple sublists based on a condition that sum of a particular field should be less than 'x'. Below is sameple code:
public class TestGrouping {
public static class Transaction{
String txnId;
String comment;
Amount amount;
public Transaction(String txnId, String comment, Amount amount) {
this.txnId = txnId;
this.comment = comment;
this.amount = amount;
}
}
public static class Amount{
String amountValue;
public Amount(String amountValue) {
this.amountValue = amountValue;
}
}
public static void main(String[] args) {
List<Transaction> transactionList = new ArrayList<>();
Transaction txn1 = new Transaction("T1","comment1",new Amount("81"));
Transaction txn2 = new Transaction("T2","comment2",new Amount("5"));
Transaction txn3 = new Transaction("T3","comment3",new Amount("12"));
Transaction txn4 = new Transaction("T4","comment4",new Amount("28"));
transactionList.add(txn1);
transactionList.add(txn2);
transactionList.add(txn3);
transactionList.add(txn4);
//below is what i thought might work
// transactionList.stream().collect(groupingBy (r->Collectors.summingInt(Integer.valueOf(r.amount.amountValue)),Collectors.mapping(t -> t, toList())));
}
The goal is to split the transactionList into 2 (or more) sublists - where the sum of 'amount' is less than 100. So i could have a sublist have only txn1 - having amount as 81; and the other sublist have txn2, txn3, txn4 (as sum of these is less 100). Other possibility is - have sublist1 having txn1, txn2, txn3; and another sublist with just txn4. Not trying to create the most 'optimal' lists basically, just that sum of amounts should be less than 100.
Any clues?

The Idea is to use a custom collector to generate a list of pair(amountSum, transactions), the list should initialy be sorted. The accumulator method (here Accumulator.accept) do the grouping logic, I didn't implement combine because there is no need for a combiner in non parallel stream.
Bellow the code snippet, hope it helps.
public class TestStream {
public class Transaction {
String txnId;
String comment;
Amount amount;
public Transaction(String txnId, String comment, Amount amount) {
this.txnId = txnId;
this.comment = comment;
this.amount = amount;
}
}
public class Amount {
String amountValue;
public Amount(String amountValue) {
this.amountValue = amountValue;
}
}
#Test
public void test() {
List<Transaction> transactionList = new ArrayList<>();
Transaction txn1 = new Transaction("T1", "comment1", new Amount("81"));
Transaction txn2 = new Transaction("T2", "comment2", new Amount("5"));
Transaction txn3 = new Transaction("T3", "comment3", new Amount("12"));
Transaction txn4 = new Transaction("T4", "comment4", new Amount("28"));
transactionList.add(txn1);
transactionList.add(txn2);
transactionList.add(txn3);
transactionList.add(txn4);
transactionList.stream()
.sorted(Comparator.comparing(tr -> Integer.valueOf(tr.amount.amountValue)))
.collect(ArrayList<Pair<Integer, List<Transaction>>>::new, Accumulator::accept, (x, y) -> {
})
.forEach(t -> {
System.out.println(t.left);
});
}
static class Accumulator {
public static void accept(List<Pair<Integer, List<Transaction>>> lPair, Transaction tr) {
Pair<Integer, List<Transaction>> lastPair = lPair.isEmpty() ? null : lPair.get(lPair.size() - 1);
Integer amount = Integer.valueOf(tr.amount.amountValue);
if (Objects.isNull(lastPair) || lastPair.left + amount > 100) {
lPair.add(
new TestStream().new Pair<Integer, List<Transaction>>(amount,
Arrays.asList(tr)));
} else {
List<Transaction> newList = new ArrayList<>();
newList.addAll(lastPair.getRight());
newList.add(tr);
lastPair.setLeft(lastPair.getLeft() + amount);
lastPair.setRight(newList);
}
}
}
class Pair<T, V> {
private T left;
private V right;
/**
*
*/
public Pair(T left, V right) {
this.left = left;
this.right = right;
}
public V getRight() {
return right;
}
public T getLeft() {
return left;
}
public void setLeft(T left) {
this.left = left;
}
public void setRight(V right) {
this.right = right;
}
}
}

Chronicle Queue V3. Can Entries be lost on data block roll-over?

I have an application that writes entries to a Chronicle Queue (V3) that also retains excerpt entry index values in other (Chronicle)Maps by way of providing indexed access in the queue. Sometimes we fail to find a given entry that we've earlier saved and I believe it maybe related to data-block roll-over.
Below is a stand-alone test program that reproduces such use-cases at small-scale. It repeatedly writes an entry and immediately attempts to find the resulting index value up using a separate ExcerptTailer. All is well for a while until the first data-block is used up and a second data file is assigned, then the retrieval failures start. If the data block size is increased to avoid roll-overs, then no entries are lost. Also using a small index data-block size, causing multiple index files to be created, doesn't cause a problem.
The test program also tries using an ExcerptListener running in parallel to see if the entries apparently 'lost' by the writer, are ever received by the reader thread - they're not. Also tries to re-read the resulting queue from start until end, which reconfirms that they really are lost.
Stepping thru' the code, I see that when looking up a 'missing entry', within AbstractVanillarExcerpt#index, it appears to successfully locate the correct VanillaMappedBytes object from the dataCache, but determines that there is no entry and the data-offset as the len == 0. In addition to the entries not being found, at some point after the problems start occurring post-roll-over, an NPE is thrown from within the VanillaMappedFile#fileChannel method due to it having been passed a null File path. The code-path assumes that when resolving a entry looked up successfully in the index that a file will always have been found, but isn't in this case.
Is it possible to reliably use Chronicle Queue across data-block roll-overs, and if so, what am I doing that maybe causing the problem I'm experiencing?
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import org.junit.Before;
import org.junit.Test;
import net.openhft.affinity.AffinitySupport;
import net.openhft.chronicle.Chronicle;
import net.openhft.chronicle.ChronicleQueueBuilder;
import net.openhft.chronicle.ExcerptAppender;
import net.openhft.chronicle.ExcerptCommon;
import net.openhft.chronicle.ExcerptTailer;
import net.openhft.chronicle.VanillaChronicle;
public class ChronicleTests {
private static final int CQ_LEN = VanillaChronicle.Cycle.DAYS.length();
private static final long CQ_ENT = VanillaChronicle.Cycle.DAYS.entries();
private static final String ROOT_DIR = System.getProperty(ChronicleTests.class.getName() + ".ROOT_DIR",
"C:/Temp/chronicle/");
private static final String QDIR = System.getProperty(ChronicleTests.class.getName() + ".QDIR", "chronicleTests");
private static final int DATA_SIZE = Integer
.parseInt(System.getProperty(ChronicleTests.class.getName() + ".DATA_SIZE", "100000"));
// Chunk file size of CQ index
private static final int INDX_SIZE = Integer
.parseInt(System.getProperty(ChronicleTests.class.getName() + ".INDX_SIZE", "10000"));
private static final int Q_ENTRIES = Integer
.parseInt(System.getProperty(ChronicleTests.class.getName() + ".Q_ENTRIES", "5000"));
// Data type id
protected static final byte FSYNC_DATA = 1;
protected static final byte NORMAL_DATA = 0;
protected static final byte TH_START_DATA = -1;
protected static final byte TH_END_DATA = -2;
protected static final byte CQ_START_DATA = -3;
private static final long MAX_RUNTIME_MILLISECONDS = 30000;
private static String PAYLOAD_STRING = "1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
private static byte PAYLOAD_BYTES[] = PAYLOAD_STRING.getBytes();
private Chronicle _chronicle;
private String _cqPath = ROOT_DIR + QDIR;
#Before
public void init() {
buildCQ();
}
#Test
public void test() throws IOException, InterruptedException {
boolean passed = true;
Collection<Long> missingEntries = new LinkedList<Long>();
long sent = 0;
Thread listener = listen();
try {
listener.start();
// Write entries to CQ,
for (int i = 0; i < Q_ENTRIES; i++) {
long entry = writeQEntry(PAYLOAD_BYTES, (i % 100) == 0);
sent++;
// check each entry can be looked up
boolean found = checkEntry(i, entry);
if (!found)
missingEntries.add(entry);
passed &= found;
}
// Wait awhile for the listener
listener.join(MAX_RUNTIME_MILLISECONDS);
if (listener.isAlive())
listener.interrupt();
} finally {
if (listener.isAlive()) { // => exception raised so wait for listener
log("Give listener a chance....");
sleep(MAX_RUNTIME_MILLISECONDS);
listener.interrupt();
}
log("Sent: " + sent + " Received: " + _receivedEntries.size());
// Look for missing entries in receivedEntries
missingEntries.forEach(me -> checkMissingEntry(me));
log("All passed? " + passed);
// Try to find missing entries by searching from the start...
searchFromStartFor(missingEntries);
_chronicle.close();
_chronicle = null;
// Re-initialise CQ and look for missing entries again...
log("Re-initialise");
init();
searchFromStartFor(missingEntries);
}
}
private void buildCQ() {
try {
// build chronicle queue
_chronicle = ChronicleQueueBuilder.vanilla(_cqPath).cycleLength(CQ_LEN).entriesPerCycle(CQ_ENT)
.indexBlockSize(INDX_SIZE).dataBlockSize(DATA_SIZE).build();
} catch (IOException e) {
throw new InitializationException("Failed to initialize Active Trade Store.", e);
}
}
private long writeQEntry(byte dataArray[], boolean fsync) throws IOException {
ExcerptAppender appender = _chronicle.createAppender();
return writeData(appender, dataArray, fsync);
}
private boolean checkEntry(int seqNo, long entry) throws IOException {
ExcerptTailer tailer = _chronicle.createTailer();
if (!tailer.index(entry)) {
log("SeqNo: " + seqNo + " for entry + " + entry + " not found");
return false;
}
boolean isMarker = isMarker(tailer);
boolean isFsyncData = isFsyncData(tailer);
boolean isNormalData = isNormalData(tailer);
String type = isMarker ? "MARKER" : isFsyncData ? "FSYNC" : isNormalData ? "NORMALDATA" : "UNKNOWN";
log("Entry: " + entry + "(" + seqNo + ") is " + type);
return true;
}
private void log(String string) {
System.out.println(string);
}
private void searchFromStartFor(Collection<Long> missingEntries) throws IOException {
Set<Long> foundEntries = new HashSet<Long>(Q_ENTRIES);
ExcerptTailer tailer = _chronicle.createTailer();
tailer.toStart();
while (tailer.nextIndex())
foundEntries.add(tailer.index());
Iterator<Long> iter = missingEntries.iterator();
long foundCount = 0;
while (iter.hasNext()) {
long me = iter.next();
if (foundEntries.contains(me)) {
log("Found missing entry: " + me);
foundCount++;
}
}
log("searchFromStartFor Found: " + foundCount + " of: " + missingEntries.size() + " missing entries");
}
private void checkMissingEntry(long missingEntry) {
if (_receivedEntries.contains(missingEntry))
log("Received missing entry:" + missingEntry);
}
Set<Long> _receivedEntries = new HashSet<Long>(Q_ENTRIES);
private Thread listen() {
Thread returnVal = new Thread("Listener") {
public void run() {
try {
int receivedCount = 0;
ExcerptTailer tailer = _chronicle.createTailer();
tailer.toStart();
while (receivedCount < Q_ENTRIES) {
if (tailer.nextIndex()) {
_receivedEntries.add(tailer.index());
} else {
ChronicleTests.this.sleep(1);
}
}
log("listener complete");
} catch (IOException e) {
log("Interupted before receiving all entries");
}
}
};
return returnVal;
}
private void sleep(long interval) {
try {
Thread.sleep(interval);
} catch (InterruptedException e) {
// No action required
}
}
protected static final int THREAD_ID_LEN = Integer.SIZE / Byte.SIZE;
protected static final int DATA_TYPE_LEN = Byte.SIZE / Byte.SIZE;
protected static final int TIMESTAMP_LEN = Long.SIZE / Byte.SIZE;
protected static final int CRC_LEN = Long.SIZE / Byte.SIZE;
protected static long writeData(ExcerptAppender appender, byte dataArray[],
boolean fsync) {
appender.startExcerpt(DATA_TYPE_LEN + THREAD_ID_LEN + dataArray.length
+ CRC_LEN);
appender.nextSynchronous(fsync);
if (fsync) {
appender.writeByte(FSYNC_DATA);
} else {
appender.writeByte(NORMAL_DATA);
}
appender.writeInt(AffinitySupport.getThreadId());
appender.write(dataArray);
appender.writeLong(CRCCalculator.calcDataAreaCRC(appender));
appender.finish();
return appender.lastWrittenIndex();
}
protected static boolean isMarker(ExcerptCommon excerpt) {
if (isCqStartMarker(excerpt) || isStartMarker(excerpt) || isEndMarker(excerpt)) {
return true;
}
return false;
}
protected static boolean isCqStartMarker(ExcerptCommon excerpt) {
return isDataTypeMatched(excerpt, CQ_START_DATA);
}
protected static boolean isStartMarker(ExcerptCommon excerpt) {
return isDataTypeMatched(excerpt, TH_START_DATA);
}
protected static boolean isEndMarker(ExcerptCommon excerpt) {
return isDataTypeMatched(excerpt, TH_END_DATA);
}
protected static boolean isData(ExcerptTailer tailer, long index) {
if (!tailer.index(index)) {
return false;
}
return isData(tailer);
}
private static void movePosition(ExcerptCommon excerpt, long position) {
if (excerpt.position() != position)
excerpt.position(position);
}
private static void moveToFsyncFlagPos(ExcerptCommon excerpt) {
movePosition(excerpt, 0);
}
private static boolean isDataTypeMatched(ExcerptCommon excerpt, byte type) {
moveToFsyncFlagPos(excerpt);
byte b = excerpt.readByte();
if (b == type) {
return true;
}
return false;
}
protected static boolean isNormalData(ExcerptCommon excerpt) {
return isDataTypeMatched(excerpt, NORMAL_DATA);
}
protected static boolean isFsyncData(ExcerptCommon excerpt) {
return isDataTypeMatched(excerpt, FSYNC_DATA);
}
/**
* Check if this entry is Data
*
* #param excerpt
* #return true if the entry is data
*/
protected static boolean isData(ExcerptCommon excerpt) {
if (isNormalData(excerpt) || isFsyncData(excerpt)) {
return true;
}
return false;
}
}

The problem only occurs when initialising the data-block size with a value that is not a power of two. The built-in configurations on IndexedChronicleQueueBuilder (small(), medium(), large()) take care to initialise using powers of two which provided the clue as to the appropriate usage.
Notwithstanding the above response regarding support, which I totally appreciate, it would be useful if a knowledgeable Chronicle user could confirm that the integrity of Chronicle Queue depends on using a data-block size of a power of two.

Spliterator Java 8

I have a number from 1 to 10,000 stored in an array of long. When adding them sequentially it will give a result of 50,005,000.
I have writing an Spliterator where if a size of array is longer than 1000, it will be splitted to another array.
Here is my code. But when I run it, the result from addition is far greater than 50,005,000. Can someone tell me what is wrong with my code?
Thank you so much.
import java.util.Arrays;
import java.util.Optional;
import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.stream.LongStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
public class SumSpliterator implements Spliterator<Long> {
private final long[] numbers;
private int currentPosition = 0;
public SumSpliterator(long[] numbers) {
super();
this.numbers = numbers;
}
#Override
public boolean tryAdvance(Consumer<? super Long> action) {
action.accept(numbers[currentPosition++]);
return currentPosition < numbers.length;
}
#Override
public long estimateSize() {
return numbers.length - currentPosition;
}
#Override
public int characteristics() {
return SUBSIZED;
}
#Override
public Spliterator<Long> trySplit() {
int currentSize = numbers.length - currentPosition;
if( currentSize <= 1_000){
return null;
}else{
currentPosition = currentPosition + 1_000;
return new SumSpliterator(Arrays.copyOfRange(numbers, 1_000, numbers.length));
}
}
public static void main(String[] args) {
long[] twoThousandNumbers = LongStream.rangeClosed(1, 10_000).toArray();
Spliterator<Long> spliterator = new SumSpliterator(twoThousandNumbers);
Stream<Long> stream = StreamSupport.stream(spliterator, false);
System.out.println( sumValues(stream) );
}
private static long sumValues(Stream<Long> stream){
Optional<Long> optional = stream.reduce( ( t, u) -> t + u );
return optional.get() != null ? optional.get() : Long.valueOf(0);
}
}

I have the strong feeling that you didn’t get the purpose of splitting right. It’s not meant to copy the underlying data but just provide access to a range of it. Keep in mind that spliterators provide read-only access. So you should pass the original array to the new spliterator and configure it with an appropriate position and length instead of copying the array.
But besides the inefficiency of copying, the logic is obviously wrong: You pass Arrays.copyOfRange(numbers, 1_000, numbers.length) to the new spliterator, so the new spliterator contains the elements from position 1000 to the end of the array and you advance the current spliterator’s position by 1000, so the old spliterator covers the elements from currentPosition + 1_000 to the end of the array. So both spliterators will cover elements at the end of the array while at the same time, depending on the previous value of currentPosition, elements at the beginning might not be covered at all. So when you want to advance the currentPosition by 1_000 the skipped range is expressed by Arrays.copyOfRange(numbers, currentPosition, 1_000) instead, referring to the currentPosition before advancing.
It’s should also be noted, that a spliterator should attempt to split balanced, that is, in the middle if the size is known. So splitting off thousand elements is not the right strategy for an array.
Further, your tryAdvance method is wrong. It should not test after calling the consumer but before, returning false if there are no more elements, which also implies that the consumer has not been called.
Putting it all together, the implementation may look like
public class MyArraySpliterator implements Spliterator<Long> {
private final long[] numbers;
private int currentPosition, endPosition;
public MyArraySpliterator(long[] numbers) {
this(numbers, 0, numbers.length);
}
public MyArraySpliterator(long[] numbers, int start, int end) {
this.numbers = numbers;
currentPosition=start;
endPosition=end;
}
#Override
public boolean tryAdvance(Consumer<? super Long> action) {
if(currentPosition < endPosition) {
action.accept(numbers[currentPosition++]);
return true;
}
return false;
}
#Override
public long estimateSize() {
return endPosition - currentPosition;
}
#Override
public int characteristics() {
return ORDERED|NONNULL|SIZED|SUBSIZED;
}
#Override
public Spliterator<Long> trySplit() {
if(estimateSize()<=1000) return null;
int middle = (endPosition + currentPosition)>>>1;
MyArraySpliterator prefix
= new MyArraySpliterator(numbers, currentPosition, middle);
currentPosition=middle;
return prefix;
}
}
But of course, it’s recommended to provide a specialized forEachRemaining implementation, where possible:
#Override
public void forEachRemaining(Consumer<? super Long> action) {
int pos=currentPosition, end=endPosition;
currentPosition=end;
for(;pos<end; pos++) action.accept(numbers[pos]);
}
As a final note, for the task of summing longs from an array, a Spliterator.OfLong and a LongStream is preferred and that work has already been done, see Arrays.spliterator() and LongStream.sum(), making the whole task as simple as Arrays.stream(numbers).sum().

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Java 8 Stream , convert List<File> to Map<Integer, List<FIle>> - java-8

Related

Memory grows until OOM crash when using a WindowStore in Kafka streams

how to use Java 8 for long function definitions?

Java8 Stream Collectors - Splitting a list based on sum of values

Chronicle Queue V3. Can Entries be lost on data block roll-over?

Spliterator Java 8

Categories

Resources