O(1) lookup in non-contiguous memory? - performance

Is there any known data structure that provides O(1) random access, without using a contiguous block of memory of size O(N) or greater? This was inspired by this answer and is being asked for curiosity's sake rather than for any specific practical use case, though it might hypothetically be useful in cases of a severely fragmented heap.

Yes, here's an example in C++:
template<class T>
struct Deque {
struct Block {
enum {
B = 4*1024 / sizeof(T), // use any strategy you want
// this gives you ~4KiB blocks
length = B
};
T data[length];
};
std::vector<Block*> blocks;
T& operator[](int n) {
return blocks[n / Block::length]->data[n % Block::length]; // O(1)
}
// many things left out for clarity and brevity
};
The main difference from std::deque is this has O(n) push_front instead of O(1), and in fact there's a bit of a problem implementing std::deque to have all of:
O(1) push_front
O(1) push_back
O(1) op[]
Perhaps I misinterpreted "without using a contiguous block of memory of size O(N) or greater", which seems awkward. Could you clarify what you want? I've interpreted as "no single allocation that contains one item for every item in the represented sequence", such as would be helpful to avoid large allocations. (Even though I do have a single allocation of size N/B for the vector.)
If my answer doesn't fit your definition, then nothing will, unless you artificially limit the container's max size. (I can limit you to LONG_MAX items, store the above blocks in a tree instead, and call that O(1) lookup, for example.)

You can use a trie where the length of the key is bounded. As lookup in a trie with a key of length m is O(m), if we bound the length of the keys then we bound m and now lookup is O(1).
So think of the a trie where the keys are strings on the alphabet { 0, 1 } (i.e., we are thinking of keys as being the binary representation of integers). If we bound the length of the keys to say 32 letters, we have a structure that we can think of as being indexed by 32-bit integers and is randomly-accessible in O(1) time.
Here is an implementation in C#:
class TrieArray<T> {
TrieArrayNode<T> _root;
public TrieArray(int length) {
this.Length = length;
_root = new TrieArrayNode<T>();
for (int i = 0; i < length; i++) {
Insert(i);
}
}
TrieArrayNode<T> Insert(int n) {
return Insert(IntToBinaryString(n));
}
TrieArrayNode<T> Insert(string s) {
TrieArrayNode<T> node = _root;
foreach (char c in s.ToCharArray()) {
node = Insert(c, node);
}
return _root;
}
TrieArrayNode<T> Insert(char c, TrieArrayNode<T> node) {
if (node.Contains(c)) {
return node.GetChild(c);
}
else {
TrieArrayNode<T> child = new TrieArray<T>.TrieArrayNode<T>();
node.Nodes[GetIndex(c)] = child;
return child;
}
}
internal static int GetIndex(char c) {
return (int)(c - '0');
}
static string IntToBinaryString(int n) {
return Convert.ToString(n, 2);
}
public int Length { get; set; }
TrieArrayNode<T> Find(int n) {
return Find(IntToBinaryString(n));
}
TrieArrayNode<T> Find(string s) {
TrieArrayNode<T> node = _root;
foreach (char c in s.ToCharArray()) {
node = Find(c, node);
}
return node;
}
TrieArrayNode<T> Find(char c, TrieArrayNode<T> node) {
if (node.Contains(c)) {
return node.GetChild(c);
}
else {
throw new InvalidOperationException();
}
}
public T this[int index] {
get {
CheckIndex(index);
return Find(index).Value;
}
set {
CheckIndex(index);
Find(index).Value = value;
}
}
void CheckIndex(int index) {
if (index < 0 || index >= this.Length) {
throw new ArgumentOutOfRangeException("index");
}
}
class TrieArrayNode<TNested> {
public TrieArrayNode<TNested>[] Nodes { get; set; }
public T Value { get; set; }
public TrieArrayNode() {
Nodes = new TrieArrayNode<TNested>[2];
}
public bool Contains(char c) {
return Nodes[TrieArray<TNested>.GetIndex(c)] != null;
}
public TrieArrayNode<TNested> GetChild(char c) {
return Nodes[TrieArray<TNested>.GetIndex(c)];
}
}
}
Here is sample usage:
class Program {
static void Main(string[] args) {
int length = 10;
TrieArray<int> array = new TrieArray<int>(length);
for (int i = 0; i < length; i++) {
array[i] = i * i;
}
for (int i = 0; i < length; i++) {
Console.WriteLine(array[i]);
}
}
}

Well, since I've spent time thinking about it, and it could be argued that all hashtables are either a contiguous block of size >N or have a bucket list proportional to N, and Roger's top-level array of Blocks is O(N) with a coefficient less than 1, and I proposed a fix to that in the comments to his question, here goes:
int magnitude( size_t x ) { // many platforms have an insn for this
for ( int m = 0; x >>= 1; ++ m ) ; // return 0 for input 0 or 1
return m;
}
template< class T >
struct half_power_deque {
vector< vector< T > > blocks; // max log(N) blocks of increasing size
int half_first_block_mag; // blocks one, two have same size >= 2
T &operator[]( size_t index ) {
int index_magnitude = magnitude( index );
size_t block_index = max( 0, index_magnitude - half_first_block_mag );
vector< T > &block = blocks[ block_index ];
size_t elem_index = index;
if ( block_index != 0 ) elem_index &= ( 1<< index_magnitude ) - 1;
return block[ elem_index ];
}
};
template< class T >
struct power_deque {
half_power_deque forward, backward;
ptrdiff_t begin_offset; // == - backward.size() or indexes into forward
T &operator[]( size_t index ) {
ptrdiff_t real_offset = index + begin_offset;
if ( real_offset < 0 ) return backward[ - real_offset - 1 ];
return forward[ real_offset ];
}
};
half_power_deque implements erasing all but the last block, altering half_first_block_mag appropriately. This allows O(max over time N) memory use, amortized O(1) insertions on both ends, never invalidating references, and O(1) lookup.

How about a map/dictionary? Last I checked, that's O(1) performance.

Related

Big O of union method of dynamic connectivity problem

i faced a paradox to analyse this function, Why the time complexity of this function is N^2 and not N?
public void union(int a, int b) {
int aid = ids[a];
int bid = ids[b];
for (int i = 0; i < ids.length; i++) {
if (ids[i] == aid) {
ids[i] = bid;
}
}
}
Its an implementation of eager approach, to solve dynamic connectivity problem , complete code is:
// Union method has N^2 time complexity!!
class EagerApproach extends UnionFind {
protected int[] ids;
EagerApproach(int[] input) {
super(input);
ids = new int[input.length];
System.arraycopy(input, 0, ids, 0, input.length);
}
public boolean connected(int a, int b) {
return ids[a] == ids[b];
}
public void union(int a, int b) {
int aid = ids[a];
int bid = ids[b];
for (int i = 0; i < ids.length; i++) {
if (ids[i] == aid) {
ids[i] = bid;
}
}
}
public int[] getIds() {
return ids;
}
}
Provided your array access ids[x] is in constant time O(1), the time complexity of the union method is linear in the length of the array ids. So
O(ids.length)
or O(n) if we define n as ids.length.
Be careful with the definition of n and ids though. If, in your specific application, n was defined as ids.length = n * n, then this is obviously O(n^2) with n being sqrt(ids.length).

To merge k sorted lists, is it bad to merge all the lists and then sort at the end? Is this cheating? [duplicate]

A 2-way merge is widely studied as a part of Mergesort algorithm.
But I am interested to find out the best way one can perform an N-way merge?
Lets say, I have N files which have sorted 1 million integers each.
I have to merge them into 1 single file which will have those 100 million sorted integers.
Please keep in mind that use case for this problem is actually external sorting which is disk based. Therefore, in real scenarios there would be memory limitation as well. So a naive approach of merging 2 files at a time (99 times) won't work. Lets say we have only a small sliding window of memory available for each array.
I am not sure if there is already a standardized solution to this N-way merge. (Googling didn't tell me much).
But if you know if a good n-way merge algorithm, please post algo/link.
Time complexity: If we greatly increase the number of files (N) to be merged, how would that affect the time complexity of your algorithm?
Thanks for your answers.
I haven't been asked this anywhere, but I felt this could be an interesting interview question. Therefore tagged.
How about the following idea:
Create a priority queue
Iterate through each file f
enqueue the pair (nextNumberIn(f), f) using the first value as priority key
While queue not empty
dequeue head (m, f) of queue
output m
if f not depleted
enqueue (nextNumberIn(f), f)
Since adding elements to a priority queue can be done in logarithmic time, item 2 is O(N × log N). Since (almost all) iterations of the while loop adds an element, the whole while-loop is O(M × log N) where M is the total number of numbers to sort.
Assuming all files have a non-empty sequence of numbers, we have M > N and thus the whole algorithm should be O(M × log N).
Search for "Polyphase merge", check out classics - Donald Knuth & E.H.Friend.
Also, you may want to take a look at the proposed Smart Block Merging by Seyedafsari & Hasanzadeh, that, similarly to earlier suggestions, uses priority queues.
Another interesting reasonsing is In Place Merging Algorithm by Kim & Kutzner.
I also recommend this paper by Vitter: External memory algorithms and data structures: dealing with massive data.
One simple idea is to keep a priority queue of the ranges to merge, stored in such a way that the range with the smallest first element is removed first from the queue. You can then do an N-way merge as follows:
Insert all of the ranges into the priority queue, excluding empty ranges.
While the priority queue is not empty:
Dequeue the smallest element from the queue.
Append the first element of this range to the output sequence.
If it's nonempty, insert the rest of the sequence back into the priority queue.
The correctness of this algorithm is essentially a generalization of the proof that a 2-way merge works correctly - if you always add the smallest element from any range, and all the ranges are sorted, you end up with the sequence as a whole sorted.
The runtime complexity of this algorithm can be found as follows. Let M be the total number of elements in all the sequences. If we use a binary heap, then we do at most O(M) insertions and O(M) deletions from the priority queue, since for each element written to the output sequence there's a dequeue to pull out the smallest sequence, followed by an enqueue to put the rest of the sequence back into the queue. Each of these steps takes O(lg N) operations, because insertion or deletion from a binary heap with N elements in it takes O(lg N) time. This gives a net runtime of O(M lg N), which grows less than linearly with the number of input sequences.
There may be a way to get this even faster, but this seems like a pretty good solution. The memory usage is O(N) because we need O(N) overhead for the binary heap. If we implement the binary heap by storing pointers to the sequences rather than the sequences themselves, this shouldn't be too much of a problem unless you have a truly ridiculous number of sequences to merge. In that case, just merge them in groups that do fit into memory, then merge all the results.
Hope this helps!
A simple approach to Merging k sorted arrays (each of length n) requires O(n k^2) time and not O(nk) time. As when you merge first 2 arrays it takes 2n time, then when you merge third with the output , it takes 3n time as now we are merging two array of length 2n and n. Now when we merge this output with the fourth one,this merge requires 4n time.Thus the last merge (when we are adding the kth array to our already sorted array ) requires k*n time.Thus total time required is 2n+ 3n + 4n +...k*n which is O(n k^2).
It looks like we can do it in O(kn) time but it is not so because each time our array which we are merging is increasing in size.
Though we can achieve a better bound using divide and conquer. I am still working on that and post a solution if I find one.
See http://en.wikipedia.org/wiki/External_sorting. Here is my take on the heap based k-way merge, using a buffered read from the sources to emulate I/O reduction:
public class KWayMerger<T>
{
private readonly IList<T[]> _sources;
private readonly int _bufferSize;
private readonly MinHeap<MergeValue<T>> _mergeHeap;
private readonly int[] _indices;
public KWayMerger(IList<T[]> sources, int bufferSize, Comparer<T> comparer = null)
{
if (sources == null) throw new ArgumentNullException("sources");
_sources = sources;
_bufferSize = bufferSize;
_mergeHeap = new MinHeap<MergeValue<T>>(
new MergeComparer<T>(comparer ?? Comparer<T>.Default));
_indices = new int[sources.Count];
}
public T[] Merge()
{
for (int i = 0; i <= _sources.Count - 1; i++)
AddToMergeHeap(i);
var merged = new T[_sources.Sum(s => s.Length)];
int mergeIndex = 0;
while (_mergeHeap.Count > 0)
{
var min = _mergeHeap.ExtractDominating();
merged[mergeIndex++] = min.Value;
if (min.Source != -1) //the last item of the source was extracted
AddToMergeHeap(min.Source);
}
return merged;
}
private void AddToMergeHeap(int sourceIndex)
{
var source = _sources[sourceIndex];
var start = _indices[sourceIndex];
var end = Math.Min(start + _bufferSize - 1, source.Length - 1);
if (start > source.Length - 1)
return; //we're done with this source
for (int i = start; i <= end - 1; i++)
_mergeHeap.Add(new MergeValue<T>(-1, source[i]));
//only the last item should trigger the next buffered read
_mergeHeap.Add(new MergeValue<T>(sourceIndex, source[end]));
_indices[sourceIndex] += _bufferSize; //we may have added less items,
//but if we did we've reached the end of the source so it doesn't matter
}
}
internal class MergeValue<T>
{
public int Source { get; private set; }
public T Value { get; private set; }
public MergeValue(int source, T value)
{
Value = value;
Source = source;
}
}
internal class MergeComparer<T> : IComparer<MergeValue<T>>
{
public Comparer<T> Comparer { get; private set; }
public MergeComparer(Comparer<T> comparer)
{
if (comparer == null) throw new ArgumentNullException("comparer");
Comparer = comparer;
}
public int Compare(MergeValue<T> x, MergeValue<T> y)
{
Debug.Assert(x != null && y != null);
return Comparer.Compare(x.Value, y.Value);
}
}
Here is one possible implementation of MinHeap<T>. Some tests:
[TestMethod]
public void TestKWaySort()
{
var rand = new Random();
for (int i = 0; i < 10; i++)
AssertKwayMerge(rand);
}
private static void AssertKwayMerge(Random rand)
{
var sources = new[]
{
GenerateRandomCollection(rand, 10, 30, 0, 30).OrderBy(i => i).ToArray(),
GenerateRandomCollection(rand, 10, 30, 0, 30).OrderBy(i => i).ToArray(),
GenerateRandomCollection(rand, 10, 30, 0, 30).OrderBy(i => i).ToArray(),
GenerateRandomCollection(rand, 10, 30, 0, 30).OrderBy(i => i).ToArray(),
};
Assert.IsTrue(new KWayMerger<int>(sources, 20).Merge().SequenceEqual(sources.SelectMany(s => s).OrderBy(i => i)));
}
public static IEnumerable<int> GenerateRandomCollection(Random rand, int minLength, int maxLength, int min = 0, int max = int.MaxValue)
{
return Enumerable.Repeat(0, rand.Next(minLength, maxLength)).Select(i => rand.Next(min, max));
}
I wrote this STL-style piece of code that does N-way merge and thought I'd post it here to help prevent others from reinventing the wheel. :)
Warning: it's only mildly tested. Test before use. :)
You can use it like this:
#include <vector>
int main()
{
std::vector<std::vector<int> > v;
std::vector<std::vector<int>::iterator> vout;
std::vector<int> v1;
std::vector<int> v2;
v1.push_back(1);
v1.push_back(2);
v1.push_back(3);
v2.push_back(0);
v2.push_back(1);
v2.push_back(2);
v.push_back(v1);
v.push_back(v2);
multiway_merge(v.begin(), v.end(), std::back_inserter(vout), false);
}
It also allows using pairs of iterators instead of the containers themselves.
If you use Boost.Range, you can remove some of the boilerplate code.
The code:
#include <algorithm>
#include <functional> // std::less
#include <iterator>
#include <queue> // std::priority_queue
#include <utility> // std::pair
#include <vector>
template<class OutIt>
struct multiway_merge_value_insert_iterator : public std::iterator<
std::output_iterator_tag, OutIt, ptrdiff_t
>
{
OutIt it;
multiway_merge_value_insert_iterator(OutIt const it = OutIt())
: it(it) { }
multiway_merge_value_insert_iterator &operator++(int)
{ return *this; }
multiway_merge_value_insert_iterator &operator++()
{ return *this; }
multiway_merge_value_insert_iterator &operator *()
{ return *this; }
template<class It>
multiway_merge_value_insert_iterator &operator =(It const i)
{
*this->it = *i;
++this->it;
return *this;
}
};
template<class OutIt>
multiway_merge_value_insert_iterator<OutIt>
multiway_merge_value_inserter(OutIt const it)
{ return multiway_merge_value_insert_iterator<OutIt>(it); };
template<class Less>
struct multiway_merge_value_less : private Less
{
multiway_merge_value_less(Less const &less) : Less(less) { }
template<class It1, class It2>
bool operator()(
std::pair<It1, It1> const &b /* inverted */,
std::pair<It2, It2> const &a) const
{
return b.first != b.second && (
a.first == a.second ||
this->Less::operator()(*a.first, *b.first));
}
};
struct multiway_merge_default_less
{
template<class T>
bool operator()(T const &a, T const &b) const
{ return std::less<T>()(a, b); }
};
template<class R>
struct multiway_merge_range_iterator
{ typedef typename R::iterator type; };
template<class R>
struct multiway_merge_range_iterator<R const>
{ typedef typename R::const_iterator type; };
template<class It>
struct multiway_merge_range_iterator<std::pair<It, It> >
{ typedef It type; };
template<class R>
typename R::iterator multiway_merge_range_begin(R &r)
{ return r.begin(); }
template<class R>
typename R::iterator multiway_merge_range_end(R &r)
{ return r.end(); }
template<class R>
typename R::const_iterator multiway_merge_range_begin(R const &r)
{ return r.begin(); }
template<class R>
typename R::const_iterator multiway_merge_range_end(R const &r)
{ return r.end(); }
template<class It>
It multiway_merge_range_begin(std::pair<It, It> const &r)
{ return r.first; }
template<class It>
It multiway_merge_range_end(std::pair<It, It> const &r)
{ return r.second; }
template<class It, class OutIt, class Less, class PQ>
OutIt multiway_merge(
It begin, It const end, OutIt out, Less const &less,
PQ &pq, bool const distinct = false)
{
while (begin != end)
{
pq.push(typename PQ::value_type(
multiway_merge_range_begin(*begin),
multiway_merge_range_end(*begin)));
++begin;
}
while (!pq.empty())
{
typename PQ::value_type top = pq.top();
pq.pop();
if (top.first != top.second)
{
while (!pq.empty() && pq.top().first == pq.top().second)
{ pq.pop(); }
if (!distinct ||
pq.empty() ||
less(*pq.top().first, *top.first) ||
less(*top.first, *pq.top().first))
{
*out = top.first;
++out;
}
++top.first;
pq.push(top);
}
}
return out;
}
template<class It, class OutIt, class Less>
OutIt multiway_merge(
It const begin, It const end, OutIt out, Less const &less,
bool const distinct = false)
{
typedef typename multiway_merge_range_iterator<
typename std::iterator_traits<It>::value_type
>::type SubIt;
if (std::distance(begin, end) < 16)
{
typedef std::vector<std::pair<SubIt, SubIt> > Remaining;
Remaining remaining;
remaining.reserve(
static_cast<size_t>(std::distance(begin, end)));
for (It i = begin; i != end; ++i)
{
if (multiway_merge_range_begin(*i) !=
multiway_merge_range_end(*i))
{
remaining.push_back(std::make_pair(
multiway_merge_range_begin(*i),
multiway_merge_range_end(*i)));
}
}
while (!remaining.empty())
{
typename Remaining::iterator smallest =
remaining.begin();
for (typename Remaining::iterator
i = remaining.begin();
i != remaining.end();
)
{
if (less(*i->first, *smallest->first))
{
smallest = i;
++i;
}
else if (distinct && i != smallest &&
!less(
*smallest->first,
*i->first))
{
i = remaining.erase(i);
}
else { ++i; }
}
*out = smallest->first;
++out;
++smallest->first;
if (smallest->first == smallest->second)
{ smallest = remaining.erase(smallest); }
}
return out;
}
else
{
std::priority_queue<
std::pair<SubIt, SubIt>,
std::vector<std::pair<SubIt, SubIt> >,
multiway_merge_value_less<Less>
> q((multiway_merge_value_less<Less>(less)));
return multiway_merge(begin, end, out, less, q, distinct);
}
}
template<class It, class OutIt>
OutIt multiway_merge(
It const begin, It const end, OutIt const out,
bool const distinct = false)
{
return multiway_merge(
begin, end, out,
multiway_merge_default_less(), distinct);
}
Here is my implementation using MinHeap...
package merging;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
public class N_Way_Merge {
int No_of_files=0;
String[] listString;
int[] listIndex;
PrintWriter pw;
private String fileDir = "D:\\XMLParsing_Files\\Extracted_Data";
private File[] fileList;
private BufferedReader[] readers;
public static void main(String[] args) throws IOException {
N_Way_Merge nwm=new N_Way_Merge();
long start= System.currentTimeMillis();
try {
nwm.createFileList();
nwm.createReaders();
nwm.createMinHeap();
}
finally {
nwm.pw.flush();
nwm.pw.close();
for (BufferedReader readers : nwm.readers) {
readers.close();
}
}
long end = System.currentTimeMillis();
System.out.println("Files merged into a single file.\nTime taken: "+((end-start)/1000)+"secs");
}
public void createFileList() throws IOException {
//creates a list of sorted files present in a particular directory
File folder = new File(fileDir);
fileList = folder.listFiles();
No_of_files=fileList.length;
assign();
System.out.println("No. of files - "+ No_of_files);
}
public void assign() throws IOException
{
listString = new String[No_of_files];
listIndex = new int[No_of_files];
pw = new PrintWriter(new BufferedWriter(new FileWriter("D:\\XMLParsing_Files\\Final.txt", true)));
}
public void createReaders() throws IOException {
//creates array of BufferedReaders to read the files
readers = new BufferedReader[No_of_files];
for(int i=0;i<No_of_files;++i)
{
readers[i]=new BufferedReader(new FileReader(fileList[i]));
}
}
public void createMinHeap() throws IOException {
for(int i=0;i<No_of_files;i++)
{
listString[i]=readers[i].readLine();
listIndex[i]=i;
}
WriteToFile(listString,listIndex);
}
public void WriteToFile(String[] listString,int[] listIndex) throws IOException{
BuildHeap_forFirstTime(listString, listIndex);
while(!(listString[0].equals("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz")))
{
pw.println(listString[0]);
listString[0]=readers[listIndex[0]].readLine();
MinHeapify(listString,listIndex,0);
}
}
public void BuildHeap_forFirstTime(String[] listString,int[] listIndex){
for(int i=(No_of_files/2)-1;i>=0;--i)
MinHeapify(listString,listIndex,i);
}
public void MinHeapify(String[] listString,int[] listIndex,int index){
int left=index*2 + 1;
int right=left + 1;
int smallest=index;
int HeapSize=No_of_files;
if(left <= HeapSize-1 && listString[left]!=null && (listString[left].compareTo(listString[index])) < 0)
smallest = left;
if(right <= HeapSize-1 && listString[right]!=null && (listString[right].compareTo(listString[smallest])) < 0)
smallest=right;
if(smallest!=index)
{
String temp=listString[index];
listString[index]=listString[smallest];
listString[smallest]=temp;
listIndex[smallest]^=listIndex[index];
listIndex[index]^=listIndex[smallest];
listIndex[smallest]^=listIndex[index];
MinHeapify(listString,listIndex,smallest);
}
}
}
Java implementation of min heap algorithm for merging k sorted arrays:
public class MergeKSorted {
/**
* helper object to store min value of each array in a priority queue,
* the kth array and the index into kth array
*
*/
static class PQNode implements Comparable<PQNode>{
int value;
int kth = 0;
int indexKth = 0;
public PQNode(int value, int kth, int indexKth) {
this.value = value;
this.kth = kth;
this.indexKth = indexKth;
}
#Override
public int compareTo(PQNode o) {
if(o != null) {
return Integer.valueOf(value).compareTo(Integer.valueOf(o.value));
}
else return 0;
}
#Override
public String toString() {
return value+" "+kth+" "+indexKth;
}
}
public static void mergeKSorted(int[][] sortedArrays) {
int k = sortedArrays.length;
int resultCtr = 0;
int totalSize = 0;
PriorityQueue<PQNode> pq = new PriorityQueue<>();
for(int i=0; i<k; i++) {
int[] kthArray = sortedArrays[i];
totalSize+=kthArray.length;
if(kthArray.length > 0) {
PQNode temp = new PQNode(kthArray[0], i, 0);
pq.add(temp);
}
}
int[] result = new int[totalSize];
while(!pq.isEmpty()) {
PQNode temp = pq.poll();
int[] kthArray = sortedArrays[temp.kth];
result[resultCtr] = temp.value;
resultCtr++;
temp.indexKth++;
if(temp.indexKth < kthArray.length) {
temp = new PQNode(kthArray[temp.indexKth], temp.kth, temp.indexKth);
pq.add(temp);
}
}
print(result);
}
public static void print(int[] a) {
StringBuilder sb = new StringBuilder();
for(int v : a) {
sb.append(v).append(" ");
}
System.out.println(sb);
}
public static void main(String[] args) {
int[][] sortedA = {
{3,4,6,9},
{4,6,8,9,12},
{3,4,9},
{1,4,9}
};
mergeKSorted(sortedA);
}
}

Puzzle: Find the order of n persons standing in a line (based on their heights)

Saw this question on Careercup.com:
Given heights of n persons standing in a line and a list of numbers corresponding to each person (p) that gives the number of persons who are taller than p and standing in front of p. For example,
Heights: 5 3 2 6 1 4
InFronts:0 1 2 0 3 2
Means that the actual actual order is: 5 3 2 1 6 4
The question gets the two lists of Heights and InFronts, and should generate the order standing in line.
My solution:
It could be solved by first sorting the list in descending order. Obviously, to sort, we need to define an object Person (with two attributes of Height and InFront) and then sort Persons based on their height. Then, I would use two stacks, a main stack and a temp one, to build up the order.
Starting from the tallest, put it in the main stack. If the next person had an InFront value of greater than the person on top of the stack, that means the new person should be added before the person on top. Therefore, we need to pop persons from the main stack, insert the new person, and then return the persons popped out in the first step (back to the main stack from temp one). I would use a temp stack to keep the order of the popped out persons. But how many should be popped out? Since the list is sorted, we need to pop exactly the number of persons in front of the new person, i.e. corresponding InFront.
I think this solution works. But the worst case order would be O(n^2) -- when putting a person in place needs popping out all previous ones.
Is there any other solutions? possibly in O(n)?
The O(nlogn) algoritm is possible.
First assume that all heights are different.
Sort people by heights. Then iterate from shortest to tallest. In each step you need an efficient way to put the next person to the correct position. Notice that people we've already placed are not taller that the current person. And the people we place after are taller than the current. So we have to find a place such that the number of empty positions in the front is equal to the inFronts value of this person. This task can be done using a data structure called interval tree in O(logn) time. So the total time of an algorithm is O(nlogn).
This algorithm works well in case where there's no ties. As it may be safely assumed that empty places up to front will be filled by taller people.
In case when ties are possible, we need to assure that people of the same height are placed in increasing order of their positions. It can be achieved if we will process people by non-decreasing inFronts value. So, in case of possible ties we should also consider inFronts values when sorting people.
And if at some step we can't find a position for next person then the answer it "it's impossible to satisfy problem constraints".
There exists an algorithm with O(nlogn) average complexity, however worst case complexity is still O(n²).
To achieve this you can use a variation of a binary tree. The idea is, in this tree, each node corresponds to a person and each node keeps track of how many people are in front of him (which is the size of the left subtree) as nodes are inserted.
Start iterating the persons array in decreasing height order and insert each person into the tree starting from the root. Insertion is as follows:
Compare the frontCount of the person with the current node's (root at the beginning) value.
If it is smaller than it insert the node to the left with value 1. Increase the current node's value by 1.
Else, descend to the right by decreasing the person's frontCount by current node's value. This enables the node to be placed in the correct location.
After all nodes finished, an inorder traversal gives the correct order of people.
Let the code speak for itself:
public static void arrange(int[] heights, int[] frontCounts) {
Person[] persons = new Person[heights.length];
for (int i = 0; i < persons.length; i++)
persons[i] = new Person(heights[i], frontCounts[i]);
Arrays.sort(persons, (p1, p2) -> {
return Integer.compare(p2.height, p1.height);
});
Node root = new Node(persons[0]);
for (int i = 1; i < persons.length; i++) {
insert(root, persons[i]);
}
inOrderPrint(root);
}
private static void insert(Node root, Person p) {
insert(root, p, p.frontCount);
}
private static void insert(Node root, Person p, int value) {
if (value < root.value) { // should insert to the left
if (root.left == null) {
root.left = new Node(p);
} else {
insert(root.left, p, value);
}
root.value++; // Increase the current node value while descending left!
} else { // insert to the right
if (root.right == null) {
root.right = new Node(p);
} else {
insert(root.right, p, value - root.value);
}
}
}
private static void inOrderPrint(Node root) {
if (root == null)
return;
inOrderPrint(root.left);
System.out.print(root.person.height);
inOrderPrint(root.right);
}
private static class Node {
Node left, right;
int value;
public final Person person;
public Node(Person person) {
this.value = 1;
this.person = person;
}
}
private static class Person {
public final int height;
public final int frontCount;
Person(int height, int frontCount) {
this.height = height;
this.frontCount = frontCount;
}
}
public static void main(String[] args) {
int[] heights = {5, 3, 2, 6, 1, 4};
int[] frontCounts = {0, 1, 2, 0, 3, 2};
arrange(heights, frontCounts);
}
I think one approach can be the following. Although it again seems to be O(n^2) at present.
Sort the Height array and corresponding 'p' array in ascending order of heights (in O(nlogn)). Pick the first element in the list. Put that element in the final array in the position given by the p index.
For example after sorting,
H - 1, 2, 3, 4, 5, 6
p - 3, 2, 1, 2, 0, 0.
1st element should go in position 3. Hence final array becomes:
---1--
2nd element shall go in position 2. Hence final array becomes:
--21--
3rd element should go in position 1. Hence final array becomes:
-321--
4th element shall go in position 2. This is the position among the empty ones. Hence final array becomes:
-321-4
5th element shall go in position 0. Hence final array becomes:
5321-4
6th element should go in position 0. Hence final array becomes:
532164
I think the approach indicated above is correct. However a critical piece missing in the solutions above are.
Infronts is the number of taller candidate before the current person. So after sorting the persons based on height(Ascending), when placing person 3 with infront=2, if person 1 and 2 was in front placed at 0, 1 position respectively, you need to discount their position and place 3 at position 4, I.E 2 taller candidates will take position 2,3.
As some indicated interval tree is the right structure. However a dynamic sized container, with available position will do the job.(code below)
struct Person{
int h, ct;
Person(int ht, int c){
h = ht;
ct = c;
}
};
struct comp{
bool operator()(const Person& lhs, const Person& rhs){
return (lhs.h < rhs.h);
}
};
vector<int> heightOrder(vector<int> &heights, vector<int> &infronts) {
if(heights.size() != infronts.size()){
return {};
}
vector<int> result(infronts.size(), -1);
vector<Person> persons;
vector<int> countSet;
for(int i= 0; i< heights.size(); i++){
persons.emplace_back(Person(heights[i], infronts[i]));
countSet.emplace_back(i);
}
sort(persons.begin(), persons.end(), comp());
for(size_t i=0; i<persons.size(); i++){
Person p = persons[i];
if(countSet.size() > p.ct){
int curr = countSet[p.ct];
//cout << "the index to place height=" << p.h << " , is at pos=" << curr << endl;
result[curr] = p.h;
countSet.erase(countSet.begin() + p.ct);
}
}
return result;
}
I'm using LinkedList for the this. Sort the tallCount[] in ascending order and accordingly re-position the items in heights[]. This is capable of handling the duplicate elements also.
public class FindHeightOrder {
public int[] findOrder(final int[] heights, final int[] tallCount) {
if (heights == null || heights.length == 0 || tallCount == null
|| tallCount.length == 0 || tallCount.length != heights.length) {
return null;
}
LinkedList list = new LinkedList();
list.insertAtStart(heights[0]);
for (int i = 1; i < heights.length; i++) {
if (tallCount[i] == 0) {
Link temp = list.getHead();
while (temp != null && temp.getData() <= heights[i]) {
temp = temp.getLink();
}
if (temp != null) {
if (temp.getData() <= heights[i]) {
list.insertAfterElement(temp.getData(), heights[i]);
} else {
list.insertAtStart(heights[i]);
}
} else {
list.insertAtEnd(heights[i]);
}
} else {
Link temp = list.getHead();
int pos = tallCount[i];
while (temp != null
&& (temp.getData() <= heights[i] || pos-- > 0)) {
temp = temp.getLink();
}
if (temp != null) {
if (temp.getData() <= heights[i]) {
list.insertAfterElement(temp.getData(), heights[i]);
} else {
list.insertBeforeElement(temp.getData(), heights[i]);
}
} else {
list.insertAtEnd(heights[i]);
}
}
}
Link fin = list.getHead();
int i = 0;
while (fin != null) {
heights[i++] = fin.getData();
fin = fin.getLink();
}
return heights;
}
public class Link {
private int data;
private Link link;
public Link(int data) {
this.data = data;
}
public int getData() {
return data;
}
public void setData(int data) {
this.data = data;
}
public Link getLink() {
return link;
}
public void setLink(Link link) {
this.link = link;
}
#Override
public String toString() {
return this.data + " -> "
+ (this.link != null ? this.link : "null");
}
}
public class LinkedList {
private Link head;
public Link getHead() {
return head;
}
public void insertAtStart(int data) {
if (head == null) {
head = new Link(data);
head.setLink(null);
} else {
Link link = new Link(data);
link.setLink(head);
head = link;
}
}
public void insertAtEnd(int data) {
if (head != null) {
Link temp = head;
while (temp != null && temp.getLink() != null) {
temp = temp.getLink();
}
temp.setLink(new Link(data));
} else {
head = new Link(data);
}
}
public void insertAfterElement(int after, int data) {
if (head != null) {
Link temp = head;
while (temp != null) {
if (temp.getData() == after) {
Link link = new Link(data);
link.setLink(temp.getLink());
temp.setLink(link);
break;
} else {
temp = temp.getLink();
}
}
}
}
public void insertBeforeElement(int before, int data) {
if (head != null) {
Link current = head;
Link previous = null;
Link ins = new Link(data);
while (current != null) {
if (current.getData() == before) {
ins.setLink(current);
break;
} else {
previous = current;
current = current.getLink();
if (current != null && current.getData() == before) {
previous.setLink(ins);
ins.setLink(current);
break;
}
}
}
}
}
#Override
public String toString() {
return "LinkedList [head=" + this.head + "]";
}
}
}
As people already corrected for original input:
Heights : A[] = { 5 3 2 6 1 4 }
InFronts: B[] = { 0 1 2 0 3 2 }
Output should look like: X[] = { 5 3 1 6 2 4 }
Here is the O(N*logN) way to approach solution (with assumption that there are no ties).
Iterate over array B and build chain of inequalities (by placing items into a right spot on each iteration, here we can use hashtable for O(1) lookups):
b0 > b1
b0 > b1 > b2
b3 > b0 > b1 > b2
b3 > b0 > b1 > b4 > b2
b3 > b0 > b5 > b1 > b4 > b2
Sort array A and reverse it
Initialize output array X, iterate over chain from #1 and fill array X by placing items from A into a position defined in a chain
Steps #1 and #3 are O(N), step #2 is the most expensive O(N*logN).
And obviously reversing sorted array A (in step #2) is not required.
This is the implementation for the idea provided by user1990169. Complexity being O(N^2).
public class Solution {
class Person implements Comparator<Person>{
int height;
int infront;
public Person(){
}
public Person(int height, int infront){
this.height = height;
this.infront = infront;
}
public int compare(Person p1, Person p2){
return p1.height - p2.height;
}
}
public ArrayList<Integer> order(ArrayList<Integer> heights, ArrayList<Integer> infronts) {
int n = heights.size();
Person[] people = new Person[n];
for(int i = 0; i < n; i++){
people[i] = new Person(heights.get(i), infronts.get(i));
}
Arrays.sort(people, new Person());
Person[] rst = new Person[n];
for(Person p : people){
int count = 0;
for(int i = 0; i < n ; i++){
if(count == p.infront){
while(rst[i] != null && i < n - 1){
i++;
}
rst[i] = p;
break;
}
if(rst[i] == null) count++;
}
}
ArrayList<Integer> heightrst = new ArrayList<Integer>();
for(int i = 0; i < n; i++){
heightrst.add(rst[i].height);
}
return heightrst;
}
}
Was solving this problem today, here is what I came up with:
The idea is to sort the heights array in descending order. Once, we have this sorted array - pick up an element from this element and place it in the resultant array at the corresponding index (I am using an ArrayList for the same, it would be nice to use LinkedList) :
public class Solution {
public ArrayList<Integer> order(ArrayList<Integer> heights, ArrayList<Integer> infronts) {
Person[] persons = new Person[heights.size()];
ArrayList<Integer> res = new ArrayList<>();
for (int i = 0; i < persons.length; i++) {
persons[i] = new Person(heights.get(i), infronts.get(i));
}
Arrays.sort(persons, (p1, p2) -> {
return Integer.compare(p2.height, p1.height);
});
for (int i = 0; i < persons.length; i++) {
//System.out.println("adding "+persons[i].height+" "+persons[i].count);
res.add(persons[i].count, persons[i].height);
}
return res;
}
private static class Person {
public final int height;
public final int count;
public Person(int h, int c) {
height = h;
count = c;
}
}
}
I found this kind of problem on SPOJ. I created a binary tree with little variation. When a new height is inserted, if the front is smaller than the root's front then it goes to the left otherwise right.
Here is the C++ implementation:
#include<bits/stdc++.h>
using namespace std;
struct TreeNode1
{
int val;
int _front;
TreeNode1* left;
TreeNode1*right;
};
TreeNode1* Add(int x, int v)
{
TreeNode1* p= (TreeNode1*) malloc(sizeof(TreeNode1));
p->left=NULL;
p->right=NULL;
p->val=x;
p->_front=v;
return p;
}
TreeNode1* _insert(TreeNode1* root, int x, int _front)
{
if(root==NULL) return Add(x,_front);
if(root->_front >=_front)
{
root->left=_insert(root->left,x,_front);
root->_front+=1;
}
else
{
root->right=_insert(root->right,x,_front-root->_front);
}
return root;
}
bool comp(pair<int,int> a, pair<int,int> b)
{
return a.first>b.first;
}
void in_order(TreeNode1 * root, vector<int>&v)
{
if(root==NULL) return ;
in_order(root->left,v);
v.push_back(root->val);
in_order(root->right,v);
}
vector<int>soln(vector<int>h, vector<int>in )
{
vector<pair<int , int> >vc;
for(int i=0;i<h.size();i++) vc.push_back( make_pair( h[i],in[i] ) );
sort(vc.begin(),vc.end(),comp);
TreeNode1* root=NULL;
for(int i=0;i<vc.size();i++)
root=_insert(root,vc[i].first,vc[i].second);
vector<int>v;
in_order(root,v);
return v;
}
int main()
{
int t;
scanf("%d",&t);
while(t--)
{
int n;
scanf("%d",&n);
vector<int>h;
vector<int>in;
for(int i=0;i<n;i++) {int x;
cin>>x;
h.push_back(x);}
for(int i=0;i<n;i++) {int x; cin>>x;
in.push_back(x);}
vector<int>v=soln(h,in);
for(int i=0;i<n-1;i++) cout<<v[i]<<" ";
cout<<v[n-1]<<endl;
h.clear();
in.clear();
}
}
Here is a Python solution that uses only elementary list functions and takes care of ties.
def solution(heights, infronts):
person = list(zip(heights, infronts))
person.sort(key=lambda x: (x[0] == 0, x[1], -x[0]))
output = []
for p in person:
extended_output = output + [p]
extended_output.sort(key=lambda x: (x[0], -x[1]))
output_position = [p for p in extended_output].index(p) + p[1]
output.insert(output_position, p)
for c, p in enumerate(output):
taller_infronts = [infront for infront in output[0:c] if infront[0] >= p[0]]
assert len(taller_infronts) == p[1]
return output
Simple O(n^2) solution for this in Java:
Algorith:
If the position of the shortest person is i, i-1 taller people will be in front of him.
We fix the position of shortest person and then move to second shortest person.
Sort people by heights. Then iterate from shortest to tallest. In each step you need an efficient way to put the next person to the correct position.
We can optimise this solution even more by using segment tree. See this link.
class Person implements Comparable<Person>{
int height;
int pos;
Person(int height, int pos) {
this.height = height;
this.pos = pos;
}
#Override
public int compareTo(Person person) {
return this.height - person.height;
}
}
public class Solution {
public int[] order(int[] heights, int[] positions) {
int n = heights.length;
int[] ans = new int[n];
PriorityQueue<Person> pq = new PriorityQueue<Person>();
for( int i=0; i<n; i++) {
pq.offer(new Person(heights[i], positions[i]) );
}
for(int i=0; i<n; i++) {
Person person = pq.poll();
int vacantTillNow = 0;
int index = 0;
while(index < n) {
if( ans[index] == 0) vacantTillNow++;
if( vacantTillNow > person.pos) break;
index++;
}
ans[index] = person.height;
}
return ans;
}
}
Segment tree can be used to solve this in O(nlog n) if there are no ties in heights.
Please look for approach 3 in this link for a clear explanation of this method.
https://www.codingninjas.com/codestudio/problem-details/order-of-people-heights_1170764
Below is my code for the same approach in python
def findEmptySlot(tree, root, left, right, K, result):
tree[root]-=1
if left==right:
return left
if tree[2*root+1] >= K:
return findEmptySlot(tree, 2*root+1, left, (left+right)//2, K, result)
else:
return findEmptySlot(tree, 2*root+2, (left+right)//2+1, right, K-tree[2*root+1], result)
def buildsegtree(tree, pos, start, end):
if start==end:
tree[pos]=1
return tree[pos]
mid=(start+end)//2
left = buildsegtree(tree, 2*pos+1,start, mid)
right = buildsegtree(tree,2*pos+2,mid+1, end)
tree[pos]=left+right
return tree[pos]
class Solution:
# #param A : list of integers
# #param B : list of integers
# #return a list of integers
def order(self, A, B):
n=len(A)
people=[(A[i],B[i]) for i in range(len(A))]
people.sort(key=lambda x: (x[0], x[1]))
result=[0]*n
tree=[0]*(4*n)
buildsegtree(tree,0, 0, n-1)
for i in range(n):
idx=findEmptySlot(tree, 0, 0, n-1, people[i][1]+1, result)
result[idx]=people[i][0]
return result

Algorithm for N-way merge

A 2-way merge is widely studied as a part of Mergesort algorithm.
But I am interested to find out the best way one can perform an N-way merge?
Lets say, I have N files which have sorted 1 million integers each.
I have to merge them into 1 single file which will have those 100 million sorted integers.
Please keep in mind that use case for this problem is actually external sorting which is disk based. Therefore, in real scenarios there would be memory limitation as well. So a naive approach of merging 2 files at a time (99 times) won't work. Lets say we have only a small sliding window of memory available for each array.
I am not sure if there is already a standardized solution to this N-way merge. (Googling didn't tell me much).
But if you know if a good n-way merge algorithm, please post algo/link.
Time complexity: If we greatly increase the number of files (N) to be merged, how would that affect the time complexity of your algorithm?
Thanks for your answers.
I haven't been asked this anywhere, but I felt this could be an interesting interview question. Therefore tagged.
How about the following idea:
Create a priority queue
Iterate through each file f
enqueue the pair (nextNumberIn(f), f) using the first value as priority key
While queue not empty
dequeue head (m, f) of queue
output m
if f not depleted
enqueue (nextNumberIn(f), f)
Since adding elements to a priority queue can be done in logarithmic time, item 2 is O(N × log N). Since (almost all) iterations of the while loop adds an element, the whole while-loop is O(M × log N) where M is the total number of numbers to sort.
Assuming all files have a non-empty sequence of numbers, we have M > N and thus the whole algorithm should be O(M × log N).
Search for "Polyphase merge", check out classics - Donald Knuth & E.H.Friend.
Also, you may want to take a look at the proposed Smart Block Merging by Seyedafsari & Hasanzadeh, that, similarly to earlier suggestions, uses priority queues.
Another interesting reasonsing is In Place Merging Algorithm by Kim & Kutzner.
I also recommend this paper by Vitter: External memory algorithms and data structures: dealing with massive data.
One simple idea is to keep a priority queue of the ranges to merge, stored in such a way that the range with the smallest first element is removed first from the queue. You can then do an N-way merge as follows:
Insert all of the ranges into the priority queue, excluding empty ranges.
While the priority queue is not empty:
Dequeue the smallest element from the queue.
Append the first element of this range to the output sequence.
If it's nonempty, insert the rest of the sequence back into the priority queue.
The correctness of this algorithm is essentially a generalization of the proof that a 2-way merge works correctly - if you always add the smallest element from any range, and all the ranges are sorted, you end up with the sequence as a whole sorted.
The runtime complexity of this algorithm can be found as follows. Let M be the total number of elements in all the sequences. If we use a binary heap, then we do at most O(M) insertions and O(M) deletions from the priority queue, since for each element written to the output sequence there's a dequeue to pull out the smallest sequence, followed by an enqueue to put the rest of the sequence back into the queue. Each of these steps takes O(lg N) operations, because insertion or deletion from a binary heap with N elements in it takes O(lg N) time. This gives a net runtime of O(M lg N), which grows less than linearly with the number of input sequences.
There may be a way to get this even faster, but this seems like a pretty good solution. The memory usage is O(N) because we need O(N) overhead for the binary heap. If we implement the binary heap by storing pointers to the sequences rather than the sequences themselves, this shouldn't be too much of a problem unless you have a truly ridiculous number of sequences to merge. In that case, just merge them in groups that do fit into memory, then merge all the results.
Hope this helps!
A simple approach to Merging k sorted arrays (each of length n) requires O(n k^2) time and not O(nk) time. As when you merge first 2 arrays it takes 2n time, then when you merge third with the output , it takes 3n time as now we are merging two array of length 2n and n. Now when we merge this output with the fourth one,this merge requires 4n time.Thus the last merge (when we are adding the kth array to our already sorted array ) requires k*n time.Thus total time required is 2n+ 3n + 4n +...k*n which is O(n k^2).
It looks like we can do it in O(kn) time but it is not so because each time our array which we are merging is increasing in size.
Though we can achieve a better bound using divide and conquer. I am still working on that and post a solution if I find one.
See http://en.wikipedia.org/wiki/External_sorting. Here is my take on the heap based k-way merge, using a buffered read from the sources to emulate I/O reduction:
public class KWayMerger<T>
{
private readonly IList<T[]> _sources;
private readonly int _bufferSize;
private readonly MinHeap<MergeValue<T>> _mergeHeap;
private readonly int[] _indices;
public KWayMerger(IList<T[]> sources, int bufferSize, Comparer<T> comparer = null)
{
if (sources == null) throw new ArgumentNullException("sources");
_sources = sources;
_bufferSize = bufferSize;
_mergeHeap = new MinHeap<MergeValue<T>>(
new MergeComparer<T>(comparer ?? Comparer<T>.Default));
_indices = new int[sources.Count];
}
public T[] Merge()
{
for (int i = 0; i <= _sources.Count - 1; i++)
AddToMergeHeap(i);
var merged = new T[_sources.Sum(s => s.Length)];
int mergeIndex = 0;
while (_mergeHeap.Count > 0)
{
var min = _mergeHeap.ExtractDominating();
merged[mergeIndex++] = min.Value;
if (min.Source != -1) //the last item of the source was extracted
AddToMergeHeap(min.Source);
}
return merged;
}
private void AddToMergeHeap(int sourceIndex)
{
var source = _sources[sourceIndex];
var start = _indices[sourceIndex];
var end = Math.Min(start + _bufferSize - 1, source.Length - 1);
if (start > source.Length - 1)
return; //we're done with this source
for (int i = start; i <= end - 1; i++)
_mergeHeap.Add(new MergeValue<T>(-1, source[i]));
//only the last item should trigger the next buffered read
_mergeHeap.Add(new MergeValue<T>(sourceIndex, source[end]));
_indices[sourceIndex] += _bufferSize; //we may have added less items,
//but if we did we've reached the end of the source so it doesn't matter
}
}
internal class MergeValue<T>
{
public int Source { get; private set; }
public T Value { get; private set; }
public MergeValue(int source, T value)
{
Value = value;
Source = source;
}
}
internal class MergeComparer<T> : IComparer<MergeValue<T>>
{
public Comparer<T> Comparer { get; private set; }
public MergeComparer(Comparer<T> comparer)
{
if (comparer == null) throw new ArgumentNullException("comparer");
Comparer = comparer;
}
public int Compare(MergeValue<T> x, MergeValue<T> y)
{
Debug.Assert(x != null && y != null);
return Comparer.Compare(x.Value, y.Value);
}
}
Here is one possible implementation of MinHeap<T>. Some tests:
[TestMethod]
public void TestKWaySort()
{
var rand = new Random();
for (int i = 0; i < 10; i++)
AssertKwayMerge(rand);
}
private static void AssertKwayMerge(Random rand)
{
var sources = new[]
{
GenerateRandomCollection(rand, 10, 30, 0, 30).OrderBy(i => i).ToArray(),
GenerateRandomCollection(rand, 10, 30, 0, 30).OrderBy(i => i).ToArray(),
GenerateRandomCollection(rand, 10, 30, 0, 30).OrderBy(i => i).ToArray(),
GenerateRandomCollection(rand, 10, 30, 0, 30).OrderBy(i => i).ToArray(),
};
Assert.IsTrue(new KWayMerger<int>(sources, 20).Merge().SequenceEqual(sources.SelectMany(s => s).OrderBy(i => i)));
}
public static IEnumerable<int> GenerateRandomCollection(Random rand, int minLength, int maxLength, int min = 0, int max = int.MaxValue)
{
return Enumerable.Repeat(0, rand.Next(minLength, maxLength)).Select(i => rand.Next(min, max));
}
I wrote this STL-style piece of code that does N-way merge and thought I'd post it here to help prevent others from reinventing the wheel. :)
Warning: it's only mildly tested. Test before use. :)
You can use it like this:
#include <vector>
int main()
{
std::vector<std::vector<int> > v;
std::vector<std::vector<int>::iterator> vout;
std::vector<int> v1;
std::vector<int> v2;
v1.push_back(1);
v1.push_back(2);
v1.push_back(3);
v2.push_back(0);
v2.push_back(1);
v2.push_back(2);
v.push_back(v1);
v.push_back(v2);
multiway_merge(v.begin(), v.end(), std::back_inserter(vout), false);
}
It also allows using pairs of iterators instead of the containers themselves.
If you use Boost.Range, you can remove some of the boilerplate code.
The code:
#include <algorithm>
#include <functional> // std::less
#include <iterator>
#include <queue> // std::priority_queue
#include <utility> // std::pair
#include <vector>
template<class OutIt>
struct multiway_merge_value_insert_iterator : public std::iterator<
std::output_iterator_tag, OutIt, ptrdiff_t
>
{
OutIt it;
multiway_merge_value_insert_iterator(OutIt const it = OutIt())
: it(it) { }
multiway_merge_value_insert_iterator &operator++(int)
{ return *this; }
multiway_merge_value_insert_iterator &operator++()
{ return *this; }
multiway_merge_value_insert_iterator &operator *()
{ return *this; }
template<class It>
multiway_merge_value_insert_iterator &operator =(It const i)
{
*this->it = *i;
++this->it;
return *this;
}
};
template<class OutIt>
multiway_merge_value_insert_iterator<OutIt>
multiway_merge_value_inserter(OutIt const it)
{ return multiway_merge_value_insert_iterator<OutIt>(it); };
template<class Less>
struct multiway_merge_value_less : private Less
{
multiway_merge_value_less(Less const &less) : Less(less) { }
template<class It1, class It2>
bool operator()(
std::pair<It1, It1> const &b /* inverted */,
std::pair<It2, It2> const &a) const
{
return b.first != b.second && (
a.first == a.second ||
this->Less::operator()(*a.first, *b.first));
}
};
struct multiway_merge_default_less
{
template<class T>
bool operator()(T const &a, T const &b) const
{ return std::less<T>()(a, b); }
};
template<class R>
struct multiway_merge_range_iterator
{ typedef typename R::iterator type; };
template<class R>
struct multiway_merge_range_iterator<R const>
{ typedef typename R::const_iterator type; };
template<class It>
struct multiway_merge_range_iterator<std::pair<It, It> >
{ typedef It type; };
template<class R>
typename R::iterator multiway_merge_range_begin(R &r)
{ return r.begin(); }
template<class R>
typename R::iterator multiway_merge_range_end(R &r)
{ return r.end(); }
template<class R>
typename R::const_iterator multiway_merge_range_begin(R const &r)
{ return r.begin(); }
template<class R>
typename R::const_iterator multiway_merge_range_end(R const &r)
{ return r.end(); }
template<class It>
It multiway_merge_range_begin(std::pair<It, It> const &r)
{ return r.first; }
template<class It>
It multiway_merge_range_end(std::pair<It, It> const &r)
{ return r.second; }
template<class It, class OutIt, class Less, class PQ>
OutIt multiway_merge(
It begin, It const end, OutIt out, Less const &less,
PQ &pq, bool const distinct = false)
{
while (begin != end)
{
pq.push(typename PQ::value_type(
multiway_merge_range_begin(*begin),
multiway_merge_range_end(*begin)));
++begin;
}
while (!pq.empty())
{
typename PQ::value_type top = pq.top();
pq.pop();
if (top.first != top.second)
{
while (!pq.empty() && pq.top().first == pq.top().second)
{ pq.pop(); }
if (!distinct ||
pq.empty() ||
less(*pq.top().first, *top.first) ||
less(*top.first, *pq.top().first))
{
*out = top.first;
++out;
}
++top.first;
pq.push(top);
}
}
return out;
}
template<class It, class OutIt, class Less>
OutIt multiway_merge(
It const begin, It const end, OutIt out, Less const &less,
bool const distinct = false)
{
typedef typename multiway_merge_range_iterator<
typename std::iterator_traits<It>::value_type
>::type SubIt;
if (std::distance(begin, end) < 16)
{
typedef std::vector<std::pair<SubIt, SubIt> > Remaining;
Remaining remaining;
remaining.reserve(
static_cast<size_t>(std::distance(begin, end)));
for (It i = begin; i != end; ++i)
{
if (multiway_merge_range_begin(*i) !=
multiway_merge_range_end(*i))
{
remaining.push_back(std::make_pair(
multiway_merge_range_begin(*i),
multiway_merge_range_end(*i)));
}
}
while (!remaining.empty())
{
typename Remaining::iterator smallest =
remaining.begin();
for (typename Remaining::iterator
i = remaining.begin();
i != remaining.end();
)
{
if (less(*i->first, *smallest->first))
{
smallest = i;
++i;
}
else if (distinct && i != smallest &&
!less(
*smallest->first,
*i->first))
{
i = remaining.erase(i);
}
else { ++i; }
}
*out = smallest->first;
++out;
++smallest->first;
if (smallest->first == smallest->second)
{ smallest = remaining.erase(smallest); }
}
return out;
}
else
{
std::priority_queue<
std::pair<SubIt, SubIt>,
std::vector<std::pair<SubIt, SubIt> >,
multiway_merge_value_less<Less>
> q((multiway_merge_value_less<Less>(less)));
return multiway_merge(begin, end, out, less, q, distinct);
}
}
template<class It, class OutIt>
OutIt multiway_merge(
It const begin, It const end, OutIt const out,
bool const distinct = false)
{
return multiway_merge(
begin, end, out,
multiway_merge_default_less(), distinct);
}
Here is my implementation using MinHeap...
package merging;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
public class N_Way_Merge {
int No_of_files=0;
String[] listString;
int[] listIndex;
PrintWriter pw;
private String fileDir = "D:\\XMLParsing_Files\\Extracted_Data";
private File[] fileList;
private BufferedReader[] readers;
public static void main(String[] args) throws IOException {
N_Way_Merge nwm=new N_Way_Merge();
long start= System.currentTimeMillis();
try {
nwm.createFileList();
nwm.createReaders();
nwm.createMinHeap();
}
finally {
nwm.pw.flush();
nwm.pw.close();
for (BufferedReader readers : nwm.readers) {
readers.close();
}
}
long end = System.currentTimeMillis();
System.out.println("Files merged into a single file.\nTime taken: "+((end-start)/1000)+"secs");
}
public void createFileList() throws IOException {
//creates a list of sorted files present in a particular directory
File folder = new File(fileDir);
fileList = folder.listFiles();
No_of_files=fileList.length;
assign();
System.out.println("No. of files - "+ No_of_files);
}
public void assign() throws IOException
{
listString = new String[No_of_files];
listIndex = new int[No_of_files];
pw = new PrintWriter(new BufferedWriter(new FileWriter("D:\\XMLParsing_Files\\Final.txt", true)));
}
public void createReaders() throws IOException {
//creates array of BufferedReaders to read the files
readers = new BufferedReader[No_of_files];
for(int i=0;i<No_of_files;++i)
{
readers[i]=new BufferedReader(new FileReader(fileList[i]));
}
}
public void createMinHeap() throws IOException {
for(int i=0;i<No_of_files;i++)
{
listString[i]=readers[i].readLine();
listIndex[i]=i;
}
WriteToFile(listString,listIndex);
}
public void WriteToFile(String[] listString,int[] listIndex) throws IOException{
BuildHeap_forFirstTime(listString, listIndex);
while(!(listString[0].equals("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz")))
{
pw.println(listString[0]);
listString[0]=readers[listIndex[0]].readLine();
MinHeapify(listString,listIndex,0);
}
}
public void BuildHeap_forFirstTime(String[] listString,int[] listIndex){
for(int i=(No_of_files/2)-1;i>=0;--i)
MinHeapify(listString,listIndex,i);
}
public void MinHeapify(String[] listString,int[] listIndex,int index){
int left=index*2 + 1;
int right=left + 1;
int smallest=index;
int HeapSize=No_of_files;
if(left <= HeapSize-1 && listString[left]!=null && (listString[left].compareTo(listString[index])) < 0)
smallest = left;
if(right <= HeapSize-1 && listString[right]!=null && (listString[right].compareTo(listString[smallest])) < 0)
smallest=right;
if(smallest!=index)
{
String temp=listString[index];
listString[index]=listString[smallest];
listString[smallest]=temp;
listIndex[smallest]^=listIndex[index];
listIndex[index]^=listIndex[smallest];
listIndex[smallest]^=listIndex[index];
MinHeapify(listString,listIndex,smallest);
}
}
}
Java implementation of min heap algorithm for merging k sorted arrays:
public class MergeKSorted {
/**
* helper object to store min value of each array in a priority queue,
* the kth array and the index into kth array
*
*/
static class PQNode implements Comparable<PQNode>{
int value;
int kth = 0;
int indexKth = 0;
public PQNode(int value, int kth, int indexKth) {
this.value = value;
this.kth = kth;
this.indexKth = indexKth;
}
#Override
public int compareTo(PQNode o) {
if(o != null) {
return Integer.valueOf(value).compareTo(Integer.valueOf(o.value));
}
else return 0;
}
#Override
public String toString() {
return value+" "+kth+" "+indexKth;
}
}
public static void mergeKSorted(int[][] sortedArrays) {
int k = sortedArrays.length;
int resultCtr = 0;
int totalSize = 0;
PriorityQueue<PQNode> pq = new PriorityQueue<>();
for(int i=0; i<k; i++) {
int[] kthArray = sortedArrays[i];
totalSize+=kthArray.length;
if(kthArray.length > 0) {
PQNode temp = new PQNode(kthArray[0], i, 0);
pq.add(temp);
}
}
int[] result = new int[totalSize];
while(!pq.isEmpty()) {
PQNode temp = pq.poll();
int[] kthArray = sortedArrays[temp.kth];
result[resultCtr] = temp.value;
resultCtr++;
temp.indexKth++;
if(temp.indexKth < kthArray.length) {
temp = new PQNode(kthArray[temp.indexKth], temp.kth, temp.indexKth);
pq.add(temp);
}
}
print(result);
}
public static void print(int[] a) {
StringBuilder sb = new StringBuilder();
for(int v : a) {
sb.append(v).append(" ");
}
System.out.println(sb);
}
public static void main(String[] args) {
int[][] sortedA = {
{3,4,6,9},
{4,6,8,9,12},
{3,4,9},
{1,4,9}
};
mergeKSorted(sortedA);
}
}

Looking at Sorts - Quicksort Iterative?

I'm looking at all different sorts. Note that this is not homework (I'm in the midst of finals) I'm just looking to be prepared if that sort of thing would pop up.
I was unable to find a reliable method of doing a quicksort iteratively. Is it possible and, if so, how?
I'll try to give a more general answer in addition to the actual implementations given in the other posts.
Is it possible and, if so, how?
Let us first of all take a look at what can be meant by making a recursive algorithm iterative.
For example, we want to have some function sum(n) that sums up the numbers from 0 to n.
Surely, this is
sum(n) =
if n = 0
then return 0
else return n + sum(n - 1)
As we try to compute something like sum(100000), we'll soon see this recursive algorithm has it's limits - a stack overflow will occur.
So, as a solution, we use an iterative algorithm to solve the same problem.
sum(n) =
s <- 0
for i in 0..n do
s <- s + i
return s
However, it's important to note that this implementation is an entirely different algorithm than the recursive sum above. We didn't in some way modify the original one to obtain the iterative version, we basically just found a non-recursive algorithm - with different and arguably better performance characteristics - that solves the same problem.
This is the first aspect of making an algorithm iterative: Finding a different, iterative algorithm that solves the same problem.
In some cases, there simply might not be such an iterative version.
The second one however is applicable to every recursive algorithm. You can turn any recursion into iteration by explicitly introducing the stack the recursion uses implicitly. Now this algorithm will have the exact same characteristics as the original one - and the stack will grow with O(n) like in the recursive version. It won't that easily overflow since it uses conventional memory instead of the call stack, and its iterative, but it's still the same algorithm.
As to quick sort: There is no different formulation what works without storing the data needed for recursion. But of course you can use an explicit stack for them like Ehsan showed. Thus you can - as always - produce an iterative version.
#include <stdio.h>
#include <conio.h>
#define MAXELT 100
#define INFINITY 32760 // numbers in list should not exceed
// this. change the value to suit your
// needs
#define SMALLSIZE 10 // not less than 3
#define STACKSIZE 100 // should be ceiling(lg(MAXSIZE)+1)
int list[MAXELT+1]; // one extra, to hold INFINITY
struct { // stack element.
int a,b;
} stack[STACKSIZE];
int top=-1; // initialise stack
int main() // overhead!
{
int i=-1,j,n;
char t[10];
void quicksort(int);
do {
if (i!=-1)
list[i++]=n;
else
i++;
printf("Enter the numbers <End by #>: ");
fflush(stdin);
scanf("%[^\n]",t);
if (sscanf(t,"%d",&n)<1)
break;
} while (1);
quicksort(i-1);
printf("\nThe list obtained is ");
for (j=0;j<i;j++)
printf("\n %d",list[j]);
printf("\n\nProgram over.");
getch();
return 0; // successful termination.
}
void interchange(int *x,int *y) // swap
{
int temp;
temp=*x;
*x=*y;
*y=temp;
}
void split(int first,int last,int *splitpoint)
{
int x,i,j,s,g;
// here, atleast three elements are needed
if (list[first]<list[(first+last)/2]) { // find median
s=first;
g=(first+last)/2;
}
else {
g=first;
s=(first+last)/2;
}
if (list[last]<=list[s])
x=s;
else if (list[last]<=list[g])
x=last;
else
x=g;
interchange(&list[x],&list[first]); // swap the split-point element
// with the first
x=list[first];
i=first+1; // initialise
j=last+1;
while (i<j) {
do { // find j
j--;
} while (list[j]>x);
do {
i++; // find i
} while (list[i]<x);
interchange(&list[i],&list[j]); // swap
}
interchange(&list[i],&list[j]); // undo the extra swap
interchange(&list[first],&list[j]); // bring the split-point
// element to the first
*splitpoint=j;
}
void push(int a,int b) // push
{
top++;
stack[top].a=a;
stack[top].b=b;
}
void pop(int *a,int *b) // pop
{
*a=stack[top].a;
*b=stack[top].b;
top--;
}
void insertion_sort(int first,int last)
{
int i,j,c;
for (i=first;i<=last;i++) {
j=list[i];
c=i;
while ((list[c-1]>j)&&(c>first)) {
list[c]=list[c-1];
c--;
}
list[c]=j;
}
}
void quicksort(int n)
{
int first,last,splitpoint;
push(0,n);
while (top!=-1) {
pop(&first,&last);
for (;;) {
if (last-first>SMALLSIZE) {
// find the larger sub-list
split(first,last,&splitpoint);
// push the smaller list
if (last-splitpoint<splitpoint-first) {
push(first,splitpoint-1);
first=splitpoint+1;
}
else {
push(splitpoint+1,last);
last=splitpoint-1;
}
}
else { // sort the smaller sub-lists
// through insertion sort
insertion_sort(first,last);
break;
}
}
} // iterate for larger list
}
// End of code.
taken from here
I was unable to find a reliable method of doing a quicksort iteratively
Have you tried google ?
It is just common quicksort, when recursion is realized with array.
This is my effort. Tell me if there is any improvement possible.
This code is done from the book "Data Structures, Seymour Lipschutz(Page-173), Mc GrawHill, Schaum's Outline Series."
#include <stdio.h>
#include <conio.h>
#include <math.h>
#define SIZE 12
struct StackItem
{
int StartIndex;
int EndIndex;
};
struct StackItem myStack[SIZE * SIZE];
int stackPointer = 0;
int myArray[SIZE] = {44,33,11,55,77,90,40,60,99,22,88,66};
void Push(struct StackItem item)
{
myStack[stackPointer] = item;
stackPointer++;
}
struct StackItem Pop()
{
stackPointer--;
return myStack[stackPointer];
}
int StackHasItem()
{
if(stackPointer>0)
{
return 1;
}
else
{
return 0;
}
}
void ShowStack()
{
int i =0;
printf("\n");
for(i=0; i<stackPointer ; i++)
{
printf("(%d, %d), ", myStack[i].StartIndex, myStack[i].EndIndex);
}
printf("\n");
}
void ShowArray()
{
int i=0;
printf("\n");
for(i=0 ; i<SIZE ; i++)
{
printf("%d, ", myArray[i]);
}
printf("\n");
}
void Swap(int * a, int *b)
{
int temp = *a;
*a = *b;
*b = temp;
}
int Scan(int *startIndex, int *endIndex)
{
int partition = 0;
int i = 0;
if(*startIndex > *endIndex)
{
for(i=*startIndex ; i>=*endIndex ; i--)
{
//printf("%d->", myArray[i]);
if(myArray[i]<myArray[*endIndex])
{
//printf("\nSwapping %d, %d", myArray[i], myArray[*endIndex]);
Swap(&myArray[i], &myArray[*endIndex]);
*startIndex = *endIndex;
*endIndex = i;
partition = i;
break;
}
if(i==*endIndex)
{
*startIndex = *endIndex;
*endIndex = i;
partition = i;
}
}
}
else if(*startIndex < *endIndex)
{
for(i=*startIndex ; i<=*endIndex ; i++)
{
//printf("%d->", myArray[i]);
if(myArray[i]>myArray[*endIndex])
{
//printf("\nSwapping %d, %d", myArray[i], myArray[*endIndex]);
Swap(&myArray[i], &myArray[*endIndex]);
*startIndex = *endIndex;
*endIndex = i;
partition = i;
break;
}
if(i==*endIndex)
{
*startIndex = *endIndex;
*endIndex = i;
partition = i;
}
}
}
return partition;
}
int GetFinalPosition(struct StackItem item1)
{
struct StackItem item = {0};
int StartIndex = item1.StartIndex ;
int EndIndex = item1.EndIndex;
int PivotIndex = -99;
while(StartIndex != EndIndex)
{
PivotIndex = Scan(&EndIndex, &StartIndex);
printf("\n");
}
return PivotIndex;
}
void QuickSort()
{
int median = 0;
struct StackItem item;
struct StackItem item1={0};
struct StackItem item2={0};
item.StartIndex = 0;
item.EndIndex = SIZE-1;
Push(item);
while(StackHasItem())
{
item = Pop();
median = GetFinalPosition(item);
if(median>=0 && median<=(SIZE-1))
{
if(item.StartIndex<=(median-1))
{
item1.StartIndex = item.StartIndex;
item1.EndIndex = median-1;
Push(item1);
}
if(median+1<=(item.EndIndex))
{
item2.StartIndex = median+1;
item2.EndIndex = item.EndIndex;
Push(item2);
}
}
ShowStack();
}
}
main()
{
ShowArray();
QuickSort();
ShowArray();
}

Resources