Mergesort implementation is slow - algorithm

I'am doing a report about different sorting algorithms in C++. What baffles me is that my mergesort seems to be slower than heapsort in both of the languages. What I've seen is that heapsort is supposed to be slower.
My mergesort sorts an unsorted array with size 100000 at a speed of 19.8 ms meanwhile heapsort sorts it at 9.7 ms. The code for my mergesort function in C++ is as follows:
void merge(int *array, int low, int mid, int high) {
int i, j, k;
int lowLength = mid - low + 1;
int highLength = high - mid;
int *lowArray = new int[lowLength];
int *highArray = new int[highLength];
for (i = 0; i < lowLength; i++)
lowArray[i] = array[low + i];
for (j = 0; j < highLength; j++)
highArray[j] = array[mid + 1 + j];
i = 0;
j = 0;
k = low;
while (i < lowLength && j < highLength) {
if (lowArray[i] <= highArray[j]) {
array[k] = lowArray[i];
i++;
} else {
array[k] = highArray[j];
j++;
}
k++;
}
while (i < lowLength) {
array[k] = lowArray[i];
i++;
k++;
}
while (j < highLength) {
array[k] = highArray[j];
j++;
k++;
}
}
void mergeSort(int *array, int low, int high) {
if (low < high) {
int mid = low + (high - low) / 2;
mergeSort(array, low, mid);
mergeSort(array, mid + 1, high);
merge(array, low, mid, high);
}
}

The example merge sort is doing allocation and copying of data in merge(), and both can be eliminated with a more efficient merge sort. A single allocation for the temp array can be done in a helper / entry function, and the copy is avoided by changing the direction of merge depending on level of recursion either by using two mutually recursive functions (as in example below) or with a boolean parameter.
Here is an example of a C++ top down merge sort that is reasonably optimized. A bottom up merge sort would be slightly faster, and on a system with 16 registers, a 4 way bottom merge sort a bit faster still, about as fast or faster than quick sort.
// prototypes
void TopDownSplitMergeAtoA(int a[], int b[], size_t ll, size_t ee);
void TopDownSplitMergeAtoB(int a[], int b[], size_t ll, size_t ee);
void TopDownMerge(int a[], int b[], size_t ll, size_t rr, size_t ee);
void MergeSort(int a[], size_t n) // entry function
{
if(n < 2) // if size < 2 return
return;
int *b = new int[n];
TopDownSplitMergeAtoA(a, b, 0, n);
delete[] b;
}
void TopDownSplitMergeAtoA(int a[], int b[], size_t ll, size_t ee)
{
if((ee - ll) == 1) // if size == 1 return
return;
size_t rr = (ll + ee)>>1; // midpoint, start of right half
TopDownSplitMergeAtoB(a, b, ll, rr);
TopDownSplitMergeAtoB(a, b, rr, ee);
TopDownMerge(b, a, ll, rr, ee); // merge b to a
}
void TopDownSplitMergeAtoB(int a[], int b[], size_t ll, size_t ee)
{
if((ee - ll) == 1){ // if size == 1 copy a to b
b[ll] = a[ll];
return;
}
size_t rr = (ll + ee)>>1; // midpoint, start of right half
TopDownSplitMergeAtoA(a, b, ll, rr);
TopDownSplitMergeAtoA(a, b, rr, ee);
TopDownMerge(a, b, ll, rr, ee); // merge a to b
}
void TopDownMerge(int a[], int b[], size_t ll, size_t rr, size_t ee)
{
size_t o = ll; // b[] index
size_t l = ll; // a[] left index
size_t r = rr; // a[] right index
while(1){ // merge data
if(a[l] <= a[r]){ // if a[l] <= a[r]
b[o++] = a[l++]; // copy a[l]
if(l < rr) // if not end of left run
continue; // continue (back to while)
while(r < ee) // else copy rest of right run
b[o++] = a[r++];
break; // and return
} else { // else a[l] > a[r]
b[o++] = a[r++]; // copy a[r]
if(r < ee) // if not end of right run
continue; // continue (back to while)
while(l < rr) // else copy rest of left run
b[o++] = a[l++];
break; // and return
}
}
}

Related

Quick sort -- What am i doing wrong?

Trying to do Quick sort.
logic -> maintaining two variables to place pivot element at correct index. Taking 1st element as pivot. int i for RHS of pivot and Int j for LHS, if they cross each other then j is correct index for pivot.
#include<iostream>
using namespace std;
int partition(int arr[], int low, int high){
int pivot = arr[low];
int i = low+1;
int j = high;
while (i<j)
{
while(arr[i]<=pivot) i++;
while(arr[j]> pivot) j--;
if(i<j) {
swap(arr[i], arr[j]);
}
swap(arr[j], arr[low]);
return j;
}
}
void QuickSort(int arr[], int low , int high){
if(low >= high ) return;
if(high>low){
int pivotindx = partition(arr, low , high);
QuickSort(arr,low, pivotindx-1);
QuickSort( arr, pivotindx+1, high);
}
}
void printquicksort(int arr[] , int n){
cout << " Quick SORT IS HERE BROOOO " << endl;
for (int i = 0; i < n; i++)
{
cout << " " << arr[i] << " " ;
}
}
int main()
{
int arr []={3,4,5,1};
int n= sizeof (arr)/ sizeof (arr[0]);
QuickSort(arr,0,n-1);
printquicksort(arr,n);
return 0;
}
Using i and j for LHS and RHS is type of Hoare partition scheme. The code has a potential issue when using low for the pivot, the while(arr[i]<=pivot) i++; may never encounter an element > pivot and scan past the end of the array. For Hoare partition scheme, the pivot and elements equal to the pivot can end up anywhere, and the partition index separate elements <= pivot and elements >= pivot, so the index needs to be included in one of the recursive calls. Example of a post-increment and post-decrement version of Hoare with the partition code included in QuickSort:
void QuickSort(int *a, int lo, int hi)
{
int i, j;
int p, t;
if(lo >= hi)
return;
p = a[lo + (hi-lo)/2];
i = lo;
j = hi;
while (i <= j){
while (a[i] < p)i++;
while (a[j] > p)j--;
if (i > j)
break;
t = a[i]; // swap
a[i] = a[j];
a[j] = t;
i++;
j--;
}
QuickSort(a, lo, j);
QuickSort(a, i, hi);
}
Example of a classic pre-increment and pre-decrement version of Hoare with the partition code included in QuickSort:
void QuickSort(int a[], int lo, int hi)
{
if(lo >= hi)
return;
int p = a[lo+(hi-lo)/2];
int i = lo-1;
int j = hi+1;
int t;
while(1){
while (a[++i] < p);
while (a[--j] > p);
if(i >= j)
break;
t = a[i]; // swap
a[i] = a[j];
a[j] = t;
}
i = j++;
QuickSort(a, lo, i);
QuickSort(a, j, hi);
}

Iterative/ Non-Recursive Merge Sort

I was trying iterative merge sort , but am stuck at at conditions when input length is not 2^x.
like int[] A ={4,5,1,254,66,75,12,8,65,4,87,63,53,8,99,54,12,34};
public class MergeSort {
public static void sort(int[] A) {
System.out.println("Log(A.len):"+log(A.length, 2));
for (int i = 0; i < log(A.length, 2); i++) { //log A.len
int r = 2 << i; //2^i
int mid = r >>> 1;
for (int j = 0; j+r < A.length; j = j + r) {
System.out.print("offset:" + j + " mid:" + (j + mid) + " r:" + (j + r));
merge(A, j, (j + mid), (j + r));
}
}
}
public static void merge(int[] A, int offset, int mid, int n) {
mid = mid - offset;
n = n - offset;
int[] L = new int[mid];
int[] R = new int[n - mid];
for (int i = 0; i < mid; i++) {
L[i] = A[i + offset];
R[i] = A[mid + i + offset];
}
System.out.print("\nL:");
print_array(L);
System.out.print("\nR:");
print_array(R);
int l = 0;
int r = 0; //left right pointer
int k = offset;
while (l < mid && r < mid) {
if (L[l] < R[r]) {
// System.out.println("in left");
A[k] = L[l];
l++;
} else {
// System.out.println("in right");
A[k] = R[r];
r++;
}
k++;
}
while (l < mid) {
A[k] = L[l];
l++;
k++;
}
while (r < mid) {
A[k] = R[r];
r++;
k++;
}
System.out.print("\nA:");
print_array(A);
System.out.print("\n\n");
}
public static void main(String[] args) {
int[] A ={4,5,1,254,66,75,12,8,65,4,87,63,53,8,99,54,12,34};
sort(A);
}
public static void print_array(int[] A) {
for (int i = 0; i < A.length; i++) {
System.out.print(A[i] + " ");
}
}
static int log(int x, int base) {
return (int) (Math.log(x) / Math.log(base));
}
}
It works fine when input length is 2^x.
Also is there any better way to implement iterative version , this looks a lot messy.
C++ example of bottom up merge sort. a[] is array to sort, b[] is temp array. It includes a check for number of merge passes and swaps in place if the number of passes would be odd, in order to end up with the sorted data in a[].
void BottomUpMerge(int a[], int b[], size_t ll, size_t rr, size_t ee);
void BottomUpCopy(int a[], int b[], size_t ll, size_t rr);
size_t GetPassCount(size_t n);
void BottomUpMergeSort(int a[], int b[], size_t n)
{
size_t s = 1; // run size
if(GetPassCount(n) & 1){ // if odd number of passes
for(s = 1; s < n; s += 2) // swap in place for 1st pass
if(a[s] < a[s-1])
std::swap(a[s], a[s-1]);
s = 2;
}
while(s < n){ // while not done
size_t ee = 0; // reset end index
while(ee < n){ // merge pairs of runs
size_t ll = ee; // ll = start of left run
size_t rr = ll+s; // rr = start of right run
if(rr >= n){ // if only left run
rr = n;
BottomUpCopy(a, b, ll, rr); // copy left run
break; // end of pass
}
ee = rr+s; // ee = end of right run
if(ee > n)
ee = n;
BottomUpMerge(a, b, ll, rr, ee);
}
std::swap(a, b); // swap a and b
s <<= 1; // double the run size
}
}
void BottomUpMerge(int a[], int b[], size_t ll, size_t rr, size_t ee)
{
size_t o = ll; // b[] index
size_t l = ll; // a[] left index
size_t r = rr; // a[] right index
while(1){ // merge data
if(a[l] <= a[r]){ // if a[l] <= a[r]
b[o++] = a[l++]; // copy a[l]
if(l < rr) // if not end of left run
continue; // continue (back to while)
do // else copy rest of right run
b[o++] = a[r++];
while(r < ee);
break; // and return
} else { // else a[l] > a[r]
b[o++] = a[r++]; // copy a[r]
if(r < ee) // if not end of right run
continue; // continue (back to while)
do // else copy rest of left run
b[o++] = a[l++];
while(l < rr);
break; // and return
}
}
}
void BottomUpCopy(int a[], int b[], size_t ll, size_t rr)
{
do // copy left run
b[ll] = a[ll];
while(++ll < rr);
}
size_t GetPassCount(size_t n) // return # passes
{
size_t i = 0;
for(size_t s = 1; s < n; s <<= 1)
i += 1;
return(i);
}

Count the number of occurences of a key in a sorted array recursively

I was trying to solve this problem recursively http://www.geeksforgeeks.org/count-number-of-occurrences-in-a-sorted-array/.
The code I have till now uses a stupid little hack with static variable. Although this works, it would fail if you call the function repeatedly with different keys(as the static variable would still remember the previous set value).
int FindCount(const vector< int > &A, int l, int r, int B)
{
static int count =0;
// cout<<l<<' '<<r<<endl;
if(l <= r)
{
int mid = (l+r)/2;
// cout<<mid<<endl;
if(A[mid] == B)
{
count++;
FindCount(A, l, mid-1, B);
FindCount(A, mid+1, r, B);
}
else if (A[mid] < B)
{
FindCount(A, mid+1, r, B);
}
else
{
FindCount(A, l, mid-1, B);
}
}
return count;
}
I can figure out how it should work but have a hard time converting that into code. It should be something like this, once you find the particular key then return 1 and the continue to recusively search the left and right of the key.
Could you help me do this recusively without the use of static variable with a cleaner code :)
int FindCount(const vector< int > &A, int l, int r, int B)
{
int count = 0;
if(l <= r)
{
int mid = (l+r)/2;
if(A[mid] == B)
{
count++;
count += FindCount(A, l, mid-1, B);
count += FindCount(A, mid+1, r, B);
}
else if (A[mid] < B)
{
count = FindCount(A, mid+1, r, B);
}
else
{
count = FindCount(A, l, mid-1, B);
}
}
return count;
}
This should work, although it is still a O(n) algorithm, not very efficient.
You yet cast away the return value of all invocations but that at the bottom of the recursion stack (a stack grows upwards); instead of the static count you can just add the return value of the recursions to an automatic local variable count.
The code contains a serious bug: you should use size_t and not int. The result could overflow. Indexers and counts should be size_t - which is a unsigned 32-bit integer on 32-bit platforms and a unsigned 64-bit integer on 64-bit platforms.
u_seem_surprised has a perfectly valid answer. Another way to solve this problem is to use lambdas and capture the count variable:
#include <vector>
#include <functional>
size_t FindCount(const std::vector<int> &A, size_t l, size_t r, int B)
{
using namespace std;
size_t count = 0;
function<void(const vector<int>&, size_t, size_t, int)> impl;
impl = [&count, &impl](const vector<int> &A, size_t l, size_t r, int B)
{
if (l <= r)
{
auto mid = (l + r) / 2;
if (A[mid] == B)
{
count++;
impl(A, l, mid - 1, B);
impl(A, mid + 1, r, B);
}
else if (A[mid] < B)
{
impl(A, mid + 1, r, B);
}
else
{
impl(A, l, mid - 1, B);
}
}
};
impl(A, l, r, B);
return count;
}

Why Insertion sort faster than Merge sort?

I created on jsperf.com test for 3 sorting methods: Bubble, Insertion and Merge. Link
Before test I create unsorted array with random number from 0 to 1Mln.
Each time test shows that Insertion sort faster than Merge one.
What's reason for such result, if Merge sort time O(n log(n)) while Insertion and Bubble sorts have O(n^2)
test result here
Without more testing, a tentative answer:
Your insertion sort is fairly optimised - you are only switching elements. Your merge sort instantiates new arrays using [], and creates new arrays using slice and concat, which is a large memory-management overhead, not to mention that concat and slice have implicit loops inside them (although in native code). Merge sort is efficient when it is done in-place; with all the copying going on, that should slow you down a lot.
As commented by Amadan, it would be best for merge sort to do a one time allocation of the same size as the array to be sorted. Top down merge sort uses recursion to generate the indices used by merge, while bottom up skips the recursion and use iteration to generate the indices. Most of the time will be spent doing the actual merging of sub-arrays, so top down's excess overhead on larger arrays (1 million element or more) is only about 5%.
Example C++ code for a somewhat optimized bottom up merge sort.
void MergeSort(int a[], size_t n) // entry function
{
if(n < 2) // if size < 2 return
return;
int *b = new int[n];
BottomUpMergeSort(a, b, n);
delete[] b;
}
size_t GetPassCount(size_t n) // return # passes
{
size_t i = 0;
for(size_t s = 1; s < n; s <<= 1)
i += 1;
return(i);
}
void BottomUpMergeSort(int a[], int b[], size_t n)
{
size_t s = 1; // run size
if(GetPassCount(n) & 1){ // if odd number of passes
for(s = 1; s < n; s += 2) // swap in place for 1st pass
if(a[s] < a[s-1])
std::swap(a[s], a[s-1]);
s = 2;
}
while(s < n){ // while not done
size_t ee = 0; // reset end index
while(ee < n){ // merge pairs of runs
size_t ll = ee; // ll = start of left run
size_t rr = ll+s; // rr = start of right run
if(rr >= n){ // if only left run
rr = n;
BottomUpCopy(a, b, ll, rr); // copy left run
break; // end of pass
}
ee = rr+s; // ee = end of right run
if(ee > n)
ee = n;
// merge a pair of runs
BottomUpMerge(a, b, ll, rr, ee);
}
std::swap(a, b); // swap a and b
s <<= 1; // double the run size
}
}
void BottomUpCopy(int a[], int b[], size_t ll, size_t rr)
{
while(ll < rr){ // copy left run
b[ll] = a[ll];
ll++;
}
}
void BottomUpMerge(int a[], int b[], size_t ll, size_t rr, size_t ee)
{
size_t o = ll; // b[] index
size_t l = ll; // a[] left index
size_t r = rr; // a[] right index
while(1){ // merge data
if(a[l] <= a[r]){ // if a[l] <= a[r]
b[o++] = a[l++]; // copy a[l]
if(l < rr) // if not end of left run
continue; // continue (back to while)
while(r < ee) // else copy rest of right run
b[o++] = a[r++];
break; // and return
} else { // else a[l] > a[r]
b[o++] = a[r++]; // copy a[r]
if(r < ee) // if not end of right run
continue; // continue (back to while)
while(l < rr) // else copy rest of left run
b[o++] = a[l++];
break; // and return
}
}
}

Large values in segment tree for product range queries

I wrote code for firing product range queries on an array.
Note: This question not a duplicate to Multiplication in a range. My problem is something different
I wrote the code for this,
// Program to show segment tree operations like construction, query and update
#include <stdio.h>
#include <math.h>
#define R 1000000000
typedef unsigned long long ull;
int getMid(int s, int e) { return s + (e -s)/2; }
ull getProdUtil(ull *st, int ss, int se, int qs, int qe, int index)
{
if (qs <= ss && qe >= se)
return st[index];
if (se < qs || ss > qe)
return 1;
int mid = getMid(ss, se);
return (getProdUtil(st, ss, mid, qs, qe, 2*index+1) %R *
getProdUtil(st, mid+1, se, qs, qe, 2*index+2) % R )%R;
}
ull getProd(ull *st, int n, int qs, int qe)
{
if (qs < 0 || qe > n-1 || qs > qe)
{
return 0;
}
return getProdUtil(st, 0, n-1, qs, qe, 0) % R;
}
ull constructSTUtil(ull arr[], int ss, int se, ull *st, int si)
{
if (ss == se)
{
st[si] = arr[ss];
return arr[ss];
}
int mid = getMid(ss, se);
ull l = (ull)constructSTUtil(arr, ss, mid, st, si*2+1)%R;
ull r = (ull)constructSTUtil(arr, mid+1, se, st, si*2+2)%R;
st[si] = (l * r) % R;
return st[si];
}
void init_array(ull *st, ull size){
for(ull i=0;i< size;i++){
st[i] = 1;
}
}
ull *constructST(ull arr[], int n)
{
int x = (int)(ceil(log2(n))); //Height of segment tree
int max_size = 2*(int)pow(2, x) - 1; //Maximum size of segment tree
ull *st = new ull[max_size];
init_array(st, max_size);
constructSTUtil(arr, 0, n-1, st, 0);
return st;
}
void print_array(ull *array,int size){
printf("\n");
for(int i=0; i< size;i++){
printf("<%d,%llu> ", i+1,array[i]);
}
}
int main()
{
int n;
scanf("%d",&n);
ull arr[n];
for(int i=0;i<n;i++){
scanf("%llu",&arr[i]);
}
ull *st = constructST(arr, n);
int t;
scanf("%d", &t);
while(t--){
int l,m,r;
scanf("%d %d %d", &l,&r,&m);
ull result = getProd(st, n, l-1, r-1);
printf("%lld\n",result % m);
//int sizes = 100;
//printf("%llu\n",st[0]);
//print_array(st, sizes);
}
return 0;
}
The constraints are,
1 ≤ N ≤ 100,000 (size of array)
1 ≤ Ai ≤ 100 (array elements)
1 ≤ T ≤ 100,000 (number of test cases)
1 ≤ Li ≤ Ri ≤ N (Left and right ranges in query)
1 ≤ Mi ≤ 10<sup>9</sup> (Modulus for each test case. Each test case has to be modulu of this number)
Im pretty sure that R , that I have chosen is failing some test case. I tried R with 1018 as well. But still same problem. Dont know why this is happening?
My question is , is it the problem of R I have chosen or is it the problem of different M being passed in each test case.
Note: Truly not expecting a solution, just expecting a clue
Regards

Resources