Find the maximum number of pieces a rod can be cut - algorithm

Here is the complete problem statement:
Given a rope of length n, you need to find the maximum number of pieces
you can make such that the length of every piece is in set {a, b, c} for
the given three values a, b, c
I know that the optimal solution can be achieved through Dynamic Programming, however, I have not learned that topic yet and I need to solve this problem recursively. With recursion, the main thing is to identify a subproblem and that's what I'm mainly having difficulty with doing. Can anyone give me an intuitive way to think of this problem? Sort of like a higher level description of the recursion if that makes sense. Is there an easier problem similar to this that I can try first that would help me solve this?
Thanks in advance.

It's already quite simple, with recursion we can just check all posibilities, in one step we can either cut away a piece of length a, b, or c so from problem of size n we get sup-problem of smaller size n-x
Of course we need a base case, so when n=0 we have succeeded so we can return 0, in case of n < 0 we have failed so we can return some negative infinity constant
Sample pseudo-code:
int solve(int n){
if(n < 0) return -123456789; //-Infinity
if(n == 0) return 0;
return 1 + max(solve(n-a), solve(n-b), solve(n-c));
}
going to dynamic programming is as simple as setting up memo lookup table
int solve(int n){
if(n < 0) return -123456789; //-Infinity
if(n == 0) return 0;
if(n in memo)return memo[n]
return memo[n] = 1 + max(solve(n-a), solve(n-b), solve(n-c));
}

int maxcut(int n, int a,int b,int c)
{
if(n==0) return 0;
if(n<0) return 1;
int result = max( maxcut(n-a,a,b,c), maxcut(n-b,a,b,c), maxcur(n-c,a,b,c));
if(res == -1) return -1;
return(result+1)
}

The way we should tackle the recursion problem is:
Finding the recursion case (Finding the subproblems)
Finding the base case (The last subproblem case we cannot break in subproblems)
Specific to this problem :
Recursion case: Cutting rope we all the possible values till we cannot break it further smaller subproblem.
Base case: a. It can be completely cut. (valid try)
b.It can't be completely cut. (invalid try)
int maxcut(int n, int a,int b,int c)
{
if(n==0) return 0; //base case a.
if(n<0) return -1; //base case b.
int result = max( maxcut(n-a,a,b,c), maxcut(n-b,a,b,c), maxcur(n-c,a,b,c)); //subproblems for all the cases
if(res == -1) return -1; // boundry coundtion
return(result+1); //to count the valid conditions and return to parent
}

here is the complete code for your problem
#include <iostream>
using namespace std;
int max(int a, int b, int c)
{
if (a > b)
{
if (a > c)
{
return a;
}
else
{
return c;
}
}
else
{
if (b > c)
{
return b;
}
else
{
return c;
}
}
}
int maxpiece(int l, int a, int b, int c)
{
int r;
if (l == 0)
{
return 0;
}
if (l<0)
{
return -1;
}
r = max(maxpiece(l-a, a, b, c), maxpiece(l-b, a, b, c), maxpiece(l-c, a, b, c));
if (r == -1)
return -1;
return r + 1;
}
int main()
{
int lenth;
cout << "enter rope lenth ";
cin >> lenth;
int p1, p2, p3;
cout << endl
<< "enter the only three parameters in which rope can be cut ";
cin >> p1 >> p2 >> p3;
cout << endl
<<"ans = "<< maxpiece(lenth, p1, p2, p3);
}

int cutRope(int n, int a, int b, int c){
// Base cases
if(n == 0) return 0;
if(n < 0) return -1;
int res = max(max(cutRope(n - a, a, b, c), cutRope(n - b, a, b, c)), cutRope(n - c, a, b, c));
if(res == -1) return -1;
return res + 1;
}
int main() {
cout << cutRope(23, 11, 9, 12) << endl;
return 0;
}

Related

0-1 knapsack TLE

I was solving 0-1 knapsack problem (src:https://www.interviewbit.com/problems/0-1-knapsack/)
and would like to understand why I got TLE and know how to get rid of TLE .
My solution : (which showed TLE in hard case)
int knapsack(vector<int> A , vector<int> B, int weight , int n , vector<vector<int>> &dp ){
if(weight==0 || n==0){
// dp[weight][n] = 0;
return 0 ;
}
if(dp[weight][n]!=(-1)){
return dp[weight][n];
}
if(B[n-1]<=weight){
dp[weight][n] = max( (knapsack(A,B,weight,n-1,dp)) , (A[n-1] + knapsack(A,B,weight-B[n-1],n-1,dp)) );
return max( (knapsack(A,B,weight,n-1,dp)) , (A[n-1] + knapsack(A,B,weight-B[n-1],n-1,dp)) );
}
// if(B[n-1]>weight){
else{
dp[weight][n] = knapsack(A,B,weight,n-1,dp);
return knapsack(A,B,weight,n-1,dp);
}
}
int Solution::solve(vector<int> &A, vector<int> &B, int C) {
int N = A.size();
// n rows and weights written vertically in columns
vector<vector<int>> dp(C+1, vector<int> (N+1,-1));
return knapsack(A,B,C,N,dp);
}
One solution which I found in discussion tab and does not get TLE which is exactly same as my solution :
int knapsack(vector<int>& wt, vector<int>& val, int W, int n, vector<vector<int>>& dp)
{
if(n == 0 || W == 0)
return 0;
if(dp[n][W] != -1)
return dp[n][W];
if(wt[n-1] <= W)
return dp[n][W] = max(val[n-1] + knapsack(wt, val, W-wt[n-1], n-1, dp), knapsack(wt, val, W, n-1, dp));
else
return dp[n][W] = knapsack(wt, val, W, n-1, dp);
}
int Solution::solve(vector<int> &val, vector<int> &wt, int W)
{
int n = wt.size();
vector<vector<int>> dp(n+1 , vector<int> (W+1, -1));
return knapsack(wt, val, W, n, dp);
}
Is it possible that using a bigger variable name caused me a TLE in the hard case ?
You do too many recursive calls.
knapsack is not a pure function. It has side effect of modifying dp, and the compiler is not smart enough to figure out that a second call in the else branch
dp[weight][n] = knapsack(A,B,weight,n-1,dp);
return knapsack(A,B,weight,n-1,dp);
is redundant. Help the compiler and optimize it out manually:
dp[weight][n] = knapsack(A,B,weight,n-1,dp);
return dp[weight][n];
or, just as in the other solution
return dp[weight][n] = knapsack(A,B,weight,n-1,dp);
Ditto for the if branch,
(And no, variable names do not affect performance).

Count the number of occurences of a key in a sorted array recursively

I was trying to solve this problem recursively http://www.geeksforgeeks.org/count-number-of-occurrences-in-a-sorted-array/.
The code I have till now uses a stupid little hack with static variable. Although this works, it would fail if you call the function repeatedly with different keys(as the static variable would still remember the previous set value).
int FindCount(const vector< int > &A, int l, int r, int B)
{
static int count =0;
// cout<<l<<' '<<r<<endl;
if(l <= r)
{
int mid = (l+r)/2;
// cout<<mid<<endl;
if(A[mid] == B)
{
count++;
FindCount(A, l, mid-1, B);
FindCount(A, mid+1, r, B);
}
else if (A[mid] < B)
{
FindCount(A, mid+1, r, B);
}
else
{
FindCount(A, l, mid-1, B);
}
}
return count;
}
I can figure out how it should work but have a hard time converting that into code. It should be something like this, once you find the particular key then return 1 and the continue to recusively search the left and right of the key.
Could you help me do this recusively without the use of static variable with a cleaner code :)
int FindCount(const vector< int > &A, int l, int r, int B)
{
int count = 0;
if(l <= r)
{
int mid = (l+r)/2;
if(A[mid] == B)
{
count++;
count += FindCount(A, l, mid-1, B);
count += FindCount(A, mid+1, r, B);
}
else if (A[mid] < B)
{
count = FindCount(A, mid+1, r, B);
}
else
{
count = FindCount(A, l, mid-1, B);
}
}
return count;
}
This should work, although it is still a O(n) algorithm, not very efficient.
You yet cast away the return value of all invocations but that at the bottom of the recursion stack (a stack grows upwards); instead of the static count you can just add the return value of the recursions to an automatic local variable count.
The code contains a serious bug: you should use size_t and not int. The result could overflow. Indexers and counts should be size_t - which is a unsigned 32-bit integer on 32-bit platforms and a unsigned 64-bit integer on 64-bit platforms.
u_seem_surprised has a perfectly valid answer. Another way to solve this problem is to use lambdas and capture the count variable:
#include <vector>
#include <functional>
size_t FindCount(const std::vector<int> &A, size_t l, size_t r, int B)
{
using namespace std;
size_t count = 0;
function<void(const vector<int>&, size_t, size_t, int)> impl;
impl = [&count, &impl](const vector<int> &A, size_t l, size_t r, int B)
{
if (l <= r)
{
auto mid = (l + r) / 2;
if (A[mid] == B)
{
count++;
impl(A, l, mid - 1, B);
impl(A, mid + 1, r, B);
}
else if (A[mid] < B)
{
impl(A, mid + 1, r, B);
}
else
{
impl(A, l, mid - 1, B);
}
}
};
impl(A, l, r, B);
return count;
}

DP memoized approach for Longest common substring

can anyone provide the memoized approach for longest common substring between two strings.I know the bottom solution but I am not able to think in top-down manner.
Expected time complexity-O(n^2)
TOP-DOWN APPROACH
#include <iostream>
#include <algorithm>
#include <cstring>
using namespace std;
string X, Y; //input strings
int ans, dp[105][105]; // ans : answer
int LCS(int n, int m) //our function return value of (n,m) state
{ // so that we can use the result in (n+1,m+1) state
if(n == 0 || m == 0) return 0; //in case of match in (n+1,m+1) state
if(dp[n][m] != -1) return dp[n][m];
LCS(n-1,m); //to visit all n*m states (try example: X:ASDF
LCS(n,m-1); // we call these states first Y:ASDFF)
if(X[n-1] == Y[m-1])
{
dp[n][m] = LCS(n-1,m-1) + 1;
ans = max(ans, dp[n][m]);
return dp[n][m];
}
return dp[n][m] = 0;
}
int main()
{
int t; cin>>t;
while(t--)
{
int n, m; cin>>n>>m; //length of strings
cin>>X>>Y;
memset(dp, -1, sizeof dp);
ans = 0;
LCS(n, m);
cout<<ans<<'\n';
}
return 0;
}
Memoization with recursion works with top-down approach.
Taking LCS example using DP from Cormen into consideration below is the pseudo code describing how it will work.
MEMOIZED-LCS-LENGTH(X,Y)
m<-length[X]
n<-length[Y]
for(i<-1 to m)
do for(j<-1 to n)
c[i,j]<- -1
for(i<-1 to m)
c[i,0]<-0
for(j<-1 to n)
c[0,j]<-0
return RECURSIVE-LCS-LENGTH(X,Y,1,1)
RECURSIVE-LCS-LENGTH(X,Y,i,j)
if(c[i,j]!=-1)
return c[i,j]
//Above 2 line fetches the result if already present, instead of computing it again.
if(x[i]==y[j])
then c[i,j]<-RECURSIVE-LCS-LENGTH(X,Y,i+1,j+1)+1
else
c1<- RECURSIVE-LCS-LENGTH(X,Y,i+1,j)
c2<-RECURSIVE-LCS-LENGTH(X,Y,i,j+1)
if(c1<c2)
then c[i,j]<-c1
else c[i,j]<-c2
return c[i,j]
Java Solution:
class Solution {
public int findLength(int[] A, int[] B) {
int[][] cache = new int[A.length][B.length];
Arrays.stream(cache).forEach(a->Arrays.fill(a,-1));
int[] res = new int[1];
findLength(0, 0, A, B, cache, res);
return res[0];
}
public static int findLength(int a, int b, int[] A, int[] B, int[][] cache, int[] res){
if( a >= A.length || b >= B.length )
return 0;
if(cache[a][b] != -1){
return cache[a][b];
}
if(A[a] == B[b]){
cache[a][b] = 1 + findLength(a+1,b+1,A,B,cache,res);
// remember you can not return here: why? see case: s1 = 1,2,3 s2=1,4,1,2,3
}
// try out other possiblities and update cache
findLength(a+1,b,A,B,cache,res);
findLength(a,b+1,A,B,cache,res);
//you can avoid this and find max value at end in cache
res[0] = Math.max(res[0],cache[a][b]);
//at this point cache might have -1 or updated value, if its -1 make it to 0 as this location is visited and no common substring is there from here
cache[a][b] = Math.max(0,cache[a][b]);
return cache[a][b];
}
}
Recursion plus memoization in python. Please note this code is partially accepted on Hackerearth and Geeksforgeeks.For larger test cases, it is giving MLE.
import sys
sys.setrecursionlimit(1000000)
maxlen=0
t=None
def solve(s1, s2, n, m):
global maxlen, t
if n<=0 or m<=0:
return 0
if t[n][m]!=-1:
return t[n][m]
if s1[n-1]==s2[m-1]:
temp=1+solve(s1, s2, n-1, m-1)
maxlen=max(maxlen, temp)
t[n][m]=temp
return temp
t[n][m]=0
return 0
class Solution:
def longestCommonSubstr(self, S1, S2, n, m):
global maxlen, t
maxlen=0
t=[[-1]*(m+1) for i in range(n+1)]
for i in range(n+1):
for j in range(m+1):
solve(S1, S2, i, j)
return maxlen
if __name__=='__main__':
S1=input().strip()
S2=input().strip()
n=len(S1)
m=len(S2)
ob = Solution()
print(ob.longestCommonSubstr(S1, S2, n, m))
An easy solution is described below. Here memo[n][m] does not store the length of
greatest substring but you can store the greatest substring in pointer maxi as follows:
#include<iostream>
#include<string>
using namespace std;
int lcs(string X,string Y,int n,int m,int *maxi,int memo[][8]) {
if(n==0||m==0) {
return 0;
}
int k=0;
int j=0;
if(memo[n-1][m-1]!=-1) {
return memo[n-1][m-1];
}
if(X[n-1]==Y[m-1]) {
memo[n-1][m-1] = 1+lcs(X,Y,n-1,m-1,maxi,memo);
if(*maxi<memo[n-1][m-1])
*maxi=memo[n-1][m-1];
}
else {
memo[n-1][m-1]=0;
}
int l = lcs(X,Y,n-1,m,maxi,memo);
int i = lcs(X,Y,n,m-1,maxi,memo);
return memo[n-1][m-1];
}
int main()
{
int n,m;
string X = "abcdxyze";
//string X = "abcd";
string Y = "xyzabcde";
n=X.length();
m=Y.length();
int memo[n][8];
for(int i=0;i<n;i++) {
for(int j=0;j<m;j++) {
memo[i][j]=-1;
}
}
int maxi=0;
int k = lcs(X,Y,n,m,&maxi,memo);
cout << maxi;
return 0;
}
class Solution {
public:
int t[1005][1005];
int maxC = 0;
int recur_memo(vector<int>& nums1, vector<int>& nums2, int m, int n) {
if(t[m][n] != -1)
return t[m][n];
if(m == 0 || n == 0)
return 0;
int max_substring_ending_here = 0;
//Example : "abcdezf" "abcdelf"
//You see that wowww, string1[m-1] = string2[n-1] = 'f' and you happily
go for (m-1, n-1)
//But you see, in future after a gap of 'l' and 'z', you will find
"abcde" and "abcde"
if(nums1[m-1] == nums2[n-1]) {
max_substring_ending_here = 1 + recur_memo(nums1, nums2, m-1, n-1);
}
//May be you find better results if you do (m-1, n) and you end up
updating maxC with some LAAARGEST COMMON SUBSTRING LENGTH
int decrease_m = recur_memo(nums1, nums2, m-1, n); //stage (m-1, n)
//OR,
//May be you find better results if you do (m, n-1) and you end up
updating maxC with some LAAARGEST COMMON SUBSTRING LENGTH
int decrease_n = recur_memo(nums1, nums2, m, n-1); //stage (m, n-1)
//Like I said, you need to keep on finding the maxC in every call you
make throughout your journey.
maxC = max({maxC, max_substring_ending_here, decrease_m, decrease_n});
//BUT BUT BUT, you need to return the best you found at this stage (m, n)
return t[m][n] = max_substring_ending_here;
}
int findLength(vector<int>& nums1, vector<int>& nums2) {
int m = nums1.size();
int n = nums2.size();
memset(t, -1, sizeof(t));
recur_memo(nums1, nums2, m, n); //resurive+memoization
return maxC;
}
};
Link : https://leetcode.com/problems/maximum-length-of-repeated-subarray/discuss/1169215/(1)-Recursive%2BMemo-(2)-Bottom-Up-(C%2B%2B)
Here is a recursive and top-down approach:
public int lcsSubstr(char[] s1, char[] s2, int m, int n, int c) {
if (m == 0 || n == 0) {
return c;
}
if (s1[m-1] == s2[n-1]) {
c = lcsSubstr(s1, s2, m-1, n-1, c+1);
} else {
c2 = Math.max(lcsSubstr(s1, s2, m, n - 1, 0), lcsSubstr(s1, s2, m-1, n, 0));
}
return Math.max(c, c2);
}
public int lcsSubstrMemo(char[] s1, char[] s2, int m, int n, int c, int[][] t) {
if(m == 0 || n == 0) {
return c;
}
if (t[m-1][n-1] != -1) return t[m-1][n-1];
if(s1[m - 1] == s2[n - 1]) {
c = lcsSubstr(s1, s2, m - 1, n - 1, c + 1);
} else {
c2 = Math.max(lcsSubstr(s1, s2, m, n - 1, 0), lcsSubstr(s1, s2, m - 1, n, 0));
}
t[m - 1][n - 1] = Math.max(c, c2);
return t[m-1][n-1];
}
Memoization refers to caching the solutions to subproblems in order to use them later. In the longest common subsequence problem, you try to match substrings of two subsequences to see if they match, maintaining in memory the longest one yet found. Here is the solution in Java you are looking for (memoized version of LCS):
public class LongestCommonSubsequence {
private static HashMap<Container, Integer> cache = new HashMap<>();
private static int count=0, total=0;
public static void main(String sargs[]){
Scanner scanner = new Scanner(System.in);
String x=scanner.nextLine();
String y=scanner.nextLine();
int max=0;
String longest="";
for(int j=0;j<x.length();j++){
String common=commonSubsequence(j,0, x, y);
if(max<common.length()){
max=common.length();
longest=common;
}
}
for(int j=0;j<y.length();j++){
String common=commonSubsequence(j,0, y, x);
if(max<common.length()){
max=common.length();
longest=common;
}
}
System.out.println(longest);
System.out.println("cache used "+count+" / "+total);
}
public static String commonSubsequence(final int startPositionX, int startPositionY, String x, String y){
StringBuilder commonSubsequence= new StringBuilder();
for(int i=startPositionX;i<x.length();i++){
Integer index=find(x.charAt(i),startPositionY,y);
if(index!=null){
commonSubsequence.append(x.charAt(i));
if(index!=y.length()-1)
startPositionY=index+1;
else
break;
}
}
return commonSubsequence.toString();
}
public static Integer find(char query, int startIndex, String target){
Integer pos=cache.get(new Container(query, startIndex));
total++;
if(pos!=null){
count++;
return pos;
}else{
for(int i=startIndex;i<target.length();i++){
if(target.charAt(i)==query){
cache.put(new Container(query, startIndex), i);
return i;
}
}
return null;
}
}
}
class Container{
private Character toMatch;
private Integer indexToStartMatch;
public Container(char t, int i){
toMatch=t;
indexToStartMatch=i;
}
#Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime
* result
+ ((indexToStartMatch == null) ? 0 : indexToStartMatch
.hashCode());
result = prime * result + ((toMatch == null) ? 0 : toMatch.hashCode());
return result;
}
#Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Container other = (Container) obj;
if (indexToStartMatch == null) {
if (other.indexToStartMatch != null)
return false;
} else if (!indexToStartMatch.equals(other.indexToStartMatch))
return false;
if (toMatch == null) {
if (other.toMatch != null)
return false;
} else if (!toMatch.equals(other.toMatch))
return false;
return true;
}
}

Power with divide & conquer algorithm

i want to find for calculating X^46, how many multiplication occurs with optimal D&C approach for calculating Power.
I think this is the best optimal code for calculating power with divide & conquer approach.
int power(int x, unsigned int y)
{
int temp;
if( y == 0)
return 1;
temp = power(x, y/2);
if (y%2 == 0)
return temp*temp;
else
return x*temp*temp;
}
in one note wrote for calculating X^46 with optimal Power code in D&C we need 8 multiplication, but in my code there is 10. anyone correct me?
Edit:
the last code is:
int power(int x, unsigned int y)
{
int temp;
if( y == 0)
return 1;
if( y ==1)
return x;
temp = power(x, y/2);
if (y%2 == 0)
return temp*temp;
else
return x*temp*temp;
}
You left out the optimizing base case of
if (y==1)
return x
and instead require extra multiplications from
temp = power(x, 0)
return x * temp * temp
The extra pair of multiplications come from the unnecessary final recursive call.
You have redundant multiplies due to not earlying out when y==1.
When y==1, you execute the last line:
return x*temp*temp;
which simplifies to:
return x*1*1;
Adding a special case for y==1 will get rid of the additional 2 multiplies.
int power(int x, unsigned int y)
{
int temp;
if( y ==1)
return x;
if (y%2 == 0){
temp = power(x, y/2);
return temp*temp;
}
else{
temp = power(x, (y-1)/2);
return x*temp*temp;
}
}
Best way to do it using the divide and conquer strategy. it will take O(log N) time to complete the execution. It will work for negative exponential also.
I'm doing this in C++:
#include <iostream>
using namespace std;
float power(int a, int b)
{
if (b == 0)
{
return 1;
}
else
{
float temp = power(a, b / 2);
if (b > 0)
{
if (b % 2 == 0)
{
return temp * temp;
}
else
{
return a * temp * temp;
}
}
else
{
return 1/power(a,-b);
}
}
}
int main()
{ int a , b ;
cout<<"Enter a Number:";cin>>a; cout<<"Enter its exponential:";cin>>b;
cout << power(a, b);
}
Output:
Output will be as follow

How do I write merge in place? [duplicate]

I know the question is not too specific.
All I want is someone to tell me how to convert a normal merge sort into an in-place merge sort (or a merge sort with constant extra space overhead).
All I can find (on the net) is pages saying "it is too complex" or "out of scope of this text".
The only known ways to merge in-place (without any extra space) are too complex to be reduced to practical program. (taken from here)
Even if it is too complex, what is the basic concept of how to make the merge sort in-place?
Knuth left this as an exercise (Vol 3, 5.2.5). There do exist in-place merge sorts. They must be implemented carefully.
First, naive in-place merge such as described here isn't the right solution. It downgrades the performance to O(N2).
The idea is to sort part of the array while using the rest as working area for merging.
For example like the following merge function.
void wmerge(Key* xs, int i, int m, int j, int n, int w) {
while (i < m && j < n)
swap(xs, w++, xs[i] < xs[j] ? i++ : j++);
while (i < m)
swap(xs, w++, i++);
while (j < n)
swap(xs, w++, j++);
}
It takes the array xs, the two sorted sub-arrays are represented as ranges [i, m) and [j, n) respectively. The working area starts from w. Compare with the standard merge algorithm given in most textbooks, this one exchanges the contents between the sorted sub-array and the working area. As the result, the previous working area contains the merged sorted elements, while the previous elements stored in the working area are moved to the two sub-arrays.
However, there are two constraints that must be satisfied:
The work area should be within the bounds of the array. In other words, it should be big enough to hold elements exchanged in without causing any out-of-bound error.
The work area can be overlapped with either of the two sorted arrays; however, it must ensure that none of the unmerged elements are overwritten.
With this merging algorithm defined, it's easy to imagine a solution, which can sort half of the array; The next question is, how to deal with the rest of the unsorted part stored in work area as shown below:
... unsorted 1/2 array ... | ... sorted 1/2 array ...
One intuitive idea is to recursive sort another half of the working area, thus there are only 1/4 elements haven't been sorted yet.
... unsorted 1/4 array ... | sorted 1/4 array B | sorted 1/2 array A ...
The key point at this stage is that we must merge the sorted 1/4 elements B
with the sorted 1/2 elements A sooner or later.
Is the working area left, which only holds 1/4 elements, big enough to merge
A and B? Unfortunately, it isn't.
However, the second constraint mentioned above gives us a hint, that we can exploit it by arranging the working area to overlap with either sub-array if we can ensure the merging sequence that the unmerged elements won't be overwritten.
Actually, instead of sorting the second half of the working area, we can sort the first half, and put the working area between the two sorted arrays like this:
... sorted 1/4 array B | unsorted work area | ... sorted 1/2 array A ...
This setup effectively arranges the work area overlap with the sub-array A. This idea
is proposed in [Jyrki Katajainen, Tomi Pasanen, Jukka Teuhola. ``Practical in-place mergesort''. Nordic Journal of Computing, 1996].
So the only thing left is to repeat the above step, which reduces the working area from 1/2, 1/4, 1/8, … When the working area becomes small enough (for example, only two elements left), we can switch to a trivial insertion sort to end this algorithm.
Here is the implementation in ANSI C based on this paper.
void imsort(Key* xs, int l, int u);
void swap(Key* xs, int i, int j) {
Key tmp = xs[i]; xs[i] = xs[j]; xs[j] = tmp;
}
/*
* sort xs[l, u), and put result to working area w.
* constraint, len(w) == u - l
*/
void wsort(Key* xs, int l, int u, int w) {
int m;
if (u - l > 1) {
m = l + (u - l) / 2;
imsort(xs, l, m);
imsort(xs, m, u);
wmerge(xs, l, m, m, u, w);
}
else
while (l < u)
swap(xs, l++, w++);
}
void imsort(Key* xs, int l, int u) {
int m, n, w;
if (u - l > 1) {
m = l + (u - l) / 2;
w = l + u - m;
wsort(xs, l, m, w); /* the last half contains sorted elements */
while (w - l > 2) {
n = w;
w = l + (n - l + 1) / 2;
wsort(xs, w, n, l); /* the first half of the previous working area contains sorted elements */
wmerge(xs, l, l + n - w, n, u, w);
}
for (n = w; n > l; --n) /*switch to insertion sort*/
for (m = n; m < u && xs[m] < xs[m-1]; ++m)
swap(xs, m, m - 1);
}
}
Where wmerge is defined previously.
The full source code can be found here and the detailed explanation can be found here
By the way, this version isn't the fastest merge sort because it needs more swap operations. According to my test, it's faster than the standard version, which allocates extra spaces in every recursion. But it's slower than the optimized version, which doubles the original array in advance and uses it for further merging.
Including its "big result", this paper describes a couple of variants of in-place merge sort (PDF):
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.22.5514&rep=rep1&type=pdf
In-place sorting with fewer moves
Jyrki Katajainen, Tomi A. Pasanen
It is shown that an array of n
elements can be sorted using O(1)
extra space, O(n log n / log log n)
element moves, and n log2n + O(n log
log n) comparisons. This is the first
in-place sorting algorithm requiring
o(n log n) moves in the worst case
while guaranteeing O(n log n)
comparisons, but due to the constant
factors involved the algorithm is
predominantly of theoretical interest.
I think this is relevant too. I have a printout of it lying around, passed on to me by a colleague, but I haven't read it. It seems to cover basic theory, but I'm not familiar enough with the topic to judge how comprehensively:
http://comjnl.oxfordjournals.org/cgi/content/abstract/38/8/681
Optimal Stable Merging
Antonios Symvonis
This paper shows how to stably merge
two sequences A and B of sizes m and
n, m ≤ n, respectively, with O(m+n)
assignments, O(mlog(n/m+1))
comparisons and using only a constant
amount of additional space. This
result matches all known lower bounds...
It really isn't easy or efficient, and I suggest you don't do it unless you really have to (and you probably don't have to unless this is homework since the applications of inplace merging are mostly theoretical). Can't you use quicksort instead? Quicksort will be faster anyway with a few simpler optimizations and its extra memory is O(log N).
Anyway, if you must do it then you must. Here's what I found: one and two. I'm not familiar with the inplace merge sort, but it seems like the basic idea is to use rotations to facilitate merging two arrays without using extra memory.
Note that this is slower even than the classic merge sort that's not inplace.
The critical step is getting the merge itself to be in-place. It's not as difficult as those sources make out, but you lose something when you try.
Looking at one step of the merge:
[...list-sorted...|x...list-A...|y...list-B...]
We know that the sorted sequence is less than everything else, that x is less than everything else in A, and that y is less than everything else in B. In the case where x is less than or equal to y, you just move your pointer to the start of A on one. In the case where y is less than x, you've got to shuffle y past the whole of A to sorted. That last step is what makes this expensive (except in degenerate cases).
It's generally cheaper (especially when the arrays only actually contain single words per element, e.g., a pointer to a string or structure) to trade off some space for time and have a separate temporary array that you sort back and forth between.
An example of bufferless mergesort in C.
#define SWAP(type, a, b) \
do { type t=(a);(a)=(b);(b)=t; } while (0)
static void reverse_(int* a, int* b)
{
for ( --b; a < b; a++, b-- )
SWAP(int, *a, *b);
}
static int* rotate_(int* a, int* b, int* c)
/* swap the sequence [a,b) with [b,c). */
{
if (a != b && b != c)
{
reverse_(a, b);
reverse_(b, c);
reverse_(a, c);
}
return a + (c - b);
}
static int* lower_bound_(int* a, int* b, const int key)
/* find first element not less than #p key in sorted sequence or end of
* sequence (#p b) if not found. */
{
int i;
for ( i = b-a; i != 0; i /= 2 )
{
int* mid = a + i/2;
if (*mid < key)
a = mid + 1, i--;
}
return a;
}
static int* upper_bound_(int* a, int* b, const int key)
/* find first element greater than #p key in sorted sequence or end of
* sequence (#p b) if not found. */
{
int i;
for ( i = b-a; i != 0; i /= 2 )
{
int* mid = a + i/2;
if (*mid <= key)
a = mid + 1, i--;
}
return a;
}
static void ip_merge_(int* a, int* b, int* c)
/* inplace merge. */
{
int n1 = b - a;
int n2 = c - b;
if (n1 == 0 || n2 == 0)
return;
if (n1 == 1 && n2 == 1)
{
if (*b < *a)
SWAP(int, *a, *b);
}
else
{
int* p, * q;
if (n1 <= n2)
p = upper_bound_(a, b, *(q = b+n2/2));
else
q = lower_bound_(b, c, *(p = a+n1/2));
b = rotate_(p, b, q);
ip_merge_(a, p, b);
ip_merge_(b, q, c);
}
}
void mergesort(int* v, int n)
{
if (n > 1)
{
int h = n/2;
mergesort(v, h); mergesort(v+h, n-h);
ip_merge_(v, v+h, v+n);
}
}
An example of adaptive mergesort (optimized).
Adds support code and modifications to accelerate the merge when an auxiliary buffer of any size is available (still works without additional memory). Uses forward and backward merging, ring rotation, small sequence merging and sorting, and iterative mergesort.
#include <stdlib.h>
#include <string.h>
static int* copy_(const int* a, const int* b, int* out)
{
int count = b - a;
if (a != out)
memcpy(out, a, count*sizeof(int));
return out + count;
}
static int* copy_backward_(const int* a, const int* b, int* out)
{
int count = b - a;
if (b != out)
memmove(out - count, a, count*sizeof(int));
return out - count;
}
static int* merge_(const int* a1, const int* b1, const int* a2,
const int* b2, int* out)
{
while ( a1 != b1 && a2 != b2 )
*out++ = (*a1 <= *a2) ? *a1++ : *a2++;
return copy_(a2, b2, copy_(a1, b1, out));
}
static int* merge_backward_(const int* a1, const int* b1,
const int* a2, const int* b2, int* out)
{
while ( a1 != b1 && a2 != b2 )
*--out = (*(b1-1) > *(b2-1)) ? *--b1 : *--b2;
return copy_backward_(a1, b1, copy_backward_(a2, b2, out));
}
static unsigned int gcd_(unsigned int m, unsigned int n)
{
while ( n != 0 )
{
unsigned int t = m % n;
m = n;
n = t;
}
return m;
}
static void rotate_inner_(const int length, const int stride,
int* first, int* last)
{
int* p, * next = first, x = *first;
while ( 1 )
{
p = next;
if ((next += stride) >= last)
next -= length;
if (next == first)
break;
*p = *next;
}
*p = x;
}
static int* rotate_(int* a, int* b, int* c)
/* swap the sequence [a,b) with [b,c). */
{
if (a != b && b != c)
{
int n1 = c - a;
int n2 = b - a;
int* i = a;
int* j = a + gcd_(n1, n2);
for ( ; i != j; i++ )
rotate_inner_(n1, n2, i, c);
}
return a + (c - b);
}
static void ip_merge_small_(int* a, int* b, int* c)
/* inplace merge.
* #note faster for small sequences. */
{
while ( a != b && b != c )
if (*a <= *b)
a++;
else
{
int* p = b+1;
while ( p != c && *p < *a )
p++;
rotate_(a, b, p);
b = p;
}
}
static void ip_merge_(int* a, int* b, int* c, int* t, const int ts)
/* inplace merge.
* #note works with or without additional memory. */
{
int n1 = b - a;
int n2 = c - b;
if (n1 <= n2 && n1 <= ts)
{
merge_(t, copy_(a, b, t), b, c, a);
}
else if (n2 <= ts)
{
merge_backward_(a, b, t, copy_(b, c, t), c);
}
/* merge without buffer. */
else if (n1 + n2 < 48)
{
ip_merge_small_(a, b, c);
}
else
{
int* p, * q;
if (n1 <= n2)
p = upper_bound_(a, b, *(q = b+n2/2));
else
q = lower_bound_(b, c, *(p = a+n1/2));
b = rotate_(p, b, q);
ip_merge_(a, p, b, t, ts);
ip_merge_(b, q, c, t, ts);
}
}
static void ip_merge_chunk_(const int cs, int* a, int* b, int* t,
const int ts)
{
int* p = a + cs*2;
for ( ; p <= b; a = p, p += cs*2 )
ip_merge_(a, a+cs, p, t, ts);
if (a+cs < b)
ip_merge_(a, a+cs, b, t, ts);
}
static void smallsort_(int* a, int* b)
/* insertion sort.
* #note any stable sort with low setup cost will do. */
{
int* p, * q;
for ( p = a+1; p < b; p++ )
{
int x = *p;
for ( q = p; a < q && x < *(q-1); q-- )
*q = *(q-1);
*q = x;
}
}
static void smallsort_chunk_(const int cs, int* a, int* b)
{
int* p = a + cs;
for ( ; p <= b; a = p, p += cs )
smallsort_(a, p);
smallsort_(a, b);
}
static void mergesort_lower_(int* v, int n, int* t, const int ts)
{
int cs = 16;
smallsort_chunk_(cs, v, v+n);
for ( ; cs < n; cs *= 2 )
ip_merge_chunk_(cs, v, v+n, t, ts);
}
static void* get_buffer_(int size, int* final)
{
void* p = NULL;
while ( size != 0 && (p = malloc(size)) == NULL )
size /= 2;
*final = size;
return p;
}
void mergesort(int* v, int n)
{
/* #note buffer size may be in the range [0,(n+1)/2]. */
int request = (n+1)/2 * sizeof(int);
int actual;
int* t = (int*) get_buffer_(request, &actual);
/* #note allocation failure okay. */
int tsize = actual / sizeof(int);
mergesort_lower_(v, n, t, tsize);
free(t);
}
This answer has a code example, which implements the algorithm described in the paper Practical In-Place Merging by Bing-Chao Huang and Michael A. Langston. I have to admit that I do not understand the details, but the given complexity of the merge step is O(n).
From a practical perspective, there is evidence that pure in-place implementations are not performing better in real world scenarios. For example, the C++ standard defines std::inplace_merge, which is as the name implies an in-place merge operation.
Assuming that C++ libraries are typically very well optimized, it is interesting to see how it is implemented:
1) libstdc++ (part of the GCC code base): std::inplace_merge
The implementation delegates to __inplace_merge, which dodges the problem by trying to allocate a temporary buffer:
typedef _Temporary_buffer<_BidirectionalIterator, _ValueType> _TmpBuf;
_TmpBuf __buf(__first, __len1 + __len2);
if (__buf.begin() == 0)
std::__merge_without_buffer
(__first, __middle, __last, __len1, __len2, __comp);
else
std::__merge_adaptive
(__first, __middle, __last, __len1, __len2, __buf.begin(),
_DistanceType(__buf.size()), __comp);
Otherwise, it falls back to an implementation (__merge_without_buffer), which requires no extra memory, but no longer runs in O(n) time.
2) libc++ (part of the Clang code base): std::inplace_merge
Looks similar. It delegates to a function, which also tries to allocate a buffer. Depending on whether it got enough elements, it will choose the implementation. The constant-memory fallback function is called __buffered_inplace_merge.
Maybe even the fallback is still O(n) time, but the point is that they do not use the implementation if temporary memory is available.
Note that the C++ standard explicitly gives implementations the freedom to choose this approach by lowering the required complexity from O(n) to O(N log N):
Complexity:
Exactly N-1 comparisons if enough additional memory is available. If the memory is insufficient, O(N log N) comparisons.
Of course, this cannot be taken as a proof that constant space in-place merges in O(n) time should never be used. On the other hand, if it would be faster, the optimized C++ libraries would probably switch to that type of implementation.
This is my C version:
void mergesort(int *a, int len) {
int temp, listsize, xsize;
for (listsize = 1; listsize <= len; listsize*=2) {
for (int i = 0, j = listsize; (j+listsize) <= len; i += (listsize*2), j += (listsize*2)) {
merge(& a[i], listsize, listsize);
}
}
listsize /= 2;
xsize = len % listsize;
if (xsize > 1)
mergesort(& a[len-xsize], xsize);
merge(a, listsize, xsize);
}
void merge(int *a, int sizei, int sizej) {
int temp;
int ii = 0;
int ji = sizei;
int flength = sizei+sizej;
for (int f = 0; f < (flength-1); f++) {
if (sizei == 0 || sizej == 0)
break;
if (a[ii] < a[ji]) {
ii++;
sizei--;
}
else {
temp = a[ji];
for (int z = (ji-1); z >= ii; z--)
a[z+1] = a[z];
ii++;
a[f] = temp;
ji++;
sizej--;
}
}
}
I know I'm late to the game, but here's a solution I wrote yesterday. I also posted this elsewhere, but this appears to be the most popular merge-in-place thread on SO. I've also not seen this algorithm posted anywhere else, so hopefully this helps some people.
This algorithm is in its most simple form so that it can be understood. It can be significantly tweaked for extra speed. Average time complexity is: O(n.log₂n) for the stable in-place array merge, and O(n.(log₂n)²) for the overall sort.
// Stable Merge In Place Sort
//
//
// The following code is written to illustrate the base algorithm. A good
// number of optimizations can be applied to boost its overall speed
// For all its simplicity, it does still perform somewhat decently.
// Average case time complexity appears to be: O(n.(log₂n)²)
#include <stddef.h>
#include <stdio.h>
#define swap(x, y) (t=(x), (x)=(y), (y)=t)
// Both sorted sub-arrays must be adjacent in 'a'
// Assumes that both 'an' and 'bn' are always non-zero
// 'an' is the length of the first sorted section in 'a', referred to as A
// 'bn' is the length of the second sorted section in 'a', referred to as B
static void
merge_inplace(int A[], size_t an, size_t bn)
{
int t, *B = &A[an];
size_t pa, pb; // Swap partition pointers within A and B
// Find the portion to swap. We're looking for how much from the
// start of B can swap with the end of A, such that every element
// in A is less than or equal to any element in B. This is quite
// simple when both sub-arrays come at us pre-sorted
for(pa = an, pb = 0; pa>0 && pb<bn && B[pb] < A[pa-1]; pa--, pb++);
// Now swap last part of A with first part of B according to the
// indicies we found
for (size_t index=pa; index < an; index++)
swap(A[index], B[index-pa]);
// Now merge the two sub-array pairings. We need to check that either array
// didn't wholly swap out the other and cause the remaining portion to be zero
if (pa>0 && (an-pa)>0)
merge_inplace(A, pa, an-pa);
if (pb>0 && (bn-pb)>0)
merge_inplace(B, pb, bn-pb);
} // merge_inplace
// Implements a recursive merge-sort algorithm with an optional
// insertion sort for when the splits get too small. 'n' must
// ALWAYS be 2 or more. It enforces this when calling itself
static void
merge_sort(int a[], size_t n)
{
size_t m = n/2;
// Sort first and second halves only if the target 'n' will be > 1
if (m > 1)
merge_sort(a, m);
if ((n-m)>1)
merge_sort(a+m, n-m);
// Now merge the two sorted sub-arrays together. We know that since
// n > 1, then both m and n-m MUST be non-zero, and so we will never
// violate the condition of not passing in zero length sub-arrays
merge_inplace(a, m, n-m);
} // merge_sort
// Print an array */
static void
print_array(int a[], size_t size)
{
if (size > 0) {
printf("%d", a[0]);
for (size_t i = 1; i < size; i++)
printf(" %d", a[i]);
}
printf("\n");
} // print_array
// Test driver
int
main()
{
int a[] = { 17, 3, 16, 5, 14, 8, 10, 7, 15, 1, 13, 4, 9, 12, 11, 6, 2 };
size_t n = sizeof(a) / sizeof(a[0]);
merge_sort(a, n);
print_array(a, n);
return 0;
} // main
Leveraging C++ std::inplace_merge, in-place merge sort can be implemented as follows:
template< class _Type >
inline void merge_sort_inplace(_Type* src, size_t l, size_t r)
{
if (r <= l) return;
size_t m = l + ( r - l ) / 2; // computes the average without overflow
merge_sort_inplace(src, l, m);
merge_sort_inplace(src, m + 1, r);
std::inplace_merge(src + l, src + m + 1, src + r + 1);
}
More sorting algorithms, including parallel implementations, are available in https://github.com/DragonSpit/ParallelAlgorithms repo, which is open source and free.
I just tried in place merge algorithm for merge sort in JAVA by using the insertion sort algorithm, using following steps.
1) Two sorted arrays are available.
2) Compare the first values of each array; and place the smallest value into the first array.
3) Place the larger value into the second array by using insertion sort (traverse from left to right).
4) Then again compare the second value of first array and first value of second array, and do the same. But when swapping happens there is some clue on skip comparing the further items, but just swapping required.
I have made some optimization here; to keep lesser comparisons in insertion sort. The only drawback i found with this solutions is it needs larger swapping of array elements in the second array.
e.g)
First___Array : 3, 7, 8, 9
Second Array : 1, 2, 4, 5
Then 7, 8, 9 makes the second array to swap(move left by one) all its elements by one each time to place himself in the last.
So the assumption here is swapping items is negligible compare to comparing of two items.
https://github.com/skanagavelu/algorithams/blob/master/src/sorting/MergeSort.java
package sorting;
import java.util.Arrays;
public class MergeSort {
public static void main(String[] args) {
int[] array = { 5, 6, 10, 3, 9, 2, 12, 1, 8, 7 };
mergeSort(array, 0, array.length -1);
System.out.println(Arrays.toString(array));
int[] array1 = {4, 7, 2};
System.out.println(Arrays.toString(array1));
mergeSort(array1, 0, array1.length -1);
System.out.println(Arrays.toString(array1));
System.out.println("\n\n");
int[] array2 = {4, 7, 9};
System.out.println(Arrays.toString(array2));
mergeSort(array2, 0, array2.length -1);
System.out.println(Arrays.toString(array2));
System.out.println("\n\n");
int[] array3 = {4, 7, 5};
System.out.println(Arrays.toString(array3));
mergeSort(array3, 0, array3.length -1);
System.out.println(Arrays.toString(array3));
System.out.println("\n\n");
int[] array4 = {7, 4, 2};
System.out.println(Arrays.toString(array4));
mergeSort(array4, 0, array4.length -1);
System.out.println(Arrays.toString(array4));
System.out.println("\n\n");
int[] array5 = {7, 4, 9};
System.out.println(Arrays.toString(array5));
mergeSort(array5, 0, array5.length -1);
System.out.println(Arrays.toString(array5));
System.out.println("\n\n");
int[] array6 = {7, 4, 5};
System.out.println(Arrays.toString(array6));
mergeSort(array6, 0, array6.length -1);
System.out.println(Arrays.toString(array6));
System.out.println("\n\n");
//Handling array of size two
int[] array7 = {7, 4};
System.out.println(Arrays.toString(array7));
mergeSort(array7, 0, array7.length -1);
System.out.println(Arrays.toString(array7));
System.out.println("\n\n");
int input1[] = {1};
int input2[] = {4,2};
int input3[] = {6,2,9};
int input4[] = {6,-1,10,4,11,14,19,12,18};
System.out.println(Arrays.toString(input1));
mergeSort(input1, 0, input1.length-1);
System.out.println(Arrays.toString(input1));
System.out.println("\n\n");
System.out.println(Arrays.toString(input2));
mergeSort(input2, 0, input2.length-1);
System.out.println(Arrays.toString(input2));
System.out.println("\n\n");
System.out.println(Arrays.toString(input3));
mergeSort(input3, 0, input3.length-1);
System.out.println(Arrays.toString(input3));
System.out.println("\n\n");
System.out.println(Arrays.toString(input4));
mergeSort(input4, 0, input4.length-1);
System.out.println(Arrays.toString(input4));
System.out.println("\n\n");
}
private static void mergeSort(int[] array, int p, int r) {
//Both below mid finding is fine.
int mid = (r - p)/2 + p;
int mid1 = (r + p)/2;
if(mid != mid1) {
System.out.println(" Mid is mismatching:" + mid + "/" + mid1+ " for p:"+p+" r:"+r);
}
if(p < r) {
mergeSort(array, p, mid);
mergeSort(array, mid+1, r);
// merge(array, p, mid, r);
inPlaceMerge(array, p, mid, r);
}
}
//Regular merge
private static void merge(int[] array, int p, int mid, int r) {
int lengthOfLeftArray = mid - p + 1; // This is important to add +1.
int lengthOfRightArray = r - mid;
int[] left = new int[lengthOfLeftArray];
int[] right = new int[lengthOfRightArray];
for(int i = p, j = 0; i <= mid; ){
left[j++] = array[i++];
}
for(int i = mid + 1, j = 0; i <= r; ){
right[j++] = array[i++];
}
int i = 0, j = 0;
for(; i < left.length && j < right.length; ) {
if(left[i] < right[j]){
array[p++] = left[i++];
} else {
array[p++] = right[j++];
}
}
while(j < right.length){
array[p++] = right[j++];
}
while(i < left.length){
array[p++] = left[i++];
}
}
//InPlaceMerge no extra array
private static void inPlaceMerge(int[] array, int p, int mid, int r) {
int secondArrayStart = mid+1;
int prevPlaced = mid+1;
int q = mid+1;
while(p < mid+1 && q <= r){
boolean swapped = false;
if(array[p] > array[q]) {
swap(array, p, q);
swapped = true;
}
if(q != secondArrayStart && array[p] > array[secondArrayStart]) {
swap(array, p, secondArrayStart);
swapped = true;
}
//Check swapped value is in right place of second sorted array
if(swapped && secondArrayStart+1 <= r && array[secondArrayStart+1] < array[secondArrayStart]) {
prevPlaced = placeInOrder(array, secondArrayStart, prevPlaced);
}
p++;
if(q < r) { //q+1 <= r) {
q++;
}
}
}
private static int placeInOrder(int[] array, int secondArrayStart, int prevPlaced) {
int i = secondArrayStart;
for(; i < array.length; i++) {
//Simply swap till the prevPlaced position
if(secondArrayStart < prevPlaced) {
swap(array, secondArrayStart, secondArrayStart+1);
secondArrayStart++;
continue;
}
if(array[i] < array[secondArrayStart]) {
swap(array, i, secondArrayStart);
secondArrayStart++;
} else if(i != secondArrayStart && array[i] > array[secondArrayStart]){
break;
}
}
return secondArrayStart;
}
private static void swap(int[] array, int m, int n){
int temp = array[m];
array[m] = array[n];
array[n] = temp;
}
}

Resources