Set Trailing Zeros to X FAST - performance

I have a 2D array of integers. There may be 0s in the middle of each row, and each row ends with some number of trailing 0s.
How do I set all the trailing zeros to some integer X?
import numpy as np
def generateTestData(N, K, INDEX_SIZE):
# Start with a flat array to easily place zeros inside
data1 = np.random.randint(0, INDEX_SIZE, N*K)
# Add zeros at random locations
idx = np.random.randint(0, N*K, int(N*K/3))
data1[idx] = 0
# Make data1 a (N,K) array
data1 = np.reshape(data1, (N,K))
# Add trailing zeros
for i in range(N):
data1[i,np.random.randint(0,K):] = 0
return data1
if __name__=='__main__':
N = 10000; K = 150; INDEX_SIZE = 500; X = -1
# Test data
data1 = generateTestData(N, K, INDEX_SIZE)
# Save a copy for the test
data2 = np.copy(data1)
for i in range(N):
for j in reversed(range(K)):
if data1[i,j] == 0:
data1[i,j] = X
else:
break
# Faster code here on 'data2'
# ...
def diff(a,b):
return np.mean(np.abs(a-b))
# Verification:
print('Diff(data1,data2) = '+str(diff(data2,data1)))

Here's a vectorized solution leveraging broadcasting -
def replace_trailing_num(a, compare_val=0, assign_val=-1):
idx = a.shape[1] - (a[:,::-1]!=compare_val).argmax(axis=1)
idx[(a==compare_val).all(1)] = 0
mask = np.arange(a.shape[1]) >= idx[:,None]
a[mask] = assign_val
return a
Sample run -
In [60]: a
Out[60]:
array([[2, 3, 0, 4, 6, 0, 0],
[0, 5, 8, 0, 0, 0, 0]])
In [61]: replace_trailing_num(a, compare_val=0, assign_val=-1)
Out[61]:
array([[ 2, 3, 0, 4, 6, -1, -1],
[ 0, 5, 8, -1, -1, -1, -1]])
Alternatively, we can use np.minimum.accumulate to get the mask -
mask = np.minimum.accumulate(a[:,::-1]==compare_val,axis=1)[:,::-1]
If you aren't looping enough or the number of columns is a comparatively bigger number than number of rows, a loopy one based on slicing might be better, one of which is listed below -
def replace_trailing_num_loopy(a, compare_val=0, assign_val=-1):
idx = (a[:,::-1]!=compare_val).argmax(axis=1)
for i,c in enumerate(idx):
a[i,-c:] = assign_val
return a

Related

How to solve flow game using Google OR tools?

I tried to make solver for flow game using google-OR tools.
I made a few rules for the corner to only contains corner pipes, but other than that, i can not figure out how to make the pipe connected to each other nor how to tell the model to make a pipe that is connecting to each other.
A few snippet
pipe_types = {
0: " ",
1: "-",
2: "|",
3: "┗" ,
4: "┛" ,
5: "┓",
6: "┏",
7: "●"
}
model = cp_model.CpModel()
filled_map = [[0,0,0,0],
[0,0,7,0],
[0,0,0,0],
[0,7,0,0]]
mesh_size = int(np.sqrt(len(np.array(filled_map).flatten())))
target_map = [[model.NewIntVar(1, 6, 'column: %i' % i) for i in range(mesh_size)] for j in range(mesh_size)]
flow_map = init_map(model, target_map, filled_map)
for i in range(len(flow_map)):
for j in range(len(flow_map[0])):
# check if top or bottom side
if (i == 0) or (i == len(flow_map)-1):
model.Add(flow_map[i][j] != 2)
# check if left or right side
if (j == 0) or (j == len(flow_map[0])-1):
model.Add(flow_map[i][j] != 1)
# left up corner
if (i == 0) & (j == 0):
model.Add(flow_map[i][j] != 3)
model.Add(flow_map[i][j] != 4)
model.Add(flow_map[i][j] != 5)
# right up corner
if (i == 0) & (j == len(flow_map[0])-1):
model.Add(flow_map[i][j] != 3)
model.Add(flow_map[i][j] != 4)
model.Add(flow_map[i][j] != 6)
# left bottom corner
if (i == len(flow_map)-1) & (j == 0):
model.Add(flow_map[i][j] != 4)
model.Add(flow_map[i][j] != 5)
model.Add(flow_map[i][j] != 6)
# right bottom corner
if (i == len(flow_map)-1) & (j == len(flow_map[0])-1):
model.Add(flow_map[i][j] != 3)
model.Add(flow_map[i][j] != 5)
model.Add(flow_map[i][j] != 6)
# Solving
status = solver.Solve(model)
res = []
if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
for i in range(len(flow_map)):
for j in range(len(flow_map[0])):
res.append(solver.Value(flow_map[i][j]))
print(solver.Value(flow_map[i][j]), end=" ")
print()
This would results horizontal pipes on the center of the mesh. Later on, i would have to figure out how to add color and such on this too.
Is there any pointer on how to make this on OR tools?
Edit 1:
Based on David Eisenstat's answer, I can find solution. Visualizing this solution based on JohanC's answer, I get this result.
Can I get the pathing made from google-OR tools?
Edit 2:
Using hamilton path from "Hamiltonian" path using Python
I could generate somewhat correct pathing.
But it feels so weird since OR tools already calculate the pathing, and I have to recalculate the path. The path generated from "Hamiltonian" path using Python doesn't show all possible combinations. If I can take the path from OR tools, I think that would be my best interest.
As I don't have experience with OR-tools, here is an approach with Z3.
The initial board is represented by numbers for the end points, one number for each color. The idea is a bit similar to how Sudoku is represented.
Each other cell on the board will get either a value for zero, or a number. This number should be equal to exactly two of its neighbors.
The initial endpoints should have exactly one neighbor with its color.
from z3 import Solver, Sum, Int, If, And, Or, sat
def plot_solution(S):
import matplotlib.pyplot as plt
ax = plt.gca()
colors = plt.cm.tab10.colors
for i in range(M):
for j in range(N):
if board[i][j] != 0:
ax.scatter(j, i, s=500, color=colors[board[i][j]])
if S[i][j] != 0:
for k in range(M):
for l in range(N):
if abs(k - i) + abs(l - j) == 1 and S[i][j] == S[k][l]:
ax.plot([j, l], [i, k], color=colors[S[i][j]], lw=15)
ax.set_ylim(M - 0.5, -0.5)
ax.set_xlim(-0.5, N - 0.5)
ax.set_aspect('equal')
ax.set_facecolor('black')
ax.set_yticks([i + 0.5 for i in range(M - 1)], minor=True)
ax.set_xticks([j + 0.5 for j in range(N - 1)], minor=True)
ax.grid(b=True, which='minor', color='white')
ax.set_xticks([])
ax.set_yticks([])
ax.tick_params(axis='both', which='both', length=0)
plt.show()
board = [[1, 0, 0, 2, 3],
[0, 0, 0, 4, 0],
[0, 0, 4, 0, 0],
[0, 2, 3, 0, 5],
[0, 1, 5, 0, 0]]
M = len(board)
N = len(board[0])
B = [[Int(f'B_{i}_{j}') for j in range(N)] for i in range(M)]
s = Solver()
s.add(([If(board[i][j] != 0, B[i][j] == board[i][j], And(B[i][j] >= 0, B[i][j] < 10))
for j in range(N) for i in range(M)]))
for i in range(M):
for j in range(N):
same_neighs_ij = Sum([If(B[i][j] == B[k][l], 1, 0)
for k in range(M) for l in range(N) if abs(k - i) + abs(l - j) == 1])
if board[i][j] != 0:
s.add(same_neighs_ij == 1)
else:
s.add(Or(same_neighs_ij == 2, B[i][j] == 0))
if s.check() == sat:
m = s.model()
S = [[m[B[i][j]].as_long() for j in range(N)] for i in range(M)]
print(S)
plot_solution(S)
Solution:
[[1, 2, 2, 2, 3],
[1, 2, 4, 4, 3],
[1, 2, 4, 3, 3],
[1, 2, 3, 3, 5],
[1, 1, 5, 5, 5]]
As mentioned in the comments, a possible requirement is that all cells would need to be colored. This would need a more complicated approach. Here is an example of such a configuration for which the above code could create a solution that connects all end points without touching all cells:
board = [[0, 1, 2, 0, 0, 0, 0],
[1, 3, 4, 0, 3, 5, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 2, 0, 4, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 5, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0]]
The best way is probably with AddCircuit. This constraint takes a directed graph where each arc is labeled with a literal and requires that the arcs labeled true form a subgraph where each node has in- and out-degree 1, and further that there is at most one cycle that is not a self-loop. By forcing an arc from the end to the beginning, we can use this constraint type to require that there is a single path from the beginning to the end.
The documentation is somewhat poor, so here's a working code sample. I'll leave the drawing part to you.
import collections
from ortools.sat.python import cp_model
def validate_board_and_count_colors(board):
assert isinstance(board, list)
assert all(isinstance(row, list) for row in board)
assert len(set(map(len, board))) == 1
colors = collections.Counter(square for row in board for square in row)
del colors[0]
assert all(count == 2 for count in colors.values())
num_colors = len(colors)
assert set(colors.keys()) == set(range(1, num_colors + 1))
return num_colors
def main(board):
num_colors = validate_board_and_count_colors(board)
model = cp_model.CpModel()
solution = [
[square or model.NewIntVar(1, num_colors, "") for (j, square) in enumerate(row)]
for (i, row) in enumerate(board)
]
true = model.NewBoolVar("")
model.AddBoolOr([true])
for color in range(1, num_colors + 1):
endpoints = []
arcs = []
for i, row in enumerate(board):
for j, square in enumerate(row):
if square == color:
endpoints.append((i, j))
else:
arcs.append(((i, j), (i, j)))
if i < len(board) - 1:
arcs.append(((i, j), (i + 1, j)))
if j < len(row) - 1:
arcs.append(((i, j), (i, j + 1)))
(i1, j1), (i2, j2) = endpoints
k1 = i1 * len(row) + j1
k2 = i2 * len(row) + j2
arc_variables = [(k2, k1, true)]
for (i1, j1), (i2, j2) in arcs:
k1 = i1 * len(row) + j1
k2 = i2 * len(row) + j2
edge = model.NewBoolVar("")
if k1 == k2:
model.Add(solution[i1][j1] != color).OnlyEnforceIf(edge)
arc_variables.append((k1, k1, edge))
else:
model.Add(solution[i1][j1] == color).OnlyEnforceIf(edge)
model.Add(solution[i2][j2] == color).OnlyEnforceIf(edge)
forward = model.NewBoolVar("")
backward = model.NewBoolVar("")
model.AddBoolOr([edge, forward.Not()])
model.AddBoolOr([edge, backward.Not()])
model.AddBoolOr([edge.Not(), forward, backward])
model.AddBoolOr([forward.Not(), backward.Not()])
arc_variables.append((k1, k2, forward))
arc_variables.append((k2, k1, backward))
model.AddCircuit(arc_variables)
solver = cp_model.CpSolver()
status = solver.Solve(model)
if status == cp_model.OPTIMAL:
for row in solution:
print("".join(str(solver.Value(x)) for x in row))
if __name__ == "__main__":
main(
[
[1, 0, 0, 2, 3],
[0, 0, 0, 4, 0],
[0, 0, 4, 0, 0],
[0, 2, 3, 0, 5],
[0, 1, 5, 0, 0],
]
)

What is the best approach to solve this problem?

If an array contained [1, 10, 3, 5, 2, 7] and k = 2, combine the set as {110, 35, 27}, sort the set {27, 35, 110} and split the set into array as [2, 7, 3, 5, 1, 10]
Here is a way to implement this in JavaScript:
const k = 2;
const arr = [1, 10, 3, 5, 2, 7];
// STEP 1 - Combine the set by k pair number
const setCombined = []
for(let i = 0; i < arr.length; ++i) {
if(i % k === 0) {
setCombined.push(parseInt(arr.slice(i, i + k ).join('')))
}
}
console.log('STEP1 - combined: \n', setCombined);
// STEP 2 - Sort
const sortedArray = setCombined.sort((a, b) => a - b)
console.log('STEP2 - sorted: \n', sortedArray);
// STEP 3 - Split sorted
const splitArray = sortedArray.join('').split('').map(e => parseInt(e))
console.log('STEP3 - split: \n', splitArray);
I was not sure though when you said to combine set, if you really ment to keep only unique values or not... Let me know

How to sort array of 0 and 1 of length n in O(n) time and O(1) space? And can we generalize this to array of 0, 1, 2, ...?

I would like to sort an array of 0 and 1. I have to sort it in linear time and in constant space. How can I do this without explicitly counting the number of 0 and 1?
I did something like this:
sort(array):
Q0 = Queue()
Q1 = Queue()
for i in (0, n-1):
if array[i] == 0:
Q0.push(array[i])
if array[i] == 1:
Q1.push(array[i])
j = 0
while Q0:
array[j] = Q0.pop()
j += 1
while Q1:
array[j] = Q1.pop()
j += 1
I think my solution is correct and has O(n) time but I am not sure of O(1) space. Any help?
Also, can we generalize the sorting to 0, 1, 2 arrays?
Here is (tested/working) Python:
# sort array of length n containing values of only 0 or 1
# in time O(n) and space O(1)
a = [1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
first = 0
last = len(a)-1
print a
# note: don't need temp since values are only 0 and 1
while(last>first):
if a[first] == 1:
a[first] = a[last]
a[last] = 1
last -= 1
else:
first += 1
print a
The idea is to swap all the ones to the end of the array and the zeros to the beginning of the array by keeping two pointers. i points to the first index that has a 1.
Here is a pseudo-code:
i = 1
for (j = 1 to n)
if(a[j] == 0)
swap(a[i], a[j])
i++

Generating integer partition by its number

I'm trying to generate decent partition of given integer number N numbered K in lexicographical order, e.g. for N = 5, K = 3 we got:
5 = 1 + 1 + 1 + 1 + 1
5 = 1 + 1 + 1 + 2
5 = 1 + 1 + 3
5 = 1 + 2 + 2
5 = 1 + 4
5 = 2 + 3
5 = 5
And the third one is 1 + 1 + 3.
How can I generate this without generating every partition(in C language, but most of all I need algorithm)?
Going to find maximal number in partition(assuming we can find number of partitions d[i][j], where i is number and j is maximal integer in its partition), then decrease the original number and number we are looking for. So yes, I'm trying to use dynamic programming. Still working on code.
This doesn't work at all:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
FILE *F1, *F2;
main()
{
long long i, j, p, n, k, l, m[102][102];
short c[102];
F1 = fopen("num2part.in", "r");
F2 = fopen ("num2part.out", "w");
n = 0;
fscanf (F1, "%lld %lld", &n, &k);
p = 0;
m[0][0] = 1;
for ( i = 0; i <= n; i++)
{
for (j = 1; j <= i; j++)
{
m[i][j] = m[i - j][j] + m[i][j - 1];
}
for (j = i + 1; j <= n; j++)
{
m[i][j] = m[i][i];
}
}
l = n;
p = n;
j = n;
while (k > 0)
{
while ( k < m[l][j])
{
if (j == 0)
{
while (l > 0)
{
c[p] = 1;
p--;
l--;
}
break;
}
j--;
}
k -=m[l][j];
c[p] = j + 1;
p--;
l -= c[p + 1];
}
//printing answer here, answer is contained in array from c[p] to c[n]
}
Here is some example Python code that generates the partitions:
cache = {}
def p3(n,val=1):
"""Returns number of ascending partitions of n if all values are >= val"""
if n==0:
return 1 # No choice in partitioning
key = n,val
if key in cache:
return cache[key]
# Choose next value x
r = sum(p3(n-x,x) for x in xrange(val,n+1))
cache[key]=r
return r
def ascending_partition(n,k):
"""Generate the k lexicographically ordered partition of n into integer parts"""
P = []
val = 1 # All values must be greater than this
while n:
# Choose the next number
for x in xrange(val,n+1):
count = p3(n-x,x)
if k >= count:
# Keep trying to find the correct digit
k -= count
elif count: # Check that there are some valid positions with this digit
# This must be the correct digit for this location
P.append(x)
n -= x
val = x
break
return P
n=5
for k in range(p3(n)):
print k,ascending_partition(n,k)
It prints:
0 [1, 1, 1, 1, 1]
1 [1, 1, 1, 2]
2 [1, 1, 3]
3 [1, 2, 2]
4 [1, 4]
5 [2, 3]
6 [5]
This can be used to generate an arbitrary partition without generating all the intermediate ones. For example, there are 9253082936723602 partitions of 300.
print ascending_partition(300,10**15)
prints
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4, 5, 7, 8, 8, 11, 12, 13, 14, 14, 17, 17, 48, 52]
def _yieldParts(num,lt):
''' It generate a comination set'''
if not num:
yield ()
for i in range(min(num,lt),0,-1):
for parts in _yieldParts(num-i,i):
yield (i,)+parts
def patition(number,kSum,maxIntInTupple):
''' It generates a comination set with sum of kSum is equal to number
maxIntInTupple is for maximum integer can be in tupple'''
for p in _yieldParts(number,maxIntInTupple):
if(len(p) <=kSum):
if(len(p)<kSum):
while len(p) < kSum:
p+=(0,)
print p
patition(40,8,40)
Output:
-------
(40,0,0,0,0,0,0,0)
(39,1,0,0,0,0,0,0)
.
.
.
.

How to find the insertion point in an array using binary search?

The basic idea of binary search in an array is simple, but it might return an "approximate" index if the search fails to find the exact item. (we might sometimes get back an index for which the value is larger or smaller than the searched value).
For looking for the exact insertion point, it seems that after we got the approximate location, we might need to "scan" to left or right for the exact insertion location, so that, say, in Ruby, we can do arr.insert(exact_index, value)
I have the following solution, but the handling for the part when begin_index >= end_index is a bit messy. I wonder if a more elegant solution can be used?
(this solution doesn't care to scan for multiple matches if an exact match is found, so the index returned for an exact match may point to any index that correspond to the value... but I think if they are all integers, we can always search for a - 1 after we know an exact match is found, to find the left boundary, or search for a + 1 for the right boundary.)
My solution:
DEBUGGING = true
def binary_search_helper(arr, a, begin_index, end_index)
middle_index = (begin_index + end_index) / 2
puts "a = #{a}, arr[middle_index] = #{arr[middle_index]}, " +
"begin_index = #{begin_index}, end_index = #{end_index}, " +
"middle_index = #{middle_index}" if DEBUGGING
if arr[middle_index] == a
return middle_index
elsif begin_index >= end_index
index = [begin_index, end_index].min
return index if a < arr[index] && index >= 0 #careful because -1 means end of array
index = [begin_index, end_index].max
return index if a < arr[index] && index >= 0
return index + 1
elsif a > arr[middle_index]
return binary_search_helper(arr, a, middle_index + 1, end_index)
else
return binary_search_helper(arr, a, begin_index, middle_index - 1)
end
end
# for [1,3,5,7,9], searching for 6 will return index for 7 for insertion
# if exact match is found, then return that index
def binary_search(arr, a)
puts "\nSearching for #{a} in #{arr}" if DEBUGGING
return 0 if arr.empty?
result = binary_search_helper(arr, a, 0, arr.length - 1)
puts "the result is #{result}, the index for value #{arr[result].inspect}" if DEBUGGING
return result
end
arr = [1,3,5,7,9]
b = 6
arr.insert(binary_search(arr, b), b)
p arr
arr = [1,3,5,7,9,11]
b = 6
arr.insert(binary_search(arr, b), b)
p arr
arr = [1,3,5,7,9]
b = 60
arr.insert(binary_search(arr, b), b)
p arr
arr = [1,3,5,7,9,11]
b = 60
arr.insert(binary_search(arr, b), b)
p arr
arr = [1,3,5,7,9]
b = -60
arr.insert(binary_search(arr, b), b)
p arr
arr = [1,3,5,7,9,11]
b = -60
arr.insert(binary_search(arr, b), b)
p arr
arr = [1]
b = -60
arr.insert(binary_search(arr, b), b)
p arr
arr = [1]
b = 60
arr.insert(binary_search(arr, b), b)
p arr
arr = []
b = 60
arr.insert(binary_search(arr, b), b)
p arr
and result:
Searching for 6 in [1, 3, 5, 7, 9]
a = 6, arr[middle_index] = 5, begin_index = 0, end_index = 4, middle_index = 2
a = 6, arr[middle_index] = 7, begin_index = 3, end_index = 4, middle_index = 3
a = 6, arr[middle_index] = 5, begin_index = 3, end_index = 2, middle_index = 2
the result is 3, the index for value 7
[1, 3, 5, 6, 7, 9]
Searching for 6 in [1, 3, 5, 7, 9, 11]
a = 6, arr[middle_index] = 5, begin_index = 0, end_index = 5, middle_index = 2
a = 6, arr[middle_index] = 9, begin_index = 3, end_index = 5, middle_index = 4
a = 6, arr[middle_index] = 7, begin_index = 3, end_index = 3, middle_index = 3
the result is 3, the index for value 7
[1, 3, 5, 6, 7, 9, 11]
Searching for 60 in [1, 3, 5, 7, 9]
a = 60, arr[middle_index] = 5, begin_index = 0, end_index = 4, middle_index = 2
a = 60, arr[middle_index] = 7, begin_index = 3, end_index = 4, middle_index = 3
a = 60, arr[middle_index] = 9, begin_index = 4, end_index = 4, middle_index = 4
the result is 5, the index for value nil
[1, 3, 5, 7, 9, 60]
Searching for 60 in [1, 3, 5, 7, 9, 11]
a = 60, arr[middle_index] = 5, begin_index = 0, end_index = 5, middle_index = 2
a = 60, arr[middle_index] = 9, begin_index = 3, end_index = 5, middle_index = 4
a = 60, arr[middle_index] = 11, begin_index = 5, end_index = 5, middle_index = 5
the result is 6, the index for value nil
[1, 3, 5, 7, 9, 11, 60]
Searching for -60 in [1, 3, 5, 7, 9]
a = -60, arr[middle_index] = 5, begin_index = 0, end_index = 4, middle_index = 2
a = -60, arr[middle_index] = 1, begin_index = 0, end_index = 1, middle_index = 0
a = -60, arr[middle_index] = 9, begin_index = 0, end_index = -1, middle_index = -1
the result is 0, the index for value 1
[-60, 1, 3, 5, 7, 9]
Searching for -60 in [1, 3, 5, 7, 9, 11]
a = -60, arr[middle_index] = 5, begin_index = 0, end_index = 5, middle_index = 2
a = -60, arr[middle_index] = 1, begin_index = 0, end_index = 1, middle_index = 0
a = -60, arr[middle_index] = 11, begin_index = 0, end_index = -1, middle_index = -1
the result is 0, the index for value 1
[-60, 1, 3, 5, 7, 9, 11]
Searching for -60 in [1]
a = -60, arr[middle_index] = 1, begin_index = 0, end_index = 0, middle_index = 0
the result is 0, the index for value 1
[-60, 1]
Searching for 60 in [1]
a = 60, arr[middle_index] = 1, begin_index = 0, end_index = 0, middle_index = 0
the result is 1, the index for value nil
[1, 60]
Searching for 60 in []
[60]
This is the code from Java's java.util.Arrays.binarySearch as included in Oracles Java:
/**
* Searches the specified array of ints for the specified value using the
* binary search algorithm. The array must be sorted (as
* by the {#link #sort(int[])} method) prior to making this call. If it
* is not sorted, the results are undefined. If the array contains
* multiple elements with the specified value, there is no guarantee which
* one will be found.
*
* #param a the array to be searched
* #param key the value to be searched for
* #return index of the search key, if it is contained in the array;
* otherwise, <tt>(-(<i>insertion point</i>) - 1)</tt>. The
* <i>insertion point</i> is defined as the point at which the
* key would be inserted into the array: the index of the first
* element greater than the key, or <tt>a.length</tt> if all
* elements in the array are less than the specified key. Note
* that this guarantees that the return value will be >= 0 if
* and only if the key is found.
*/
public static int binarySearch(int[] a, int key) {
return binarySearch0(a, 0, a.length, key);
}
// Like public version, but without range checks.
private static int binarySearch0(int[] a, int fromIndex, int toIndex,
int key) {
int low = fromIndex;
int high = toIndex - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
int midVal = a[mid];
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return mid; // key found
}
return -(low + 1); // key not found.
}
The algorithm has proven to be appropriate and I like the fact, that you instantly know from the result whether it is an exact match or a hint on the insertion point.
This is how I would translate this into ruby:
# Inserts the specified value into the specified array using the binary
# search algorithm. The array must be sorted prior to making this call.
# If it is not sorted, the results are undefined. If the array contains
# multiple elements with the specified value, there is no guarantee
# which one will be found.
#
# #param [Array] array the ordered array into which value should be inserted
# #param [Object] value the value to insert
# #param [Fixnum|Bignum] from_index ordered sub-array starts at
# #param [Fixnum|Bignum] to_index ordered sub-array ends the field before
# #return [Array] the resulting array
def self.insert(array, value, from_index=0, to_index=array.length)
array.insert insertion_point(array, value, from_index, to_index), value
end
# Searches the specified array for an insertion point ot the specified value
# using the binary search algorithm. The array must be sorted prior to making
# this call. If it is not sorted, the results are undefined. If the array
# contains multiple elements with the specified value, there is no guarantee
# which one will be found.
#
# #param [Array] array the ordered array into which value should be inserted
# #param [Object] value the value to insert
# #param [Fixnum|Bignum] from_index ordered sub-array starts at
# #param [Fixnum|Bignum] to_index ordered sub-array ends the field before
# #return [Fixnum|Bignum] the position where value should be inserted
def self.insertion_point(array, value, from_index=0, to_index=array.length)
raise(ArgumentError, 'Invalid Range') if from_index < 0 || from_index > array.length || from_index > to_index || to_index > array.length
binary_search = _binary_search(array, value, from_index, to_index)
if binary_search < 0
-(binary_search + 1)
else
binary_search
end
end
# Searches the specified array for the specified value using the binary
# search algorithm. The array must be sorted prior to making this call.
# If it is not sorted, the results are undefined. If the array contains
# multiple elements with the specified value, there is no guarantee which
# one will be found.
#
# #param [Array] array the ordered array in which the value should be searched
# #param [Object] value the value to search for
# #param [Fixnum|Bignum] from_index ordered sub-array starts at
# #param [Fixnum|Bignum] to_index ordered sub-array ends the field before
# #return [Fixnum|Bignum] if > 0 position of value, otherwise -(insertion_point + 1)
def self.binary_search(array, value, from_index=0, to_index=array.length)
raise(ArgumentError, 'Invalid Range') if from_index < 0 || from_index > array.length || from_index > to_index || to_index > array.length
_binary_search(array, value, from_index, to_index)
end
private
# Like binary_search, but without range checks.
#
# #param [Array] array the ordered array in which the value should be searched
# #param [Object] value the value to search for
# #param [Fixnum|Bignum] from_index ordered sub-array starts at
# #param [Fixnum|Bignum] to_index ordered sub-array ends the field before
# #return [Fixnum|Bignum] if > 0 position of value, otherwise -(insertion_point + 1)
def self._binary_search(array, value, from_index, to_index)
low = from_index
high = to_index - 1
while low <= high do
mid = (low + high) / 2
mid_val = array[mid]
if mid_val < value
low = mid + 1
elsif mid_val > value
high = mid - 1
else
return mid # value found
end
end
-(low + 1) # value not found.
end
Code returns the same values as OP provided for his test data.
Update 2020
Actually, the insertion problem of binary search has been well researched. There is a left insertion point and right insertion point. Code can be found on Wikipedia and Rosetta Code. For example, to find the left insertion point, the code is:
BinarySearch_Left(A[0..N-1], value) {
low = 0
high = N - 1
while (low <= high) {
// invariants: value > A[i] for all i < low
value <= A[i] for all i > high
mid = (low + high) / 2
if (A[mid] >= value)
high = mid - 1
else
low = mid + 1
}
return low
}
One note is about the overflow bug, so mid really should be found as low + floor((high - low) / 2).
Earlier answer:
Actually, instead of checking for begin_index >= end_index, it can be better handled using begin_index > end_index, and the solution is much cleaner:
def binary_search_helper(arr, a, begin_index, end_index)
if begin_index > end_index
return begin_index
else
middle_index = (begin_index + end_index) / 2
if arr[middle_index] == a
return middle_index
elsif a > arr[middle_index]
return binary_search_helper(arr, a, middle_index + 1, end_index)
else
return binary_search_helper(arr, a, begin_index, middle_index - 1)
end
end
end
# for [1,3,5,7,9], searching for 6 will return index for 7 for insertion
# if exact match is found, then return that index
def binary_search(arr, a)
return binary_search_helper(arr, a, 0, arr.length - 1)
end
And using iteration instead of recursion may be faster and have less worry for stack overflow.
sample for left insertion:
def binary_search(arr, target):
left = 0
right = len(arr) - 1
while left <= right:
mid = (left + right) >> 1
if arr[mid] < target:
left = mid + 1
else:
right = mid - 1
return left
sample for right insertion:
def binary_search(arr, target):
left = 0
right = len(arr) - 1
while left <= right:
mid = (left + right) >> 1
if arr[mid] <= target:
left = mid + 1
else:
right = mid - 1
return left # add - 1 for right most index of target
sample for is present:
def binary_search(arr, target):
left = 0
right = len(arr) - 1
while left <= right:
mid = (left + right) >> 1
if n < target:
left = mid + 1
elif n > target:
right = mid - 1
else:
return True # or return mid for index
return False # or return -1 for not found
sample test case:
arr = [1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 6, 7, 8, 9, 10]
result = binary_search(arr, 5)

Resources