A* Search implementation for the "Travelling Salesman P" - algorithm

I've been struggling for a long time after writing my A* Search algorithm with the fact that when the number of cities is greater than 8, the algorithm won't return any answer (or is ridiculously slow).
The cities are stored in a 2-d array where point cityList[x][y] is the distance between city x and city y (it is also the same as cityList[y][x]).
It's slightly messy as I had to use both the city class and the beginning of each route to remember route lengths and which routes had already been attempted.
A new route is also created for each city that is added on.
Could anyone help to optimize it or ensure it can work on an increasingly large number of cities.
class city(object):
def __init__(self, number):
self.number = number
global size
self.possibleChildren = []
for i in range(0,size):
if i != number:
self.possibleChildren.append(i)
def getNo(self):
return self.number
def getSize(self):
return len(self.possibleChildren)
def getOptions(self, i):
self.i = i
return self.possibleChildren[self.i]
def deleteNo(self, option):
self.option = option
if(self.option in self.possibleChildren):
self.possibleChildren.remove(self.option)
def addFinalStep(self, beginning):
self.beginning = beginning
self.possibleChildren.append(self.beginning)
def Astar():
routeList = []
#routeList[i][0] = distance travelled, routeList[i][1] = number of cities past
for i in range(0, size):
newRoute = []
newCity = city(i)
newRoute.append(0)
newRoute.append(1)
newRoute.append(newCity)
routeList.append(newRoute)
while True:
toUse = 0
smallest = -2
#Now search through the routeList to find the shortest route length
for i in range(0, len(routeList)):
#getSize() checks if there are any cities that can be visited by
this route that have not already been tried, this list is
stored in the city class
if routeList[i][1 + routeList[i][1]].getSize() != 0:
if routeList[i][0] < smallest or smallest == -2:
smallest = routeList[i][0]
toUse = i
elif routeList[i][1 + routeList[i][1]].getSize() == 0:
routeList.remove(i)
routeRec = []
#Creates the new route
for i in range (0, len(routeList[toUse])):
routeRec.append(routeList[toUse][i])
currentCity = routeRec[1 + routeRec[1]]
possibleChildren = []
for i in range(0, currentCity.getSize()):
possibleChildren.append(currentCity.getOptions(i))
smallest = 0
newCityNo = 2
for i in range(0, size):
if(i in possibleChildren):
#Finds the closest city
if smallest > cityList[currentCity.getNo()][i] or smallest == 0:
newCityNo = i
smallest = cityList[currentCity.getNo()][i]
#If the new city to visit is the same as the first, the algorithm
#has looped to the beginning and finished
if newCityNo == routeRec[2].getNo():
print("Tour Length")
print(routeRec[0] + smallest)
print("Route: ")
for i in range(2,len(routeRec)):
print(routeRec[i].getNo())
return(routeRec[0] + smallest)
#deletes all cities that have been tried
routeList[toUse][1 + routeRec[1]].deleteNo(newCityNo)
nextCity = city(newCityNo)
#deletes all cities that are already in the route
for i in range(2, 2 + routeRec[1]):
nextCity.deleteNo(routeRec[i].getNo())
#When the route is full, it can return to the first city
routeRec[1] = routeRec[1] + 1
print(routeRec[1])
if routeRec[1] == size:
#first city added to potential cities to visit
nextCity.addFinalStep(routeRec[2].getNo())
routeRec.append(nextCity)
routeRec[0] = routeRec[0] + smallest
routeList.append(routeRec)

You need to do some decomposition here.
You need data structure for saving and getting nodes to expand, in this case heap.
You also need some heuristic which you can pass it as a parameter.
Implementation of A*
# A* Algorithm
# heuristic - Heuristic function expecting state as operand
# world - Global
# sstate - Start state
# endstate - End state, state to find
# expandfn - function(world, state) returning array of tuples (nstate, cost)
# where nstate is the state reached from expanded state and cost is the 'size'
# of the edge.
def Astar(heuristic, world, sstate, endstate, expandfn):
been = set()
heap = Heap()
end = None
# We hape tuples (state, pathlen, parent state tuple) in heap
heap.push((sstate, 0, None), 0) # Heap inserting with weight 0
while not heap.isempty():
# Get next state to expand from heap
nstate = heap.pop()
# If already seen state not expand
if nstate[0] in been:
continue
# If goal reached
if nstate[0] === endstate:
end = nstate
break
# Add state as expanded
been.add(nstate[0])
# For each state reached from current state
for expstate in expandfn(world, nstate[0]):
pathlen = nstate[1] + expstate[1]
# Expanding state with heap weight 'path length + heuristic'
heap.push((expstate[0], pathlen, nstate), pathlen + heuristic(expstate[0]))
# End of while loop
if end is None:
return None
# Getting path from end node
pathlen = end[1]
res = []
while end[2] is not None:
res.append(end[0])
end = end[2]
res.reverse()
return res, pathlen

Related

Create a space-efficient Snapshot Set

I received this interview question that I didn't know how to solve.
Design a snapshot set functionality.
Once the snapshot is taken, the iterator of the class should only return values that were present in the function.
The class should provide add, remove, and contains functionality. The iterator always returns elements that were present in the snapshot even though the element might be removed from set after the snapshot.
The snapshot of the set is taken when the iterator function is called.
interface SnapshotSet {
void add(int num);
void remove(num);
boolean contains(num);
Iterator<Integer> iterator(); // the first call to this function should trigger a snapshot of the set
}
The interviewer said that the space requirement is that we cannot create a copy (snapshot) of the entire list of keys when calling iterator.
The first step is to handle only one iterator being created and being iterated over at a time. The followup question: how to handle the scenario of multiple iterators?
An example:
SnapshotSet set = new SnapshotSet();
set.add(1);
set.add(2);
set.add(3);
set.add(4);
Iterator<Integer> itr1 = set.iterator(); // iterator should return 1, 2, 3, 4 (in any order) when next() is called.
set.remove(1);
set.contains(1); // returns false; because 1 was removed.
Iterator<Integer> itr2 = set.iterator(); // iterator should return 2, 3, 4 (in any order) when next() is called.
I came up with an O(n) space solution where I created a copy of the entire list of keys when calling iterator. The interviewer said this was not space efficient enough.
I think it is fine to have a solution that focuses on reducing space at the cost of time complexity (but the time complexity should still be as efficient as possible).
Here is a solution that makes all operations reasonably fast. So it is like a set that has all history, all the time.
First we'll need to review the idea of a skiplist. Without the snapshot functionality.
What we do is start with a linked list on the bottom which will always be kept in sorted order. Draw that in a line. Half the values are randomly selected to also be part of another linked list that you draw above the first. Then half of those are selected to be part of another linked list, and so on. If the bottom layer has size n, the whole structure usually requires around 2n nodes. (Because 1 + 1/2 + 1/4 + 1/8 + ... = 2.) Each node in the entire 2-dimensional structure has the following data:
value: the value of the node
height: the height of the node in the skip list
next: the next node at the current level (is null at the end)
down: the same value node, one level down (is null at height 0)
And now your set is represented by a stack of nodes whose values are ignored, that points at the starting node at each level.
Here is a basic picture:
set
|
start(3) -> 2
| |
start(2) -> 2 -> 5 -> 9
| | | |
start(1) -> 2 -> 4 -> 5 -> 9
| | | | |
start(0) -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9 -> 10
Now suppose I want to find whether 8 is in the set. What I do is start from the set, find the topmost start, then:
while True:
if node.next is null or 8 < node.next.value:
if node.down is null:
return False
else:
node = node.down
elif 8 == node.next.value:
return True
else:
node = node.next
In this case we go from set to start(3) to the top 2, down one to 2, forward to 5, down 2x to 5, then go 6, 7, and find 8.
That's contains. To remove we follow the same search idea, but if we find that node.next.value == 5 then we assign node.next = node.next.next, then continue searching.
To add we randomly choose a height (which can be int(-log(random())/log(2))). And then we search forward until we've arrived at that height at a node whose node.next should be our desired new value. Then we do something complicated.
prev_added = null
while node is not null:
if node.next is null or new_value < node.next.value:
if node.height <= desired_height:
adding_node = Node(new_value, node.height, node.next, null)
node.next = adding_node
if prev_added is not null:
prev_added.down = adding_node
prev_added = adding_node
node = node.down
else:
node = node.next
You can verify that expected performance of all three operations is O(log(n)).
So, how do we add snapshotting to this?
First we add a version to the set data structure. This will be tied to snapshot. Next, we replace every single pointer with a linked list of pointers and versions. And now instead of directly modifying pointers, if the top one has an older version than we're now inserting, you add to the head of the list and leave the older version be.
And NOW we can implement a snapshot as follows.
set.version = set.version+1
node = set.start
while node.down is not null:
node = node.down
snapshot = Snapshot(set, set.version, node)
Now snapshotting is very quick. And to traverse a particular past version of the set (including simply iterating over a snapshot) for any pointer we need to traverse back until we get past any too new pointers, and find an old enough one. It turns out that any given pointer will tend to have a fairly small number of pointers, so this has only a modest amount of overhead.
Traversal of the current version of the set is just a question of always looking at the most recent version of a pointer. So it is just an additional layer of indirection, but same expected performance.
And now we have a version of this with all snapshotted versions available forever. It is possible to add garbage collection to reduce how much of a problem that is. But this description is long enough already.
This is a very different but ultimately much better answer than the one I gave at first. The idea is simply to have the data structure be a read-only reasonably well balanced sorted tree. Since it is read-only, it is easy to iterate over it.
But then how do you make modifications? Well, you simply create a new copy of the tree from the modification on up to the root. This will be O(log(n)) new nodes. Better yet the O(log(n)) old nodes that were replaced can be trivially garbage collected if they are not in use.
All operations are O(log(n)) except iteration which is O(n). I also included both an explicit iterator using callbacks, and an implicit one using Python's generators.
And for fun I coded it up in Python.
class TreeNode:
def __init__ (self, value, left=None, right=None):
self.value = value
count = 1
if left is not None:
count += left.count
if right is not None:
count += right.count
self.count = count
self.left = left
self.right = right
def left_count (self):
if self.left is None:
return 0
else:
return self.left.count
def right_count (self):
if self.right is None:
return 0
else:
return self.right.count
def attach_left (self, child):
# New node for balanced tree with self.left replaced by child.
if id(child) == id(self.left):
return self
elif child is None:
return TreeNode(self.value).attach_right(self.right)
elif child.left_count() < child.right_count() + self.right_count():
return TreeNode(self.value, child, self.right)
else:
new_right = TreeNode(self.value, child.right, self.right)
return TreeNode(child.value, child.left, new_right)
def attach_right (self, child):
# New node for balanced tree with self.right replaced by child.
if id(child) == id(self.right):
return self
elif child is None:
return TreeNode(self.value).attach_left(self.left)
elif child.right_count() < child.left_count() + self.left_count():
return TreeNode(self.value, self.left, child)
else:
new_left = TreeNode(self.value, self.left, child.left)
return TreeNode(child.value, new_left, child.right)
def merge_right (self, other):
# New node for balanced tree with all of self, then all of other.
if other is None:
return self
elif self.right is None:
return self.attach_right(other)
elif other.left is None:
return other.attach_left(self)
else:
child = self.right.merge_right(other.left)
if self.left_count() < other.right_count():
child = self.attach_right(child)
return other.attach_left(child)
else:
child = other.attach_left(child)
return self.attach_right(child)
def add (self, value):
if value < self.value:
if self.left is None:
child = TreeNode(value)
else:
child = self.left.add(value)
return self.attach_left(child)
elif self.value < value:
if self.right is None:
child = TreeNode(value)
else:
child = self.right.add(value)
return self.attach_right(child)
else:
return self
def remove (self, value):
if value < self.value:
if self.left is None:
return self
else:
return self.attach_left(self.left.remove(value))
elif self.value < value:
if self.right is None:
return self
else:
return self.attach_right(self.right.remove(value))
else:
if self.left is None:
return self.right
elif self.right is None:
return self.left
else:
return self.left.merge_right(self.right)
def __str__ (self):
if self.left is None:
left_lines = []
else:
left_lines = str(self.left).split("\n")
left_lines.pop()
left_lines = [" " + l for l in left_lines]
if self.right is None:
right_lines = []
else:
right_lines = str(self.right).split("\n")
right_lines.pop()
right_lines = [" " + l for l in right_lines]
return "\n".join(left_lines + [str(self.value)] + right_lines) + "\n"
# Pythonic iterator.
def __iter__ (self):
if self.left is not None:
yield from self.left
yield self.value
if self.right is not None:
yield from self.right
class SnapshottableSet:
def __init__ (self, root=None):
self.root = root
def contains (self, value):
node = self.root
while node is not None:
if value < node.value:
node = node.left
elif node.value < value:
node = node.right
else:
return True
return False
def add (self, value):
if self.root is None:
self.root = TreeNode(value)
else:
self.root = self.root.add(value)
def remove (self, value):
if self.root is not None:
self.root = self.root.remove(value)
# Pythonic built-in approach
def __iter__ (self):
if self.root is not None:
yield from self.root
# And explicit approach
def iterator (self):
nodes = []
if self.root is not None:
node = self.root
while node is not None:
nodes.append(node)
node = node.left
def next_value ():
if len(nodes):
node = nodes.pop()
value = node.value
node = node.right
while node is not None:
nodes.append(node)
node = node.left
return value
else:
raise StopIteration
return next_value
s = SnapshottableSet()
for i in range(10):
s.add(i)
it = s.iterator()
for i in range(5):
s.remove(2*i)
print("Current contents")
for v in s:
print(v)
print("Original contents")
while True:
print(it())

Implementing iterative solution in a functionally recursive way with memoization

I am trying to solve the following problem on leetcode: Coin Change 2
Input: amount = 5, coins = [1, 2,5]
Output: 4 Explanation: there are four ways to make up the amount:
5=5
5=2+2+1
5=2+1+1+1
5=1+1+1+1+1
I am trying to implement an iterative solution which essentially simulates/mimic recursion using stack. I have managed to implement it and the solution works, but it exceeds time limit.
I have noticed that the recursive solutions make use of memoization to optimize. I would like to incorporate that in my iterative solution as well, but I am lost on how to proceed.
My solution so far:
# stack to simulate recursion
stack = []
# add starting indexes and sum to stack
#Tuple(x,y) where x is sum, y is index of the coins array input
for i in range(0, len(coins)):
if coins[i]<=amount:
stack.append((coins[i], i))
result = 0
while len(stack)!=0:
c = stack.pop()
currentsum = c[0]
currentindex = c[1]
# can't explore further
if currentsum >amount:
continue
# condition met, increment result
if currentsum == amount:
result = result+1
continue
# add coin at current index to sum if doesn't exceed amount (append call to stack)
if (currentsum+coins[currentindex])<=amount:
stack.append((currentsum+coins[currentindex], currentindex))
#skip coin at current index (append call to stack)
if (currentindex+1)<=len(coins)-1:
stack.append((currentsum, currentindex+1))
return result
I have tried using dictionary to record appends to the stack as follows:
#if the call has not already happened, add to dictionary
if dictionary.get((currentsum, currentindex+1), None) == None:
stack.append((currentsum, currentindex+1))
dictionary[currentsum, currentindex+1)] = 'visited'
Example, if call (2,1) of sum = 2 and coin-array-index = 1 is made, I append it to dictionary. If the same call is encountered again, I don't append it again. However, it does not work as different combinations can have same sum and index.
Is there anyway I can incorporate memoization in my iterative solution above. I want to do it in a way such that it is functionally same as the recursive solution.
I have managed to figure out the solution. Essentially, I used post order traversal and used a state variable to record the stage of recursion the current call is in. Using the stage, I have managed to go bottom up after going top down.
The solution I came up with is as follows:
def change(self, amount: int, coins: List[int]) -> int:
if amount<=0:
return 1
if len(coins) == 0:
return 0
d= dict()
#currentsum, index, instruction
coins.sort(reverse=True)
stack = [(0, 0, 'ENTER')]
calls = 0
while len(stack)!=0:
currentsum, index, instruction = stack.pop()
if currentsum == amount:
d[(currentsum, index)] = 1
continue
elif instruction == 'ENTER':
stack.append((currentsum, index, 'EXIT'))
if (index+1)<=(len(coins)-1):
if d.get((currentsum, index+1), None) == None:
stack.append((currentsum, index+1, 'ENTER'))
newsum = currentsum + coins[index]
if newsum<=amount:
if d.get((newsum, index), None) == None:
stack.append((newsum, index, 'ENTER'))
elif instruction == 'EXIT':
newsum = currentsum + coins[index]
left = 0 if d.get((newsum, index), None) == None else d.get((newsum, index))
right = 0 if d.get((currentsum, index+1), None) == None else d.get((currentsum, index+1))
d[(currentsum, index)] = left+right
calls = calls+1
print(calls)
return d[(0,0)]

Implementing a PriorityQueue Algorithm

Here is my implementation of a PriorityQueue algorithm. I have a feeling that my pop function is wrong. But I am not sure where exactly it is wrong. I have checked multiple times on where my logic went wrong but it seems to be perfectly correct(checked with CLRS pseudo code).
class PriorityQueue:
"""Array-based priority queue implementation."""
def __init__(self):
"""Initially empty priority queue."""
self.queue = []
self.min_index = None
def parent(self, i):
return int(i/2)
def left(self, i):
return 2*i+1
def right(self, i):
return 2*i+2
def min_heapify(self, heap_size, i):
#Min heapify as written in CLRS
smallest = i
l = self.left(i)
r = self.right(i)
#print([l,r,len(self.queue),heap_size])
try:
if l <= heap_size and self.queue[l] < self.queue[i]:
smallest = l
else:
smallest = i
except IndexError:
pass
try:
if r <= heap_size and self.queue[r] < self.queue[smallest]:
smallest = r
except IndexError:
pass
if smallest != i:
self.queue[i], self.queue[smallest] = self.queue[smallest], self.queue[i]
self.min_heapify(heap_size, smallest)
def heap_decrease_key(self, i, key):
#Implemented as specified in CLRS
if key > self.queue[i]:
raise ValueError("new key is larger than current key")
#self.queue[i] = key
while i > 0 and self.queue[self.parent(i)] > self.queue[i]:
self.queue[i], self.queue[self.parent(i)] = self.queue[self.parent(i)], self.queue[i]
i = self.parent(i)
def __len__(self):
# Number of elements in the queue.
return len(self.queue)
def append(self, key):
"""Inserts an element in the priority queue."""
if key is None:
raise ValueError('Cannot insert None in the queue')
self.queue.append(key)
heap_size = len(self.queue)
self.heap_decrease_key(heap_size - 1, key)
def min(self):
"""The smallest element in the queue."""
if len(self.queue) == 0:
return None
return self.queue[0]
def pop(self):
"""Removes the minimum element in the queue.
Returns:
The value of the removed element.
"""
if len(self.queue) == 0:
return None
self._find_min()
popped_key = self.queue[self.min_index]
self.queue[0] = self.queue[len(self.queue)-1]
del self.queue[-1]
self.min_index = None
self.min_heapify(len(self.queue), 0)
return popped_key
def _find_min(self):
# Computes the index of the minimum element in the queue.
#
# This method may crash if called when the queue is empty.
if self.min_index is not None:
return
min = self.queue[0]
self.min_index = 0
Any hint or input will be highly appreciated
The main issue is that the parent function is wrong. As it should do the opposite from the left and right methods, you should first subtract 1 from i before halving that value:
def parent(self, i):
return int((i-1)/2)
Other things to note:
You don't really have a good use for the member self.min_index. It is either 0 or None, and the difference is not really used in your code, as it follows directly from whether the heap is empty or not. This also means you don't need the method _find_min, (which in itself is strange: you assign to min, but never use that). Any way, drop that method, and the line where you call it. Also drop the line where you assign None to self.min_index, and the only other place where you read the value, just use 0.
You have two ways to protect against index errors in the min_heapify method: <= heapsize and a try block. The first protection should really have < instead of <=, but you should use only one way, not two. So either test the less-than, or trap the exception.
The else block with smallest = i is unnecessary, because at that time smallest == i.
min_heapify has a first parameter that always receives the full size of the heap. So it is an unnecessary parameter. It would also not make sense to ever call this method with another value for it. So drop that argument from the method definition and all calls. And then define heap_size = len(self.queue) as a local name in that function
In heap_decrease_key you commented out the assignment #self.queue[i] = key, which is fine as long as you never call this method to really decrease a key. But although you never do that from "inside" the class, the user of the class may well want to use it in that way (since that is what the method's name is suggesting). So better uncomment that assignment.
With the above changes, your instance would only have queue as its data property. This is fine, but you could consider to let PriorityQueue inherit from list, so that you don't need this property either, and can just work with the list that you inherit. By consequence, you should then replace self.queue with self throughout your code, and you can drop the __init__ and __len__ methods, since the list implementation of those is just what you need. A bit of care is needed in the case where you want to call a list original method, when you have overridden it, like append. In that case use super().append.
With all of the above changes applied:
class PriorityQueue(list):
"""Array-based priority queue implementation."""
def parent(self, i):
return int((i-1)/2)
def left(self, i):
return 2*i+1
def right(self, i):
return 2*i+2
def min_heapify(self, i):
#Min heapify as written in CLRS
heap_size = len(self)
smallest = i
l = self.left(i)
r = self.right(i)
if l < heap_size and self[l] < self[i]:
smallest = l
if r < heap_size and self[r] < self[smallest]:
smallest = r
if smallest != i:
self[i], self[smallest] = self[smallest], self[i]
self.min_heapify(smallest)
def heap_decrease_key(self, i, key):
#Implemented as specified in CLRS
if key > self[i]:
raise ValueError("new key is larger than current key")
self[i] = key
while i > 0 and self[self.parent(i)] > self[i]:
self[i], self[self.parent(i)] = self[self.parent(i)], self[i]
i = self.parent(i)
def append(self, key):
"""Inserts an element in the priority queue."""
if key is None:
raise ValueError('Cannot insert None in the queue')
super().append(key)
heap_size = len(self)
self.heap_decrease_key(heap_size - 1, key)
def min(self):
"""The smallest element in the queue."""
if len(self) == 0:
return None
return self[0]
def pop(self):
"""Removes the minimum element in the queue.
Returns:
The value of the removed element.
"""
if len(self) == 0:
return None
popped_key = self[0]
self[0] = self[-1]
del self[-1]
self.min_heapify(0)
return popped_key
Your parent function is already wrong.
The root element of your heap is stored in array index 0, the children are in 1 and 2. The parent of 1 is 0, that is correct, but the parent of 2 should also be 0, whereas your function returns 1.
Usually the underlying array of a heap does not use the index 0, instead the root element is at index 1. This way you can compute parent and children like this:
parent(i): i // 2
left_child(i): 2 * i
right_child(i): 2 * i + 1

algo class question: compare n no. of sequence showing their comparison which leads to the particular sequence

Whenever you compare 3 no. it end up in 6 results and similarly 4 no it goes for 24 no. making permutation of no. of inputs.
The task is to compare n no. of sequence showing their comparison which leads to the particular sequence
For example your input is a,b,c
If a<b
If b<c
Abc
Else
If a<c
Acb
Else a>c
cab
Else b>c
Cba
Else
If a<c
Bac
Else
Bca
Else
Cba
The task is to print all the comparisons which took place to lead that sequence for n no.s and
confirm that there is no duplication.
Here is Python code that outputs valid Python code to assign to answer the sorted values.
The sorting algorithm here is mergesort. Which is not going to give the smallest possible decision tree, but it will be pretty good.
Here is Python code that outputs valid Python code to assign to answer the sorted values.
The sorting algorithm here is mergesort. Which is not going to give the smallest possible decision tree, but it will be pretty good.
#! /usr/bin/env python
import sys
class Tree:
def __init__ (self, node_type, value1=None, value2=None, value3=None):
self.node_type = node_type
self.value1 = value1
self.value2 = value2
self.value3 = value3
def output (self, indent='', is_continue=False):
rows = []
if self.node_type == 'answer':
rows.append("{}answer = [{}]".format(indent, ', '.join(self.value1)))
elif self.node_type == 'comparison':
if is_continue:
rows.append('{}elif {} < {}:'.format(indent, self.value1[0], self.value1[1]))
else:
rows.append('{}if {} < {}:'.format(indent, self.value1[0], self.value1[1]))
rows = rows + self.value2.output(indent + ' ')
if self.value3.node_type == 'answer':
rows.append('{}else:'.format(indent))
rows = rows + self.value3.output(indent + ' ')
else:
rows = rows + self.value3.output(indent, True)
return rows
# This call captures a state in the merging.
def _merge_tree (chains, first=None, second=None, output=None):
if first is None and second is None and output is None:
if len(chains) < 2:
return Tree('answer', chains[0])
else:
return _merge_tree(chains[2:], chains[0], chains[1], [])
elif first is None:
return _merge_tree(chains + [output])
elif len(first) == 0:
return _merge_tree(chains, second, None, output)
elif second is None:
return _merge_tree(chains + [output + first])
elif len(second) < len(first):
return _merge_tree(chains, second, first, output)
else:
subtree1 = _merge_tree(chains, first[1:], second, output + [first[0]])
subtree2 = _merge_tree(chains, first, second[1:], output + [second[0]])
return Tree('comparison', [first[0], second[0]], subtree1, subtree2)
def merge_tree (variables):
# Turn the list into a list of 1 element merges.
return _merge_tree([[x] for x in variables])
# This captures the moment when you're about to compare the next
# variable with the already sorted variable at position 'position'.
def insertion_tree (variables, prev_sorted=None, current_variable=None, position=None):
if prev_sorted is None:
prev_sorted = []
if current_variable is None:
if len(variables) == 0:
return Tree('answer', prev_sorted)
else:
return insertion_tree(variables[1:], prev_sorted, variables[0], len(prev_sorted))
elif position < 1:
return insertion_tree(variables, [current_variable] + prev_sorted)
else:
position = position - 1
subtree1 = insertion_tree(variables, prev_sorted, current_variable, position)
subtree2 = insertion_tree(variables, prev_sorted[0:position] + [current_variable] + prev_sorted[position:])
return Tree('comparison', [current_variable, prev_sorted[position]], subtree1, subtree2)
args = ['a', 'b', 'c']
if 1 < len(sys.argv):
args = sys.argv[1:]
for line in merge_tree(args).output():
print(line)
For giggles and grins, you can get insertion sort by switching the final call to merge_tree to insertion_tree.
In principle you could repeat the exercise for any sort algorithm, but it gets really tricky, really fast. (For quicksort you have to do continuation passing. For heapsort and bubble sort you have to insert fancy logic to only consider parts of the decision tree that you could actually arrive at. It is a fun exercise if you want to engage in it.)

Algorithm for finding all points within distance of another point

I had this problem for an entry test for a job. I did not pass the test. I am disguising the question in deference to the company.
Imagine you have N number of people in a park of A X B space. If a person has no other person within 50 feet, he enjoys his privacy. Otherwise, his personal space is violated. Given a set of (x, y), how many people will have their space violated?
For example, give this list in Python:
people = [(0,0), (1,1), (1000, 1000)]
We would find 2 people who are having their space violated: 1, 2.
We don't need to find all sets of people; just the total number of unique people.
You can't use a brute method to solve the problem. In other words, you can't use a simple array within an array.
I have been working on this problem off and on for a few weeks, and although I have gotten a solution faster than n^2, have not come up with a problem that scales.
I think the only correct way to solve this problem is by using Fortune's algorithm?
Here's what I have in Python (not using Fortune's algorithm):
import math
import random
random.seed(1) # Setting random number generator seed for repeatability
TEST = True
NUM_PEOPLE = 10000
PARK_SIZE = 128000 # Meters.
CONFLICT_RADIUS = 500 # Meters.
def _get_distance(x1, y1, x2, y2):
"""
require: x1, y1, x2, y2: all integers
return: a distance as a float
"""
distance = math.sqrt(math.pow((x1 - x2), 2) + math.pow((y1 - y2),2))
return distance
def check_real_distance(people1, people2, conflict_radius):
"""
determine if two people are too close
"""
if people2[1] - people1[1] > conflict_radius:
return False
d = _get_distance(people1[0], people1[1], people2[0], people2[1])
if d >= conflict_radius:
return False
return True
def check_for_conflicts(peoples, conflict_radius):
# sort people
def sort_func1(the_tuple):
return the_tuple[0]
_peoples = []
index = 0
for people in peoples:
_peoples.append((people[0], people[1], index))
index += 1
peoples = _peoples
peoples = sorted(peoples, key = sort_func1)
conflicts_dict = {}
i = 0
# use a type of sweep strategy
while i < len(peoples) - 1:
x_len = peoples[i + 1][0] - peoples[i][0]
conflict = False
conflicts_list =[peoples[i]]
j = i + 1
while x_len <= conflict_radius and j < len(peoples):
x_len = peoples[j][0] - peoples[i][0]
conflict = check_real_distance(peoples[i], peoples[j], conflict_radius)
if conflict:
people1 = peoples[i][2]
people2 = peoples[j][2]
conflicts_dict[people1] = True
conflicts_dict[people2] = True
j += 1
i += 1
return len(conflicts_dict.keys())
def gen_coord():
return int(random.random() * PARK_SIZE)
if __name__ == '__main__':
people_positions = [[gen_coord(), gen_coord()] for i in range(NUM_PEOPLE)]
conflicts = check_for_conflicts(people_positions, CONFLICT_RADIUS)
print("people in conflict: {}".format(conflicts))
As you can see from the comments, there's lots of approaches to this problem. In an interview situation you'd probably want to list as many as you can and say what the strengths and weaknesses of each one are.
For the problem as stated, where you have a fixed radius, the simplest approach is probably rounding and hashing. k-d trees and the like are powerful data structures, but they're also quite complex and if you don't need to repeatedly query them or add and remove objects they might be overkill for this. Hashing can achieve linear time, versus spatial trees which are n log n, although it might depend on the distribution of points.
To understand hashing and rounding, just think of it as partitioning your space up into a grid of squares with sides of length equal to the radius you want to check against. Each square is given it's own "zip code" which you can use as a hash key to store values in that square. You can compute the zip code of a point by dividing the x and y co-ordinates by the radius, and rounding down, like this:
def get_zip_code(x, y, radius):
return str(int(math.floor(x/radius))) + "_" + str(int(math.floor(y/radius)))
I'm using strings because it's simple, but you can use anything as long as you generate a unique zip code for each square.
Create a dictionary, where the keys are the zip codes, and the values are lists of all the people in that zip code. To check for conflicts, add the people one at a time, and before adding each one, test for conflicts with all the people in the same zip code, and the zip code's 8 neighbours. I've reused your method for keeping track of conflicts:
def check_for_conflicts(peoples, conflict_radius):
index = 0
d = {}
conflicts_dict = {}
for person in peoples:
# check for conflicts with people in this person's zip code
# and neighbouring zip codes:
for offset_x in range(-1, 2):
for offset_y in range(-1, 2):
offset_zip_code = get_zip_code(person[0] + (offset_x * conflict_radius), person[1] + (offset_y * conflict_radius), conflict_radius)
if offset_zip_code in d:
# get a list of people in this zip:
other_people = d[offset_zip_code]
# check for conflicts with each of them:
for other_person in other_people:
conflict = check_real_distance(person, other_person, conflict_radius)
if conflict:
people1 = index
people2 = other_person[2]
conflicts_dict[people1] = True
conflicts_dict[people2] = True
# add the new person to their zip code
zip_code = get_zip_code(person[0], person[1], conflict_radius)
if not zip_code in d:
d[zip_code] = []
d[zip_code].append([person[0], person[1], index])
index += 1
return len(conflicts_dict.keys())
The time complexity of this depends on a couple of things. If you increase the number of people, but don't increase the size of the space you are distributing them in, then it will be O(N2) because the number of conflicts is going to increase quadratically and you have to count them all. However if you increase the space along with the number of people, so that the density is the same, it will be closer to O(N).
If you're just counting unique people, you can keep a count if how many people in each zip code have at least 1 conflict. If its equal to everyone in the zip code, you can early out of the loop that checks for conflicts in a given zip after the first conflict with the new person, since no more uniques will be found. You could also loop through twice, adding all people on the first loop, and testing on the second, breaking out of the loop when you find the first conflict for each person.
You can see this topcoder link and section 'Closest pair'. You can modify the closest pair algorithm so that the distance h is always 50.
So , what you basically do is ,
Sort the people by X coordinate
Sweep from left to right.
Keep a balanced binary tree and keep all the points within 50 radii in the binary tree. The key of the binary tree would be the Y coordinates of the point
Select the points with Y-50 and Y+50 , this can be done with the binary tree in lg(n) time.
So the overall complexity becomes nlg(n)
Be sure to mark the points you find to skip those points in the future.
You can use set in C++ as the binary tree.But I couldn't find if python set supports range query or upper_bound and lower_bound.If someone knows , please point that out in the comments.
Here's my solution to this interesting problem:
from math import sqrt
import math
import random
class Person():
def __init__(self, x, y, conflict_radius=500):
self.position = [x, y]
self.valid = True
self.radius = conflict_radius**2
def validate_people(self, people):
P0 = self.position
for p in reversed(people):
P1 = p.position
dx = P1[0] - P0[0]
dy = P1[1] - P0[1]
dx2 = (dx * dx)
if dx2 > self.radius:
break
dy2 = (dy * dy)
d = dx2 + dy2
if d <= self.radius:
self.valid = False
p.valid = False
def __str__(self):
p = self.position
return "{0}:{1} - {2}".format(p[0], p[1], self.valid)
class Park():
def __init__(self, num_people=10000, park_size=128000):
random.seed(1)
self.num_people = num_people
self.park_size = park_size
def gen_coord(self):
return int(random.random() * self.park_size)
def generate(self):
return [[self.gen_coord(), self.gen_coord()] for i in range(self.num_people)]
def naive_solution(data):
sorted_data = sorted(data, key=lambda x: x[0])
len_sorted_data = len(sorted_data)
result = []
for index, pos in enumerate(sorted_data):
print "{0}/{1} - {2}".format(index, len_sorted_data, len(result))
p = Person(pos[0], pos[1])
p.validate_people(result)
result.append(p)
return result
if __name__ == '__main__':
people_positions = Park().generate()
with_conflicts = len(filter(lambda x: x.valid, naive_solution(people_positions)))
without_conflicts = len(filter(lambda x: not x.valid, naive_solution(people_positions)))
print("people with conflicts: {}".format(with_conflicts))
print("people without conflicts: {}".format(without_conflicts))
I'm sure the code can be still optimized further
I found a relatively solution to the problem. Sort the list of coordinates by the X value. Then look at each X value, one at a time. Sweep right, checking the position with the next position, until the end of the sweep area is reached (500 meters), or a conflict is found.
If no conflict is found, sweep left in the same manner. This method avoids unnecessary checks. For example, if there are 1,000,000 people in the park, then all of them will be in conflict. The algorithm will only check each person one time: once a conflict is found the search stops.
My time seems to be O(N).
Here is the code:
import math
import random
random.seed(1) # Setting random number generator seed for repeatability
NUM_PEOPLE = 10000
PARK_SIZE = 128000 # Meters.
CONFLICT_RADIUS = 500 # Meters.
check_real_distance = lambda conflict_radius, people1, people2: people2[1] - people1[1] <= conflict_radius \
and math.pow(people1[0] - people2[0], 2) + math.pow(people1[1] - people2[1], 2) <= math.pow(conflict_radius, 2)
def check_for_conflicts(peoples, conflict_radius):
peoples.sort(key = lambda x: x[0])
conflicts_dict = {}
i = 0
num_checks = 0
# use a type of sweep strategy
while i < len(peoples) :
conflict = False
j = i + 1
#sweep right
while j < len(peoples) and peoples[j][0] - peoples[i][0] <= conflict_radius \
and not conflict and not conflicts_dict.get(i):
num_checks += 1
conflict = check_real_distance(conflict_radius, peoples[i], peoples[j])
if conflict:
conflicts_dict[i] = True
conflicts_dict[j] = True
j += 1
j = i - 1
#sweep left
while j >= 0 and peoples[i][0] - peoples[j][0] <= conflict_radius \
and not conflict and not conflicts_dict.get(i):
num_checks += 1
conflict = check_real_distance(conflict_radius, peoples[j], peoples[i])
if conflict:
conflicts_dict[i] = True
conflicts_dict[j] = True
j -= 1
i += 1
print("num checks is {0}".format(num_checks))
print("num checks per size is is {0}".format(num_checks/ NUM_PEOPLE))
return len(conflicts_dict.keys())
def gen_coord():
return int(random.random() * PARK_SIZE)
if __name__ == '__main__':
people_positions = [[gen_coord(), gen_coord()] for i in range(NUM_PEOPLE)]
conflicts = check_for_conflicts(people_positions, CONFLICT_RADIUS)
print("people in conflict: {}".format(conflicts))
I cam up with an answer that seems to take O(N) time. The strategy is to sort the array by X values. For each X value, sweep left until a conflict is found, or the distance exceeds the conflict distance (500 M). If no conflict is found, sweep left in the same manner. With this technique, you limit the amount of searching.
Here is the code:
import math
import random
random.seed(1) # Setting random number generator seed for repeatability
NUM_PEOPLE = 10000
PARK_SIZE = 128000 # Meters.
CONFLICT_RADIUS = 500 # Meters.
check_real_distance = lambda conflict_radius, people1, people2: people2[1] - people1[1] <= conflict_radius \
and math.pow(people1[0] - people2[0], 2) + math.pow(people1[1] - people2[1], 2) <= math.pow(conflict_radius, 2)
def check_for_conflicts(peoples, conflict_radius):
peoples.sort(key = lambda x: x[0])
conflicts_dict = {}
i = 0
num_checks = 0
# use a type of sweep strategy
while i < len(peoples) :
conflict = False
j = i + 1
#sweep right
while j < len(peoples) and peoples[j][0] - peoples[i][0] <= conflict_radius \
and not conflict and not conflicts_dict.get(i):
num_checks += 1
conflict = check_real_distance(conflict_radius, peoples[i], peoples[j])
if conflict:
conflicts_dict[i] = True
conflicts_dict[j] = True
j += 1
j = i - 1
#sweep left
while j >= 0 and peoples[i][0] - peoples[j][0] <= conflict_radius \
and not conflict and not conflicts_dict.get(i):
num_checks += 1
conflict = check_real_distance(conflict_radius, peoples[j], peoples[i])
if conflict:
conflicts_dict[i] = True
conflicts_dict[j] = True
j -= 1
i += 1
print("num checks is {0}".format(num_checks))
print("num checks per size is is {0}".format(num_checks/ NUM_PEOPLE))
return len(conflicts_dict.keys())
def gen_coord():
return int(random.random() * PARK_SIZE)
if __name__ == '__main__':
people_positions = [[gen_coord(), gen_coord()] for i in range(NUM_PEOPLE)]
conflicts = check_for_conflicts(people_positions, CONFLICT_RADIUS)
print("people in conflict: {}".format(conflicts))

Resources