dafny set membership implications - set

Verifying another simple problem here for the following TypeScript code.
function containsDuplicate(nums: number[]): boolean {
let m = new Set();
for(let elem of nums) {
if(m.has(elem)) {
return true;
}
m.add(elem);
}
return false;
};
I run into situation where I can get the method to verify based on setSeq, but not on the variable windowSet.
function method seqSet(nums: seq<int>, index: nat): set<int> {
set x | 0 <= x < index < |nums| :: nums[x]
}
method containsDuplicateI(nums: seq<int>) returns (containsDuplicate: bool)
ensures containsDuplicate ==> exists i,j :: 0 <= i < j < |nums| && nums[i] == nums[j]
{
var windowGhost: set<int> := {};
var windowSet: set<int> := {};
for i:= 0 to |nums|
invariant 0 <= i <= |nums|
invariant forall j :: 0 <= j < i < |nums| ==> nums[j] in windowSet
invariant forall x :: x in windowSet ==> x in nums
invariant seqSet(nums, i) <= windowSet
{
windowGhost := windowSet;
// if nums[i] in windowSet { // does not verify
if nums[i] in seqSet(nums, i) { //verifies
return true;
}
windowSet := windowSet + {nums[i]};
}
return false;
}
It seems like because window set is updated that it cannot be equal to the result of seqSet, at least asserting so fails. How do I change the invariants such that I can verify the method using the membership test in the non-ghost variable rather than the function set?

After changing invariant forall x :: x in windowSet ==> x in nums to invariant forall x :: x in windowSet ==> x in nums[0..i], I am able to verify.

Related

Longest sequence that holds a property in Dafny

In Dafny I am trying to make a Max polymorphic and high-order function that, given a sequence and a predicate, returns the longest subsequence that holds it. For instance, the longest increasing subsequence, or the longest subsequence in which all the elements are zero.
To do so, I designed a slow algorithm (given the P predicate and a S sequence):
1. Start an i pivot in the left and a j pivot in the same place.
2. Start the max_sequence = [] and the max_sequence_length = 0.
3. While i<S.length:
counter = 0
j = i
While max_sequence[i..j] satisfies P and j<S.length:
If counter > max_sequence_length:
max_sequence_length = counter
max_sequence = max_sequence[i..j]
Increment j
Increment i
4. Return max_sequence
You can see it implemented:
method maxPropertySequence<T>(P: seq<T> -> bool, sequ: seq<T>) returns (max_seq: seq<T>)
{
var i := 0;
var j := 0;
var longest := 0;
var the_sgmt := sequ;
var fresh_segmnt := sequ;
var counter := longest;
while i<(|sequ|)
decreases |sequ|-i
{
j := i;
counter := 0;
fresh_segmnt := [sequ[i]];
if P(fresh_segmnt)
{
j := j+1;
counter:=counter+1;
if counter>longest {
longest:=counter;
the_sgmt := fresh_segmnt;
}
while P(fresh_segmnt) && j<|sequ|
decreases |sequ|-j
{
fresh_segmnt := fresh_segmnt + [sequ[j]];
j := j+1;
counter:=counter+1;
if counter>longest {
longest:=counter;
the_sgmt := fresh_segmnt;
}
}
}
i := i+1;
}
return the_sgmt;
}
My question is: how can I verify that the Max function behaves as I expect? More concretely: which are the ensures I have to add?
I have thought something like: forall the subsequences of the original sequence, there is no subsequence that holds P and is longer than the_sgmt. But I do not know how to express it efficiently.
Thanks!
I wrote code for finding the (leftmost) longest subsequence of zeros from a given integer array. Since you can map sequ using the predicate, these two are almost identical problems.
// For a given integer array, let's find the longest subesquence of 0s.
// sz: size, pos: position. a[pos..(pos+sz)] will be all zeros
method longestZero(a: array<int>) returns (sz:int, pos:int)
requires 1 <= a.Length
ensures 0 <= sz <= a.Length
ensures 0 <= pos < a.Length
ensures pos + sz <= a.Length
ensures forall i:int :: pos <= i < pos + sz ==> a[i] == 0
ensures forall i,j :: (0 <= i < j < a.Length && getSize(i, j) > sz) ==> exists k :: i <= k <= j && a[k] != 0
{
var b := new int[a.Length]; // if b[i] == n, then a[i], a[i-1], ... a[i-n+1] will be all zeros and (i-n ==0 or a[i-n] !=0)
if a[0] == 0
{b[0] := 1;}
else
{b[0] := 0;}
var idx:int := 0;
while idx < a.Length - 1 // idx <- 0 to a.Length - 2
invariant 0 <= idx <= a.Length - 1
invariant forall i:int :: 0 <= i <= idx ==> 0 <= b[i] <= a.Length
invariant forall i:int :: 0 <= i <= idx ==> -1 <= i - b[i]
invariant forall i:int :: 0 <= i <= idx ==> (forall j:int :: i-b[i] < j <= i ==> a[j] == 0)
invariant forall i:int :: 0 <= i <= idx ==> ( 0 <= i - b[i] ==> a[i - b[i]] != 0 )
{
if a[idx + 1] == 0
{ b[idx + 1] := b[idx] + 1; }
else
{ b[idx + 1] := 0;}
idx := idx + 1;
}
idx := 1;
sz := b[0];
pos := 0;
// Let's find maximum of array b. That is the desired sz.
while idx < a.Length
invariant 1 <= idx <= b.Length
invariant 0 <= sz <= a.Length
invariant 0 <= pos < a.Length
invariant pos + sz <= a.Length
invariant forall i:int :: 0 <= i < idx ==> b[i] <= sz
invariant forall i:int :: pos <= i < pos + sz ==> a[i] == 0
invariant forall i, j:int :: (0 <= i < j < idx && getSize(i,j) > sz) ==> a[j-b[j]] != 0
{
// find max
if b[idx] > sz
{
sz := b[idx];
pos := idx - b[idx] + 1;
}
idx := idx + 1;
}
}
function getSize(i: int, j:int) : int
{
j - i + 1
}
Since I am new to dafny, any comments on style or anything are appreciated.

How can I figure out loop invariant in my binary search implementation?

bool binsearch(int x) {
int i = 0, j = N;
while(i < j) {
int m = (i+j)/2;
if(arr[m] <= x) {
if(arr[m] == x)
return true;
i = m+1;
}
else {
j = m;
}
}
return false;
}
This is my implementation of binary search which returns true if x is in arr[0:N-1] or
returns false if x is not in arr[0:N-1].
And I'm wondering how can I figure out right loop invariant to prove this implementation is correct.
How can I solve this problem?
Thanks a lot :D
Think about the variables holding state within your loop. In your case, they are variables i and j. You start with the fact that all elements < i and less than the value you are searching for (x) and all elements > j and greater than the x. This is the invariant you are trying to maintain.

Binary searching via bitmasking?

I have used this algorithm many times to binary search over Ints or Longs. Basically, I start from Long.MinValue and Long.MaxValue and decide to set the bit at ith position depending on the value of the function I am maximizing (or minimizing). In practice, this turns out to be faster (exactly 63*2 bitwise operations) and easier to code and avoids the many gotchas of traditional binary search implementations.
Here is my algorithm in Scala:
/**
* #return Some(x) such that x is the largest number for which f(x) is true
* If no such x is found, return None
*/
def bitBinSearch(f: Long => Boolean): Option[Long] = {
var n = 1L << 63
var p = 0L
for (i <- 62 to 0 by -1) {
val t = 1L << i
if (f(n + t)) n += t
if (f(p + t)) p += t
}
if (f(p)) Some(p) else if (f(n)) Some(n) else None
}
I have 3 questions:
What is this algorithm called in literature? Surely, I can't be the inventor of this - but, I did not find anything when I tried googling for various combinations of binary-search + bit-masking/toggling. I have been personally calling it "bitBinSearch". I have not seen this mentioned at all in articles going over binary search over an Int or Long domain where this would be trivial to write.
Can the code be improved/shortened in anyway? Right now I keep track of the negative and positive solutions in n and p. Any clever way I can merge them into single variable? Here are some sample test cases: http://scalafiddle.net/console/70a3e3e59bc61c8eb7acfbba1073980c before you attempt an answer
Is there a version that can be made to work with Doubles and Floats?
As long as you're bit-twiddling (a popular pastime in some circles) why not go all the way? I don't know if there's any efficiency to be gained, but I think it actually makes the algorithm a little clearer.
def bitBinSearch(f: Long => Boolean): Option[Long] = {
var n = Long.MinValue
var p = 0L
var t = n >>> 1
while (t > 0) {
if ( f(n|t) ) n |= t
if ( f(p|t) ) p |= t
t >>= 1
}
List(p,n).find(f)
}
Of course, if you go recursive you can eliminate those nasty vars.
import scala.annotation.tailrec
#tailrec
def bitBinSearch( f: Long => Boolean
, n: Long = Long.MinValue
, p: Long = 0L
, t: Long = Long.MinValue >>> 1 ): Option[Long] = {
if (t > 0) bitBinSearch(f
, if (f(n|t)) n|t else n
, if (f(p|t)) p|t else p
, t >> 1
)
else List(p,n).find(f)
}
Again, probably not more efficient, but perhaps a bit more Scala-like.
UPDATE
Your comment about Int/Long got me wondering if one function could do it all.
After traveling down a few dead-ends I finally came up with this (which is, oddly, actually pretty close to your original code).
import Integral.Implicits._
import Ordering.Implicits._
def bitBinSearch[I](f: I => Boolean)(implicit ev:Integral[I]): Option[I] = {
def topBit(x: I = ev.one):I = if (x+x < ev.zero) x else topBit(x+x)
var t:I = topBit()
var p:I = ev.zero
var n:I = t+t
while (t > ev.zero) {
if ( f(p+t) ) p += t
if ( f(n+t) ) n += t
t /= (ev.one+ev.one)
}
List(p,n).find(f)
}
This passes the following tests.
assert(bitBinSearch[Byte] (_ <= 0) == Some(0))
assert(bitBinSearch[Byte] (_ <= 1) == Some(1))
assert(bitBinSearch[Byte] (_ <= -1) == Some(-1))
assert(bitBinSearch[Byte] (_ <= 100) == Some(100))
assert(bitBinSearch[Byte] (_ <= -100) == Some(-100))
assert(bitBinSearch[Short](_ <= 10000) == Some(10000))
assert(bitBinSearch[Short](_ <= -10000) == Some(-10000))
assert(bitBinSearch[Int] (_ <= Int.MinValue) == Some(Int.MinValue))
assert(bitBinSearch[Int] (_ <= Int.MaxValue) == Some(Int.MaxValue))
assert(bitBinSearch[Long] (_ <= Long.MinValue) == Some(Long.MinValue))
assert(bitBinSearch[Long] (_ <= Long.MaxValue) == Some(Long.MaxValue))
assert(bitBinSearch[Long] (_ < Long.MinValue) == None)
I don't know Scala, but this is my version of Binary searching via bitmasking in java
My algorithm is like this
We start with the index with highest power of 2 and end at 20. Every time we see A[itemIndex] ≤ A[index] we update itemIndex += index
After the iteration itemIndex gives the index of the item if present in the array else gives the floor value in A
int find(int[] A, int item) { // A uses 1 based indexing
int index = 0;
int N = A.length;
for (int i = Integer.highestOneBit(N); i > 0; i >>= 1) {
int j = index | i;
if (j < N && A[j] <= item) {
index = j;
if (A[j] == item) break;
}
}
return item == A[index] ? index : -1;
}

Longest slice of a binary array that can be split into two parts

how to find longest slice of a binary array that can be split into two parts: in the left part, 0 should be the leader; in the right part, 1 should be the leader ?
for example :
[1,1,0,1,0,0,1,1] should return 7 so that the first part is [1,0,1,0,0] and the second part is [1,1]
i tried the following soln and it succeeds in some test cases but i think it is not efficient:
public static int solution(int[] A)
{
int length = A.Length;
if (length <2|| length>100000)
return 0;
if (length == 2 && A[0] != A[1])
return 0;
if (length == 2 && A[0] == A[1])
return 2;
int zerosCount = 0;
int OnesCount = 0;
int start = 0;
int end = 0;
int count=0;
//left hand side
for (int i = 0; i < length; i++)
{
end = i;
if (A[i] == 0)
zerosCount++;
if (A[i] == 1)
OnesCount++;
count = i;
if (zerosCount == OnesCount )
{
start++;
break;
}
}
int zeros = 0;
int ones = 0;
//right hand side
for (int j = end+1; j < length; j++)
{
count++;
if (A[j] == 0)
zeros++;
if (A[j] == 1)
ones++;
if (zeros == ones)
{
end--;
break;
}
}
return count;
}
I agree brute force is time complexity: O(n^3).
But this can be solved in linear time. I've implemented it in C, here is the code:
int f4(int* src,int n)
{
int i;
int sum;
int min;
int sta;
int mid;
int end;
// Find middle
sum = 0;
mid = -1;
for (i=0 ; i<n-1 ; i++)
{
if (src[i]) sum++;
else sum--;
if (src[i]==0 && src[i+1]==1)
{
if (mid==-1 || sum<min)
{
min=sum;
mid=i+1;
}
}
}
if (mid==-1) return 0;
// Find start
sum=0;
for (i=mid-1 ; i>=0 ; i--)
{
if (src[i]) sum++;
else sum--;
if (sum<0) sta=i;
}
// Find end
sum=0;
for (i=mid ; i<n ; i++)
{
if (src[i]) sum++;
else sum--;
if (sum>0) end=i+1;
}
return end-sta;
}
This code is tested: brute force results vs. this function. They have same results. I tested all valid arrays of 10 elements (1024 combinations).
If you liked this answer, don't forget to vote up :)
As promissed, heres the update:
I've found a simple algorithm with linear timecomplexity to solve the problem.
The math:
Defining the input as int[] bits, we can define this function:
f(x) = {bits[x] = 0: -1; bits[x] = 1: 1}
Next step would be to create a basic integral of this function for the given input:
F(x) = bits[x] + F(x - 1)
F(-1) = 0
This integral is from 0 to x.
F(x) simply represents the number of count(bits , 1 , 0 , x + 1) - count(bits , 0 , 0 , x + 1). This can be used to define the following function: F(x , y) = F(y) - F(x), which would be the same as count(bits , 1 , x , y + 1) - count(bits , 0 , x , y + 1) (number of 1s minus number of 0s in the range [x , y] - this is just to show how the algorithm basically works).
Since the searched sequence of the field must fulfill the following condition: in the range [start , mid] 0 must be leading, and in the range [mid , end] 1 must be leading and end - start + 1 must be the biggest possible value, the searched mid must fulfill the following condition: F(mid) < F(start) AND F(mid) < F(end). So first step is to search the minimum of 'F(x)', which would be the mid (every other point must be > than the minimum, and thus will result in a smaller / equally big range [end - start + 1]. NOTE: this search can be optimized by taking into the following into account: f(x) is always either 1 or -1. Thus, if f(x) returns 1bits for the next n steps, the next possible index with a minimum would be n * 2 ('n' 1s since the last minimum means, that 'n' -1s are required afterwards to reach a minimum - or atleast 'n' steps).
Given the 'x' for the minimum of F(x), we can simply find start and end (biggest/smallest value b, s ∈ [0 , length(bits) - 1] such that: F(s) > F(mid) and F(b) > F(mid), which can be found in linear time.
Pseudocode:
input: int[] bits
output: int
//input verification left out
//transform the input into F(x)
int temp = 0;
for int i in [0 , length(bits)]
if bits[i] == 0
--temp;
else
++temp;
//search the minimum of F(x)
int midIndex = -1
int mid = length(bits)
for int i in [0 , length(bits - 1)]
if bits[i] > mid
i += bits[i] - mid //leave out next n steps (see above)
else if bits[i - 1] > bits[i] AND bits[i + 1] > bits[i]
midIndex = i
mid = bits[i]
if midIndex == -1
return //only 1s in the array
//search for the endindex
int end
for end in [length(bits - 1) , mid]
if bits[end] > mid
break
else
end -= mid - bits[end] //leave out next n searchsteps
//search for the startindex
int start
for start in [0 , mid]
if bits[start] > mid
break
else
start += mid - bits[start]
return end - start

Debug binary search code

I came across this interview questions. It says we have to do a binary search on a sorted array. Following is the code for that. This code has bug such that it doesn't give right answer. You have to change the code to give correct output.
Condition : You are not allowed to add line and you can change only three lines in the code.
int solution(int[] A, int X) {
int N = A.length;
if (N == 0) {
return -1;
}
int l = 0;
int r = N;
while (l < r) {
int m = (l + r) / 2;
if (A[m] > X) {
r = m - 1;
} else {
l = m+1;
}
}
if (A[r] == X) {
return r;
}
return -1;
}
I tried a lot on my own but was missing on some test cases.
I hate this question, it's one of those "unnecessary constraint" questions. As others have mentioned, the problem is that you're not returning the value if you find it. Since the stupid instructions say you can't add any code, you can hack it like this:
if (A[m] >= X) {
r = m;
} else {
l = m;
}
This kills the performance but it should work.
You need to check for the searched value inside the loop, for exit if it's found
Sample Code:
int solution(int[] A, int X) {
int N = A.length;
if (N == 0) {
return -1;
}
int l = 0;
int r = N;
while (l <= r) { // change here, need to check for the element if l == r
// this is the principal problem of your code
int m = (l + r) / 2;
if (A[m] == X) { // new code, for every loop check if the middle element
return r; // is the search element for early exit.
} else if (A[m] > X) {
r = m - 1;
} else {
l = m + 1;
}
}
return -1;
}
Other problem is that you are testing more elements that you need when the element is in the array.
Try this:
int l = 0;
int r = N - 1; // changed
while (l <= r) { // changed
You have to understand the method that is used. You are looking for the first element >= X.
You want k with i < k <=> A[i] < X.
L is for left. It is the lower limit for k. You have i < l => A[i] < X.
R is for right. It is the upper limit for k. You have i >= r => A[i] >= X.
Your target is to reduce the range and have l = r. To do so you check the value in the middle, at m = (r+l)/2.
If A[m] >= X then m satisfies the conditions for r. You can set r = m.
If A[m] < X then A[m] belongs to the part left of l. So you can set l to the right of m, l = m+1.
Each loop reduces the range between l and r. When you reach l==r, you have found the point I called k. A[k] is the smallest number >= X. You only need to check if it is == X or > X.
From there you should be able to fix the code.
PS: Note that the k (aka l or r) can be >= A.length. You need to verify that.

Resources