Segfault when trying to get a byte from memory address - algorithm

Im trying to make Sieve of Eratosthenes work with big numbers
The problem I have is that it is giving me a segfault but idk why
It works up until about 100k
But the algorithm works if I replace cmp byte [rbx], 0 with cmp dword [rbx], 0
Im very confused as to why that is happening since all the values in the array are 0's and 1's so a byte should be enough
btw with cmp dword [rbx], 0 the results are incorrect so I cant use that
X86 64
%macro crossOut 4
xor rdi, rdi ;edi keeps track of how many numbers were crossed out
;if 0 end loop
mov rbx, %1 ;array
add rbx, %2 ;move position to starting index
mov rax, %3 ;every nth number to be crossed out
mov rbp, %4 ; array length
mov rcx, 0 ;counter
%%loop:
add rcx, rax
cmp rcx, rbp
jge %%exit
add rbx, rax
cmp byte [rbx], 0
je %%crossout
jmp %%loop
%%crossout:
mov byte [rbx], 1
inc rdi
jmp %%loop
%%exit:
cmp rdi, 0
%endmacro

See this CodeReview question for the OP's full program.
You are reading past the end of the numbers buffer because your code does not take into account the 2nd macro parameter (an offset into the array)!
The first time the offset in the array is 0, and the code will run ok. But later you add to the offset while keeping the array length the same, and so memory that does not belong to the array is addressed.
<---- RBP=10 ----->
0,0,0,0,0,0,0,0,0,0
First time RCX allows 4 iterations {2,4,6,8} less than 10:
<---- RBP=10 ----->
RBX
v
0,0,1,0,1,0,1,0,1,0
^ ^ ^ ^
1° 2° 3° 4°
Second time RCX allows 3 iterations {3,6,9} less than 10:
<---- RBP=10 ----->
RBX
v
0,0,1,0,1,0,1,1,1,0,?
^ ^ ^
1° 2° 3°
The 3° is past the buffer and, depending on total array length, at some point this buffer overrun will segfault!
The quick fix is to initialize RCX at the value for the starting index %2 instead of zeroing it.
%macro crossOut 4
xor edi, edi ;edi keeps track of how many numbers were crossed out
;if 0 end loop
mov rbx, %1 ;array
add rbx, %2 ;move position to starting index
mov rax, %3 ;every nth number to be crossed out
mov rbp, %4 ; array length
mov rcx, %2 ;counter
%%loop:
add rcx, rax
cmp rcx, rbp
jge %%exit
add rbx, rax
cmp byte [rbx], 0
je %%crossout
jmp %%loop
%%crossout:
mov byte [rbx], 1
inc rdi
jmp %%loop
%%exit:
cmp rdi, 0
%endmacro
A better fix is to establish an absolute last address that you have RBX compare against:
%macro crossOut 4
xor edi, edi ; EDI keeps track of how many numbers were crossed out
; if 0 end loop
mov rbx, %1 ; array
mov rcx, %4 ; array length
add rcx, rbx ; Last address
add rbx, %2 ; move position to starting index
mov rax, %3 ; every nth number to be crossed out
%%loop:
add rbx, rax
cmp rbx, rcx
jae %%exit
cmp byte [rbx], 0
jne %%loop
%%crossout:
mov byte [rbx], 1
inc rdi
jmp %%loop
%%exit:
cmp rdi, 0
%endmacro
Please notice that
je %%crossout
jmp %%loop
%%crossout:
is better written as
jne %%loop
%%crossout:

Related

segmentation fault in x86 trying to do bubble sort

I am trying to implement bubble sort in assembly. Here is my code. I keep getting segmentation fault. I have a function down below. I have been trying to figure this out but I couldn't find a compiler for x86 and I have been checking with my code to check what is wrong but to no avail.
here is my function code:
bubble:
push ebp
mov ebp, esp
push ecx
push edx
push edi
push esi
push ebx
push eax
mov eax, 0
mov ecx, [ebp+12] ; number of elements in the array
mov edx, [ebp+8]; address of array
mov edi, 0
mov esi, 0
dec ecx; n - 1
mov esi, 1
sortOuter:
cmp esi, 0
jg sort
sort:
mov esi, 0 ;count
check:
cmp edi, ecx ; i < n - 1
jl sortInner
sortInner:
mov ebx, [edx+edi*4] ; mov array[i+1] to ebx
cmp [edx], ebx ; cmp array[i] to array[i+1]
jle noswap
swap:
mov eax, ebx ; mov array[i+1] to eax
mov ebx, [edx] ; mov array[i] to array[i+1]
mov [edx], eax ; mov array[i+1] to array[i]
inc esi ; count++
noswap:
inc edi ; i++
jmp check
jmp sortOuter
done:
call print_nl
pop ebx
pop esi
pop edi
pop edx
pop ecx
mov esp, ebp
pop ebp
ret
The segmentation error comes from the infinite loop that you have created with
check:
cmp edi, ecx ; i < n - 1
jl sortInner
sortInner:
If EDI is less than ECX you jump to sortInner, but if it isn't you fall-through into sortInner. No matter, you always end up running the code at sortInner and because the memory addresses that the code uses keep growing, at some point you will be trying to read memory that you don't have access to, hence the segmentation error.
Now if you were to correct this, then there's a second infinite loop waiting.
sortOuter:
cmp esi, 0
jg sort
sort:
Other errors include:
Resetting ESI instead of EDI at the start of the inner loop
Not swapping elements at all but always writing the smallest value in the first array element
Forgetting to restore the register EAX
This is a working BubbleSort. Don't just copy it, but find out how it functions!
In an array with N elements we have to do N-1 comparisons at most (to have the greatest value bubble towards the rear).
Because the InnerLoop uses a counter that gets initialized from the ever decreasing OuterLoop counter, with each iteration of the OuterLoop the portion of the array that is processed in the InnerLoop gets smaller. The portion of the array that we then no longer process contains the greatest elements that have bubbled towards the end of the array, hence the name BubbleSort.
Provisions have been made for an empty array or an array that has but 1 element. Always include some code for the special cases!
bubble:
push ebp
mov ebp, esp
push ...
mov ecx, [ebp + 12] ; Number of elements in the array
sub ecx, 1 ; First time we do n = N-1
jbe Done ; Array is empty or has but 1 element
OuterLoop:
mov edx, ecx ; Current value of the OuterLoop counter (n)
mov esi, [ebp + 8] ; Address of array
InnerLoop:
mov eax, [esi]
mov ebx, [esi + 4]
cmp eax, ebx
jng NoSwap
mov [esi], ebx
mov [esi + 4], eax
NoSwap:
add esi, 4
dec edx
jnz InnerLoop
dec ecx ; Next times we do n = n-1
jnz OuterLoop
Done:
pop ...
pop ebp
ret

Open file, delete zeros, sort it - NASM

I am currently working on some problems and this is the one I am having trouble with. To make it all clear, I am a beginner, so any help is more than welcome.
Problem:
Sort the content of a binary file in descending order. The name of the file is passed as a command line argument. File content is interpreted as four-byte positive integers, where value 0, when found, is not written into the file. The result must be written in the same file that has been read.
The way I understand is that I have to have a binary file. Open it. Get its content. Find all characters while keeping in mind those are positive, four-byte integers, find zeros, get rid of zeros, sort the rest of the numbers.
We are allowed to use glibc, so this was my attempt:
section .data
warning db 'File does not exist!', 10, 0
argument db 'Enter your argument.', 10, 0
mode dd 'r+'
opened db 'File is open. Time to read.', 10, 0
section .bss
content resd 10
counter resb 1
section .text
extern printf, fopen, fgets, fputc
global main
main:
push rbp
mov rbp, rsp
push rsi
push rdi
push rbx
;location of argument's address
push rsi
cmp rdi, 2
je .openfile
mov rdi, argument
mov rax, 0
call printf
jmp .end
.openfile:
pop rbx
;First real argument of command line
mov rdi, [rbx + 8]
mov rsi, mode
mov rax, 0
call fopen
cmp al, 0
je .end
push rax
mov rdi, opened
mov rax, 0
call printf
.readfromfile:
mov rdi, content
mov rsi, 12 ;I wrote 10 numbers in my file
pop rdx
mov rax, 0
call fgets
cmp al, 0
je .end
push rax
mov rsi, tekst
pop rdi
.loop:
lodsd
inc byte[counter]
cmp eax, '0'
jne .loop
;this is the part where I am not sure what to do.
;I am trying to delete the zero with backspace, then use space and
;backspace again - I saw it here somewhere as a solution
mov esi, 0x08
call fputc
mov esi, 0x20
call fputc
mov esi, 0x08
call fputc
cmp eax, 0
je .end
jmp .loop
.end:
pop rdi
pop rsi
pop rbx
mov rsp, rbp
pop rbp
ret
So, my idea was to open the file, find zero, delete it by using backspace and space, then backspace again; Continue until I get to the end of the file, then sort it. As it can be seen I did not attempt to sort the content because I cannot get program to do the first part for me. I have been trying this for couple of days now and everything is getting foggy.
If someone can help me out, I would be very grateful. If there is something similar to this problem, feel free to link it to me. Anything that could help, I am ready to read and learn.
I am also unsure about how much information do I have to give. If something is unclear, please point it out to me.
Thank you
For my own selfish fun, an example of memory area being "collapsed" when dword zero value is detected:
to build in linux with NASM for target ELF64 executable:
nasm -f elf64 so_64b_collapseZeroDword.asm -l so_64b_collapseZeroDword.lst -w+all
ld -b elf64-x86-64 -o so_64b_collapseZeroDword so_64b_collapseZeroDword.o
And for debugger I'm using edb (built from sources) (the executable doesn't do anything observable by user, when it works correctly, it's supposed to be run in debugger single-stepping over instructions and having memory view over the .data segment to see how the values are moved around in memory).
source file so_64b_collapseZeroDword.asm
segment .text
collapseZeroDwords:
; input (custom calling convention, suitable only for calls from assembly):
; rsi - address of first element
; rdx - address beyond last element ("vector::end()" pointer)
; return: rdi - new "beyond last element" address
; modifies: rax, rsi, rdi
; the memory after new end() is not cleared (the zeroes are just thrown away)!
; search for first zero (up till that point the memory content will remain same)
cmp rsi, rdx
jae .noZeroFound ; if the (rsi >= end()), no zero was in the memory
lodsd ; eax = [rsi], rsi += 4
test eax, eax ; check for zero
jne collapseZeroDwords
; first zero found, from here on, the non-zero values will be copied to earlier area
lea rdi, [rsi-4] ; address where the non-zero values should be written
.moveNonZeroValues:
cmp rsi, rdx
jae .wholeArrayCollapsed ; if (rsi >= end()), whole array is collapsed
lodsd ; eax = [rsi], rsi += 4
test eax, eax ; check for zero
jz .moveNonZeroValues ; zero detected, skip the "store" value part
stosd ; [rdi] = eax, rdi += 4 (pointing beyond last element)
jmp .moveNonZeroValues
.noZeroFound:
mov rdi, rdx ; just return the original "end()" pointer
.wholeArrayCollapsed: ; or just return when rdi is already set as new end()
ret
global _start
_start: ; run some hardcoded simple tests, verify in debugger
lea rsi, [test1]
lea rdx, [test1+4*4]
call collapseZeroDwords
cmp rdi, test1+4*4 ; no zero collapsed
lea rsi, [test2]
lea rdx, [test2+4*4]
call collapseZeroDwords
cmp rdi, test2+3*4 ; one zero
lea rsi, [test3]
lea rdx, [test3+4*4]
call collapseZeroDwords
cmp rdi, test3+3*4 ; one zero
lea rsi, [test4]
lea rdx, [test4+4*4]
call collapseZeroDwords
cmp rdi, test4+2*4 ; two zeros
lea rsi, [test5]
lea rdx, [test5+4*4]
call collapseZeroDwords
cmp rdi, test5+2*4 ; two zeros
lea rsi, [test6]
lea rdx, [test6+4*4]
call collapseZeroDwords
cmp rdi, test6+0*4 ; four zeros
; exit back to linux
mov eax, 60
xor edi, edi
syscall
segment .data
; all test arrays are 4 elements long for simplicity
dd 0xCCCCCCCC ; debug canary value to detect any over-read or over-write
test1 dd 71, 72, 73, 74, 0xCCCCCCCC
test2 dd 71, 72, 73, 0, 0xCCCCCCCC
test3 dd 0, 71, 72, 73, 0xCCCCCCCC
test4 dd 0, 71, 0, 72, 0xCCCCCCCC
test5 dd 71, 0, 72, 0, 0xCCCCCCCC
test6 dd 0, 0, 0, 0, 0xCCCCCCCC
I tried to comment it extensively to show what/why/how it is doing, but feel free to ask about any particular part. The code was written with simplicity on mind, so it doesn't use any aggressive performance optimizations (like vectorized search for first zero value, etc).

Mach-O 64-bit format does not support 32-bit absolute addresses, cannot put byte in resb buffer [duplicate]

This question already has an answer here:
Mach-O 64-bit format does not support 32-bit absolute addresses. NASM Accessing Array
(1 answer)
Closed 4 years ago.
%include "along64.inc"
default rel
section .data
EnterMsg : db "Enter a number to add to the list; enter 0 when you done" ,0ah,0
ExitMsg : db "The sorted list: " ,0ah,0
InputError1 : db "Input Error; Integers Only" ,0ah,0
InputError2 : db "Input Error; Maximum list is 100" ,0ah,0
InputError3 : db "Input Error; Enter AT LEAST ONE Int" ,0ah,0
segment .bss
a: resq 100 ;creates a new array with 100 values set to 0
section .text
global main
main:
mov rbx,0
mov rcx,0 ;sets counter to 0
mov rdx, EnterMsg ;moves EnterMsg to rdx to be called by WriteString
call WriteString ;prints EnterMsg
jmp read ;calls readInput label
read: ;reads the input
call ReadInt ;reads integer
jo invalidInput ;jumps if not an integer value
cmp rcx, 100 ;compares rcx and 100
jg tooManyInts ;if rcx is already 100, then cannot add int to array, so jump to error
cmp rax,0 ;tests for the 0 input
je Loop1 ;jumps to Loop1 if zero
mov [a +(rcx*4)], rax ;adds read integer to array
inc rcx ;increments counter
jmp read ;loops if input is not 0
Loop1:
cmp rcx, 2 ;compares rcx to 2
jmp badInput ;jumps to badinput if less than 2
push rcx ;pushes rcx for number of numbers to print
dec rcx ;decrements rcx because 0-based indexing
Loop2:
push rcx ;saves outer loop count
mov rbx, 0 ;sets rbx to 0 because 0 based indexing
Loop3:
mov rax,qword[a +(rbx * 4)] ;moves current value of array, as determined by value of
;rbx*4, to rax
cmp [a + (rbx * 4) + 4], rax ;compares next value of array with rax
jg Loop4 ;if greater, jumps to L4
xchg rax,qword[a + (rbx*4)+4] ;if less, exchanges values
mov qword[a + (rbx * 4)], rax ;and moves new value of rax to the current value of the array
Loop4:
inc rbx ;increments rbx to iterate through array
loop Loop3 ;inner loop iterates through array values once
pop rcx ;pops rcx that was previously pushed to reset count for outer loop
loop Loop2 ;loops back to L2 to iterate through all values of array again
SetWrite:
mov rbx, 0 ;resets rbx to 0
pop rcx ;pops intial count for rcx, to determine how many times
;writeint should be called
call Crlf ;prints new line
mov rdx, ExitMsg ;moves ExitMsg to rdx
call WriteString ;writes ExitMsg
WriteArray:
mov [a + rcx],rax ;moves values of array to rax, where WriteInt calls from
call WriteInt ;writes current value of rax
call Crlf ;prints new line
add rbx, 4 ;increments rbx by 4
loop WriteArray ;loops rcx times
jmp exit ;jumps to exit
WriteArrayOne:
call Crlf ;prints new line
mov rdx, ExitMsg ;moves ExitMsg to rdx
call WriteString ;writes ExitMsg
mov qword[a +rbx],rax ;moves first value of array to rax, where WriteInt calls from
call WriteInt ;writes value of rax
call Crlf ;prints new line
jmp exit ;jumps to exit
invalidInput: ;jumps here if input is not an integer
mov rdx,InputError1 ;moves InputError1 to rdx
call WriteString ;prints InputError1
jmp exit ;exits
tooManyInts:
mov rdx, InputError2 ;moves InputError2 to rdx
call WriteString ;writes InputError2
jmp exit ;exits
badInput:
cmp rcx, 1 ;if rcx == 1, prints that one value
jmp WriteArrayOne ;jumps to WriteOne which writes first int in the array
mov rdx, InputError3 ;if zero, moves InputError3 to rdx
call WriteString ;writes InputError3
jmp exit ;exits
exit: ;exits program
int 80h
Your [a+rbx*4] and similar all assemble to absolute addressing with 32 bit displacement. You should load the address into a register first, then apply the indexing. For example:
lea rdx, [a]
mov [rdx + rcx*8], rax
Note that a qword is 8 bytes, so you should scale by that, or, if you have 4 byte integers you need to change to 32 bit register.

Finding Smallest Number in List

My goal in this code is to find the smallest number in the list. I used bubble sort method in this case; unfortunately, the code is not giving me the smallest/minimum number. Please take a look, Thanks:
include irvine32.inc
.data
input byte 100 dup(0)
stringinput byte "Enter any string: ",0
totallength byte "The total length is: ",0
minimum byte "The minimum value is: ",0
.code
stringLength proc
push ebp
mov ebp, esp
push ebx
push ecx
mov eax, 0
mov ebx, [ebp+8]
L1:
mov ecx, [ebx] ;you can use ecx, cx, ch, cl
cmp ecx, 0 ;you can use ecx, cx, ch, cl
JE L2
add ebx, 1
add eax, 1
jmp L1
L2:
pop ecx
pop ebx
mov ebp, esp
pop ebp
ret 4
stringLength endp
BubbleSort PROC uses ECX
push edx
xor ecx,ecx
mov ecx, 50
OUTER_LOOP:
push ecx
xor ecx,ecx
mov ecx,14
mov esi, OFFSET input
COMPARE:
xor ebx,ebx
xor edx,edx
mov bl, byte ptr ds:[esi]
mov dl, byte ptr ds:[esi+1]
cmp bl,dl
jg SWAP1
CONTINUE:
add esi,2
loop COMPARE
mov esi, OFFSET input
pop ecx
loop OUTER_LOOP
jmp FINISHED
SWAP1:
xchg bl,dl
mov byte ptr ds:[esi+1],dl
mov byte ptr ds:[esi],bl
jmp CONTINUE
FINISHED:
pop edx
ret 4
BubbleSort ENDP
main proc
call clrscr
mov edx, offset stringinput
call writeString
mov edx, offset input
call writeString
call stringLength
mov edx, offset input
mov ecx, sizeof input
call readstring
call crlf
mov edx,offset totallength
call writestring
call writedec
call crlf
mov edx, offset minimum
call crlf
call writeString
push offset input
call BubbleSort
mov edx, offset input
call writeString
call crlf
exit
main endp
end main
I haven't looked over your code, because sorting is an over complicated method for what you want to do. Not only that, but most of us don't pay too much attention to uncommented code. Just takes to long to figure out what you're trying to do.
Simply iterate through the entire list and start with 255 (FFH) in AL let's say. Each time you come across a number that is smaller than the one in AL, then replace it with that value and then when loop is finished, AL will have the lowest value.
If you need to know where it is in the list, you could maybe use AH which would be the difference between start address and current address. Knowledge of the instruction set is essential as finding the length of the string can be simplified by;
mov di, input ; Point to beginning of buffer
mov cx, -1 ; for a maximum of 65535 characters
xor al, al ; Looking for NULL
rep scasb
neg cx
dec cx ; CX = length of string.
Remember, ES needs to point to #DATA

First macro assembler program, can't figure our unhandled exception

This is my first assembler program in masm32. Using vis studio 2012. And this is just one procedure in a program to convert input into an ascii chart with decimal, hex and ascii output. I've been trying to figure this out for 8 hours, and I know it's going to be something really simple.
It gets through all the computation, but during the pop and return phase it crashes into an unhandled exception when accessing the EIP(i think). Also, all my registers are set to 0 except ebx and I don't know why, but it may have something to do with it.
This is just the procedure to convert from the input string to a decimal value.*
My inputStr is:
inputStr db 16 DUP(0)
.code
main proc
xor eax, eax
xor ebx, ebx
xor ecx, ecx
xor edx, edx
lea esi, outputStr1
call PrintString
lea esi, inputStr
call GetString
call StrtoNum
invoke ExitProcess, 0 ;****This is the next line when it crashes***
main endp
StrtoNum proc ;going to hex first
pushad
pushfd
mov bl, 1 ;mov 1 into bl for later multiplying
whilemorechar:
mov al,byte ptr [esi]
cmp al, 0 ;stuff
je ConvertDone ;if null then we are done here
;esi is pointing to the string to be converted
;cmp bl,0
;jnz decrement
cmp al, 0h
je ConvertDec
sub al, 30h ;get first string byte to dec number 0-9
push ax ;push last digit to stack
inc esi ;gets to next string byte
inc cl ;make note of decimal position in string
jmp whilemorechar ;jump for next place in string
ConvertDec: ;reverse is done, now turn into decimal number
cmp cl, 0 ;compare counter to 0
jz ConvertDone ;if counter is 0, start comparing numbers
pop ax ;pop last on stack
mul bl ;multiply place value by input byte
add dx, ax ;add decimal value into dl
mov al, 10d ;move 10 into al
mul bx ;multiply 10 and bl value to get next
mov bx, ax ;mov decimal place into bl for next loop
dec cl ;decrement counter
jmp ConvertDec ;loop through again for next decimal place
ConvertDone:
mov ebx, 0
popfd ;pop flags
popad ;pop registers
ret ;return to caller
StrtoNum endp

Resources