increasing the PPA limitation of a design - vhdl

i finished creating a design in vhdl, of the algorithm sha256.
now im trying to get my design level higher by understanding how to change the code so i will get higher result of power, performance and area. the end game goal is trying to get the best netlist in my designs so i can get them into a chip.
so for my design: i got max frequency of 85 mhz in cyclone 4 FPGA with the usage of 8,500 total logic elements, 55% of the FPGA.
the main issue that i think made my design so big is that i wrote the code in a hierarchy manner, a lot of "elsif" and variables. and one other thing that could be better, i think, is if the quartus would implement my memory design as a memory and not with logic element, even that its only array of 16 words of 32 bits.
what you guys think i can improve ?
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
USE ieee.numeric_std.ALL;
entity padding is
port( clk : in std_logic;
rst : in std_logic;
ward : in std_logic_vector(31 downto 0);
ready : out std_logic;
hash : out std_logic_vector(255 downto 0));
end;
architecture padding of padding is
component sha256
port ( clk : in std_logic;
rst : in std_logic;
enable : in std_logic;
ward : in std_logic_vector(31 downto 0);
k : in std_logic_vector(31 downto 0);
h0 : in std_logic_vector(31 downto 0);
h1 : in std_logic_vector(31 downto 0);
h2 : in std_logic_vector(31 downto 0);
h3 : in std_logic_vector(31 downto 0);
h4 : in std_logic_vector(31 downto 0);
h5 : in std_logic_vector(31 downto 0);
h6 : in std_logic_vector(31 downto 0);
h7 : in std_logic_vector(31 downto 0);
ready : out std_logic;
digest : out std_logic_vector(255 downto 0));
end component;
type kconst is array ( 0 to 63 ) of std_logic_vector(31 downto 0);
type mem is array ( 0 to 15 ) of std_logic_vector(31 downto 0);
signal k : kconst := (x"428a2f98", x"71374491", x"b5c0fbcf", x"e9b5dba5", x"3956c25b", x"59f111f1", x"923f82a4", x"ab1c5ed5",
x"d807aa98", x"12835b01", x"243185be", x"550c7dc3", x"72be5d74", x"80deb1fe", x"9bdc06a7", x"c19bf174",
x"e49b69c1", x"efbe4786", x"0fc19dc6", x"240ca1cc", x"2de92c6f", x"4a7484aa", x"5cb0a9dc", x"76f988da",
x"983e5152", x"a831c66d", x"b00327c8", x"bf597fc7", x"c6e00bf3", x"d5a79147", x"06ca6351", x"14292967",
x"27b70a85", x"2e1b2138", x"4d2c6dfc", x"53380d13", x"650a7354", x"766a0abb", x"81c2c92e", x"92722c85",
x"a2bfe8a1", x"a81a664b", x"c24b8b70", x"c76c51a3", x"d192e819", x"d6990624", x"f40e3585", x"106aa070",
x"19a4c116", x"1e376c08", x"2748774c", x"34b0bcb5", x"391c0cb3", x"4ed8aa4a", x"5b9cca4f", x"682e6ff3",
x"748f82ee", x"78a5636f", x"84c87814", x"8cc70208", x"90befffa", x"a4506ceb", x"bef9a3f7", x"c67178f2");
signal first_mem : mem:= ( x"00000000", x"00000000", x"00000000", x"00000000", x"00000000", x"00000000", x"00000000", x"00000000",
x"00000000", x"00000000", x"00000000", x"00000000", x"00000000", x"00000000", x"00000000", x"00000000");
signal second_mem : mem:= ( x"00000000", x"00000000", x"00000000", x"00000000", x"80000000", x"00000000", x"00000000", x"00000000",
x"00000000", x"00000000", x"00000000", x"00000000", x"00000000", x"00000000", x"00000000", x"00000280");
signal enable : std_logic;
signal enable1 : std_logic;
signal enable2 : std_logic;
signal r_d : std_logic;
signal k_in : std_logic_vector(31 downto 0);
signal ward_in : std_logic_vector(31 downto 0);
signal ward_in1 : std_logic_vector(31 downto 0);
signal ward_in2 : std_logic_vector(31 downto 0);
signal h0,h1,h2,h3 : std_logic_vector(31 downto 0);
signal h4,h5,h6,h7 : std_logic_vector(31 downto 0);
signal temp : std_logic_vector(255 downto 0);
signal temp1 : std_logic_vector(255 downto 0);
signal gama0 : std_logic_vector(31 downto 0);
signal gama1 : std_logic_vector(31 downto 0);
signal gama2 : std_logic_vector(31 downto 0);
signal gama3 : std_logic_vector(31 downto 0);
signal gama4 : std_logic_vector(31 downto 0);
signal gama5 : std_logic_vector(31 downto 0);
begin
sha1: sha256 port map(
clk ,
rst ,
enable ,
ward_in ,
k_in ,
h0 ,
h1 ,
h2 ,
h3 ,
h4 ,
h5 ,
h6 ,
h7 ,
enable1 ,
temp );
sha2: sha256 port map(
clk ,
rst ,
enable1 ,
ward_in1 ,
k_in ,
temp(255 downto 224),
temp(223 downto 192),
temp(191 downto 160),
temp(159 downto 128),
temp(127 downto 96 ),
temp(95 downto 64 ),
temp(63 downto 32 ),
temp(31 downto 0 ),
enable2 ,
temp1 );
sha3: sha256 port map(
clk ,
rst ,
r_d ,
ward_in2 ,
k_in ,
h0 ,
h1 ,
h2 ,
h3 ,
h4 ,
h5 ,
h6 ,
h7 ,
ready ,
hash );
h0 <= x"6a09e667";
h1 <= x"bb67ae85";
h2 <= x"3c6ef372";
h3 <= x"a54ff53a";
h4 <= x"510e527f";
h5 <= x"9b05688c";
h6 <= x"1f83d9ab";
h7 <= x"5be0cd19";
process (clk,rst)
variable i : integer;
variable j : integer;
variable m : integer;
variable n : integer;
variable l : integer;
begin
if rst = '0' then
enable <= '0';
i := 0;
j := 0;
m := 9;
n := 15;
l := 8;
elsif clk'event and clk = '1' then
if j = 16 then
j := 0;
end if;
if m = 16 then
m := 0;
end if;
if n = 16 then
n := 0;
end if;
if l = 16 then
l := 0;
end if;
if i = 193 then
i := 0;
elsif i > 144 then
first_mem(n) <= gama4 + first_mem(l) + gama5 + first_mem(n);
ward_in2 <= gama4 + first_mem(l) + gama5 + first_mem(n);
k_in <= k(i-129);
elsif i > 136 then
ward_in2 <= first_mem(n);
k_in <= k(i-129);
elsif i = 136 then
first_mem(n) <= temp1(31 downto 0);
ward_in2 <= temp1(31 downto 0);
k_in <= k(i-129);
elsif i = 135 then
first_mem(n) <= temp1(63 downto 32);
ward_in2 <= temp1(63 downto 32);
k_in <= k(i-129);
elsif i = 134 then
first_mem(n) <= temp1(95 downto 64);
ward_in2 <= temp1(95 downto 64);
k_in <= k(i-129);
elsif i = 133 then
first_mem(n) <= temp1(127 downto 96);
ward_in2 <= temp1(127 downto 96);
k_in <= k(i-129);
elsif i = 132 then
first_mem(n) <= temp1(159 downto 128);
ward_in2 <= temp1(159 downto 128);
k_in <= k(i-129);
elsif i = 131 then
first_mem(n) <= temp1(191 downto 160);
ward_in2 <= temp1(191 downto 160);
k_in <= k(i-129);
elsif i = 130 then
first_mem(n) <= temp1(223 downto 192);
ward_in2 <= temp1(223 downto 192);
k_in <= k(i-129);
elsif i = 129 then
first_mem(15) <= x"00000100";
first_mem(14) <= x"00000000";
first_mem(13) <= x"00000000";
first_mem(12) <= x"00000000";
first_mem(11) <= x"00000000";
first_mem(10) <= x"00000000";
first_mem(9) <= x"00000000";
first_mem(8) <= x"80000000";
first_mem(n) <= temp1(255 downto 224);
ward_in2 <= temp1(255 downto 224);
k_in <= k(i-129);
elsif i = 128 then
elsif i > 79 then
second_mem(j) <= gama2 + second_mem(m) + gama3 + second_mem(j);
ward_in1 <= gama2 + second_mem(m) + gama3 + second_mem(j);
k_in <= k(i-64);
elsif i > 63 then
enable <= '0';
ward_in1 <= second_mem(j);
k_in <= k(i-64);
elsif i > 19 then
first_mem(j) <= gama0 + first_mem(m) + gama1 + first_mem(j);
ward_in <= gama0 + first_mem(m) + gama1 + first_mem(j);
k_in <= k(i);
enable <= '1';
elsif i > 15 then
second_mem(j)<= ward;
first_mem(j) <= gama0 + first_mem(m) + gama1 + first_mem(j);
ward_in <= gama0 + first_mem(m) + gama1 + first_mem(j);
k_in <= k(i);
enable <= '1';
elsif i >= 0 then
first_mem(i) <= ward;
ward_in <= ward;
k_in <= k(i);
enable <= '1';
end if;
i := i + 1;
j := j + 1;
m := m + 1;
n := n + 1;
l := l + 1;
end if;
end process;
process (clk, rst)
begin
if rst = '0' then
r_d <= '0';
elsif clk'event and clk = '1' then
r_d <= enable2;
end if;
end process;
process (clk, rst)
variable f: integer;
variable j: integer;
variable l: integer;
variable m: integer;
begin
if rst = '0' then
f := 2;
j := 15;
l := 1;
m := 14;
elsif clk'event and clk = '1' then
if j = 16 then
j := 0;
end if;
if f = 16 then
f := 0;
end if;
if l = 16 then
l := 0;
end if;
if m = 16 then
m := 0;
end if;
gama0 <= ((first_mem(f)(6 downto 0) & first_mem(f)(31 downto 7)) xor (first_mem(f)(17 downto 0) & first_mem(f)(31 downto 18)) xor ("000" & first_mem(f)(31 downto 3)));
gama1 <= ((first_mem(j)(16 downto 0) & first_mem(j)(31 downto 17)) xor (first_mem(j)(18 downto 0) & first_mem(j)(31 downto 19)) xor ("0000000000" & first_mem(j)(31 downto 10)));
gama4 <= ((first_mem(l)(6 downto 0) & first_mem(l)(31 downto 7)) xor (first_mem(l)(17 downto 0) & first_mem(l)(31 downto 18)) xor ("000" & first_mem(l)(31 downto 3)));
gama5 <= ((first_mem(m)(16 downto 0) & first_mem(m)(31 downto 17)) xor (first_mem(m)(18 downto 0) & first_mem(m)(31 downto 19)) xor ("0000000000" & first_mem(m)(31 downto 10)));
gama2 <= ((second_mem(f)(6 downto 0) & second_mem(f)(31 downto 7)) xor (second_mem(f)(17 downto 0) & second_mem(f)(31 downto 18)) xor ("000" & second_mem(f)(31 downto 3)));
gama3 <= ((second_mem(j)(16 downto 0) & second_mem(j)(31 downto 17)) xor (second_mem(j)(18 downto 0) & second_mem(j)(31 downto 19)) xor ("0000000000" & second_mem(j)(31 downto 10)));
f := f + 1;
j := j + 1;
l := l + 1;
m := m + 1;
end if;
end process;
end;

elsif, i.e. "priority en/decoding", will effect the frequency of your design. With all the available logic resources you have left, you might consider a case statement... unless you actually have a need for priority en/decoding. Even then, if you can afford latency trade-offs you can do the decoding in several clock cycles (pipeline the decode) and your design will likely increase frequency... ultimately, you need to run a timing report and look at the slow paths to understand the bottlenecks.
If you really want to use RAM instead of FFs, you could infer a RAM (create an array), or if that doesn't work for you, you could then manually instantiate a device-specific RAM.... and, then, of course, add the control logic for it. if primitive, blackbox it to swap later for "same" ASIC library primitive
As far as "variables", the discussion is the same as "VHDL" vs. "Verilog", or "synch" vs "asynch" resets, mostly just opinions, and mine is, "I am not a fan of variables in synthesizable RTL"... they are legal for synthesis, but they "disappear" during syn, so that if you ever want to look at a netlist and compare to your RTL, you get to manually trace the connections. There is usually not a good reason to have variables, as they represent nothing in terms of Hardware, and obfuscate the design vs. netlist. I like to see logic types of wire/net/regs, so that it is clear what you are creating in HW. But, as you please, i just tend to cringe when i see them.
Along the same lines, in terms of arrays, I am not a big fan of "bundling signals into arrays"... folks will argue it is "faster" and "easier" to deal with, but to me, it further obfuscates the design. Again, not illegal, but when it comes to OPC (Other people's code) it can be very annoying trying to trace signals, not only within a module, but, arrays across ports too... and then, if they slice those arrays, or decimate them otherwise, it gets even more annoying. Kind of like this rant :)
Ultimately, you can do whatever you want, and especially in an FPGA, some folks tend to be less attention-to-detail about what will be created versus an ASIC. If you are designing an ASIC, I would say you should err on the side of being more pedantic and be able to look at your RTL and know what (to some extent) is going to be created and as such be able to estimate the gate count if you need. To this end, I highly recommend taking the time to draw your design in a drawing program (e.g. visio) to include gates, FFs, decoders, Muxes, FSMs, pseudo-code where appropriate, details of your clock and reset trees, and all CDC crossing logic, etc., including signal names. Once you have that, it is just a matter of translating to RTL... likely, too, as a bonus for those that share my opinion on variables, you will find no variables in your drawing and thus none in your RTL. :)

Related

Spikes or glitches in Modelsim

I've been learning VHDL for a while and I'm making a project right now. I made a NCO (Numerically controlled Oscillator) and a cordic algorithm to produce sine and cosine with a certain frequency.
I don't know why I get spikes on my waves in Modelsim. I guess they are caused by the case statement when it changes the quadrant for the cordic algorithm to work with angles between -180 and +180.
I've read that it could be a "normal" behavior of Modelsim because of the iterations of the simulator (VHDL delta cycles). But I don't know how and if I should fix them.
LIBRARY ieee;
USE ieee.std_logic_1164.all;
USE ieee.numeric_std.all;
ENTITY phase_accumulator_module IS
GENERIC (
LGTBL: INTEGER := 16; --lunghezza table log base 2
W: INTEGER := 32;
OW: INTEGER := 16
);
PORT (
clk: IN STD_LOGIC;
reset: IN STD_LOGIC;
i_dphase: IN STD_LOGIC_VECTOR(W-1 DOWNTO 0);
o_val: BUFFER STD_LOGIC_VECTOR(W-1 DOWNTO 0);
o_val_test:BUFFER STD_LOGIC_VECTOR(OW-1 DOWNTO 0);
o_val_laser: BUFFER STD_LOGIC_VECTOR(W-1 DOWNTO 0);
quadrant: BUFFER STD_LOGIC_VECTOR(1 DOWNTO 0)
);
END phase_accumulator_module;
ARCHITECTURE behave OF phase_accumulator_module IS
SIGNAL r_step,r_step_laser,r_step_test: STD_LOGIC_VECTOR(W-1 DOWNTO 0) := (OTHERS => '0');
SIGNAL r_phase,r_phase_laser,r_phase_test: STD_LOGIC_VECTOR(W-1 DOWNTO 0) := (OTHERS => '0');
SIGNAL r_phase_pipe,r_phase_laser_pipe: STD_LOGIC_VECTOR(W-1 DOWNTO 0) := (OTHERS => '0');
CONSTANT P: INTEGER := LGTBL;
BEGIN
R_Step_pro: PROCESS(clk,reset)
BEGIN
IF(reset='1') THEN
r_step <= (OTHERS=>'0');
r_step_test <= (OTHERS=>'0');
r_step_laser <= (OTHERS=>'0');
ELSE IF(rising_edge(clk)) THEN
r_step <= i_dphase; -- 2^W*f/fs
r_step_test <= i_dphase; --test signal
r_step_laser <= STD_LOGIC_VECTOR(SHIFT_RIGHT(UNSIGNED(i_dphase),1));
END IF;
END IF;
END PROCESS R_Step_pro;
R_phase_pro: PROCESS(clk,reset)
BEGIN
IF(reset='1') THEN
r_phase <= (OTHERS=>'0');
r_phase_test <= (OTHERS=>'0');
r_phase_laser <= (OTHERS=>'0');
o_val <= (OTHERS=>'0');
o_val_test <= (OTHERS=>'0');
o_val_laser <= (OTHERS=>'0');
ELSE IF(rising_edge(clk)) THEN
r_phase <= STD_LOGIC_VECTOR(UNSIGNED(r_phase) + UNSIGNED(r_step));
r_phase_test <= STD_LOGIC_VECTOR(UNSIGNED(r_phase_test) + UNSIGNED(r_step_test)); --test signal
r_phase_laser <= STD_LOGIC_VECTOR(UNSIGNED(r_phase_laser) + UNSIGNED(r_step_laser));
quadrant <= r_phase(W-1 DOWNTO w-2);
CASE quadrant IS
WHEN "00" | "11" =>
r_phase_pipe <= r_phase;
r_phase_laser_pipe <= r_phase_laser;
WHEN "01" =>
r_phase_pipe <= "00" & r_phase(W-3 DOWNTO 0);
r_phase_laser_pipe <= "00" & r_phase_laser(W-3 DOWNTO 0);
WHEN "10" =>
r_phase_pipe <= "11" & r_phase(W-3 DOWNTO 0);
r_phase_laser_pipe <= "11" & r_phase_laser(W-3 DOWNTO 0);
WHEN OTHERS =>
null;
END CASE;
o_val_test <= r_phase_test(W-1 DOWNTO W-P);
o_val <= r_phase_pipe;
o_val_laser <= r_phase_laser_pipe;
END IF;
END IF;
END PROCESS R_phase_pro;
END behave;
This is the cordic algotithm vhdl file:
LIBRARY ieee;
USE ieee.std_logic_1164.all;
USE ieee.numeric_std.all;
ENTITY signal_gen_cordic_module IS
GENERIC (
bits: INTEGER := 16;
bits_out_c: INTEGER := 32;
iter : INTEGER := 32
);
PORT (
clk: IN STD_LOGIC;
reset: IN STD_LOGIC;
locked: IN STD_LOGIC;
z0: IN STD_LOGIC_VECTOR (bits_out_c-1 DOWNTO 0);
quadrant: IN STD_LOGIC_VECTOR(1 DOWNTO 0);
sine,cosine: BUFFER STD_LOGIC_VECTOR(bits-1 DOWNTO 0)
);
END signal_gen_cordic_module;
ARCHITECTURE behave OF signal_gen_cordic_module IS
TYPE temp IS ARRAY (0 TO iter-1) OF STD_LOGIC_VECTOR(bits_out_c-1 DOWNTO 0);
SIGNAL x_temp,y_temp,z_temp: temp;
CONSTANT x0: SIGNED(bits_out_c-1 DOWNTO 0) := "00000000000000000010111100011010"; --0.6072*2^16
CONSTANT y0: SIGNED(bits_out_c-1 DOWNTO 0) := "00000000000000000000000000000000";
SIGNAL x00,y00: SIGNED(bits_out_c-1 DOWNTO 0);
TYPE atand IS ARRAY (0 TO iter-1) OF STD_LOGIC_VECTOR(bits_out_c-1 DOWNTO 0);
CONSTANT arctan: atand :=
(
x"20000000",
x"12E4051E",
x"09FB385B",
x"051111D4",
x"028B0D43",
x"0145D7E1",
x"00A2F61E",
x"00517C55",
x"0028BE53",
x"00145F2F",
x"000A2F98",
x"000517CC",
x"00028BE6",
x"000145F3",
x"0000A2FA",
x"0000517D",
x"000028BE",
x"0000145F",
x"00000A30",
x"00000518",
x"0000028C",
x"00000146",
x"000000A3",
x"00000051",
x"00000029",
x"00000014",
x"0000000A",
x"00000005",
x"00000003",
x"00000001",
x"00000001",
x"00000000"
);
BEGIN
PROCESS(clk,reset)
BEGIN
IF(reset='1') THEN
cosine <= (OTHERS=>'0');
sine <= (OTHERS=>'0');
FOR i IN iter-1 DOWNTO 0 LOOP
x_temp(i) <= (OTHERS=>'0');
y_temp(i) <= (OTHERS=>'0');
z_temp(i) <= (OTHERS=>'0');
END LOOP;
ELSE IF(rising_edge(clk)) THEN
IF(locked='1') THEN
IF(quadrant="00" OR quadrant="11") THEN
x00 <= x0;
y00 <= y0;
ELSE IF(quadrant="01") THEN
x00 <= -y0;
y00 <= x0;
ELSE
x00 <= y0;
y00 <= -x0;
END IF;
END IF;
x_temp(0) <= STD_LOGIC_VECTOR(x00);
y_temp(0) <= STD_LOGIC_VECTOR(y00);
z_temp(0) <= z0;
FOR i IN 0 TO iter-2 LOOP
IF(z_temp(i)(z0'HIGH)='1') THEN
x_temp(i+1) <= STD_LOGIC_VECTOR(SIGNED(x_temp(i)) + SHIFT_RIGHT(SIGNED(y_temp(i)),i));
y_temp(i+1) <= STD_LOGIC_VECTOR(SIGNED(y_temp(i)) - SHIFT_RIGHT(SIGNED(x_temp(i)),i));
z_temp(i+1) <= STD_LOGIC_VECTOR(SIGNED(z_temp(i)) + SIGNED(arctan(i)));
ELSE
x_temp(i+1) <= STD_LOGIC_VECTOR(SIGNED(x_temp(i)) - SHIFT_RIGHT(SIGNED(y_temp(i)),i));
y_temp(i+1) <= STD_LOGIC_VECTOR(SIGNED(y_temp(i)) + SHIFT_RIGHT(SIGNED(x_temp(i)),i));
z_temp(i+1) <= STD_LOGIC_VECTOR(SIGNED(z_temp(i)) - SIGNED(arctan(i)));
END IF;
END LOOP;
cosine <= STD_LOGIC_VECTOR(RESIZE(SIGNED(x_temp(iter-1)),bits));
sine <= STD_LOGIC_VECTOR(RESIZE(SIGNED(y_temp(iter-1)),bits));
END IF;
END IF;
END IF;
END PROCESS;
END behave;
My modelsim version is 10.5b. Thank you for any help! :)

VHDL - Want to create a simple divider

I'm using Vivado 2018.2
I want to make a simple divider, say the input is 153 and the constant is 53. So with 153/53, I want to see 2 and the remainder 47.
The code I have so far errors out (sequential).
entity divider_main is
port(
dividend: in std_logic_vector(7 downto 0);
remainder: out std_logic_vector(5 downto 0);
quotient: out std_logic_vector(2 downto 0)
);
end divider_main;
architecture Behavioral of divider_main is
signal dividend_signal: signed(7 downto 0);
signal remainder_signal: std_logic_vector(5 downto 0);
signal fifty_three: signed(7 downto 0);
signal count: unsigned(2 downto 0);
begin
dividend_signal <= signed(dividend);
fifty_three <= "00011101";
count <= "000";
process(dividend, dividend_signal) is
begin
if dividend_signal < fifty_three then
remainder(5 downto 0) <= std_logic_vector(dividend_signal(5 downto 0));
quotient <= std_logic_vector(count);
dividend_signal <= "00000000";
count(2 downto 0) <= "000";
else
count <= count + 1;
dividend_signal <= dividend_signal - fifty_three;
quotient(2 downto 0) <= "000";
remainder <= "000000";
end if;
end process;
end Behavioral;
I'm new to vhdl so let me know what I am doing wrong!

i have a vhdl code that take a 16 bit binary converts it to 5 bcd(4bit) how can i connect each bcd with a display?

i have a code that take a 16 bit binary converts it to 5 bcd(4bit)
how can i connect each bcd with a display?
the bcd 0 has to connect to the display 0 (seven segment display) so that the binary number is going to convert to decimal and display it on 5 seven segment displays
library ieee;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
entity binary_bcd is
generic(N: positive := 16);
port(
clk, reset: in std_logic;
binary_in: in std_logic_vector(N-1 downto 0);
bcd0, bcd1, bcd2, bcd3, bcd4: out std_logic_vector(3 downto 0)
);
end binary_bcd ;
architecture behaviour of binary_bcd is
type states is (start, shift, done);
signal state, state_next: states;
signal binary, binary_next: std_logic_vector(N-1 downto 0);
signal bcds, bcds_reg, bcds_next: std_logic_vector(19 downto 0);
-- output register keep output constant during conversion
signal bcds_out_reg, bcds_out_reg_next: std_logic_vector(19 downto 0);
-- need to keep track of shifts
signal shift_counter, shift_counter_next: natural range 0 to N;
begin
process(clk, reset)
begin
if reset = '1' then
binary <= (others => '0');
bcds <= (others => '0');
state <= start;
bcds_out_reg <= (others => '0');
shift_counter <= 0;
elsif falling_edge(clk) then
binary <= binary_next;
bcds <= bcds_next;
state <= state_next;
bcds_out_reg <= bcds_out_reg_next;
shift_counter <= shift_counter_next;
end if;
end process;
convert:
process(state, binary, binary_in, bcds, bcds_reg, shift_counter)
begin
state_next <= state;
bcds_next <= bcds;
binary_next <= binary;
shift_counter_next <= shift_counter;
case state is
when start =>
state_next <= shift;
binary_next <= binary_in;
bcds_next <= (others => '0');
shift_counter_next <= 0;
when shift =>
if shift_counter = N then
state_next <= done;
else
binary_next <= binary(N-2 downto 0) & 'L';
bcds_next <= bcds_reg(18 downto 0) & binary(N-1);
shift_counter_next <= shift_counter + 1;
end if;
when done =>
state_next <= start;
end case;
end process;
bcds_reg(19 downto 16) <= bcds(19 downto 16) + 3 when bcds(19 downto 16) > 4 else
bcds(19 downto 16);
bcds_reg(15 downto 12) <= bcds(15 downto 12) + 3 when bcds(15 downto 12) > 4 else
bcds(15 downto 12);
bcds_reg(11 downto 8) <= bcds(11 downto 8) + 3 when bcds(11 downto 8) > 4 else
bcds(11 downto 8);
bcds_reg(7 downto 4) <= bcds(7 downto 4) + 3 when bcds(7 downto 4) > 4 else
bcds(7 downto 4);
bcds_reg(3 downto 0) <= bcds(3 downto 0) + 3 when bcds(3 downto 0) > 4 else
bcds(3 downto 0);
bcds_out_reg_next <= bcds when state = done else
bcds_out_reg;
bcd4 <= bcds_out_reg(19 downto 16);
bcd3 <= bcds_out_reg(15 downto 12);
bcd2 <= bcds_out_reg(11 downto 8);
bcd1 <= bcds_out_reg(7 downto 4);
bcd0 <= bcds_out_reg(3 downto 0);
end behaviour;
test bench
LIBRARY ieee;
USE ieee.std_logic_1164.ALL;
ENTITY tb_bcd IS
END tb_bcd;
ARCHITECTURE behavior OF tb_bcd IS
-- Component Declaration for the Unit Under Test (UUT)
COMPONENT binary_bcd
PORT(
clk : IN std_logic;
reset : IN std_logic;
binary_in : IN std_logic_vector(15 downto 0);
bcd0 : OUT std_logic_vector(3 downto 0);
bcd1 : OUT std_logic_vector(3 downto 0);
bcd2 : OUT std_logic_vector(3 downto 0);
bcd3 : OUT std_logic_vector(3 downto 0);
bcd4 : OUT std_logic_vector(3 downto 0)
);
END COMPONENT;
--Inputs
signal clk : std_logic := '0';
signal reset : std_logic := '0';
signal binary_in : std_logic_vector(15 downto 0) := (others => '0');
--Outputs
signal bcd0 : std_logic_vector(3 downto 0);
signal bcd1 : std_logic_vector(3 downto 0);
signal bcd2 : std_logic_vector(3 downto 0);
signal bcd3 : std_logic_vector(3 downto 0);
signal bcd4 : std_logic_vector(3 downto 0);
-- Clock period definitions
constant clk_period : time := 10 ns;
BEGIN
-- Instantiate the Unit Under Test (UUT)
uut: binary_bcd PORT MAP (
clk => clk,
reset => reset,
binary_in => binary_in,
bcd0 => bcd0,
bcd1 => bcd1,
bcd2 => bcd2,
bcd3 => bcd3,
bcd4 => bcd4
);
-- Clock process definitions
clk_process :process
begin
clk <= '0';
wait for clk_period/2;
clk <= '1';
wait for clk_period/2;
end process;
-- Stimulus process
stim_proc: process
begin
-- hold reset state for 100 ns.
reset <= '1';
wait for 100 ns;
reset <= '0';
binary_in <= "0000000000001111";
wait for 200 ns;
binary_in <= "0000000001001111";
wait for 200 ns;
binary_in <= "0000000001111111";
wait for 200 ns;
binary_in <= "0000111101001111";
wait for 2000 ns;
end process;
END;

VHDL code to find square root of number?

Is there is any in built function or any library that can be included in the design to find square root of a number?
Restoring square root algorithm is easy to implement on fpga, wikipedia has an example.
FPGA vendors should have cores available, it hides inside the general purpose CORDIC core on Xilinx. They also have square root cores for floating points, if that's what you need.
For non-synthesizable (simulation/test-bench only) operation, square root for real can be done with:
y := math_real.sqrt(x)
For synthesizable operation, see answer from Jonathan Drolet.
This one worked for me.
library ieee;
use ieee.std_logic_1164.all;
use IEEE.STD_LOGIC_unsigned.ALL;
entity squart is port(
clock : in std_logic;
data_in : in std_logic_vector(7 downto 0);
data_out : out std_logic_vector(3 downto 0)); end squart;
architecture behaviour of squart is
signal part_done : std_logic := '0';
signal part_count : integer := 3;
signal result : std_logic_vector(4 downto 0) := "00000";
signal partialq : std_logic_vector(5 downto 0) := "000000";
begin
part_done_1: process(clock, data_in, part_done)
begin
if(clock'event and clock='1')then
if(part_done='0')then
if(part_count>=0)then
partialq(1 downto 0) <= data_in((part_count*2)+ 1 downto part_count*2);
part_done <= '1'; else
data_out <= result(3 downto 0);
end if;
part_count <= part_count - 1;
elsif(part_done='1')then
if((result(3 downto 0) & "01") <= partialq)then
result <= result(3 downto 0) & '1';
partialq(5 downto 2) <= partialq(3 downto 0) - (result(1 downto 0)&"01");
else
result <= result(3 downto 0) & '0';
partialq(5 downto 2) <= partialq(3 downto 0);
end if;
part_done <= '0';
end if;
end if;
end process;
end behaviour;
Check this one:
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;
entity SQRT is
Generic ( b : natural range 4 to 32 := 16 );
Port ( value : in STD_LOGIC_VECTOR (15 downto 0);
result : out STD_LOGIC_VECTOR (7 downto 0));
end SQRT;
architecture Behave of SQRT is
begin
process (value)
variable vop : unsigned(b-1 downto 0);
variable vres : unsigned(b-1 downto 0);
variable vone : unsigned(b-1 downto 0);
begin
vone := to_unsigned(2**(b-2),b);
vop := unsigned(value);
vres := (others=>'0');
while (vone /= 0) loop
if (vop >= vres+vone) then
vop := vop - (vres+vone);
vres := vres/2 + vone;
else
vres := vres/2;
end if;
vone := vone/4;
end loop;
result <= std_logic_vector(vres(result'range));
end process;
end;

My VHDL ALU code behave awkward

I have a problem with VHDL ALU code. I have to make simple ALU with 4 operations with 4-bit operands. I implemented these operations correctly and they work well. For executing I use E2LP board. For choosing the operation I selected 4 JOY buttons,one for each operation. Problem is that when I press button to execute operation and depress it I want result to stay on LEDs while I don't select any other operation, but that's not happening. For first 5 LEDs this works fine, but upper 3 not.This only works for one operation. My simulation results are correct. Here is code an schema of project.Thank you in advance.
---------------------------------------------------------------------------------- Control logic
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
Port ( --clk : in STD_LOGIC;
in_saberi : in STD_LOGIC;
in_mnozi : in STD_LOGIC;
in_ili : in STD_LOGIC;
in_rotiraj : in STD_LOGIC;
out_saberi : out STD_LOGIC;
out_mnozi : out STD_LOGIC;
out_ili : out STD_LOGIC;
out_rotiraj : out STD_LOGIC);
end upravljanje;
architecture Behavioral of upravljanje is
signal tmps : std_logic := '1';
signal tmpm : std_logic := '1';
signal tmpi : std_logic := '1';
signal tmpr : std_logic := '1';
begin
logika : process(in_saberi,in_mnozi,in_ili,in_rotiraj)
begin
if (in_saberi='0' and in_mnozi='1' and in_ili='1' and in_rotiraj='1') then
tmps <= in_saberi;
tmpm <= in_mnozi;
tmpi <= in_ili;
tmpr <= in_rotiraj;
elsif (in_mnozi='0' and in_saberi='1' and in_ili='1' and in_rotiraj='1') then
tmps <= in_saberi;
tmpm <= in_mnozi;
tmpi <= in_ili;
tmpr <= in_rotiraj;
elsif (in_saberi='1' and in_mnozi='1' and in_ili='0' and in_rotiraj='1') then
tmps <= in_saberi;
tmpm <= in_mnozi;
tmpi <= in_ili;
tmpr <= in_rotiraj;
elsif (in_saberi='1' and in_mnozi='1' and in_ili='1' and in_rotiraj='0') then
tmps <= in_saberi;
tmpm <= in_mnozi;
tmpi <= in_ili;
tmpr <= in_rotiraj;
elsif (in_saberi='1' and in_mnozi='1' and in_ili='1' and in_rotiraj='1') then
tmps <= tmps;
tmpm <= tmpm;
tmpi <= tmpi;
tmpr <= tmpr;
else
tmps <= '1';
tmpm <= '1';
tmpi <= '1';
tmpr <= '1';
end if;
end process logika;
out_saberi <= tmps;
out_mnozi <= tmpm;
out_ili <= tmpi;
out_rotiraj <= tmpr;
end Behavioral;
--------------------------------------------------------------------------
-- this is for operation add
entity sabirac is
Port ( clk : in STD_LOGIC;
data1 : in STD_LOGIC_VECTOR (3 downto 0);
data2 : in STD_LOGIC_VECTOR (3 downto 0);
saberi : in STD_LOGIC;
result : out STD_LOGIC_VECTOR (7 downto 0));
end sabirac;
architecture Behavioral of sabirac is
signal c : std_logic_vector (5 downto 0) := "000000";
signal tmp : std_logic_vector (7 downto 0) := "00000000";
begin
sabiranje : process(clk,saberi)
begin
if (saberi='0') then
tmp(0) <= data1(0) xor data2(0);
c(0) <= data1(0) and data2(0);
tmp(1) <= data1(1) xor data2(1) xor c(0);
c(1) <= (data1(1) and data2(1)) or (data1(1) and c(0)) or (data2(1) and c(0));
tmp(2) <= data1(2) xor data2(2) xor c(1);
c(2) <= (data1(2) and data2(2)) or (data1(2) and c(1)) or (data2(2) and c(1));
tmp(3) <= data1(3) xor data2(3) xor c(2);
if(data1(3) = data2(3)) then
c(3) <= (data1(3) and data2(3)) or (data1(3) and c(2)) or (data2(3) and c(2));
tmp(4) <= c(3);
tmp(5) <= c(3);
tmp(6) <= c(3);
tmp(7) <= c(3);
else
c(3) <= data1(3) xor data2(3) xor c(2);
tmp(4) <= c(3);
tmp(5) <= c(3);
tmp(6) <= c(3);
tmp(7) <= c(3);
end if;
else
tmp <= "ZZZZZZZZ";
end if;
end process sabiranje;
result <= tmp;
end Behavioral;
-----------------------------------------------------------------------------
entity mul is
Port (
clk : in STD_LOGIC;
pomnozi : in STD_LOGIC;
data1 : in STD_LOGIC_VECTOR (3 downto 0);
data2 : in STD_LOGIC_VECTOR (3 downto 0);
result : out STD_LOGIC_VECTOR (7 downto 0));
end mul;
architecture Behavioral of mul is
begin
mnozenje : process (clk,pomnozi)
begin
if (pomnozi='0') then
result <= std_logic_vector(signed(data1) * signed(data2));
else
result <= "ZZZZZZZZ";
end if;
end process mnozenje;
end Behavioral;
--------------------------------------------------------------------------
entity rotate is
Port ( clk : in STD_LOGIC;
rotiraj : in STD_LOGIC;
data1 : in STD_LOGIC_VECTOR (3 downto 0);
data2 : in STD_LOGIC_VECTOR (3 downto 0);
result : out STD_LOGIC_VECTOR (7 downto 0));
end rotate;
architecture Behavioral of rotate is
signal tmp : std_logic_vector (3 downto 0) := "0000";
signal tmp2 : std_logic_vector (7 downto 0) := "00000000";
begin
rotacija : process(clk,rotiraj)
begin
if (rotiraj='0') then
tmp <= std_logic_vector(rotate_left(unsigned(data1),to_integer(unsigned(data2))));
tmp2(0) <= tmp(0);
tmp2(1) <= tmp(1);
tmp2(2) <= tmp(2);
tmp2(3) <= tmp(3);
tmp2(4) <= '0';
tmp2(5) <= '0';
tmp2(6) <= '0';
tmp2(7) <= '0';
else
tmp2 <= "ZZZZZZZZ";
end if;
end process rotacija;
result <= tmp2;
end Behavioral;
--------------------------------------------------------------------------
-- Logic OR operation
entity logicko_ILI is
Port ( clk : in STD_LOGIC;
data1 : in STD_LOGIC_VECTOR (3 downto 0);
data2 : in STD_LOGIC_VECTOR (3 downto 0);
logili : in STD_LOGIC;
result : out STD_LOGIC_VECTOR (7 downto 0));
end logicko_ILI;
architecture Behavioral of logicko_ILI is
signal c : std_logic_vector (5 downto 0) := "000000";
signal tmp : std_logic_vector (7 downto 0) := "00000000";
begin
logicko : process(clk,logili)
begin
if (logili = '0') then
tmp(0) <= data1(0) or data2(0);
tmp(1) <= data1(1) or data2(1);
tmp(2) <= data1(2) or data2(2);
tmp(3) <= data1(3) or data2(3);
tmp(4) <= '0';
tmp(5) <= '1';
tmp(6) <= '1';
tmp(7) <= '1';
else
tmp <= "ZZZZZZZZ";
end if;
end process logicko;
result <= tmp;
end Behavioral;
I think you should even use your clk and reset signals in process. Your design is completely asynchron! This is a very bad idea.
A synchron process with asynchron reset look like this:
test : process (clk,reset)
begin
if (reset) then
c = 0;
elsif (rising_edge(clk)) then
c = a + b;
end if;
end process:
None of your sensitivity lists are correct. This does not comply with IEEE standard on syntesizable RTL. It poses a a high risk of getting synthesis results that are different from your simulation results.
line: 24 Incomplete sensitivity list. Missing signals: tmpm, tmps, tmpr, tmpi
line: 86 Incomplete sensitivity list. Missing signals: data1, data2, c
line: 137 Incomplete sensitivity list. Missing signals: data1, data2
line: 166 Incomplete sensitivity list. Missing signals: tmp, data1, data2
line: 205 Incomplete sensitivity list. Missing signals: data1, data2,
(line numbers might be slightly off because I had to add use/library clauses for ieee.std_logic_1164)
Please check your synthesis results for warnings, or use a VHDL code checker before your synthesize.

Resources