1、逐位比较法-二进制
《 FPGA篇(一) 基于verilog的定点开方运算(1)-逐次逼近算法 》该篇文章中有详细描述
假设被开方数rad_i[7:0],则结果qout[3:0]位宽为4位,从最高位到最低位依次置1,用乘法器平方后与被开方数比较,>rad_i则当前位=0,<rad_i则当前位=1;详细说明见原文。
总结:该算法在计算大数据时占用大量乘法器,时间为位宽一半,可流水计算
二进制思维:依次改变的每一个bit的值,下方的sqrt_shift中并不关心bit的0,1,以十进制参考.
module sqrt_1
#(
parameter d_width = 58,
parameter q_width = d_width/2 - 1,
parameter r_width = d_width/2)
(
input wire clk,
input wire rst,
input wire i_vaild,
input wire [d_width-1:0] data_i,
output reg o_vaild,
output reg [q_width:0] data_o,
output reg [r_width:0] data_r
);
reg [d_width-1:0] D [r_width:1];
reg [q_width:0] Q_z [r_width:1];
reg [q_width:0] Q_q [r_width:1];
reg ivalid_t [r_width:1];
always@(posedge clk or posedge rst)
begin
if(rst)
begin
D[r_width] <= 0;
Q_z[r_width] <= 0;
Q_q[r_width] <= 0;
ivalid_t[r_width] <= 0;
end
else if(i_vaild)
begin
D[r_width] <= data_i;
Q_z[r_width] <= {1'b1,{q_width{1'b0}}};
Q_q[r_width] <= 0;
ivalid_t[r_width] <= 1;
end
else
begin
D[r_width] <= 0;
Q_z[r_width] <= 0;
Q_q[r_width] <= 0;
ivalid_t[r_width] <= 0;
end
end
generate
genvar i;
for(i=r_width-1;i>=1;i=i-1)
begin:U
always@(posedge clk or posedge rst)
begin
if(rst)
begin
D[i] <= 0;
Q_z[i] <= 0;
Q_q[i] <= 0;
ivalid_t[i] <= 0;
end
else if(ivalid_t[i+1])
begin
if(Q_z[i+1]*Q_z[i+1] > D[i+1])
begin
Q_z[i] <= {Q_q[i+1][q_width:i],1'b1,{{i-1}{1'b0}}};
Q_q[i] <= Q_q[i+1];
end
else
begin
Q_z[i] <= {Q_z[i+1][q_width:i],1'b1,{{i-1}{1'b0}}};
Q_q[i] <= Q_z[i+1];
end
D[i] <= D[i+1];
ivalid_t[i] <= 1;
end
else
begin
ivalid_t[i] <= 0;
D[i] <= 0;
Q_q[i] <= 0;
Q_z[i] <= 0;
end
end
end
endgenerate
always@(posedge clk or posedge rst)
begin
if(rst)
begin
data_o <= 0;
data_r <= 0;
o_vaild <= 0;
end
else if(ivalid_t[1])
begin
if(Q_z[1]*Q_z[1] > D[1])
begin
data_o <= Q_q[1];
data_r <= D[1] - Q_q[1]*Q_q[1];
o_vaild <= 1;
end
else
begin
data_o <= {Q_q[1][q_width:1],Q_z[1][0]};
data_r <= D[1] - {Q_q[1][q_width:1],Q_z[1][0]}*{Q_q[1][q_width:1],Q_z[1][0]};
o_vaild <= 1;
end
end
else
begin
data_o <= 0;
data_r <= 0;
o_vaild <= 0;
end
end
endmodule
2、逐位比较-十进制
十进制思维:
把结果按照位宽拆分为多份,以每一位的十进制结果为参考,通过由大块到小块的叠加减,从最高位粗调,低位细调得到最终精确的十进制值。并不以二进制方式逐位确定0、1.
求rad_i[51:0],把结果[25:0]的每个bit依次从高到低查询,(以十进制方式)并累加减得到最终结果。
结果可以拆分为
关键算式:
b_2是b的平方,b表示对应bit的十进制值;
r0_2是r0的平方,r0表示当前结果的十进制值;
(r0 + b)^2 = r0^2 + b^2 + r0b2 = r0_2 + b_2 + r0<<c,r0 + b表示当前结果与下一个要判断的bit代表值的和。
在调整结果和的同时,调整和的平方值,用于与被开方数比较。
总结:
sqrt_shift从开方值一半开始移位相加比较;当前代码不可流水,可修改设计为流水。
```c
-------------------------------------------------------------------------------
--
-- Project: <Floating Point Unit Core>
--
-- Description: square-root entity for the square-root unit
-------------------------------------------------------------------------------
-- Author: Jidan Al-eryani
-- E-mail: jidan@gmx.net
--
-- Copyright (C) 2006
--
-- This source file may be used and distributed without
-- restriction provided that this copyright statement is not
-- removed from the file and that any derivative work contains
-- the original copyright notice and the associated disclaimer.
--
-- THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-- FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR
-- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
-- GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-- BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-- OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-- POSSIBILITY OF SUCH DAMAGE.
--把结果[25:0]的每个bit依次从高到低查询,并累加得到最终结果。 结果可以拆分为[25]*2^25 + [24]*2^24 +... +[0]*2^0
--关键算式:b_2是b的平方,b表示对应bit的十进制值;r0_2是r0的平方,r0表示当前结果的十进制值;(r0 + b)^2 = r0^2 + b^2 + r0*b*2 = r0_2 + b_2 + r0<<c,r0 + b表示当前结果与下一个要判断的bit代表值的和。
library ieee ;
use ieee.std_logic_1164.all;
use ieee.std_logic_unsigned.all;
entity sqrt_shift is
generic (RD_WIDTH: integer:=52; SQ_WIDTH: integer:=26); -- SQ_WIDTH = RD_WIDTH/2 (+ 1 if odd)
port(
clk_i : in std_logic;
rad_i : in std_logic_vector(RD_WIDTH-1 downto 0); -- hidden(1) & fraction(23)
start_i : in std_logic;
ready_o : out std_logic;
sqr_o : out std_logic_vector(SQ_WIDTH-1 downto 0);
ine_o : out std_logic
);
end sqrt_shift;
architecture rtl of sqrt_shift is
signal s_rad_i: std_logic_vector(RD_WIDTH-1 downto 0);
signal s_start_i, s_ready_o : std_logic;
signal s_sqr_o: std_logic_vector(RD_WIDTH-1 downto 0);
signal s_ine_o : std_logic;
constant ITERATIONS : integer:= RD_WIDTH/2; -- iterations = N/2
constant WIDTH_C : integer:= 5; -- log2(ITERATIONS)
--0000000000000000000000000000000000000000000000000000
constant CONST_B : std_logic_vector(RD_WIDTH-1 downto 0) :="0000000000000000000000000010000000000000000000000000"; -- b = 2^(N/2 - 1) 最大开方值的一半 0x2000000 * 2 = 0x4000000
constant CONST_B_2: std_logic_vector(RD_WIDTH-1 downto 0):="0100000000000000000000000000000000000000000000000000"; -- b^2 最大开方值的一半 的平方
constant CONST_C : std_logic_vector(WIDTH_C-1 downto 0):= "11010"; -- c = N/2
signal s_count : integer range 0 to ITERATIONS;
type t_state is (waiting,busy);
signal s_state : t_state;
signal b, b_2, r0, r0_2 : std_logic_vector(RD_WIDTH-1 downto 0);
signal c : std_logic_vector(WIDTH_C-1 downto 0);
signal s_op1, s_op2, s_sum1a, s_sum1b, s_sum2a, s_sum2b : std_logic_vector(RD_WIDTH-1 downto 0);
begin
-- Input Register
process(clk_i)
begin
if rising_edge(clk_i) then
s_rad_i <= rad_i;
s_start_i <= start_i;
end if;
end process;
-- Output Register
process(clk_i)
begin
if rising_edge(clk_i) then
sqr_o <= s_sqr_o(SQ_WIDTH-1 downto 0);
ine_o <= s_ine_o;
ready_o <= s_ready_o;
end if;
end process;
-- FSM
process(clk_i)
begin
if rising_edge(clk_i) then
if s_start_i ='1' then
s_state <= busy;
s_count <= ITERATIONS; --26 迭代次数为被开方数宽度的一半
elsif s_count=0 and s_state=busy then
s_state <= waiting;
s_ready_o <= '1';--计算完成
s_count <=ITERATIONS;
elsif s_state=busy then
s_count <= s_count - 1;--长度0~26
else
s_state <= waiting;
s_ready_o <= '0';
end if;
end if;
end process;
process(clk_i)
begin
if rising_edge(clk_i) then
if s_start_i='1' then
b <= CONST_B; -- "0000000000000000000000000010000000000000000000000000"; -- b = 2^(N/2 - 1) = 2^25
b_2 <= CONST_B_2;-- "0100000000000000000000000000000000000000000000000000"; -- b^2 = 2^50 b_2一直是b的平方
c <= CONST_C; -- 26
else
b <= '0' &b (RD_WIDTH-1 downto 1);-- shr 1 每个clk右移1位= /2
b_2 <= "00"&b_2(RD_WIDTH-1 downto 2);-- shr 2 每个clk右移2位= /4
c <= c - '1';
end if;
end if;
end process;
-- (r0 + b)^2 = r0^2 + b^2 + r0*b*2 = r0_2 + b_2 + r0<<c
s_op1 <= r0_2 + b_2;
s_op2 <= shl(r0, c);--r0左移c位 因为b = 2^(位号 - 1),所以r0*b*2 =r0*2^c
-- r0
s_sum1a <= "00000000000000000000000000"& (r0(25 downto 0) - b(25 downto 0));
s_sum2a <= "00000000000000000000000000"& (r0(25 downto 0) + b(25 downto 0));
-- r0_2 > s_rad_i 取 s_sum1b
s_sum1b <= s_op1 - s_op2;
s_sum2b <= s_op1 + s_op2;
process(clk_i)
-- variable v_r1, v_r1_2 : std_logic_vector(RD_WIDTH-1 downto 0);
begin
if rising_edge(clk_i) then
if s_start_i='1' then
r0 <= (others =>'0');
r0_2 <= (others =>'0');
elsif s_state=busy then
if r0_2 > s_rad_i then--与被开方数比较,被开方数是一个平方值 平方的乘法运算转换为移位,节省乘法器
r0 <= s_sum1a;--r0 <= r0 - b
r0_2 <= s_sum1b;--r0_2 <= r0_2 + b_2 - r0<<c
else
r0 <= s_sum2a;--r0 <= r0 + b (r0 + b)^2 = r0^2 + b^2 + r0*b*2 = r0_2 + b_2 + r0<<c
r0_2 <= s_sum2b;--r0_2 <= r0_2 + b_2 + r0<<c 初始值为b_2的初值 CONST_B_2 r0_2是r0的平方值r0<<c = r0*b*2
end if;
-- r0 <= v_r1;
-- r0_2 <= v_r1_2;
-- r0 <= v_r1;
-- r0_2 <= v_r1_2;
end if;
end if;
end process;
process(clk_i)
begin
if rising_edge(clk_i) then
if s_start_i = '1' then
s_sqr_o <= (others =>'0');
elsif s_count=0 then
if r0_2 > s_rad_i then--判定最后一个[0]bit
s_sqr_o <= r0 - '1';
else
s_sqr_o <= r0;--最终结果
end if;
end if;
end if;
end process;
-- check if result is inexact. In this way we saved 1 clk cycle!
process(clk_i)
variable v_r1_2 : std_logic_vector(RD_WIDTH-1 downto 0);
begin
if rising_edge(clk_i) then
v_r1_2 := r0_2 - (r0(RD_WIDTH-2 downto 0)&"0") + '1';-- = r0_2 + 1^2 - (r0(RD_WIDTH-2 downto 0)&"0") = ( r0 - 1)^2
if s_count=0 then
if r0_2 = s_rad_i or v_r1_2=s_rad_i then--r0^2 或 ( r0 - 1)^2
s_ine_o <= '0';--正好开方完,没有余数
else
s_ine_o <= '1';
end if;
end if;
end if;
end process;
end rtl;
仿真时序如下:
3、安路TD软件自带的开方IP
精度准确,可流水计算。其原理与逐位比较-十进制 相同,改进为流水计算。
4、安路TD软件自带的CORDIC算复数有效值、角度
极坐标方式计算膜误差很大,当膜本身很大时,误差小,角度值计算误差也小。
角度=scaled_radians_angle/2^32*180。
当角度的模式选择为radians时,值是scaled的π倍。1064850062/338952294=3.14159
5、开源CORDIC算法
《基于FPGA的CORDIC算法实现——Verilog版》 本文对CORDIC算法的原理做了详细的介绍
基本原理是坐标旋转,把角度分为16份,从45°开始通过16次迭代逼近,最终结果用系数做校正。
(采用20次迭代与16次精度相差不大,个别情况误差更大,可能因为在终点的震荡导致)
module Cordic_Sqrt
(
CLK_50M,RST_N,
Phase,Quadrant_in,
Sin,Cos,Error,
x_in,y_in,angle,length
);
input CLK_50M;
input RST_N;
input [31:0] Phase;
input [1:0] Quadrant_in;
output [31:0] Sin;
output [31:0] Cos;
output [31:0] Error;
input [31:0] x_in;
input [31:0] y_in;
output reg [31:0] angle;
output reg [31:0] length;
`define rot0 32'd2949120
`define rot1 32'd1740967
`define rot2 32'd919879
`define rot3 32'd466945
`define rot4 32'd234379
`define rot5 32'd117304
`define rot6 32'd58666
`define rot7 32'd29335
`define rot8 32'd14668
`define rot9 32'd7334
`define rot10 32'd3667
`define rot11 32'd1833
`define rot12 32'd917
`define rot13 32'd458
`define rot14 32'd229
`define rot15 32'd115
`define rot16 32'd57
`define rot17 32'd29
`define rot18 32'd14
`define rot19 32'd7
parameter Pipeline = 16;
parameter K = 32'h09b75; //K=0.6072529351*2^16,32'd39797, 16次迭代,20次迭代都一样
reg signed [31:0] Sin;
reg signed [31:0] Cos;
reg signed [31:0] Error;
reg signed [31:0] x[0:Pipeline];
reg signed [31:0] y[0:Pipeline];
reg signed [31:0] z[0:Pipeline];
reg [ 1:0] Quadrant [Pipeline:0];
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[0] <= 1'b0;
y[0] <= 1'b0;
z[0] <= 1'b0;
end
else
begin
x[0] <= x_in;
y[0] <= y_in;
z[0] <= 0;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[1] <= 1'b0;
y[1] <= 1'b0;
z[1] <= 1'b0;
end
else if(!y[0][31])
begin
x[1] <= x[0] + y[0];
y[1] <= y[0] - x[0];
z[1] <= z[0] + `rot0;
end
else
begin
x[1] <= x[0] - y[0];
y[1] <= y[0] + x[0];
z[1] <= z[0] - `rot0;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[2] <= 1'b0;
y[2] <= 1'b0;
z[2] <= 1'b0;
end
else if(!y[1][31])
begin
x[2] <= x[1] + (y[1] >>> 1);
y[2] <= y[1] - (x[1] >>> 1);
z[2] <= z[1] + `rot1;
end
else
begin
x[2] <= x[1] - (y[1] >>> 1);
y[2] <= y[1] + (x[1] >>> 1);
z[2] <= z[1] - `rot1;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[3] <= 1'b0;
y[3] <= 1'b0;
z[3] <= 1'b0;
end
else if(!y[2][31])
begin
x[3] <= x[2] + (y[2] >>> 2);
y[3] <= y[2] - (x[2] >>> 2);
z[3] <= z[2] + `rot2;
end
else
begin
x[3] <= x[2] - (y[2] >>> 2);
y[3] <= y[2] + (x[2] >>> 2);
z[3] <= z[2] - `rot2;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[4] <= 1'b0;
y[4] <= 1'b0;
z[4] <= 1'b0;
end
else if(!y[3][31])
begin
x[4] <= x[3] + (y[3] >>> 3);
y[4] <= y[3] - (x[3] >>> 3);
z[4] <= z[3] + `rot3;
end
else
begin
x[4] <= x[3] - (y[3] >>> 3);
y[4] <= y[3] + (x[3] >>> 3);
z[4] <= z[3] - `rot3;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[5] <= 1'b0;
y[5] <= 1'b0;
z[5] <= 1'b0;
end
else if(!y[4][31])
begin
x[5] <= x[4] + (y[4] >>> 4);
y[5] <= y[4] - (x[4] >>> 4);
z[5] <= z[4] + `rot4;
end
else
begin
x[5] <= x[4] - (y[4] >>> 4);
y[5] <= y[4] + (x[4] >>> 4);
z[5] <= z[4] - `rot4;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[6] <= 1'b0;
y[6] <= 1'b0;
z[6] <= 1'b0;
end
else if(!y[5][31])
begin
x[6] <= x[5] + (y[5] >>> 5);
y[6] <= y[5] - (x[5] >>> 5);
z[6] <= z[5] + `rot5;
end
else
begin
x[6] <= x[5] - (y[5] >>> 5);
y[6] <= y[5] + (x[5] >>> 5);
z[6] <= z[5] - `rot5;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[7] <= 1'b0;
y[7] <= 1'b0;
z[7] <= 1'b0;
end
else if(!y[6][31])
begin
x[7] <= x[6] + (y[6] >>> 6);
y[7] <= y[6] - (x[6] >>> 6);
z[7] <= z[6] + `rot6;
end
else
begin
x[7] <= x[6] - (y[6] >>> 6);
y[7] <= y[6] + (x[6] >>> 6);
z[7] <= z[6] - `rot6;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[8] <= 1'b0;
y[8] <= 1'b0;
z[8] <= 1'b0;
end
else if(!y[7][31])
begin
x[8] <= x[7] + (y[7] >>> 7);
y[8] <= y[7] - (x[7] >>> 7);
z[8] <= z[7] + `rot7;
end
else
begin
x[8] <= x[7] - (y[7] >>> 7);
y[8] <= y[7] + (x[7] >>> 7);
z[8] <= z[7] - `rot7;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[9] <= 1'b0;
y[9] <= 1'b0;
z[9] <= 1'b0;
end
else if(!y[8][31])
begin
x[9] <= x[8] + (y[8] >>> 8);
y[9] <= y[8] - (x[8] >>> 8);
z[9] <= z[8] + `rot8;
end
else
begin
x[9] <= x[8] - (y[8] >>> 8);
y[9] <= y[8] + (x[8] >>> 8);
z[9] <= z[8] - `rot8;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[10] <= 1'b0;
y[10] <= 1'b0;
z[10] <= 1'b0;
end
else if(!y[9][31])
begin
x[10] <= x[9] + (y[9] >>> 9);
y[10] <= y[9] - (x[9] >>> 9);
z[10] <= z[9] + `rot9;
end
else
begin
x[10] <= x[9] - (y[9] >>> 9);
y[10] <= y[9] + (x[9] >>> 9);
z[10] <= z[9] - `rot9;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[11] <= 1'b0;
y[11] <= 1'b0;
z[11] <= 1'b0;
end
else if(!y[10][31])
begin
x[11] <= x[10] + (y[10] >>> 10);
y[11] <= y[10] - (x[10] >>> 10);
z[11] <= z[10] + `rot10;
end
else
begin
x[11] <= x[10] - (y[10] >>> 10);
y[11] <= y[10] + (x[10] >>> 10);
z[11] <= z[10] - `rot10;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[12] <= 1'b0;
y[12] <= 1'b0;
z[12] <= 1'b0;
end
else if(!y[11][31])
begin
x[12] <= x[11] + (y[11] >>> 11);
y[12] <= y[11] - (x[11] >>> 11);
z[12] <= z[11] + `rot11;
end
else
begin
x[12] <= x[11] - (y[11] >>> 11);
y[12] <= y[11] + (x[11] >>> 11);
z[12] <= z[11] - `rot11;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[13] <= 1'b0;
y[13] <= 1'b0;
z[13] <= 1'b0;
end
else if(!y[12][31])
begin
x[13] <= x[12] + (y[12] >>> 12);
y[13] <= y[12] - (x[12] >>> 12);
z[13] <= z[12] + `rot12;
end
else
begin
x[13] <= x[12] - (y[12] >>> 12);
y[13] <= y[12] + (x[12] >>> 12);
z[13] <= z[12] - `rot12;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[14] <= 1'b0;
y[14] <= 1'b0;
z[14] <= 1'b0;
end
else if(!y[13][31])
begin
x[14] <= x[13] + (y[13] >>> 13);
y[14] <= y[13] - (x[13] >>> 13);
z[14] <= z[13] + `rot13;
end
else
begin
x[14] <= x[13] - (y[13] >>> 13);
y[14] <= y[13] + (x[13] >>> 13);
z[14] <= z[13] - `rot13;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[15] <= 1'b0;
y[15] <= 1'b0;
z[15] <= 1'b0;
end
else if(!y[14][31])
begin
x[15] <= x[14] + (y[14] >>> 14);
y[15] <= y[14] - (x[14] >>> 14);
z[15] <= z[14] + `rot14;
end
else
begin
x[15] <= x[14] - (y[14] >>> 14);
y[15] <= y[14] + (x[14] >>> 14);
z[15] <= z[14] - `rot14;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[16] <= 1'b0;
y[16] <= 1'b0;
z[16] <= 1'b0;
end
else if(!y[15][31])
begin
x[16] <= x[15] + (y[15] >>> 15);
y[16] <= y[15] - (x[15] >>> 15);
z[16] <= z[15] + `rot15;
end
else
begin
x[16] <= x[15] - (y[15] >>> 15);
y[16] <= y[15] + (x[15] >>> 15);
z[16] <= z[15] - `rot15;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[17] <= 1'b0;
y[17] <= 1'b0;
z[17] <= 1'b0;
end
else if(!y[16][31])
begin
x[17] <= x[16] + (y[16] >>> 16);
y[17] <= y[16] - (x[16] >>> 16);
z[17] <= z[16] + `rot16;
end
else
begin
x[17] <= x[16] - (y[16] >>> 16);
y[17] <= y[16] + (x[16] >>> 16);
z[17] <= z[16] - `rot16;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[18] <= 1'b0;
y[18] <= 1'b0;
z[18] <= 1'b0;
end
else if(!y[17][31])
begin
x[18] <= x[17] + (y[17] >>> 17);
y[18] <= y[17] - (x[17] >>> 17);
z[18] <= z[17] + `rot17;
end
else
begin
x[18] <= x[17] - (y[17] >>> 17);
y[18] <= y[17] + (x[17] >>> 17);
z[18] <= z[17] - `rot17;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[19] <= 1'b0;
y[19] <= 1'b0;
z[19] <= 1'b0;
end
else if(!y[18][31])
begin
x[19] <= x[18] + (y[18] >>> 18);
y[19] <= y[18] - (x[18] >>> 18);
z[19] <= z[18] + `rot18;
end
else
begin
x[19] <= x[18] - (y[18] >>> 18);
y[19] <= y[18] + (x[18] >>> 18);
z[19] <= z[18] - `rot18;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
x[20] <= 1'b0;
y[20] <= 1'b0;
z[20] <= 1'b0;
end
else if(!y[19][31])
begin
x[20] <= x[19] + (y[19] >>> 19);
y[20] <= y[19] - (x[19] >>> 19);
z[20] <= z[19] + `rot19;
end
else
begin
x[20] <= x[19] - (y[19] >>> 19);
y[20] <= y[19] + (x[19] >>> 19);
z[20] <= z[19] - `rot19;
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
Quadrant[0] <= 1'b0;
Quadrant[1] <= 1'b0;
Quadrant[2] <= 1'b0;
Quadrant[3] <= 1'b0;
Quadrant[4] <= 1'b0;
Quadrant[5] <= 1'b0;
Quadrant[6] <= 1'b0;
Quadrant[7] <= 1'b0;
Quadrant[8] <= 1'b0;
Quadrant[9] <= 1'b0;
Quadrant[10] <= 1'b0;
Quadrant[11] <= 1'b0;
Quadrant[12] <= 1'b0;
Quadrant[13] <= 1'b0;
Quadrant[14] <= 1'b0;
Quadrant[15] <= 1'b0;
Quadrant[16] <= 1'b0;
Quadrant[17] <= 1'b0;
Quadrant[18] <= 1'b0;
Quadrant[19] <= 1'b0;
Quadrant[20] <= 1'b0;
end
else
begin
Quadrant[0] <= Quadrant_in;
Quadrant[1] <= Quadrant[0];
Quadrant[2] <= Quadrant[1];
Quadrant[3] <= Quadrant[2];
Quadrant[4] <= Quadrant[3];
Quadrant[5] <= Quadrant[4];
Quadrant[6] <= Quadrant[5];
Quadrant[7] <= Quadrant[6];
Quadrant[8] <= Quadrant[7];
Quadrant[9] <= Quadrant[8];
Quadrant[10] <= Quadrant[9];
Quadrant[11] <= Quadrant[10];
Quadrant[12] <= Quadrant[11];
Quadrant[13] <= Quadrant[12];
Quadrant[14] <= Quadrant[13];
Quadrant[15] <= Quadrant[14];
Quadrant[16] <= Quadrant[15];
Quadrant[17] <= Quadrant[16];
Quadrant[18] <= Quadrant[17];
Quadrant[19] <= Quadrant[18];
Quadrant[20] <= Quadrant[19];
end
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
begin
angle <= 0;
length <= 0;
Error <= 0;
end
else
begin
Error <= y[Pipeline];
length <= x[Pipeline];
angle <= z[Pipeline];
end
end
endmodul
6、仿真 对比TD CORDIC与开源CORDIC精度
结论:在被开方数在百万级时,两者误差率都不超过1/1000,开源精度更高。角度计算值两者基本相等,相差大约0.01°左右。两者的计算结果都与TD的开方IP结果对比。
开源算法的误差检查代码:
TD CORDIC误差检查:
误差仿真:TD的误差稍大一些。
开源CORDIC算法的细节:
TD CORDIC的算法细节:
角度计算值两者对比:
`timescale 1 ns/ 1 ns
module Cordic_Sqrt_tb;
glbl glbl();
reg CLK_50M;
reg RST_N;
reg [31:0] cnt;
reg [31:0] cnt_n;
reg [31:0] Phase;
reg [31:0] Phase_n;
wire [31:0] Sin;
wire [31:0] Cos;
wire [31:0] Error;
reg [33:0] x;
reg [33:0] y;
reg [1:0] Quadrant_in;
wire [31:0] angle;
wire [31:0] length;
wire [33:0] rout_td;
wire [34:0] angle_td;
wire done_td ;
reg start_td =0 ;
wire [51:0] rad_i ;
wire start_i ;
wire ready_o ;
wire [25:0] sqr_o ;
wire ine_o ;
wire done_o ;
wire [48:0] sqrt_o ;
reg [51:0] rad_ishift;
reg start_ishift ;
tdcordic_sqrt tdcordic_sqrt
(
.clk (CLK_50M),
.num (rad_i),
.rst (!RST_N),
.start (start_td),
.done (done_o),
.sqrt (sqrt_o)
);
reg [31:0] td_sqrt;
reg [31:0] td_sqrt_temp [0:11];
always @ (posedge CLK_50M )
begin
td_sqrt_temp[0] <= sqrt_o[48:24];
td_sqrt_temp[1] <= td_sqrt_temp[0];
td_sqrt_temp[2] <= td_sqrt_temp[1];
td_sqrt_temp[3] <= td_sqrt_temp[2];
td_sqrt_temp[4] <= td_sqrt_temp[3];
td_sqrt <= td_sqrt_temp[4];
end
cordic cordic
(
.clk (CLK_50M),
.xin (x),
.yin (y ),
.rst(!RST_N),
.start (start_td),
.done (done_td),
.rout (rout_td),
.angle (angle_td)
);
wire [31:0]tderror_rate,tderror_num;
wire tderror_bit,tdnum_bit;
assign tderror_num = (rout_td > td_sqrt) ? (rout_td - td_sqrt) : (td_sqrt - rout_td);
assign tdnum_bit = (tderror_num >= 6) ? 1 : 0;
assign tderror_rate = (tderror_num* 100000)/td_sqrt;
assign tderror_bit = (tderror_rate >= 1) ? 1 : 0;
wire [63:0] angle_td_final,angle_td_final1,angle_td_final2;
assign angle_td_final1 = angle_td * 180 * 100 ;
assign angle_td_final2 = angle_td_final1/3.1415926 ;
assign angle_td_final = angle_td_final2 >> 32;
Cordic_Sqrt uut
(
.CLK_50M (CLK_50M ),
.RST_N (RST_N ),
.Phase (Phase ),
.Sin (Sin ),
.Cos (Cos ),
.Error (Error ),
.x_in(x),
.y_in(y),
.angle(angle),
.length(length),
.Quadrant_in(Quadrant_in)
);
reg [31:0] real_length,real_angle;
reg [31:0] length_temp [0:11];
reg [31:0] angle_temp [0:11];
always @ (posedge CLK_50M )
begin
length_temp[0] <= length * 0.6072529351;
length_temp[1] <= length_temp[0];
length_temp[2] <= length_temp[1];
length_temp[3] <= length_temp[2];
length_temp[4] <= length_temp[3];
length_temp[5] <= length_temp[4];
length_temp[6] <= length_temp[5];
length_temp[7] <= length_temp[6];
length_temp[8] <= length_temp[7];
length_temp[9] <= length_temp[8];
real_length <= length_temp[9];
angle_temp[0] <= angle*100 / 65536 ;
angle_temp[1] <= angle_temp[0];
angle_temp[2] <= angle_temp[1];
angle_temp[3] <= angle_temp[2];
angle_temp[4] <= angle_temp[3];
angle_temp[5] <= angle_temp[4];
angle_temp[6] <= angle_temp[5];
angle_temp[7] <= angle_temp[6];
angle_temp[8] <= angle_temp[7];
angle_temp[9] <= angle_temp[8];
real_angle <= angle_temp[9];
end
wire [31:0]error_rate,error_num;
wire error_bit,num_bit;
assign error_num = (real_length > sqrt_o[48:24]) ? (real_length - sqrt_o[48:24]) : (sqrt_o[48:24] - real_length);
assign num_bit = (error_num >= 6) ? 1 : 0;
assign error_rate = (error_num* 100000)/sqrt_o[48:24] ;
assign error_bit = (error_rate >= 1) ? 1 : 0;
sqrt_shift sqrt_shift
(
.clk_i (CLK_50M),
.rad_i (rad_ishift),
.start_i (start_ishift),
.ready_o (ready_o),
.sqr_o (sqr_o),
.ine_o (ine_o)
);
initial
begin
#0 CLK_50M = 1'b0;
RST_N = 1'b0;
x=34'd2_000_000;
y=34'd 000_000;
Quadrant_in=0;
#6 RST_N = 1'b1;
end
always #10
begin
CLK_50M = ~CLK_50M;
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
cnt <= 1'b0;
else
cnt <= cnt_n;
end
always @ ( * )
begin
if(cnt == 16'd359)
cnt_n = 1'b0;
else
cnt_n = cnt + 1'b1;
end
always @ (posedge CLK_50M or negedge RST_N)
begin
if(!RST_N)
Phase <= 1'b0;
else
Phase <= Phase_n;
end
assign rad_i = x*x + y*y;
always @ ( posedge CLK_50M )
begin
if(cnt >= 16'd4 && cnt <= 16'd364) begin
start_td <= 1;
x <= x - 34'd2857;
y <= y + 34'd2243;
Quadrant_in <= 2'b00;
end
else
start_td <= 0;
if(cnt == 16'd4) begin
rad_ishift <= 52'd3988585193498;
start_ishift <= 1;
end
else
start_ishift <= 0;
end
endmodule
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)