47 // parameter M_WIDTH = 43, // actual multiplier width (== (A_WIDTH +B_WIDTH) 48 parameter ROUND_OUT =
8,
// cut these number of LSBs on the output, round result (in addition to COSINE_SHIFT) 51 parameter COS_1_16 =
128553,
// (1<<17) * cos(1*pi/16) 52 parameter COS_2_16 =
121095,
// (2<<17) * cos(1*pi/16) 53 parameter COS_3_16 =
108982,
// (3<<17) * cos(1*pi/16) 54 parameter COS_4_16 =
92682,
// (4<<17) * cos(1*pi/16) 55 parameter COS_5_16 =
72820,
// (5<<17) * cos(1*pi/16) 56 parameter COS_6_16 =
50159,
// (6<<17) * cos(1*pi/16) 57 parameter COS_7_16 =
25570 // (7<<17) * cos(1*pi/16) 62 input [
2 *
WIDTH -
1:
0]
d10_32_76_54,
// Concatenated input data {x[1],x[0]}/{x[3],x[2]}/ {x[7],x[6]}/{x[5],x[4]} 63 input start,
// {x[1],x[0]} available next after start, {x[3],x[2]} - second next, then {x[7],x[6]} and {x[5],x[4]} 65 output reg pre2_start_out,
// 2 clock cycle before F4 output, full dout sequence 66 // start_out-X-F4-X-F2-X-F6-F5-F0-F3-X-F1-X-F7 67 output reg en_out // valid at the same time slot as pre2_start_out (goes active with pre2_start_out) 103 // Multipler A/D inputs before shift 136 // wire simd_ceas45; // second stage A registers CE 150 // wire [OUT_WIDTH -1:0] dout1_w; 151 // wire [OUT_WIDTH -1:0] dout2_w; 158 reg [
2:
0]
per_type;
// idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out) 161 // Temporarily adding 1 extra latency cycle for rounding/saturation. TODO: Remove when moved to DSP itself 163 // start_out-X-F4-X-F2-X-F6-F5-F0-F3-X-F1-X-F7 164 reg pre_en_out;
// valid at the same time slot as pre2_start_out (goes active with pre2_start_out) 169 // .ain ({simd_a1,simd_a0}), // input[47:0] 170 // .bin ({simd_b1,simd_b0}), // input[47:0] 171 // dsp_addsub_simd1_i input connections 182 // assign simd_cep01 = phase[2] | phase[3] | phase[5] | phase[6]; 185 // dsp_addsub_simd2_i input connections 196 // assign simd_cep23 = phase[0] | phase[3] | phase[4] | phase[7]; 199 assign simd_a4 =
simd_p3;
// only at phase[6], other phases - don't care 200 assign simd_a5 =
simd_p0;
// only at phase[6], other phases - don't care 202 // dsp_addsub_reg2_simd_i input connections 203 // assign simd_b4 = dsp_ma_p_1[M_WIDTH-1 -: WIDTH]; // only at phase[6], other phases - don't care. TODO: add symmetric rounding here? 204 // assign simd_b5 = dsp_ma_p_1[M_WIDTH-1 -: WIDTH]; // only at phase[2], other phases - don't care. TODO: add symmetric rounding here? 215 // assign simd_ceas45 = phase[2]; 219 // assign simd_cep45 = phase[2] | phase[3] | phase[4] | phase[5]; 222 // dsp_ma1_i control connections 235 // dsp_ma1_i data input connections 236 /* assign dsp_ma_ain24_1 = ({WIDTH{phase[6]}} & simd_p1) | 237 ({WIDTH{phase[1]}} & simd_p2) | 238 ({WIDTH{phase[2]}} & simd_p0) | 239 ({WIDTH{phase[3]}} & simd_p2) ; // Other - don't care **/ 240 // Swapping A and d for pre-added (it is D-A, not A-D) 241 // assign dsp_ma_ain24_1 = phase[6] ? simd_p1 : (phase[2] ? simd_p0 : simd_p2); 242 // assign dsp_ma_din24_1 = phase[6] ? simd_p2 : simd_p1; 244 // assign dsp_ma_din24_1 = phase[6] ? simd_p1 : simd_p1; 247 // dsp_ma2_i control connections 258 // dsp_ma2_i data input connections 262 // Shift adder outputs to the MSB of the multiplier inputs 263 // assign dsp_ma_ain_1 = {dsp_ma_ain24_1, {A_WIDTH-WIDTH{1'b0}}}; 264 // assign dsp_ma_din_1 = {dsp_ma_din24_1, {A_WIDTH-WIDTH{1'b0}}}; 265 // assign dsp_ma_ain_2 = {dsp_ma_ain24_2, {A_WIDTH-WIDTH{1'b0}}}; 266 // assign dsp_ma_din_2 = {dsp_ma_din24_2, {A_WIDTH-WIDTH{1'b0}}}; 267 // Extend sign for A and D of the multiplier inputs (24bits->25 bits) 273 // Shift DSP outputs to match output results 276 // assign dout1_w = dsp_ma_p_1[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added) 277 // assign dout2_w = dsp_ma_p_2[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added) 280 // assign dout1_w = dsp_ma_p_1[COSINE_SHIFT +: OUT_WIDTH]; // adding one bit for adder (two MPY outputs are added) 281 // assign dout2_w = dsp_ma_p_2[COSINE_SHIFT +: OUT_WIDTH]; // adding one bit for adder (two MPY outputs are added) 285 // Saturation (only if BEFORE_SAT_WIDTH > OUT_WIDTH) 288 if (
TRIM_MSB <
0)
begin // should never happen 292 end else begin //! saturate. TODO: Maybe (and also symmetric rounding) can be done in DSP itself using masks? 299 // to possibly remove registers with generate 304 // wire dout_round_c; 305 // wire [OUT_WIDTH -1:0] dout_round_w; 308 //phase_cnt[0] ? dout1_w : dout2_w; 311 always @ (
posedge clk)
begin 314 else if (
phase[
7])
begin 323 // Cosine table, defined to fit into 17 bits for 18-bit signed DSP B-operand 334 // dout_r <= phase_cnt[0] ? dout1_w : dout2_w; 345 else if (
phase[
3])
begin 357 )
dsp_addsub_simd1_i (
372 )
dsp_addsub_simd2_i (
384 dsp_addsub_reg2_simd #( 387 ) dsp_addsub_reg2_simd_i ( 390 .ain ({simd_a5,simd_a4}), // input[47:0] 391 .bin ({simd_b5,simd_b4}), // input[47:0] 392 .cea1 (simd_ceaf45), // input 393 .cea2 (simd_ceas45), // input 394 .ceb (simd_ceb45), // input 395 .subtract (simd_sub45), // input 396 .cep (simd_cep45), // input 397 .pout ({simd_p5,simd_p4}) // output[47:0] 403 )
dsp_addsub_simd3_i (
431 .
cead (
1'b1),
// input 465 dly01_16 dly01_16_i ( 468 .dly (4'h4), // input[3:0] 469 .din (phase[7]), // input 470 .dout (pre2_start_out) // output
3211dout_rreg[OUT_WIDTH-1:0]
3175dsp_ma_ain24_1wire[WIDTH-1:0]
3185simd_b0wire[WIDTH-1:0]
signed [B_WIDTH-1:0] 3383bin
[2 * WIDTH -1:0] 3138d10_32_76_54
3189simd_b4wire[WIDTH-1:0]
3160dsp_ma_p_1wire[P_WIDTH-1:0]
signed [A_WIDTH-1:0] 3390din
signed [A_WIDTH-1:0] 3368din
[OUT_WIDTH -1:0] 3140dout
3178dsp_ma_din24_2wire[WIDTH-1:0]
dsp_addsub_simd3_i dsp_addsub_simd
3144BEFORE_SAT_WIDTHP_WIDTH - TOTAL_RSHIFT
[NUM_DATA * WIDTH -1:0] 3352pout
3164dsp_ma_ain_2wire[A_WIDTH-1:0]
signed [P_WIDTH-1:0] 3399pout
3195simd_p4wire[WIDTH-1:0]
3181simd_a2wire[WIDTH-1:0]
3194simd_p3wire[WIDTH-1:0]
3190simd_b5wire[WIDTH-1:0]
3196simd_p5wire[WIDTH-1:0]
3187simd_b2wire[WIDTH-1:0]
3215dout_sat_wwire[OUT_WIDTH-1:0]
3174dsp_ma_p_muxwire[P_WIDTH-1:0]
3216dout_roundwire[BEFORE_SAT_WIDTH-1:0]
3180simd_a1wire[WIDTH-1:0]
signed [A_WIDTH-1:0] 3387ain
signed [B_WIDTH-1:0] 3361bin
3182simd_a3wire[WIDTH-1:0]
[NUM_DATA * WIDTH -1:0] 3346ain
signed [A_WIDTH-1:0] 3365ain
3152dsp_ma_din_1wire[A_WIDTH-1:0]
3167dsp_ma_din_2wire[A_WIDTH-1:0]
[NUM_DATA * WIDTH -1:0] 3347bin
3176dsp_ma_din24_1wire[WIDTH-1:0]
3188simd_b3wire[WIDTH-1:0]
3192simd_p1wire[WIDTH-1:0]
3220TRIM_MSBBEFORE_SAT_WIDTH - OUT_WIDTH
3145dsp_ma_binreg[B_WIDTH-1:0]
3149dsp_ma_ain_1wire[A_WIDTH-1:0]
3214dout_round_rreg[BEFORE_SAT_WIDTH-1:0]
3186simd_b1wire[WIDTH-1:0]
3173dsp_ma_p_2wire[P_WIDTH-1:0]
3191simd_p0wire[WIDTH-1:0]
signed [P_WIDTH-1:0] 3374pout
3143TOTAL_RSHIFTCOSINE_SHIFT + ROUND_OUT
3193simd_p2wire[WIDTH-1:0]
3179simd_a0wire[WIDTH-1:0]
3184simd_a5wire[WIDTH-1:0]
3183simd_a4wire[WIDTH-1:0]
3213dout_round_wwire[BEFORE_SAT_WIDTH-1:0]
3177dsp_ma_ain24_2wire[WIDTH-1:0]