基本算法

ROM乘法器的算法比较简单,即使用一个ROM保存乘法的结果,在需要运算的时候直接到相应的地址去查表即可。例如计算两个4位二进制数的乘法a*b,那么需要一个八位输入八位输出的ROM存储计算结果即可,其地址与存储数据的关系为:地址{a,b}(位拼接)存储a*b(例如地址为8'b00010010存储的结果就是0001*0001=8'b00000010
这种情况下使用的ROM比较大,所以在时序要求不严格的时候可以用时钟换面积,例如对于8位8位的ROM乘法器,我们将其拆成乘数1高4位,低4位和乘数2高4位低4位两两相乘。高四位和高四位相乘后结果向左位移4位,高四位和低四位相乘结果往左移2位,低四位和低四位相乘结果不变累加(就是手算乘法常用的套路)可得在四个(最少)时钟周期后得到结果,使用的ROM可由16\16降到4*4

单个ROM乘法器

Python生成器

单个ROM在Verilog中可以使用case语句模拟,手写这种重复化很高的case语句无疑是一种效率很低的方法,本次使用Python语句生成

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class ROMGenerator(object):
"""docstring for ROMGenerator"""

def __init__(self, Width):
super(ROMGenerator, self).__init__()
self.Width = Width

def GeneratorROM(self, FileName):
RomContent = ["""
module ROM_%s (
input [%s:0]addr,
output reg [%s:0]dout
);

always @(*) begin
case(addr)\
""" % (self.Width, self.Width * 2 - 1, self.Width * 2 - 1)]
for i in range(2 ** self.Width):
for j in range(2 ** self.Width):
RomContent.append(
"\t\t%s\'d%s:dout = %s\'d%s;" %
(2 * self.Width, i * (2 ** self.Width) + j,
2 * self.Width, i * j))
RomContent.append("""\t\tdefault:dout = \'b0;
endcase
end
endmodule
""")
with open("./%s.v" % FileName, "w") as filepoint:
filepoint.write("\n".join(RomContent))
return "\n".join(RomContent)

if __name__ == '__main__':
test = ROMGenerator(4)
print(test.GeneratorROM("ROM_4"))

代码很简单,除了开头和结尾以外,就是对于批量化生成需要的\t\t%s\'d%s:dout = %s\'d%s;

测试平台

测试时使用SystemVerilog编写的测试平台,使用*运算符和自己的模块的输出相比较

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
module mult_tb (
);

parameter WIDTH = 4;

logic clk,rst_n;
logic [WIDTH - 1:0]multiplier1;
logic [WIDTH - 1:0]multiplier2;

logic [2 * WIDTH - 1:0]product;

ROM_4 dut(
.addr({multiplier1,multiplier2}),
.dout(product)
);

initial begin
clk = 1'b0;
forever begin
#50 clk = ~clk;
end
end

initial begin
rst_n = 1'b1;
#5 rst_n = 1'b0;
#10 rst_n = 1'b1;
end

initial begin
{multiplier1,multiplier2} = 'b0;
repeat(100) begin
@(negedge clk);
multiplier1 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
multiplier2 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
end
$stop();
end

logic [2 * WIDTH - 1:0]exp;
initial begin
exp = 'b0;
forever begin
@(posedge clk);
exp = multiplier1 * multiplier2;
if(exp == product) begin
$display("successful");
end else begin
$display("fail");
end
end
end
endmodule

分时复用ROM乘法器

RTL代码

核心部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
module serial_multrom_mult_core #(
parameter HALF_WIDTH = 4
)(
input clk, // Clock
input rst_n, // Asynchronous reset active low

input [2 * HALF_WIDTH - 1:0]mult1,mult2,

input start,
input [2 * HALF_WIDTH - 1:0]rom_dout,
output reg [2 * HALF_WIDTH - 1:0]rom_address,
output reg [4 * HALF_WIDTH - 1:0]dout
);

parameter INIT = 1'b0,
WORK = 1'b1;
reg mode;
reg [1:0]counte_4_decay2;
always @ (posedge clk or negedge rst_n) begin
if(~rst_n) begin
mode <= 1'b0;
end else begin
case (mode)
INIT:begin
if(start == 1'b1) begin
mode <= WORK;
end else begin
mode <= INIT;
end
end
WORK:begin
if(counte_4_decay2 == 2'd3) begin
mode <= INIT;
end else begin
mode <= WORK;
end
end
default:mode <= INIT;
endcase
end
end

到这里是一个状态机的状态部分,开始信号有效时状态变为WORK,运算结束状态变为INIT

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
reg [1:0]counte_4;
always @(posedge clk or negedge rst_n) begin : proc_counte_4
if(~rst_n) begin
counte_4 <= 'b0;
end else if(mode == WORK)begin
counte_4 <= counte_4 + 1'b1;
end else begin
counte_4 <= 'b0;
end
end

reg [2 * HALF_WIDTH - 1:0]mult1_lock,mult2_lock;
always @(posedge clk or negedge rst_n) begin
if(~rst_n) begin
{mult1_lock,mult2_lock} <= 'b0;
end else if(start == 1'b1)begin
{mult1_lock,mult2_lock} <= {mult1,mult2};
end else begin
{mult1_lock,mult2_lock} <= {mult1_lock,mult2_lock};
end
end

reg [1:0]counte_4_decay;
always @ (posedge clk or negedge rst_n) begin
if(~rst_n) begin
{rom_address,counte_4_decay} <= 'b0;
end else if(start == 1'b1) begin
{rom_address,counte_4_decay} <= 'b0;
end else begin
case (counte_4)
2'd0:rom_address <= {mult1_lock[HALF_WIDTH - 1:0],mult2_lock[HALF_WIDTH - 1:0]};
2'd1:rom_address <= {mult1_lock[2 * HALF_WIDTH - 1:HALF_WIDTH],mult2_lock[HALF_WIDTH - 1:0]};
2'd2:rom_address <= {mult1_lock[HALF_WIDTH - 1:0],mult2_lock[2 * HALF_WIDTH - 1:HALF_WIDTH]};
2'd3:rom_address <= {mult1_lock[2 * HALF_WIDTH - 1:HALF_WIDTH],mult2_lock[2 * HALF_WIDTH - 1:HALF_WIDTH]};
default:rom_address <= 'b0;
endcase
counte_4_decay <= counte_4;
end
end

以上是输入控制部分,将乘数1高四位低四位,乘数2高四位低四位位拼接后送入ROM,获取乘积
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
wire [4 * HALF_WIDTH - 1:0]rom_dout_ex = '{rom_dout};
reg [4 * HALF_WIDTH - 1:0]rom_dout_lock;

always @ (posedge clk or negedge rst_n) begin
if(~rst_n) begin
{rom_dout_lock,counte_4_decay2} <= 'b0;
end else if(start == 1'b1) begin
{rom_dout_lock,counte_4_decay2} <= 'b0;
end else begin
case (counte_4_decay)
2'd0:rom_dout_lock <= rom_dout_ex;
2'd1:rom_dout_lock <= rom_dout_ex << HALF_WIDTH;
2'd2:rom_dout_lock <= rom_dout_ex << HALF_WIDTH;
2'd3:rom_dout_lock <= rom_dout_ex << (2 * HALF_WIDTH);
default:rom_dout_lock <= 'b0;
endcase
counte_4_decay2 <= counte_4_decay;
end
end

always @ (posedge clk or negedge rst_n) begin
if(~rst_n) begin
dout <= 'b0;
end else if(mode == WORK) begin
dout <= dout + rom_dout_lock;
end else if(start == 1'b1) begin
dout <= 'b0;
end else begin
dout <= dout;
end
end

endmodule

从ROM获取数据后按乘数组成位移相应位数后累加

顶层部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
module serial_multrom_mult_top #(
parameter HALF_WIDTH = 2
)(
input clk, // Clock
input rst_n, // Asynchronous reset active low

input start,
input [2 * HALF_WIDTH - 1:0]mult1,mult2,
output [4 * HALF_WIDTH - 1:0]dout
);

wire [2 * HALF_WIDTH - 1:0]rom_dout;
wire [2 * HALF_WIDTH - 1:0]rom_address;
serial_multrom_mult_core #(
.HALF_WIDTH(HALF_WIDTH)
) u_serial_multrom_mult_core (
.clk(clk), // Clock
.rst_n(rst_n), // Asynchronous reset active low

.mult1(mult1),
.mult2(mult2),

.start(start),
.rom_dout(rom_dout),
.rom_address(rom_address),
.dout(dout)
);

ROM_4 u_ROM_4(
.addr(rom_address),
.dout(rom_dout)
);
endmodule

Testbench

testbench由单个ROM的Testbench加入时钟和开始信号等改进而来

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
`timescale 1ns/1ps
module mult_tb (
);

parameter HALF_WIDTH = 4;
parameter WIDTH = HALF_WIDTH * 2;

logic clk,rst_n;
logic start;
logic [WIDTH - 1:0]multiplier1;
logic [WIDTH - 1:0]multiplier2;

logic [2 * WIDTH - 1:0]product;

serial_multrom_mult_top #(
.HALF_WIDTH(HALF_WIDTH)
) dut (
.clk(clk), // Clock
.rst_n(rst_n), // Asynchronous reset active low

.start(start),
.mult1(multiplier1),
.mult2(multiplier2),
.dout(product)
);

initial begin
clk = 1'b0;
forever begin
#50 clk = ~clk;
end
end

initial begin
rst_n = 1'b1;
#5 rst_n = 1'b0;
#10 rst_n = 1'b1;
end

logic [2 * WIDTH - 1:0]exp;
initial begin
{multiplier1,multiplier2} = 'b0;
repeat(100) begin
@(negedge clk);
start = 1'b1;
multiplier1 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
multiplier2 = (WIDTH)'($urandom_range(0,2 ** WIDTH));
exp = multiplier1 * multiplier2;
repeat(12) begin
@(negedge clk);
start = 'b0;
end
if(product == exp) begin
$display("successful");
end else begin
$display("fail");
end
end
$stop();
end

endmodule

需要注意的是,使用modelsim仿真的时候出现错误代码211,这是关闭波形优化功能即可正常仿真