2.4 简单流水线计算机核设计
写到这里,大家是否就能够理解2.1那个简单能够执行自己设计程序的实例了呢?现将其适当扩充,做一个简单的16位的流水线结构计算机核。作为练习,这里给出的是一个4级非透明流水线设计,主要解释流水线的一般设计方法,没有对“相关”问题进行相应处理,读者有兴趣可以自己补上。此实例在Quartus II上设计,其中二程序都进行过时序仿真验证,并下载到教学计算机开发板上运行验证,结果正确无误。
2.4.1 指令设计与指令格式
指令格式说明:指令类编码高4位,源寄存器3位,目寄存器3位,功能码在低6位;如果是访存指令,地址编码12位。
指令设计如下:
2.4.2 流水线核的Verilog HDL程序描述
module L_cpu(
clock, //系统时钟
clr_n, //初始复位,低电位有效
idata, //输入数据
odata, //输出数据
w,
aq,
bq,
cq,
opc,
ocall,
pcback,
osp,
oret,
oswren,
odwren,
oda,
oadd,
osub,
or0,
or1,or7 );
input clock,clr_n;
input [15:0] idata;
output [15:0] odata;
output [15:0] w,aq,bq,cq,oda,or0,or1,or7;
output ocall,oret,oswren,odwren,oadd,osub;
output [11:0] opc,pcback,osp;
wire clk;
reg [15:0] r_0,r_1,r_2,r_3,r_4,r_5,r_6,r_7,x;
reg lda,add,sub,out,in,str,mov,xtda,mult,divi,sdal,sdah,
ldar,strr,jmp,jz,jn,call,ret,nop,push,pop,stp;
wire [15:0] q_w,q_data;
wire [11:0] pc_next;
reg [15:0] aq_w,bq_w,cq_w,dq_w,wdata,ddata,a,b,da,outd;
reg [11:0] pc,sp,pc_back,mar;
reg [0:0] jp,iwren,dwren,swren,f_da,f_call,f_ret;
altsyncram0 iram(.address(pc),.clock(clock),.data(idata),.wren(iwren),.q(q_w)); //程序存储器
altsyncram1 dram(.address(mar),.clock(clock),.data(ddata),.wren(dwren),.q(q_data)); //数据存储器
altsyncram2 sram(.address(sp),.clock(clock),.data(pc_back),.wren(swren),.q(pc_next)); //堆栈
assign oda=da;
assign odata =outd;
assign w=q_w;
assign aq=aq_w;
assign bq=bq_w;
// assign cq=cq_w;
assign opc=pc;
assign ocall = call;
assign pcback = pc_back;
assign osp = sp;
assign oret = ret;
assign oswren = swren;
assign or0=r_0;
assign or1=r_1;
assign or7=r_7;
always @(posedge clock or negedge clr_n) //取出指令与传递指令
begin
if (!clr_n)
begin
aq_w <= 16'h0000;
bq_w <= 16'h0000;
cq_w <= 16'h0000;
dq_w <= 16'h0000;
jp <= 0;
pc <= 0;
sp <= 0; //栈底
dwren <= 0;
swren <= 0;
// 指令线
lda <= 0; //da取数据
str <= 0;
ldar <= 0;
strr <= 0;
add <= 0;
sub <= 0;
out <= 0;
mov <= 0;
xtda <= 0;
mult <= 0;
divi <= 0;
sdal <= 0;
sdah <= 0;
jmp <= 0;
jz <= 0;
jn <= 0;
call <= 0;
ret <= 0;
push <= 0;
pop <= 0;
stp <= 0;
//标志线
f_da <= 0; //为1时da正在写过程
f_call <= 0;
f_ret <= 0;
end
else
begin
//1取指传递和节拍控制设备:
if (stp)
begin
pc <= 0;
aq_w <= 0;
bq_w <= 0;
cq_w <= 0;
dq_w <= 0;
end
else
begin
pc <= pc+1;
aq_w <= q_w; //实际aq_w在第一拍改变
bq_w <= aq_w; //实际bq_w在第二拍改变
cq_w <= bq_w; //实际cq_w在第三拍改变
dq_w <= cq_w; //实际dq_w在第四拍改变
end
//以上是初始过程,此后q_w将依次向后传递。
//2分析准备设备:
case (q_w[15:12])
4'b0000: case (q_w[5:0])
6'b000001: // push
case (q_w[11:9])
3'b000: pc_back <= r_0;
3'b001: pc_back <= r_1;
3'b010: pc_back <= r_2;
3'b011: pc_back <= r_3;
3'b100: pc_back <= r_4;
3'b101: pc_back <= r_5;
3'b110: pc_back <= r_6;
3'b111: pc_back <= r_7;
endcase
6'b000010: //pop
begin
sp <= sp-1;
end
6'b000011: //ldar
case (q_w[11:9])
3'b000: da <= r_0;
3'b001: da <= r_1;
3'b010: da <= r_2;
3'b011: da <= r_3;
3'b100: da <= r_4;
3'b101: da <= r_5;
3'b110: da <= r_6;
3'b111: da <= r_7;
endcase
6'b000100: // (strr )
case (q_w[11:9])
3'b000: r_0 <= da;
3'b001: r_1 <= da ;
3'b010: r_2 <= da;
3'b011: r_3 <= da ;
3'b100: r_4 <= da;
3'b101: r_5 <= da;
3'b110: r_6 <= da ;
3'b111: r_7 <= da;
endcase
6'b000101: // (add )
case (q_w[11:6])
6'b000000: r_0 <= r_0+r_0;
6'b001001: r_1 <= r_1+r_1;
6'b010010: r_2 <= r_2+r_2;
6'b011011: r_3 <= r_3+r_3;
6'b100100: r_4 <= r_4+r_4;
6'b101101: r_5 <= r_5+r_5;
6'b110110: r_6 <= r_6+r_6;
6'b111111: r_7 <= r_7+r_7;
6'b000001: r_1 <= r_0+r_1;
6'b000010: r_2 <= r_0+r_2;
6'b000011: r_3 <= r_0+r_3;
6'b000100: r_4 <= r_0+r_4;
6'b000101: r_5 <= r_0+r_5;
6'b000110: r_6 <= r_0+r_6;
6'b000111: r_7 <= r_0+r_7;
6'b001000: r_0 <= r_1+r_0;
6'b001010: r_2 <= r_1+r_2;
6'b001011: r_3 <= r_1+r_3;
6'b001100: r_4 <= r_1+r_4;
6'b001101: r_5 <= r_1+r_5;
6'b001110: r_6 <= r_1+r_6;
6'b001111: r_7 <= r_1+r_7;
6'b010000: r_0 <= r_2+r_0;
6'b010001: r_1 <= r_2+r_1;
6'b010011: r_3 <= r_2+r_3;
6'b010100: r_4 <= r_2+r_4;
6'b010101: r_5 <= r_2+r_5;
6'b010110: r_6 <= r_2+r_6;
6'b010111: r_7 <= r_2+r_7;
6'b011000: r_0 <= r_3+r_0;
6'b011001: r_1 <= r_3+r_1;
6'b011010: r_2 <= r_3+r_2;
6'b011100: r_4 <= r_3+r_4;
6'b011101: r_5 <= r_3+r_5;
6'b011110: r_6 <= r_3+r_6;
6'b011111: r_7 <= r_3+r_7;
6'b100000: r_0 <= r_4+r_0;
6'b100001: r_1 <= r_4+r_1;
6'b100010: r_2 <= r_4+r_2;
6'b100011: r_3 <= r_4+r_3;
6'b100101: r_5 <= r_4+r_5;
6'b100110: r_6 <= r_4+r_6;
6'b100111: r_7 <= r_4+r_7;
6'b101000: r_0 <= r_5+r_0;
6'b101001: r_1 <= r_5+r_1;
6'b101010: r_2 <= r_5+r_2;
6'b101011: r_3 <= r_5+r_3;
6'b101100: r_4 <= r_5+r_4;
6'b101110: r_6 <= r_5+r_6;
6'b101111: r_7 <= r_5+r_7;
6'b110000: r_0 <= r_6+r_0;
6'b110001: r_1 <= r_6+r_1;
6'b110010: r_2 <= r_6+r_2;
6'b110011: r_3 <= r_6+r_3;
6'b110100: r_4 <= r_6+r_4;
6'b110101: r_5 <= r_6+r_5;
6'b110111: r_7 <= r_6+r_7;
6'b111000: r_0 <= r_7+r_0;
6'b111001: r_1 <= r_7+r_1;
6'b111010: r_2 <= r_7+r_2;
6'b111011: r_3 <= r_7+r_3;
6'b111100: r_4 <= r_7+r_4;
6'b111101: r_5 <= r_7+r_5;
6'b111110: r_6 <= r_7+r_6;
endcase
6'b000110: // (sub )
case (q_w[11:6])
6'b000000: r_0 <= r_0-r_0;
6'b001001: r_1 <= r_1-r_1;
6'b010010: r_2 <= r_2-r_2;
6'b011011: r_3 <= r_3-r_3;
6'b100100: r_4 <= r_4-r_4;
6'b101101: r_5 <= r_5-r_5;
6'b110110: r_6 <= r_6-r_6;
6'b111111: r_7 <= r_7-r_7;
6'b000001: r_1 <= r_1-r_0;
6'b000010: r_2 <= r_2-r_0;
6'b000011: r_3 <= r_3-r_0;
6'b000100: r_4 <= r_4-r_0;
6'b000101: r_5 <= r_5-r_0;
6'b000110: r_6 <= r_6-r_0;
6'b000111: r_7 <= r_7-r_0;
6'b001000: r_0 <= r_0-r_1;
6'b001010: r_2 <= r_2-r_1;
6'b001011: r_3 <= r_3-r_1;
6'b001100: r_4 <= r_4-r_1;
6'b001101: r_5 <= r_5-r_1;
6'b001110: r_6 <= r_6-r_1;
6'b001111: r_7 <= r_7-r_1;
6'b010000: r_0 <= r_0-r_2;
6'b010001: r_1 <= r_1-r_2;
6'b010011: r_3 <= r_3-r_2;
6'b010100: r_4 <= r_4-r_2;
6'b010101: r_5 <= r_5-r_2;
6'b010110: r_6 <= r_6-r_2;
6'b010111: r_7 <= r_7-r_2;
6'b011000: r_0 <= r_0-r_3;
6'b011001: r_1 <= r_1-r_3;
6'b011010: r_2 <= r_2-r_3;
6'b011100: r_4 <= r_4-r_3;
6'b011101: r_5 <= r_5-r_3;
6'b011110: r_6 <= r_6-r_3;
6'b011111: r_7 <= r_7-r_3;
6'b100000: r_0 <= r_0-r_4;
6'b100001: r_1 <= r_1-r_4;
6'b100010: r_2 <= r_2-r_4;
6'b100011: r_3 <= r_3-r_4;
6'b100101: r_5 <= r_5-r_4;
6'b100110: r_6 <= r_6-r_4;
6'b100111: r_7 <= r_7-r_4;
6'b101000: r_0 <= r_0-r_5;
6'b101001: r_1 <= r_1-r_5;
6'b101010: r_2 <= r_2-r_5;
6'b101011: r_3 <= r_3-r_5;
6'b101100: r_4 <= r_4-r_5;
6'b101110: r_6 <= r_6-r_5;
6'b101111: r_7 <= r_7-r_5;
6'b110000: r_0 <= r_0-r_6;
6'b110001: r_1 <= r_1-r_6;
6'b110010: r_2 <= r_2-r_6;
6'b110011: r_3 <= r_3-r_6;
6'b110100: r_4 <= r_4-r_6;
6'b110101: r_5 <= r_5-r_6;
6'b110111: r_7 <= r_7-r_6;
6'b111000: r_0 <= r_0-r_7;
6'b111001: r_1 <= r_1-r_7;
6'b111010: r_2 <= r_2-r_7;
6'b111011: r_3 <= r_3-r_7;
6'b111100: r_4 <= r_4-r_7;
6'b111101: r_5 <= r_5-r_7;
6'b111110: r_6 <= r_6-r_7;
endcase
6'b000111: // (out )
case (q_w[11:9])
3'b000: outd <= r_0;
3'b001: outd <= r_1;
3'b010: outd <= r_2;
3'b011: outd <= r_3;
3'b100: outd <= r_4;
3'b101: outd <= r_5;
3'b110: outd <= r_6;
3'b111: outd <= r_7;
endcase
6'b001000: //(mov)
case (q_w[11:6])
6'b000001: r_1 <= r_0;
6'b000010: r_2 <= r_0;
6'b000011: r_3 <= r_0;
6'b000100: r_4 <= r_0;
6'b000101: r_5 <= r_0;
6'b000110: r_6 <= r_0;
6'b000111: r_7 <= r_0;
6'b001000: r_0 <= r_1;
6'b001010: r_2 <= r_1;
6'b001011: r_3 <= r_1;
6'b001100: r_4 <= r_1;
6'b001101: r_5 <= r_1;
6'b001110: r_6 <= r_1;
6'b001111: r_7 <= r_1;
6'b010000: r_0 <= r_2;
6'b010001: r_1 <= r_2;
6'b010011: r_3 <= r_2;
6'b010100: r_4 <= r_2;
6'b010101: r_5 <= r_2;
6'b010110: r_6 <= r_2;
6'b010111: r_7 <= r_2;
6'b011000: r_0 <= r_3;
6'b011001: r_1 <= r_3;
6'b011010: r_2 <= r_3;
6'b011100: r_4 <= r_3;
6'b011101: r_5 <= r_3;
6'b011110: r_6 <= r_3;
6'b011111: r_7 <= r_3;
6'b100000: r_0 <= r_4;
6'b100001: r_1 <= r_4;
6'b100010: r_2 <= r_4;
6'b100011: r_3 <= r_4;
6'b100101: r_5 <= r_4;
6'b100110: r_6 <= r_4;
6'b100111: r_7 <= r_4;
6'b101000: r_0 <= r_5;
6'b101001: r_1 <= r_5;
6'b101010: r_2 <= r_5;
6'b101011: r_3 <= r_5;
6'b101100: r_4 <= r_5;
6'b101110: r_6 <= r_5;
6'b101111: r_7 <= r_5;
6'b110000: r_0 <= r_6;
6'b110001: r_1 <= r_6;
6'b110010: r_2 <= r_6;
6'b110011: r_3 <= r_6;
6'b110100: r_4 <= r_6;
6'b110101: r_5 <= r_6;
6'b110111: r_7 <= r_6;
6'b111000: r_0 <= r_7;
6'b111001: r_1 <= r_7;
6'b111010: r_2 <= r_7;
6'b111011: r_3 <= r_7;
6'b111100: r_4 <= r_7;
6'b111101: r_5 <= r_7;
6'b111110: r_6 <= r_7;
endcase
6'b001001: // mult
case (q_w[11:9])
3'b000: {x,da} <= da*r_0;
3'b001: {x,da} <= da*r_1;
3'b010: {x,da} <= da*r_2;
3'b011: {x,da} <= da*r_3;
3'b100: {x,da} <= da*r_4;
3'b101: {x,da} <= da*r_5;
3'b110: {x,da} <= da*r_6;
3'b111: {x,da} <= da*r_7;
endcase
/* 6'b001010: // divi
case (q_w[11:9])
3'b000: begin da <= da/r_0; x <= da%r_0; end
3'b001: begin da <= da/r_1; x <= da%r_1; end
3'b010: begin da <= da/r_2; x <= da%r_2; end
3'b011: begin da <= da/r_3; x <= da%r_3; end
3'b100: begin da <= da/r_4; x <= da%r_4; end
3'b101: begin da <= da/r_5; x <= da%r_5; end
3'b110: begin da <= da/r_6; x <= da%r_6; end
3'b111: begin da <= da/r_7; x <= da%r_7; end
endcase */
6'b001011:da <= x; //x->da
6'b000000:begin end //nop
6'b111110: begin // (ret )
sp <= sp-1; //准备出栈
end
6'b111111: begin // (stp)
stp <= 1;
pc <= 0;
end
default: begin end
endcase
4'b1001: begin // (sdal )
da <= {{8{q_w[7]}},q_w[7:0]}; //扩充16位有符号数
end
4'b1010: begin // (sdah )
da[15:0] <= {q_w[7:0],da[7:0]}; //事先sdal送低8位
end
4'b1011: begin // (jmp )
pc <= q_w[11:0];
end
4'b1100: begin // (jz )
if (da==0) pc <= q_w[11:0];
end
4'b1101: begin // (jn)
if (da<0) pc <= q_w[11:0];
end
4'b1110: begin // (call )
pc_back <= pc+1; //返回地址入栈
pc <= q_w[11:0];
end
default: begin
end
endcase
//3执行设备:
case (aq_w[15:12])
4'b0000:
case (aq_w[5:0])
6'b000001: //push
begin
swren <= 1; //第3拍实现入栈
end
6'b000010: //pop第3拍实现出栈
case (aq_w[11:9])
3'b000: r_0 <= pc_next;
3'b001: r_1 <= pc_next ;
3'b010: r_2 <= pc_next;
3'b011: r_3 <= pc_next ;
3'b100: r_4 <= pc_next;
3'b101: r_5 <= pc_next;
3'b110: r_6 <= pc_next ;
3'b111: r_7 <= pc_next;
endcase
default: begin end
endcase
4'b0001: begin // (lda )
da <= q_data; //第3拍实现读
end
4'b0010: begin // (str )
dwren <= 1; //第3拍实现写
end
4'b1110: begin // (call )
swren <= 1; //pc_back实现入栈
end
default: begin
end
endcase
//4存取设备:
case (bq_w[15:12])
4'b0000:
case (bq_w[5:0])
6'b000001: //push
begin
swren <= 0;
sp<= sp+1;
end
6'b111110: pc <= pc_next;// (ret )必须第四拍实现转移
6'b000010: //pop
begin
end
endcase
4'b0010: begin // (str )
dwren <= 0;
end
4'b1110: begin // (call )
swren <= 0;
sp <= sp+1; //指向新栈顶,堆栈开口向下
end
default: begin
end
endcase
end
end
endmodule
//////////////////////////////////////////////////////////////
//例题1:求1+2+...+300。数据文件imem16_1.mif
// 汇编 编译
//Start: sdal 1 9001
// Strr 1 0204
// Sdal 0 9000
// Strr 0 0004
// Sdal 44 902c
// sdah 1 a001 //
// Strr 7 0e04
//Loop: ldar 7 0e03
// Jz exit c00e
// Add 7,0 0e05
// ldar 7 0e03
// Sub 1,7 03c6
// Jmp loop b007
// nop 0000
//Exit: out 0 0007
// stp 003f
//
//结果为:hb05e=45150
//
//用标志控制正确转移
//call a
//call b
//ret
//ret
//
//例题2:求8! 数据文件imem16_2.mif
//
//0 main: sdal 1 9001 ;1
//1 Strr 1 0204 ;送1号寄存器
//2 strr 2 0404 ;2号寄存器(部分积)
//3 Sdal 0 9000 ;0
//4 Strr 0 0004 ;送0号寄存器
//5 sdal 8 9008 ;
//6 sdah 0 a000 ;高位置0
//7 strr 7 0e04 ;/8送7号寄存器
//8 call make e00c ;
//9 nop 0000 ;
//a out 2 0407 ;输出结果
//b stp 003f ;停机
//c make: ldar 7 0e03 ;
//d jz exit c015 ;
//e mult 2 0409 ;乘法
//f strr 2 0404 ;
//10 sub 1,7 03c6 ;
//11 call test e019 ;二层调用
//12 nop 0000 ;
//23 jmp make b00c ;
//14 nop 0000 ;
//15 exit: ret 003e ;ret后面要加3个nop断流,其他转移指令后加1个nop.
//16 nop 0000 ;
//17 nop 0000 ;
//18 nop 0000 ;
//19 test: nop 0000 ;不让call连接ret
//1a ret 003e ;
//1b nop 0000 ;
//1c nop 0000 ;
//
//结果:h9d08=40200
//编程注意事项:
//由于call第4拍实现入栈、ret都要在第4拍才能实现pc转移,所以不能call立即连接ret
程序初始化文件如图 2 16所示。