CMP设计分享 http://blog.sciencenet.cn/u/accsys 没有逆向思维就没有科技原创。 不自信是科技创新的大敌。

博文

CPU设计我要让更多的人掌握它(续7)

已有 4383 次阅读 2010-1-13 13:34 |个人分类:计算机核|系统分类:科研笔记| 动态计算机, 核重组, 执行核, 动态关系总线

2.4 简单流水线计算机核设计
 
写到这里,大家是否就能够理解2.1那个简单能够执行自己设计程序的实例了呢?现将其适当扩充,做一个简单的16位的流水线结构计算机核。作为练习,这里给出的是一个4级非透明流水线设计,主要解释流水线的一般设计方法,没有对“相关”问题进行相应处理,读者有兴趣可以自己补上。此实例在Quartus II上设计,其中二程序都进行过时序仿真验证,并下载到教学计算机开发板上运行验证,结果正确无误。
 
2.4.1 指令设计与指令格式
 
指令格式说明:指令类编码高4位,源寄存器3位,目寄存器3位,功能码在低6位;如果是访存指令,地址编码12位。
 
指令设计如下:
 
 
 2.4.2 流水线核的Verilog HDL程序描述
 
module L_cpu(
 
clock, //系统时钟
 
clr_n, //初始复位,低电位有效
 
idata, //输入数据
 
odata, //输出数据
 
w,
 
aq,
 
bq,
 
cq,
 
opc,
 
ocall,
 
pcback,
 
osp,
 
oret,
 
oswren,
 
odwren,
 
oda,
 
oadd,
 
osub,
 
or0,
 
or1,or7 );
 
input clock,clr_n;
 
input [15:0] idata;
 
output [15:0] odata;
 
output [15:0] w,aq,bq,cq,oda,or0,or1,or7;
 
output ocall,oret,oswren,odwren,oadd,osub;
 
output [11:0] opc,pcback,osp;
 
wire clk;
 
reg [15:0] r_0,r_1,r_2,r_3,r_4,r_5,r_6,r_7,x;
 
reg lda,add,sub,out,in,str,mov,xtda,mult,divi,sdal,sdah,
 
ldar,strr,jmp,jz,jn,call,ret,nop,push,pop,stp;
 
wire [15:0] q_w,q_data;
 
wire [11:0] pc_next;
 
reg [15:0] aq_w,bq_w,cq_w,dq_w,wdata,ddata,a,b,da,outd;
 
reg [11:0] pc,sp,pc_back,mar;
 
reg [0:0] jp,iwren,dwren,swren,f_da,f_call,f_ret;
 
altsyncram0 iram(.address(pc),.clock(clock),.data(idata),.wren(iwren),.q(q_w)); //程序存储器
 
altsyncram1 dram(.address(mar),.clock(clock),.data(ddata),.wren(dwren),.q(q_data)); //数据存储器
 
altsyncram2 sram(.address(sp),.clock(clock),.data(pc_back),.wren(swren),.q(pc_next)); //堆栈
 
assign oda=da;
 
assign odata =outd;
 
assign w=q_w;
 
assign aq=aq_w;
 
assign bq=bq_w;
 
// assign cq=cq_w;
 
assign opc=pc;
 
assign ocall = call;
 
assign pcback = pc_back;
 
assign osp = sp;
 
assign oret = ret;
 
assign oswren = swren;
 
assign or0=r_0;
 
assign or1=r_1;
 
assign or7=r_7;
 
always @(posedge clock or negedge clr_n) //取出指令与传递指令
 
begin
 
if (!clr_n)
 
begin
 
aq_w <= 16'h0000;
 
bq_w <= 16'h0000;
 
cq_w <= 16'h0000;
 
dq_w <= 16'h0000;
 
jp <= 0;
 
pc <= 0;
 
sp <= 0; //栈底
 
dwren <= 0;
 
swren <= 0;
 
// 指令线
 
lda <= 0; //da取数据
 
str <= 0;
 
ldar <= 0;
 
strr <= 0;
 
add <= 0;
 
sub <= 0;
 
out <= 0;
 
mov <= 0;
 
xtda <= 0;
 
mult <= 0;
 
divi <= 0;
 
sdal <= 0;
 
sdah <= 0;
 
jmp <= 0;
 
jz <= 0;
 
jn <= 0;
 
call <= 0;
 
ret <= 0;
 
push <= 0;
 
pop <= 0;
 
stp <= 0;
 
//标志线
 
f_da <= 0; //为1时da正在写过程
 
f_call <= 0;
 
f_ret <= 0;
 
end
 
else
 
begin
 
//1取指传递和节拍控制设备:
 
if (stp)
 
begin
 
pc <= 0;
 
aq_w <= 0;
 
bq_w <= 0;
 
cq_w <= 0;
 
dq_w <= 0;
 
end
 
else
 
begin
 
pc <= pc+1;
 
aq_w <= q_w; //实际aq_w在第一拍改变
 
bq_w <= aq_w; //实际bq_w在第二拍改变
 
cq_w <= bq_w; //实际cq_w在第三拍改变
 
dq_w <= cq_w; //实际dq_w在第四拍改变
 
end
 
//以上是初始过程,此后q_w将依次向后传递。
 
//2分析准备设备:
 
case (q_w[15:12])
 
4'b0000: case (q_w[5:0])
 
6'b000001: // push
 
case (q_w[11:9])
 
3'b000: pc_back <= r_0;
 
3'b001: pc_back <= r_1;
 
3'b010: pc_back <= r_2;
 
3'b011: pc_back <= r_3;
 
3'b100: pc_back <= r_4;
 
3'b101: pc_back <= r_5;
 
3'b110: pc_back <= r_6;
 
3'b111: pc_back <= r_7;
 
endcase
 
6'b000010: //pop
 
begin
 
sp <= sp-1;
 
end
 
6'b000011: //ldar
 
case (q_w[11:9])
 
3'b000: da <= r_0;
 
3'b001: da <= r_1;
 
3'b010: da <= r_2;
 
3'b011: da <= r_3;
 
3'b100: da <= r_4;
 
3'b101: da <= r_5;
 
3'b110: da <= r_6;
 
3'b111: da <= r_7;
 
endcase
 
6'b000100: // (strr )
 
case (q_w[11:9])
 
3'b000: r_0 <= da;
 
3'b001: r_1 <= da ;
 
3'b010: r_2 <= da;
 
3'b011: r_3 <= da ;
 
3'b100: r_4 <= da;
 
3'b101: r_5 <= da;
 
3'b110: r_6 <= da ;
 
3'b111: r_7 <= da;
 
endcase
 
6'b000101: // (add )
 
case (q_w[11:6])
 
6'b000000: r_0 <= r_0+r_0;
 
6'b001001: r_1 <= r_1+r_1;
 
6'b010010: r_2 <= r_2+r_2;
 
6'b011011: r_3 <= r_3+r_3;
 
6'b100100: r_4 <= r_4+r_4;
 
6'b101101: r_5 <= r_5+r_5;
 
6'b110110: r_6 <= r_6+r_6;
 
6'b111111: r_7 <= r_7+r_7;
 
6'b000001: r_1 <= r_0+r_1;
 
6'b000010: r_2 <= r_0+r_2;
 
6'b000011: r_3 <= r_0+r_3;
 
6'b000100: r_4 <= r_0+r_4;
 
6'b000101: r_5 <= r_0+r_5;
 
6'b000110: r_6 <= r_0+r_6;
 
6'b000111: r_7 <= r_0+r_7;
 
6'b001000: r_0 <= r_1+r_0;
 
6'b001010: r_2 <= r_1+r_2;
 
6'b001011: r_3 <= r_1+r_3;
 
6'b001100: r_4 <= r_1+r_4;
 
6'b001101: r_5 <= r_1+r_5;
 
6'b001110: r_6 <= r_1+r_6;
 
6'b001111: r_7 <= r_1+r_7;
 
6'b010000: r_0 <= r_2+r_0;
 
6'b010001: r_1 <= r_2+r_1;
 
6'b010011: r_3 <= r_2+r_3;
 
6'b010100: r_4 <= r_2+r_4;
 
6'b010101: r_5 <= r_2+r_5;
 
6'b010110: r_6 <= r_2+r_6;
 
6'b010111: r_7 <= r_2+r_7;
 
6'b011000: r_0 <= r_3+r_0;
 
6'b011001: r_1 <= r_3+r_1;
 
6'b011010: r_2 <= r_3+r_2;
 
6'b011100: r_4 <= r_3+r_4;
 
6'b011101: r_5 <= r_3+r_5;
 
6'b011110: r_6 <= r_3+r_6;
 
6'b011111: r_7 <= r_3+r_7;
 
6'b100000: r_0 <= r_4+r_0;
 
6'b100001: r_1 <= r_4+r_1;
 
6'b100010: r_2 <= r_4+r_2;
 
6'b100011: r_3 <= r_4+r_3;
 
6'b100101: r_5 <= r_4+r_5;
 
6'b100110: r_6 <= r_4+r_6;
 
6'b100111: r_7 <= r_4+r_7;
 
6'b101000: r_0 <= r_5+r_0;
 
6'b101001: r_1 <= r_5+r_1;
 
6'b101010: r_2 <= r_5+r_2;
 
6'b101011: r_3 <= r_5+r_3;
 
6'b101100: r_4 <= r_5+r_4;
 
6'b101110: r_6 <= r_5+r_6;
 
6'b101111: r_7 <= r_5+r_7;
 
6'b110000: r_0 <= r_6+r_0;
 
6'b110001: r_1 <= r_6+r_1;
 
6'b110010: r_2 <= r_6+r_2;
 
6'b110011: r_3 <= r_6+r_3;
 
6'b110100: r_4 <= r_6+r_4;
 
6'b110101: r_5 <= r_6+r_5;
 
6'b110111: r_7 <= r_6+r_7;
 
6'b111000: r_0 <= r_7+r_0;
 
6'b111001: r_1 <= r_7+r_1;
 
6'b111010: r_2 <= r_7+r_2;
 
6'b111011: r_3 <= r_7+r_3;
 
6'b111100: r_4 <= r_7+r_4;
 
6'b111101: r_5 <= r_7+r_5;
 
6'b111110: r_6 <= r_7+r_6;
 
endcase
 
6'b000110: // (sub )
 
case (q_w[11:6])
 
6'b000000: r_0 <= r_0-r_0;
 
6'b001001: r_1 <= r_1-r_1;
 
6'b010010: r_2 <= r_2-r_2;
 
6'b011011: r_3 <= r_3-r_3;
 
6'b100100: r_4 <= r_4-r_4;
 
6'b101101: r_5 <= r_5-r_5;
 
6'b110110: r_6 <= r_6-r_6;
 
6'b111111: r_7 <= r_7-r_7;
 
6'b000001: r_1 <= r_1-r_0;
 
6'b000010: r_2 <= r_2-r_0;
 
6'b000011: r_3 <= r_3-r_0;
 
6'b000100: r_4 <= r_4-r_0;
 
6'b000101: r_5 <= r_5-r_0;
 
6'b000110: r_6 <= r_6-r_0;
 
6'b000111: r_7 <= r_7-r_0;
 
6'b001000: r_0 <= r_0-r_1;
 
6'b001010: r_2 <= r_2-r_1;
 
6'b001011: r_3 <= r_3-r_1;
 
6'b001100: r_4 <= r_4-r_1;
 
6'b001101: r_5 <= r_5-r_1;
 
6'b001110: r_6 <= r_6-r_1;
 
6'b001111: r_7 <= r_7-r_1;
 
6'b010000: r_0 <= r_0-r_2;
 
6'b010001: r_1 <= r_1-r_2;
 
6'b010011: r_3 <= r_3-r_2;
 
6'b010100: r_4 <= r_4-r_2;
 
6'b010101: r_5 <= r_5-r_2;
 
6'b010110: r_6 <= r_6-r_2;
 
6'b010111: r_7 <= r_7-r_2;
 
6'b011000: r_0 <= r_0-r_3;
 
6'b011001: r_1 <= r_1-r_3;
 
6'b011010: r_2 <= r_2-r_3;
 
6'b011100: r_4 <= r_4-r_3;
 
6'b011101: r_5 <= r_5-r_3;
 
6'b011110: r_6 <= r_6-r_3;
 
6'b011111: r_7 <= r_7-r_3;
 
6'b100000: r_0 <= r_0-r_4;
 
6'b100001: r_1 <= r_1-r_4;
 
6'b100010: r_2 <= r_2-r_4;
 
6'b100011: r_3 <= r_3-r_4;
 
6'b100101: r_5 <= r_5-r_4;
 
6'b100110: r_6 <= r_6-r_4;
 
6'b100111: r_7 <= r_7-r_4;
 
6'b101000: r_0 <= r_0-r_5;
 
6'b101001: r_1 <= r_1-r_5;
 
6'b101010: r_2 <= r_2-r_5;
 
6'b101011: r_3 <= r_3-r_5;
 
6'b101100: r_4 <= r_4-r_5;
 
6'b101110: r_6 <= r_6-r_5;
 
6'b101111: r_7 <= r_7-r_5;
 
6'b110000: r_0 <= r_0-r_6;
 
6'b110001: r_1 <= r_1-r_6;
 
6'b110010: r_2 <= r_2-r_6;
 
6'b110011: r_3 <= r_3-r_6;
 
6'b110100: r_4 <= r_4-r_6;
 
6'b110101: r_5 <= r_5-r_6;
 
6'b110111: r_7 <= r_7-r_6;
 
6'b111000: r_0 <= r_0-r_7;
 
6'b111001: r_1 <= r_1-r_7;
 
6'b111010: r_2 <= r_2-r_7;
 
6'b111011: r_3 <= r_3-r_7;
 
6'b111100: r_4 <= r_4-r_7;
 
6'b111101: r_5 <= r_5-r_7;
 
6'b111110: r_6 <= r_6-r_7;
 
endcase
 
6'b000111: // (out )
 
case (q_w[11:9])
 
3'b000: outd <= r_0;
 
3'b001: outd <= r_1;
 
3'b010: outd <= r_2;
 
3'b011: outd <= r_3;
 
3'b100: outd <= r_4;
 
3'b101: outd <= r_5;
 
3'b110: outd <= r_6;
 
3'b111: outd <= r_7;
 
endcase
 
6'b001000: //(mov)
 
case (q_w[11:6])
 
6'b000001: r_1 <= r_0;
 
6'b000010: r_2 <= r_0;
 
6'b000011: r_3 <= r_0;
 
6'b000100: r_4 <= r_0;
 
6'b000101: r_5 <= r_0;
 
6'b000110: r_6 <= r_0;
 
6'b000111: r_7 <= r_0;
 
6'b001000: r_0 <= r_1;
 
6'b001010: r_2 <= r_1;
 
6'b001011: r_3 <= r_1;
 
6'b001100: r_4 <= r_1;
 
6'b001101: r_5 <= r_1;
 
6'b001110: r_6 <= r_1;
 
6'b001111: r_7 <= r_1;
 
6'b010000: r_0 <= r_2;
 
6'b010001: r_1 <= r_2;
 
6'b010011: r_3 <= r_2;
 
6'b010100: r_4 <= r_2;
 
6'b010101: r_5 <= r_2;
 
6'b010110: r_6 <= r_2;
 
6'b010111: r_7 <= r_2;
 
6'b011000: r_0 <= r_3;
 
6'b011001: r_1 <= r_3;
 
6'b011010: r_2 <= r_3;
 
6'b011100: r_4 <= r_3;
 
6'b011101: r_5 <= r_3;
 
6'b011110: r_6 <= r_3;
 
6'b011111: r_7 <= r_3;
 
6'b100000: r_0 <= r_4;
 
6'b100001: r_1 <= r_4;
 
6'b100010: r_2 <= r_4;
 
6'b100011: r_3 <= r_4;
 
6'b100101: r_5 <= r_4;
 
6'b100110: r_6 <= r_4;
 
6'b100111: r_7 <= r_4;
 
6'b101000: r_0 <= r_5;
 
6'b101001: r_1 <= r_5;
 
6'b101010: r_2 <= r_5;
 
6'b101011: r_3 <= r_5;
 
6'b101100: r_4 <= r_5;
 
6'b101110: r_6 <= r_5;
 
6'b101111: r_7 <= r_5;
 
6'b110000: r_0 <= r_6;
 
6'b110001: r_1 <= r_6;
 
6'b110010: r_2 <= r_6;
 
6'b110011: r_3 <= r_6;
 
6'b110100: r_4 <= r_6;
 
6'b110101: r_5 <= r_6;
 
6'b110111: r_7 <= r_6;
 
6'b111000: r_0 <= r_7;
 
6'b111001: r_1 <= r_7;
 
6'b111010: r_2 <= r_7;
 
6'b111011: r_3 <= r_7;
 
6'b111100: r_4 <= r_7;
 
6'b111101: r_5 <= r_7;
 
6'b111110: r_6 <= r_7;
 
endcase
 
6'b001001: // mult
 
case (q_w[11:9])
 
3'b000: {x,da} <= da*r_0;
 
3'b001: {x,da} <= da*r_1;
 
3'b010: {x,da} <= da*r_2;
 
3'b011: {x,da} <= da*r_3;
 
3'b100: {x,da} <= da*r_4;
 
3'b101: {x,da} <= da*r_5;
 
3'b110: {x,da} <= da*r_6;
 
3'b111: {x,da} <= da*r_7;
 
endcase
 
/* 6'b001010: // divi
 
case (q_w[11:9])
 
3'b000: begin da <= da/r_0; x <= da%r_0; end
 
3'b001: begin da <= da/r_1; x <= da%r_1; end
 
3'b010: begin da <= da/r_2; x <= da%r_2; end
 
3'b011: begin da <= da/r_3; x <= da%r_3; end
 
3'b100: begin da <= da/r_4; x <= da%r_4; end
 
3'b101: begin da <= da/r_5; x <= da%r_5; end
 
3'b110: begin da <= da/r_6; x <= da%r_6; end
 
3'b111: begin da <= da/r_7; x <= da%r_7; end
 
endcase */
 
6'b001011:da <= x; //x->da
 
6'b000000:begin end //nop
 
6'b111110: begin // (ret )
 
sp <= sp-1; //准备出栈
 
end
 
6'b111111: begin // (stp)
 
stp <= 1;
 
pc <= 0;
 
end
 
default: begin end
 
endcase
 
4'b1001: begin // (sdal )
 
da <= {{8{q_w[7]}},q_w[7:0]}; //扩充16位有符号数
 
end
 
4'b1010: begin // (sdah )
 
da[15:0] <= {q_w[7:0],da[7:0]}; //事先sdal送低8位
 
end
 
4'b1011: begin // (jmp )
 
pc <= q_w[11:0];
 
end
 
4'b1100: begin // (jz )
 
if (da==0) pc <= q_w[11:0];
 
end
 
4'b1101: begin // (jn)
 
if (da<0) pc <= q_w[11:0];
 
end
 
4'b1110: begin // (call )
 
pc_back <= pc+1; //返回地址入栈
 
pc <= q_w[11:0];
 
end
 
default: begin
 
end
 
endcase
 
//3执行设备:
 
case (aq_w[15:12])
 
4'b0000:
 
case (aq_w[5:0])
 
6'b000001: //push
 
begin
 
swren <= 1; //第3拍实现入栈
 
end
 
6'b000010: //pop第3拍实现出栈
 
case (aq_w[11:9])
 
3'b000: r_0 <= pc_next;
 
3'b001: r_1 <= pc_next ;
 
3'b010: r_2 <= pc_next;
 
3'b011: r_3 <= pc_next ;
 
3'b100: r_4 <= pc_next;
 
3'b101: r_5 <= pc_next;
 
3'b110: r_6 <= pc_next ;
 
3'b111: r_7 <= pc_next;
 
endcase
 
default: begin end
 
endcase
 
4'b0001: begin // (lda )
 
da <= q_data; //第3拍实现读
 
end
 
4'b0010: begin // (str )
 
dwren <= 1; //第3拍实现写
 
end
 
4'b1110: begin // (call )
 
swren <= 1; //pc_back实现入栈
 
end
 
default: begin
 
end
 
endcase
 
//4存取设备:
 
case (bq_w[15:12])
 
4'b0000:
 
case (bq_w[5:0])
 
6'b000001: //push
 
begin
 
swren <= 0;
 
sp<= sp+1;
 
end
 
6'b111110: pc <= pc_next;// (ret )必须第四拍实现转移
 
6'b000010: //pop
 
begin
 
end
 
endcase
 
4'b0010: begin // (str )
 
dwren <= 0;
 
end
 
4'b1110: begin // (call )
 
swren <= 0;
 
sp <= sp+1; //指向新栈顶,堆栈开口向下
 
end
 
default: begin
 
end
 
endcase
 
end
 
end
 
endmodule
 
//////////////////////////////////////////////////////////////
 
//例题1:求1+2+...+300。数据文件imem16_1.mif
 
//        汇编                                         编译
 
//Start: sdal 1                                  9001
 
//           Strr 1                                   0204
 
//           Sdal 0                                 9000
 
//           Strr 0                                  0004
 
//            Sdal 44                             902c
 
//            sdah 1                              a001 //
 
//            Strr 7                                0e04
 
//Loop: ldar 7                                 0e03
 
//           Jz exit                                 c00e
 
//            Add 7,0                             0e05
 
//           ldar 7                                 0e03
 
//           Sub 1,7                              03c6
 
//           Jmp loop                            b007
 
//            nop                                     0000
 
//Exit:  out 0                                      0007
 
//          stp                                        003f
 
//
 
//结果为:hb05e=45150
 
//
 
//用标志控制正确转移
 
//call a
 
//call b
 
//ret
 
//ret
 
//
 
//例题2:求8! 数据文件imem16_2.mif
 
//
 
//0    main: sdal 1                       9001 ;1
 
//1               Strr 1                         0204 ;送1号寄存器
 
//2                strr 2                        0404 ;2号寄存器(部分积)
 
//3               Sdal 0                       9000 ;0
 
//4               Strr 0                         0004 ;送0号寄存器
 
//5               sdal 8                       9008 ;
 
//6               sdah 0                      a000 ;高位置0
 
//7               strr 7                         0e04 ;/8送7号寄存器
 
//8               call make                e00c ;
 
//9               nop                           0000 ;
 
//a               out 2                         0407 ;输出结果
 
//b                stp                           003f ;停机
 
//c     make: ldar 7                      0e03 ;
 
//d                 jz exit                      c015 ;
 
//e                 mult 2                    0409 ;乘法
 
//f                  strr 2                       0404 ;
 
//10               sub 1,7                  03c6 ;
 
//11               call test                  e019 ;二层调用
 
//12               nop                        0000 ;
 
//23              jmp make              b00c ;
 
//14              nop                          0000 ;
 
//15              exit: ret                    003e ;ret后面要加3个nop断流,其他转移指令后加1个nop.
 
//16              nop                         0000 ;
 
//17              nop                         0000 ;
 
//18              nop                         0000 ;
 
//19     test: nop                          0000 ;不让call连接ret
 
//1a               ret                           003e ;
 
//1b               nop                         0000 ;
 
//1c               nop                         0000 ;
 
//
 
//结果:h9d08=40200
 
//编程注意事项:
 
//由于call第4拍实现入栈、ret都要在第4拍才能实现pc转移,所以不能call立即连接ret
 
程序初始化文件如图 2 16所示。
 
 
                                   图 2 16 程序初始化文件
 
实用RISC及流水线编程可参考MIPS编码,关键要理解好各部分的作用。
 
后续将转入动态计算机的思想方法讨论。
 
--- 待续 ---
 


https://blog.sciencenet.cn/blog-340399-286801.html

上一篇:CPU设计我要让更多的人掌握它(续6)
下一篇:CPU设计我要让更多的人掌握它(续8)
收藏 IP: .*| 热度|

0

发表评论 评论 (1 个评论)

数据加载中...
扫一扫,分享此博文

Archiver|手机版|科学网 ( 京ICP备07017567号-12 )

GMT+8, 2024-4-24 16:18

Powered by ScienceNet.cn

Copyright © 2007- 中国科学报社

返回顶部