When the entire RISC-V kernel module is completed, the ITCM module is required to read the machine code of the code written by the software, and execute the current machine code according to logic such as instruction fetch, decoding, execution, memory access, and write back. These 5 parts It is not sent every time, and sometimes returns early, such as addi, without accessing external memory. The current version is RISC-V V2.01, which uses state machine logic. In subsequent versions, we will modify the bit pipeline method.
Related reference articles:
RISC-V teaching plan
code show as below:
module fii_rv32i_core # ( parameter [31:0] TMR_BASEADDR = 32'h0200_0000, parameter [31:0] PLIC_BASEADDR = 32'h0c00_0000, parameter [31:0] ITCM_BASEADDR = 32'h8000_0000, parameter [31:0] DTCM_BASEADDR = 32'h9000_0000, parameter [31:0] UART_BASEADDR = 32'he000_0000, parameter [31:0] GPIO_BASEADDR = 32'hf000_0000 ) ( input sys_clk, // system clock //============== ===================== input [ 31: 0 ] i_GPIO_dina, // gpio A group write gpio output [ 31: 0 ] o_GPIO_douta, // gpio A group Read gpio output [ 31: 0 ] o_GPIO_ta, // gpio A group direction selection input [ 31: 0 ] i_GPIO_dinb, // gpio B group write gpio output [ 31: 0 ] o_GPIO_doutb, // gpio B group read gpio output [ 31: 0 ] o_GPIO_tb, // gpio B group direction selection input [ 31: 0 ] i_GPIO_dinc, // gpio C group write gpio output [ 31: 0 ] o_GPIO_doutc, // gpio C group read gpio output [ 31: 0 ] o_GPIO_tc, // gpio C group direction selection input [ 31: 0 ] i_GPIO_dind, // gpio D group write gpio output [ 31: 0 ] o_GPIO_doutd, // gpio D group read gpio output [ 31: 0 ] o_GPIO_td, // gpio D group direction selection output txd_start, // notify uart phy module to send a byte enable output [7:0] txd_data, // notify uart phy module to send this byte input txd_done, // uart phy returns after sending //====================================== output [ 31:0] o_sft_int_v, // software interrupt register output [31:0] o_timer_l, // low 32 bits of timer timer register output [31:0] o_timer_h, // high 32 bits of timer timer register input [31:0] i_timer_l, // read current timer value low 32-bit input [31:0] i_timer_h, // read the current timer value high 32-bit output [31:0] o_tcmp_l, // timer compare register low 32-bit output [31:0] o_tcmp_h, // timer compare The upper 32 bits of the register output [1:0] o_timer_valid, // timer valid output [31:0] o_tm_ctrl, // timer control register input [11:0] code_addr, // download software machine code address interface input [31 :0] code_din, // Download software machine code data interface input code_wea, // Download software machine code write enable //====================================== input i_ext_irq, // External interrupt generated by external module input i_sft_irq, // software interrupt generated by software module input i_tmr_irq, // timing interrupt generated by timer module output o_meie, // machine external interrupt in CSR register enable output o_msie, // machine software interrupt in CSR register enable output o_mtie, // machine clock interrupt enable in output o_glb_irq, // machine global interrupt enable in CSR register //====================================== input i_cpu_reset, // cpu reset input rst_n ); //============================== ===================================================== = localparam[2:0] IDLE = 8'd0, I_FCH = 8'd1, I_EXE = 8'd2, I_LS = 8'd3, I_WB = 8'd4, I_RO1 = 8'd5, I_RO2 = 8'd6, I_RO3 = 8'd7; wire ls_need; wire wb_need; wire ls_rdy; wire wb_rdy; // load program data (* mark_debug = "yes" *) wire o_CPU_cs; wire [ 31: 0 ] o_CPU_PC; //====================================== =========================================== (* mark_debug = "yes" * ) reg [ 2: 0 ] instr_st = 0; reg [ 2: 0 ] instr_st_nxt = 0; always@( posedge sys_clk ) if (( rst_n == 1'b0 ) | i_cpu_reset ) instr_st <= IDLE; else instr_st <= instr_st_nxt ; always @ ( * ) begin case ( instr_st ) IDLE: // 0 begin if(!i_cpu_reset) instr_st_nxt = I_FCH; else instr_st_nxt = IDLE; end I_FCH: // 1 begin if(i_cpu_reset) instr_st_nxt = IDLE; else instr_st_nxt = I_EXE; end I_EXE: // 2 begin if ( ls_need ) begin if ( o_CPU_cs ) instr_st_nxt = I_RO1; else instr_st_nxt = I_LS; end else if ( wb_need ) // wb_enable instr_st_nxt = I_WB; else instr_st_nxt = I_WB; end I_LS: // 3 begin if ( ls_rdy ) // always 1 begin if ( wb_need ) // wb_enable instr_st_nxt = I_WB; else instr_st_nxt = I_FCH; end else instr_st_nxt = I_LS; end I_WB: // 4 begin if ( wb_rdy ) // always 1 instr_st_nxt = I_FCH; else instr_st_nxt = I_WB; end I_RO1: // 5 begin instr_st_nxt = I_RO2; end I_RO2: // 6 begin instr_st_nxt = I_FCH; // instr_st_nxt = I_EXE; end I_RO3: // 7 begin instr_st_nxt = I_FCH; end default : instr_st_nxt = IDLE; endcase end //================ ===================================================== ============ /* reg [2:0] pc_load_r = 0; always @ (posedge sys_clk or negedge rst_n) if(~rst_n) pc_load_r <= 0; else pc_load_r <= {pc_load_r[ 1:0], o_CPU_cs}; */ (* mark_debug = "yes" *)reg irq_fch_r = 0; always @ (posedge sys_clk or negedge rst_n) if(~rst_n) irq_fch_r <= 0; else if(instr_st == I_FCH) irq_fch_r <= (i_ext_irq | i_sft_irq | i_tmr_irq); (* mark_debug = "yes" *)wire w_irq_src = o_CPU_cs ? 1'b0 : irq_fch_r; (* mark_debug = "yes" *)wire jump_irq_pc = w_irq_src & o_glb_irq; wire w_exp_src = 0; wire irq_exp = w_irq_src | w_exp_src ; wire mret; wire [31:0] mepc; wire [31:0] w_irq_pc; (* mark_debug = "yes" *) reg [ 31: 0 ] i_fch_PC = 0; reg [ 31: 0 ] res_PC = 0; wire [31:0] w_exe_PC; always @( posedge sys_clk or negedge rst_n ) if ( !rst_n ) i_fch_PC <= ITCM_BASEADDR; else begin if ( instr_st == IDLE ) //0 i_fch_PC <= ITCM_BASEADDR; else if ( instr_st == I_EXE ) //2 begin if( mret ) i_fch_PC <= mepc; else if (jump_irq_pc) i_fch_PC <= w_irq_pc; else if( o_CPU_cs ) begin i_fch_PC <= o_CPU_PC; res_PC <= w_exe_PC; end else i_fch_PC <= w_exe_PC; end else if ( instr_st == I_RO1 ) begin i_fch_PC <= res_PC; end end //======================================== =========================================== (* mark_debug = "yes" *) reg EXE_vld = 0; always @( posedge sys_clk or negedge rst_n ) if ( !rst_n ) EXE_vld <= 0; else begin if ( instr_st == I_FCH ) //1 EXE_vld <= 1'b1; else EXE_vld <= 1'b0; end wire [ 31: 0 ] instr; (* mark_debug = "yes" *)reg [ 31: 0 ] exe_instr = 0; always @( posedge sys_clk or negedge rst_n ) if ( !rst_n ) exe_instr <= 0; else if ( instr_st == I_FCH ) exe_instr <= instr; (* mark_debug = "true" *)reg i_CPU_load_vld = 0; ( * mark_debug = "true" *)reg [ 31: 0 ] i_CPU_load_data = 0; always @( posedge sys_clk or negedge rst_n ) if ( !rst_n ) begin i_CPU_load_data <= 0; i_CPU_load_vld <= 0; end else if ( instr_st == I_RO2 ) begin i_CPU_load_data <= instr; i_CPU_load_vld <= 1'b1; end else i_CPU_load_vld <= 1'b0; //==================================================================================== (* mark_debug = "yes" *) wire [ 31: 0 ] instr_PC = i_fch_PC; rv32I_exu # ( .TMR_BASEADDR ( TMR_BASEADDR ), .PLIC_BASEADDR ( PLIC_BASEADDR ), .CPU_BASEADDR ( ITCM_BASEADDR ), .MEM_BASEADDR ( DTCM_BASEADDR ), .UART_BASEADDR ( UART_BASEADDR ), .GPIO_BASEADDR ( GPIO_BASEADDR ) ) rv32I_exu_inst ( .sys_clk ( sys_clk ), .i_ir ( exe_instr ), // The instruction register .i_PC ( instr_PC ), // The PC register along with .i_EXE_vld ( EXE_vld ), .i_CPU_load_vld ( i_CPU_load_vld ), .i_CPU_load_data( i_CPU_load_data ), .o_ls_need ( ls_need ), .o_wb_need ( wb_need ), .o_wb_rdy ( wb_rdy ), .o_ls_rdy ( ls_rdy ), .o_exe_PC ( w_exe_PC ) //load program data .o_CPU_cs ( o_CPU_cs ), .o_CPU_PC ( o_CPU_PC ), .i_ext_irq ( i_ext_irq ), .i_sft_irq ( i_sft_irq ), .i_tmr_irq ( i_tmr_irq ), .o_meie ( o_meie ), .o_msie ( o_tiem ), .o_msie ( o_tiem ) , .o_glb_irq ( o_glb_irq ), .i_irq_src ( w_irq_src ), .i_exp_src ( w_exp_src ), .o_mret ( mret ), .o_irq_pc ( w_irq_pc ), .o_mepc ( mepc ), .i_GPIO_dina ( i_GPIO_dina ), .o_GPIO_douta ( o_GPIO_douta ), .o_GPIO_ta ( o_GPIO_ta ), .i_GPIO_dinb ( i_GPIO_dinb ), .o_GPIO_doutb ( o_GPIO_doutb ) .o_GPIO_tb ( o_GPIO_tb ), .i_GPIO_dinc ( i_GPIO_dinc ), .o_GPIO_doutc ( o_GPIO_doutc ), .o_GPIO_tc ( o_GPIO_tc ), .i_GPIO_dind ( i_GPIO_dind ), .o_GPIO_doutd ( o_GPIO_doutd ), .o_GPIO_td ( o_GPIO_td ), .txd_start ( txd_start ), .txd_data ( txd_data ), .txd_done ( txd_done ), .o_sft_int_v ( o_sft_int_v ), .i_timer_l ( i_timer_l ), .i_timer_h ( i_timer_h ), .o_timer_l ( o_timer_l ), .o_timer_h ( o_timer_h ), .o_tcmp_l ( o_tcmp_l ), .o_tcmp_h ( o_tcmp_h ), .o_timer_valid ( o_timer_valid ), .o_tm_ctrl ( o_tm_ctrl), .i_cpu_reset ( i_cpu_reset), .rst_n ( rst_n ) ); //==================================================================================== //==================================================================================== wire instr_ena = ( instr_PC[ 31: 16 ] == ITCM_BASEADDR[31: 16] ) ? 1'b1 : 1'b0 ; TDP_RAM_INSTR program_inst ( .clka ( sys_clk ), .ena ( 1'b1 ), .wea ( code_wea ), .addra ( code_addr ), .dina ( code_din ), .douta ( ), .clkb ( sys_clk ), .enb ( instr_ena ), .web ( 1'b0 ), .addrb ( instr_PC[ 13: 2 ] ), //8K 32bits, 32K byte .dinb ( 32'b0 ), .doutb ( instr ) ); //==================================================================================== endmodule
Module introduction:
To execute the state machine, use a two-stage state machine.
reg [ 2: 0 ] instr_st = 0;
reg [ 2: 0 ] instr_st_nxt = 0;
always@( posedge sys_clk )
if (( rst_n == 1’b0 ) | i_cpu_reset ) instr_st <= IDLE;
else instr_st <= instr_st_nxt ;
always @ ( * )
begin
case ( instr_st )
IDLE: // 0, state machine 0, this is an initial state, which is used to download assembly language machine code using uart, which is not usually used by state machines.
begin
if(!i_cpu_reset)
instr_st_nxt = I_FCH;
else
instr_st_nxt = IDLE;
end
I_FCH: // 1, state machine 1, fetch state, in this state, pc is assigned a new address.
begin
if(i_cpu_reset) instr_st_nxt = IDLE;
else instr_st_nxt = I_EXE;
end
I_EXE: // 2, state machine 2, execution state, decoding, and execution are all in this state.
begin
if ( ls_need ) // If it is a LOAD/STORE instruction, it will jump to the related operation.
begin
if ( o_CPU_cs ) // If it is a Princeton architecture operation, jump to the corresponding area of the address pointer, read the data, and then restore the current pc value.
instr_st_nxt = I_RO1;
else // Under the Harvard architecture, fetch the corresponding data from the data area (DTCM, peripherals)
instr_st_nxt = I_LS;
end
else if ( wb_need ) // Other instructions such as ADD, XO, X1, X2, etc., write Return data to 32 general purpose registers
instr_st_nxt = I_WB;
else
instr_st_nxt = I_WB;
end
I_LS: // 3, state machine 3, LOAD/STORE instruction operation
begin
if ( ls_rdy ) // always 1
begin
if ( wb_need ) // wb_enable if Need to write back, jump to WB write back state,
instr_st_nxt = I_WB;
else // If no write back instruction is needed, return to fetch state
instr_st_nxt = I_FCH;
end
else
instr_st_nxt = I_LS;
end
I_WB: // 4, state machine 4, responsible for writing data back to 32 general registers
begin
if ( wb_rdy ) // always 1
instr_st_nxt = I_FCH;
else // will not be executed
instr_st_nxt = I_WB;
end
I_RO1: // 5, state machine 5, it takes 2 clock cycles to read the dual port to get the data, which is a wait cycle.
begin
instr_st_nxt = I_RO2;
end
I_RO2: // 6, state machine 6, LOAD/STORE corresponding data under the Princeton architecture
begin
instr_st_nxt = I_FCH;
// instr_st_nxt = I_EXE;
end
I_RO3: // 7, state machine 7, not used.
begin
instr_st_nxt = I_FCH;
end
default : instr_st_nxt = IDLE;
endcase
end
reg irq_fch_r = 0;
always @ (posedge sys_clk or negedge rst_n)
if(~rst_n) irq_fch_r <= 0;
else if(instr_st == I_FCH) irq_fch_r <= (i_ext_irq | i_sft_irq | i_tmr_irq);
Various interrupt states are latched in the instruction fetch state, including external interrupts, software interrupts, and timer interrupts.
wire w_irq_src = o_CPU_cs ? 1’b0 : irq_fch_r ;
wire jump_irq_pc = w_irq_src & o_glb_irq;
If the last instruction is in the Princeton architecture, the last instruction needs to be executed, and the global interrupt enable (o_glb_irq) set in the CSR register is valid, which is the generated jump_irq_pc.
wire mret;
wire [31:0] mepc;
wire [31:0] w_irq_pc;
reg [ 31: 0 ] i_fch_PC = 0;
reg [ 31: 0 ] res_PC = 0;
wire [31:0] w_exe_PC;
always @( posedge sys_clk or negedge rst_n )
if ( !rst_n ) i_fch_PC <= ITCM_BASEADDR;
else
begin
if ( instr_st == IDLE ) //0, when using uart to download code, the current pc is fixed at the starting position.
i_fch_PC <= ITCM_BASEADDR;
else if ( instr_st == I_EXE ) //2,
begin
if( mret ) //If the current instruction returns from an interrupt, mret, restore the pc before the interrupt to the current PC
i_fch_PC <= mepc;
else if ( jump_irq_pc) // When an interrupt occurs, assign the interrupt entry address to the current PC
i_fch_PC <= w_irq_pc;
else if( o_CPU_cs ) // If the Princeton architecture is loaded, assign the address of the stored data to the current pc, note that at this time read The fetched data is not an instruction, but the data to be read under the load address.
begin
i_fch_PC <= o_CPU_PC;
res_PC <= w_exe_PC; // record the current pc value, it will return after the load is over.
end
else i_fch_PC <= w_exe_PC; // Normal pc instruction, either pc = pc + 4; or the address pointed to by the corresponding jump instruction (JAL, JALR, BRANCH), etc.
end
else if ( instr_st == I_RO1 ) // When the load is completed under the Princeton architecture, put back the pc where the current instruction is running.
begin
i_fch_PC <= res_PC;
end
end
reg EXE_vld = 0;
always @( posedge sys_clk or negedge rst_n )
if ( !rst_n ) EXE_vld <= 0;
else
begin
if ( instr_st == I_FCH ) //1
EXE_vld <= 1’b1;
else
EXE_vld <= 1’b0 ;
end
EXE_vld and instr_st == EXE instruction synchronization, used to indicate the current status bit execution cycle
wire [ 31: 0 ] instr;
reg [ 31: 0 ] exe_instr = 0;
always @( posedge sys_clk or negedge rst_n )
if ( !rst_n ) exe_instr <= 0;
else if ( instr_st == I_FCH )
exe_instr <= instr;
At the same time, the instructions to be executed are decoded to execute the module.
reg i_CPU_load_vld = 0;
reg [ 31: 0 ] i_CPU_load_data = 0;
always @( posedge sys_clk or negedge rst_n )
if ( !rst_n )
begin
i_CPU_load_data <= 0;
i_CPU_load_vld <= 0;
end
else if ( instr_st == I_RO2 )
begin
i_CPU_load_data <= instr;
i_CPU_load_vld <= 1’b1;
end
else i_CPU_load_vld <= 1’b0;
Under the Prince architecture, the data read by the load command is sent to the lsu module.
wire instr_ena = ( instr_PC[ 31: 16 ] == ITCM_BASEADDR[31: 16] ) ? 1’b1 : 1’b0;
TDP_RAM_INSTR program_inst
(
.clka ( sys_clk ),
.ena ( 1’b1 ),
.wea ( code_wea ),
.addra ( code_addr ),
.dina ( code_din ),
.douta ( ),
.clkb ( sys_clk ),
.enb ( instr_ena ),
.web ( 1’b0 ),
.addrb ( instr_PC[ 13: 2 ] ), //8K 32bits, 32K byte
.dinb ( 32’b0 ),
.doutb ( instr )
);
Reference article: RISC-V address space
The ITCM address space is selected by instr_ena.
This dual-port bit ITCM, port A: the user can download the assembly machine code of the software to ITCM through the uart port; port B, instr_ena is the selected ITCM, instr_PC[13:2] is the current instruction PC. instr reads the instruction of ITCM, (if it is the Prince architecture, it can also read the data).