The ALU module of RISC-V is used to handle the instruction execution part. This module processes the instructions read by ITCM and the relevant information separated by the decoding module. In the process of processing, including assignment to 32 general-purpose registers, memory access to memory, changes to the pc pointer and so on. Related reference articles:
RISC-V teaching plan
RISC-V cpu architecture:
ALU module code:
module exu_alu ( input sys_clk, // system clock input rst_n, input [ 4: 0 ] i_rd_idx, // one of the 32 general-purpose registers specified by rd in the assembly instruction input [ 31: 0 ] i_rs1_val, // in the assembly instruction The register value specified by rs1 input [ 31: 0 ] i_PC, // PC of the currently executing instruction input i_OP_IMM, // op_imm instruction group input i_LOAD, // load instruction group input [ 8: 0 ] i_opimm_instr, // op_imm instruction group members: SRAI, SRLI, SLLI, ANDI, ORI, XORI, SLTIU, SLTI, ADDI input [ 31: 0 ] i_I_imm, // I-type immediate data input i_OP, // op instruction group input [ 9: 0 ] i_op_instr, // members of op instruction group: SRA, SUB, SRL, SLL, XOR, OR, AND, SLTU, SLT, ADD input [ 31: 0 ] i_rs2_val, // register value specified by rs2 in assembly instruction input i_LUI, // LUI instruction input i_AUIPC, // AUIPC instruction input [ 31: 0 ] i_U_imm, // U-type immediate input i_JAL, // JAL instruction input i_JALR, // JALR instruction input [ 31: 0 ] i_J_imm, // J-type immediate Count input i_STORE, // STORE instruction input [ 31: 0 ] i_S_imm, // s-type immediate data //=================================================================================== output o_J_vld, // jump valid output [ 31: 0 ] o_J_PC, // jump to new PC //output addition result to LOAD/STORE unit output [ 31: 0 ] o_D_PC, // jump memroy fetched PC output o_rd_wen, // write back enable output [ 4: 0 ] o_wb_rd_idx, // write back rd register output reg [ 31: 0 ] o_wb_data// write back data value ); //================================================ ================================= //reg [31:0] wb_data_opimm; //reg [31:0 ] wb_data_op; //reg [ 31: 0 ] wb_data_LUI; //reg [ 31: 0 ] wb_data_AUIPC; wire [ 31: 0 ] opd1 = ( i_AUIPC | i_JAL ) ? i_PC : i_rs1_val; wire [ 31: 0 ] opd2 = ( { 32{ i_OP_IMM | i_JALR | i_LOAD} } & i_I_imm ) | ( { 32{ i_OP } } & i_rs2_val ) | ( { 32{ i_JAL } } & i_J_imm ) | ( { 32{ i_AUIPC } } & i_U_imm ) | ( { 32{ i_STORE } } & i_S_imm ) ; //the second operand; // rv32i_slti rv32i_slt wire [ 33: 0 ] ext_opd1 = ({i_opimm_instr[1],i_op_instr[1]} != 0) ? {opd1[31],opd1[31],opd1} : {2'b00, opd1}; wire [ 33: 0 ] ext_opd2 = ({i_opimm_instr[1],i_op_instr[1]} != 0) ? {opd2[31],opd2[31],opd2} : {2'b00, opd2}; wire [ 33 : 0 ] comp_opd2 = ~ext_opd2 + 32'b1; //2's complement // rv32i_sub rv32i_sltiu, rv32i_slti rv32i_sltu, rv32i_slt wire [ 33: 0 ] comp_ext_opd2 = ({i_op_instr[8], i_opimm_instr[2:1],i_op_instr[2:1]} != comp_opd2 : ext_opd2; wire [ 33: 0 ] add_res = ext_opd1 + comp_ext_opd2; wire [ 31: 0 ] xor_res = i_rs1_val ^ opd2; wire [ 31: 0 ] or_res = i_rs1_val | opd2; wire [ 31: 0 ] and_res = i_rs1_val & opd2; wire [ 31: 0 ] sll_res = i_rs1_val << opd2[ 4: 0 ]; wire [ 31: 0 ] srl_res = i_rs1_val >> opd2[ 4: 0 ]; //================================ ===================================================== / / shift right arithmetic immediate x[rd] = ( x[rs1] >>s shamt) wire[ 31: 0 ] eff_mask = ( ~( 32'b0 ) ) >> opd2[ 4: 0 ]; wire[ 31: 0 ] sra_res = ( srl_res & eff_mask ) | ( { 32{ i_rs1_val[ 31 ] } } & ( ~eff_mask ) ); //wire [ 31: 0 ] sra_res = $signed(i_rs1_val) >>> opd2[ 4: 0 ]; //= ===================================================== =========================== always@( * ) begin o_wb_data <= 32'b0; /* assign o_opimm_instr = { rv32i_srai, rv32i_srli , rv32i_slli, rv32i_andi, rv32i_ori, rv32i_xori, rv32i_sltiu, rv32i_slti, rv32i_addi }; */ if ( i_OP_IMM ) case ( i_opimm_instr ) //one hot decoder and execute 9'h001: //rv32i_addi o_wb_data <= add_res[31:0]; 9'h002: //rv32i_slti o_wb_data <= add_res[ 33 ] ? 32'b1 : 32'b0; 9'h004: //rv32i_sltiu o_wb_data <= add_res[ 33 ] ? 32'b1 : 32'b0; 9'h008: //xori o_wb_data <= xor_res; 9'h010: //rv32i_ori o_wb_data <= or_res; 9'h020: //rv32i_andi o_wb_data <= and_res; 9'h040: //rv32i_slli o_wb_data <= sll_res; 9'h080: //rv32i_srli o_wb_data <= srl_res; 9'h100://rv32i_srai o_wb_data <= sra_res; default: ; endcase /* assign o_op_instr = { rv32i_sra, rv32i_sub, rv32i_srl, rv32i_sll, rv32i_xor, rv32i_or, rv32i_and, rv32i_sltu, rv32i_slt, rv32i_add }; */ // if ( i_OP )
case ( i_op_instr ) //one hot decoder and execute h001: //rv32i_add o_wb_data <= add_res[31:0]; 10'h002: //rv32i_slt o_wb_data <= add_res[ 33 ] ? 32'b1 : 32'b0; 10'h004: //rv32i_sltu o_wb_data <= add_res[ 33 ] ? 32'b1 : 32'b0; 10'h008: //rv32i_and o_wb_data <= and_res; 10'h010: //rv32i_or o_wb_data <= or_res; 10'h020://rv32i_xor o_wb_data <= xor_res; 10'h040: //rv32i_sll o_wb_data <= sll_res; 10'h080: //rv32i_srl o_wb_data <= srl_res; 10'h100: //rv32i_sub o_wb_data <= add_res[31:0]; 10'h200: //rv32i_sra o_wb_data <= sra_res; default: ; endcase if ( i_LUI ) o_wb_data <= i_U_imm; if ( i_AUIPC ) o_wb_data <= add_res[31:0]; if ( i_JAL | i_JALR ) o_wb_data <= i_PC + 4; end / /==================================================== ============================= // wire rv32i_sub =o_op_instr[8]; // wire rv32i_sra =o_op_instr[9]; assign o_wb_rd_idx = i_rd_idx; assign o_rd_wen = i_OP_IMM | i_OP | i_LUI | i_AUIPC | i_JAL | i_JALR; assign o_D_PC = add_res[31:0]; assign o_J_PC = add_res[31:0]; assign o_J_vld = i_JAL | i_JALR; //= ===================================================== ============================= endmodule
Port description:
input sys_clk , // system clock input rst_n , input [ 4: 0 ] i_rd_idx , // one of the 32 general-purpose registers specified by rd in the assembly instruction input [ 31: 0 ] i_rs1_val , // specified by rs1 in the assembly instruction Register value input [ 31: 0 ] i_PC , // PC of the instruction currently being executed input i_OP_IMM , // op_imm instruction group input i_LOAD , // load instruction group input [ 8: 0 ] i_opimm_instr , // op_imm instruction group Members: SRAI, SRLI, SLLI, ANDI, ORI, XORI, SLTIU, SLTI, ADDI input [ 31: 0 ] i_I_imm , // I-type immediate input i_OP, // op instruction group input [ 9: 0 ] i_op_instr , // op instruction group members: SRA, SUB, SRL, SLL, XOR, OR, AND, SLTU, SLT, ADD input [ 31: 0 ] i_rs2_val , // The register value specified by rs2 in the assembly instruction is input i_LUI , // LUI instruction input i_AUIPC , // AUIPC instruction input [ 31: 0 ] i_U_imm , // U-type immediate data input i_JAL , // JAL instruction input i_JALR , // JALR Instruction input [ 31: 0 ] i_J_imm , // J-type immediate data input i_STORE , // STORE instruction input [ 31: 0 ]i_S_imm , // s-type immediate data //======================================== ========================================= output o_J_vld , // Jump valid output [ 31: 0 ] o_J_PC , // jump to new PC //output addition result to LOAD/STORE unit output [ 31: 0 ] o_D_PC , // jump to memroy fetched PC output o_rd_wen , // write back to enable output [ 4: 0 ] o_wb_rd_idx , // write back the rd register output reg [ 31: 0 ] o_wb_data // write back the data value
The operation (execution) of all RISC-V instructions is less than or equal to two operands. (Instructions with greater than two operands, which have been discussed in the riscv standard, are not currently implemented.)
Example:
nop instruction, addi x0, x0, 0 two operands
x[rd] = x[rs1] + x[rs2], with only two operands,
x[rd] = x[rs1] + imm has only two operands.
So in an ALU operation: there are at most two operands.
wire [ 31: 0 ] opd1 = ( i_AUIPC | i_JAL ) ? i_PC : i_rs1_val;
wire [ 31: 0 ] opd2 = ( { 32{ i_OP_IMM | i_JALR | i_LOAD} } & i_I_imm ) |
( { 32{ i_OP } } & i_rs2_val ) |
( { 32{ i_JAL } } & i_J_imm ) |
( { 32{ i_AUIPC } } & i_U_imm ) |
( { 32{ i_STORE } } & i_S_imm ) ; //the second operand;
In opd1 (operand 1), sorting out the RISC-V instruction set, we will find that only when (AUIPC, JAL), the PC will be used, and in other cases (other instructions) can only be the value of rs1.
In opd2 (operand 2):
When the instruction is OP_IMM , instruction group, JALR, LOAD instruction group, I_imm will be used;
When the instruction is the OP instruction group, rs2_val will be used;
When the instruction is a JAL instruction, J_imm will be used;
When the instruction is an AUIPC instruction, U_imm will be used;
When the instruction is the STORE instruction group, S_imm will be used;
Under a specific clock, only one of these instructions will be decoded, and there will not be a situation where multiple instructions are valid at the same time.
Arrange operands 1, 2:
// rv32i_slti rv32i_slt
wire [ 33: 0 ] ext_opd1 = ({i_opimm_instr[1],i_op_instr[1]} != 0) ? {opd1[31],opd1[31],opd1} : {2’b00, opd1} ;
wire [ 33: 0 ] ext_opd2 = ({i_opimm_instr[1],i_op_instr[1]} != 0) ? {opd2[31],opd2[31],opd2} : {2’b00, opd2};
Extend the operand to 34 bits, that is, 32-bit operand + carry bit + sign bit. slti, slt need sign extension, other instructions are considered positive.
Negative operation:
wire [ 33: 0 ] comp_opd2 = ~ext_opd2 + 32’b1; //2’s complement
Make operand 2 negative, equivalent to comp_opd2 = – ext_opd2;
// rv32i_sub rv32i_sltiu, rv32i_slti rv32i_sltu, rv32i_slt
wire [ 33: 0 ] comp_ext_opd2 = ({i_op_instr[8], i_opimm_instr[2:1], i_op_instr[2:1]} != 0) ? comp_opd2 : ext_opd2;
If the current instruction is sub, sltiu, slti, sltu, slt, the operand needs to be negative, other instructions do not.
Addition operation:
wire[33:0] add_res = ext_opd1 + comp_ext_opd2;
XOR operation:
wire[31:0] xor_res = i_rs1_val ^ opd2;
or operation:
wire [ 31: 0 ] or_res = i_rs1_val | opd2;
with operation:
wire [ 31: 0 ] and_res = i_rs1_val & opd2;
Logical left shift operation:
wire [ 31: 0 ] sll_res = i_rs1_val << opd2[ 4: 0 ];
Logical right shift operation:
wire [ 31: 0 ] srl_res = i_rs1_val >> opd2[ 4: 0 ];
Arithmetic right shift operation:
wire [ 31: 0 ] eff_mask = ( ~( 32’b0 ) ) >> opd2[ 4: 0 ];
wire [ 31: 0 ] sra_res = ( srl_res & eff_mask ) | ( { 32{ i_rs1_val[ 31 ] } } & ( ~eff_mask ) );
Equivalent to sra_res = i_rs1_val >>> opd2[4:0];
assign o_rd_wen = i_OP_IMM | i_OP | i_LUI | i_AUIPC | i_JAL | i_JALR;
These instructions require writing back to 32 general purpose registers.
assign o_J_vld = i_JAL | i_JALR;
When it is JAL, JALR instruction, J_vld needs to enable