=====================
== Rase's basement ==
=====================
Simple and functional is beautiful

Intel 8086 - Simulating immediate MOV instructions

In this post I’m going to go through my thought process implementing the brains for my intel 8086 decoder. Currently, I’ve implemented the decoding of the bits of the compiled executable. Next I have to simulate the logic of executing for example mov ax, 50.

I’m writing this post as I go to better give people my thought process. In this blog post I’m going to be implementing the logic for an immediate to register mov instruction.

Currently, we just decode the bits and then format the printed instructions accordingly:

fn main() {
	let args: Vec<String> = env::args().collect();
	let binary_path = &args[1];
    let binary_contents = fs::read(binary_path).unwrap();
	let op_codes = construct_opcodes();
	let registers = construct_registers();
	
	let mut i: usize = 0;
	let mut instruction_count: usize = 1;
	while i < binary_contents.len() {
	    let first_byte = binary_contents[i];
	    let second_byte = binary_contents[i + 1];
	
	    let instruction = determine_instruction(&op_codes, first_byte);
	    let mnemonic = get_mnemonic(first_byte, second_byte, instruction);
	    let is_word_size = is_word_size(first_byte, instruction);
	    let memory_mode = determine_memory_mode(second_byte);
	    let is_s_bit_set = first_byte & S_BIT_M as u8 == 0b00000010;
	    let instruction_size = determine_instruction_byte_size(instruction, is_word_size, memory_mode, mnemonic, is_s_bit_set);
	    let reg_is_dest = first_byte & D_BITS as u8 != 0;
	
	    let mut reg_or_immediate = String::new();
	    let mut rm_or_immediate = String::new();
	
	    // We are doing this if statement because in the case of an ImmediateToRegisterMemory (NON MOV one)
	    // we actually do not have a REG register. the immediate value is always moved into the R/M register.
	
	    if instruction == ImmediateToRegisterMemory {
	        if !is_word_size {
	            // TODO: Do we have to handle 8 and 16-bit memory modes here in its own branch?
	            let third_byte = binary_contents[i + 2];
	            reg_or_immediate = (third_byte as usize).to_string();
	        } else { // is_word_size
	            // MOV doesn't care about the s_bit. CMP, SUB, ADD do.
	            // if w=1 and s=0 and mnemonic is sub/add/cmp, it's an 16-bit immediate.
	            match (mnemonic, is_s_bit_set) {
	                ("mov", _) | ("cmp", false) | ("add", false) | ("sub", false) => {
	                    if memory_mode == MemoryMode16Bit || memory_mode == MemoryMode8Bit || memory_mode == DirectMemoryOperation {
	                        // the immediate is guaranteed to be 16-bit because the s bit is set to 0 in this branch.
	                        let fifth_byte = binary_contents[i + 4];
	                        let sixth_byte = binary_contents[i + 5];
	                        let combined = combine_bytes(sixth_byte, fifth_byte);
	                        reg_or_immediate = (combined as usize).to_string();
	                    } else {
	                        let third_byte = binary_contents[i + 2];
	                        let fourth_byte = binary_contents[i + 3];
	                        let combined = combine_bytes(fourth_byte, third_byte);
	                        reg_or_immediate = (combined as usize).to_string();
	                    }
	                },
	                ("cmp", true) | ("add", true) | ("sub", true) => {
	                    if memory_mode == MemoryMode16Bit || memory_mode == MemoryMode8Bit || memory_mode == DirectMemoryOperation {
	                        // In this branch we guarantee that the s bit is not set. Therefore the immediate can not be a 16-bit value.
	                        // With 16-bit memory mode operations the immediate is in the fifth and sixth bytes depending on the size.
	                        let fifth_byte = binary_contents[i + 4];
	                        reg_or_immediate = (fifth_byte as usize).to_string();
	                    }
	                    else {
	                        let third_byte = binary_contents[i + 2];
	                        reg_or_immediate = (third_byte as usize).to_string();
	                    }
	                }
	                _ => panic!("Unknown (mnemonic, s_bit_is_set): ({}, {})", mnemonic, is_s_bit_set)
	            }
	        }
	    } else if instruction == ImmediateToAccumulatorADD || instruction == ImmediateToAccumulatorSUB || instruction == ImmediateToAccumulatorCMP{
	        if is_word_size {
	            let third_byte = binary_contents[i + 2];
	            let combined = combine_bytes(third_byte, second_byte);
	            reg_or_immediate = (combined as usize).to_string();
	        } else {
	            reg_or_immediate = (second_byte as usize).to_string();
	        }
	    }
	    else {
	        // In this case its actually not an immediate, instead the string gets populated with the reg register.
	        reg_or_immediate = get_register(true, instruction, memory_mode, first_byte, second_byte, is_word_size).parse().unwrap();
	    }
	
	    // This case is actually the complete opposite from the previous one.
	    // The immediate to register MOV instruction actually does not have the R/M register
	    // but has the REG register it used to move immediate values to.
	    if instruction == ImmediateToRegisterMOV {
	        // and the R/M Register actually is not used at all with the MOV immediate instruction.
	
	        // With the immediate to register mov instruction, the immediate is stored in the second (and third byte if word sized).
	        if is_word_size {
	            let third_byte = binary_contents[i + 2];
	            let combined = combine_bytes(third_byte, second_byte);
	            rm_or_immediate = (combined as usize).to_string();
	        } else {
	            rm_or_immediate = (second_byte as usize).to_string();
	        }
	    } else {
	        // In this case its actually not an immediate, instead the string gets populated with the R/M register.
	        rm_or_immediate = get_register(false, instruction, memory_mode, first_byte, second_byte, is_word_size).parse().unwrap();
	    }
	
	    let formatted_instruction = format_instruction(&binary_contents, i, first_byte, second_byte, instruction, mnemonic, is_word_size, memory_mode, reg_is_dest, &reg_or_immediate, &rm_or_immediate);
	    println!("{}", formatted_instruction);
	    instruction_count += 1;
	    i += instruction_size;
	
	    // print!("size: {}, count: {} - ", instruction_size, instruction_count);
	}
}

after the i += instruction_size block in the end, we want to update the registers with the immediate’s that were moved into registers.

My current thought process is that I will have all the registers in a compile time array, and I will construct a Register struct from each one. This collection would then be linearly looped through, matching the register and then retrieving the corresponding value from it and updating if necessary.

pub struct Register {
   pub register: &'static str,
   pub updated_value: &'static str,
   pub original_value: &'static str,
}

const REGISTERS: [&str; 16] = [
    "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
    "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
];

 
// this will be called at the start of the program 
// to hold state of all the possible registers and their values.
pub fn construct_registers() -> Vec<Register>{
    let mut registers: Vec<Register> = Vec::with_capacity(REGISTERS.len());

    for register in REGISTERS.iter() {
        registers.push(Register {
            register,
            updated_value: "0",
            original_value: "0",
        });
    }
    return registers;
}

pub fn get_register_value(register: &'static str, registers: &Vec<Register>) -> usize {
    for reg in registers.iter() {
        if reg.register == register {
            return reg.original_value;
        }
    }
    panic!("Register not found, this should never happen. Register that was not found was {}", register);
}
pub fn update_register_value(register_to_update: &'static str, value: &'static str, registers: &mut Vec<Register>) -> () {
    for reg in registers.iter_mut() {
        if reg.register == register_to_update {
            reg.updated_value = value;
            return
        }
    }
    panic!("Register not found, this should never happen. Register that was not found was {}", register_to_update);
}

So I’m thinking that we will use these functions as follows:

  1. Figure out which register was used as the destination by looking at the D bit in the instruction stream.
  2. Linearly loop over the Vec<Register> to figure out the value in the source register that was moved into the destination register. The correct source register will be figured out by looking at the D bit once again.
  3. Call update_register_value with the destination registers name, passing the return value of get_register_value(src_reg, register) as the value argument to update_register_value. This would in theory correctly hold the state of the mov register, immediate instruction.

Things I haven’t figured out yet:

  1. Update the original value in the struct, but where?
  2. updated_value and original_value struct members are string slices. This is pretty bad when we have to do arithmetics since it would lead into a bunch of casts into usize and would therefore be very error-prone since the same variable is used for registers and casting that would lead to a crash or ugly error handling code. This might need to be refactored somehow. It will require changes in a lot of places in our existing code 😐.

Hmm, what if the operation an immediate to register move? We don’t want to loop over the registers then since the source value we want to update with is never going to be in a register, instead it’s provided in the immediate directly. this is an edge case we want to handle.

Also, what if the operation is a conditional jump? The state won’t change. Well. technically the instruction pointer and certain flags will be modified, but we are currently not handling this.

So for our case we might just want to skip the update_register_value function call if it’s a conditional jump. In the future if we want to handle this, we will.

Ehh, now that I think about it, I think get_register_value should be get_register that returns the struct instead of the value. This is because returning the struct will give us more flexibility in the caller.

get_register_value turned into this:

pub fn get_register_state<'a>(register: &String, registers: &Vec<Register>) -> &'a Register {
    for reg in registers.iter() {
        if reg.register == register {
            return reg
        }
    }
    panic!("Register not found, this should never happen. Register that was not found was {}", register);
}

Ok so in the caller we want to check if the instruction is an immediate move or a conditional jump, well, we have an enum for this called InstructionType and it looks like this:

// InstructionTable contains all the possible instructions that we are trying to decode.
#[derive(PartialEq, Debug, Clone, Copy)]
pub enum InstructionType {
    RegisterMemory,
    ImmediateToRegisterMemory,
    ImmediateToRegisterMOV,
    ImmediateToAccumulatorADD,
    ImmediateToAccumulatorSUB,
    ImmediateToAccumulatorCMP,
    JE_JUMP,
    JL_JUMP,
    JLE_JUMP,
    JB_JUMP,
    JBE_JUMP,
    JP_JUMP,
    JO_JUMP,
    JS_JUMP,
    JNE_JUMP,
    JNL_JUMP,
    JNLE_JUMP,
    JNB_JUMP,
    JNBE_JUMP,
    JNP_JUMP,
    JNO_JUMP,
    JNS,
    LOOP,
    LOOPZ,
    LOOPNZ,
    JCXZ,
}

I’m going to be matching the instruction against this enum. To not have ugly 3 page long if statements, I’m going to make a little helper functions for this.

pub fn instruction_is_conditional_jump(instruction: InstructionType) -> bool {
    match instruction {
        InstructionType::JE_JUMP => true,
        InstructionType::JL_JUMP => true,
        InstructionType::JLE_JUMP => true,
        InstructionType::JB_JUMP => true,
        InstructionType::JBE_JUMP => true,
        InstructionType::JP_JUMP => true,
        InstructionType::JO_JUMP => true,
        InstructionType::JS_JUMP => true,
        InstructionType::JNE_JUMP => true,
        InstructionType::JNL_JUMP => true,
        InstructionType::JNLE_JUMP => true,
        InstructionType::JNB_JUMP => true,
        InstructionType::JNBE_JUMP => true,
        InstructionType::JNP_JUMP => true,
        InstructionType::JNO_JUMP => true,
        InstructionType::JNS => true,
        InstructionType::LOOP => true,
        InstructionType::LOOPZ => true,
        InstructionType::LOOPNZ => true,
        InstructionType::JCXZ => true,
        _ => false,
    }
}

pub fn instruction_is_immediate_to_register(instruction: InstructionType) -> bool {
    match instruction {
        InstructionType::ImmediateToRegisterMemory => true,
        InstructionType::ImmediateToRegisterMOV => true,
        InstructionType::ImmediateToAccumulatorADD => true,
        InstructionType::ImmediateToAccumulatorSUB => true,
        InstructionType::ImmediateToAccumulatorCMP => true,
        _ => false,
    }
}

That’s a bit better, now I can just have a clean looking if statement for all these branches.

Let’s take it back a bit. I mentioned that we have this problem where we have these two fields in the Register struct:

pub struct Register {
   pub register: &'static str,
   pub updated_value: &'static str, // THIS
   pub original_value: &'static str, // THIS
}

The reason the fields are string slices instead of usize is because before I was just thinking about decoding and printing the decoded instructions, this is why it didn’t matter that I had one register that could be either a register or an immediate value.

The code that stored the values looked like this:

if instruction == ImmediateToRegisterMemory {
      if !is_word_size {
          // TODO: Do we have to handle 8 and 16-bit memory modes here in its own branch?
          let third_byte = binary_contents[i + 2];
          reg_or_immediate = (third_byte as usize).to_string();
      } else { // is_word_size
          // MOV doesn't care about the s_bit. CMP, SUB, ADD do.
          // if w=1 and s=0 and mnemonic is sub/add/cmp, it's an 16-bit immediate.
          match (mnemonic, is_s_bit_set) {
              ("mov", _) | ("cmp", false) | ("add", false) | ("sub", false) => {
                  if memory_mode == MemoryMode16Bit || memory_mode == MemoryMode8Bit || memory_mode == DirectMemoryOperation {
                      // the immediate is guaranteed to be 16-bit because the s bit is set to 0 in this branch.
                      let fifth_byte = binary_contents[i + 4];
                      let sixth_byte = binary_contents[i + 5];
                      let combined = combine_bytes(sixth_byte, fifth_byte);
                      reg_or_immediate = (combined as usize).to_string();
                  } else {
                      let third_byte = binary_contents[i + 2];
                      let fourth_byte = binary_contents[i + 3];
                      let combined = combine_bytes(fourth_byte, third_byte);
                      reg_or_immediate = (combined as usize).to_string();
                  }
              },
              ("cmp", true) | ("add", true) | ("sub", true) => {
                  if memory_mode == MemoryMode16Bit || memory_mode == MemoryMode8Bit || memory_mode == DirectMemoryOperation {
                      // In this branch we guarantee that the s bit is not set. Therefore the immediate can not be a 16-bit value.
                      // With 16-bit memory mode operations the immediate is in the fifth and sixth bytes depending on the size.
                      let fifth_byte = binary_contents[i + 4];
                      reg_or_immediate = (fifth_byte as usize).to_string();
                  }
                  else {
                      let third_byte = binary_contents[i + 2];
                      reg_or_immediate = (third_byte as usize).to_string();
                  }
              }
              _ => panic!("Unknown (mnemonic, s_bit_is_set): ({}, {})", mnemonic, is_s_bit_set)
          }
      }
} else if instruction == ImmediateToAccumulatorADD || instruction == ImmediateToAccumulatorSUB || instruction == ImmediateToAccumulatorCMP{
    if is_word_size {
        let third_byte = binary_contents[i + 2];
        let combined = combine_bytes(third_byte, second_byte);
        reg_or_immediate = (combined as usize).to_string();
    } else {
        reg_or_immediate = (second_byte as usize).to_string();
    }
}
else {
    // In this case its actually not an immediate, instead the string gets populated with the reg register.
    reg_or_immediate = get_register(true, instruction, memory_mode, first_byte, second_byte, is_word_size).parse().unwrap();
}

So, depending on the InstructionType enum, we stored either an immediate or a register into the reg_or_immediate variable.

Ideally this variable would be spread into two different variables, for example: reg_register and reg_immediate or something like that. This could allow us to do arithmetics without casts in case the InstructionType was an immediate to register move.

However, this would require us to check for the InstructionType again when we want to figure out which variable we want to use. I think I’ll handle them as different variables.. after all we just made helper functions to check for the InstructionType so it will be pretty easy..

Ok so the previous code snippet became:

let mut reg_register = String::new();
let mut reg_immediate: usize = 0;
// We are doing this if statement because in the case of an ImmediateToRegisterMemory (NON MOV one)
// we actually do not have a REG register. the immediate value is always moved into the R/M register.

if instruction == ImmediateToRegisterMemory {
    if !is_word_size {
        // TODO: Do we have to handle 8 and 16-bit memory modes here in its own branch?
        let third_byte = binary_contents[i + 2];
        reg_immediate = third_byte as usize
    } else { // is_word_size
        // MOV doesn't care about the s_bit. CMP, SUB, ADD do.
        // if w=1 and s=0 and mnemonic is sub/add/cmp, it's an 16-bit immediate.
        match (mnemonic, is_s_bit_set) {
            ("mov", _) | ("cmp", false) | ("add", false) | ("sub", false) => {
                if memory_mode == MemoryMode16Bit || memory_mode == MemoryMode8Bit || memory_mode == DirectMemoryOperation {
                    // the immediate is guaranteed to be 16-bit because the s bit is set to 0 in this branch.
                    let fifth_byte = binary_contents[i + 4];
                    let sixth_byte = binary_contents[i + 5];
                    let combined = combine_bytes(sixth_byte, fifth_byte);
                    reg_immediate = combined as usize
                } else {
                    let third_byte = binary_contents[i + 2];
                    let fourth_byte = binary_contents[i + 3];
                    let combined = combine_bytes(fourth_byte, third_byte);
                    reg_immediate = combined as usize
                }
            },
            ("cmp", true) | ("add", true) | ("sub", true) => {
                if memory_mode == MemoryMode16Bit || memory_mode == MemoryMode8Bit || memory_mode == DirectMemoryOperation {
                    // In this branch we guarantee that the s bit is not set. Therefore the immediate can not be a 16-bit value.
                    // With 16-bit memory mode operations the immediate is in the fifth and sixth bytes depending on the size.
                    let fifth_byte = binary_contents[i + 4];
                    reg_immediate = fifth_byte as usize;
                }
                else {
                    let third_byte = binary_contents[i + 2];
                    reg_immediate = third_byte as usize
                }
            }
            _ => panic!("Unknown (mnemonic, s_bit_is_set): ({}, {})", mnemonic, is_s_bit_set)
        }
    }
} else if instruction == ImmediateToAccumulatorADD || instruction == ImmediateToAccumulatorSUB || instruction == ImmediateToAccumulatorCMP{
    if is_word_size {
        let third_byte = binary_contents[i + 2];
        let combined = combine_bytes(third_byte, second_byte);
        reg_immediate = combined as usize
    } else {
        reg_immediate = second_byte as usize
    }
}
else {
    // In this case its actually not an immediate, instead the string gets populated with the reg register.
    reg_register = get_register(true, instruction, memory_mode, first_byte, second_byte, is_word_size).parse().unwrap();
}

So, we just made another mutable variable that will contain the immediate as usize if present, the else branch is the only one actually using a register so it was pretty easy.

Now we actually need to do the same for the R/M register. It has the same logic since it keeps the immediate and register in the same variable and it doesn’t separate those two, lets do that.

let mut rm_or_immediate = String::new();

// This case is actually the complete opposite from the previous one.
// The immediate to register MOV instruction actually does not have the R/M register
// but has the REG register it used to move immediate values to.
if instruction == ImmediateToRegisterMOV {
    // and the R/M Register actually is not used at all with the MOV immediate instruction.

    // With the immediate to register mov instruction, the immediate is stored in the second (and third byte if word sized).
    if is_word_size {
        let third_byte = binary_contents[i + 2];
        let combined = combine_bytes(third_byte, second_byte);
        rm_or_immediate = (combined as usize).to_string();
    } else {
        rm_or_immediate = (second_byte as usize).to_string();
    }
} else {
    // In this case its actually not an immediate, instead the string gets populated with the R/M register.
    rm_or_immediate = get_register(false, instruction, memory_mode, first_byte, second_byte, is_word_size).parse().unwrap();
}

Becomes:

let mut rm_register = String::new();
let mut rm_immediate: usize = 0;

if instruction == ImmediateToRegisterMOV {
	// and the R/M Register actually is not used at all with the MOV immediate instruction.
	
	// With the immediate to register mov instruction, the immediate is stored in the second (and third byte if word sized).
	if is_word_size {
	    let third_byte = binary_contents[i + 2];
	    let combined = combine_bytes(third_byte, second_byte);
	    rm_immediate = combined as usize
	} else {
	    rm_immediate = second_byte as usize
	}
} else {
	// In this case its actually not an immediate, instead the string gets populated with the R/M register.
	rm_register = get_register(false, instruction, memory_mode, first_byte, second_byte, is_word_size).parse().unwrap();
}

Now, after this block of logic we have a function call that calls format_instruction. This formatting logic looks pretty messy, so it’s in its own function. This function thinks that the immediate is still in the same variable as the register, we need to change this.

fn format_instruction(binary_contents: &Vec<u8>, i: usize, first_byte: u8, second_byte: u8, instruction: InstructionType, mnemonic: &str, is_word_size: bool, memory_mode: MemoryModeEnum, reg_is_dest: bool, reg_or_immediate: &String, rm_or_immediate: &String) -> String {
    if instruction == ImmediateToRegisterMemory {
        if memory_mode == MemoryModeNoDisplacement {
            if is_word_size {
                return format!("{} word [{}], {}", mnemonic, rm_or_immediate, reg_or_immediate);
            } else {
                return format!("{} byte [{}], {}", mnemonic, rm_or_immediate, reg_or_immediate);
            }
        } else if memory_mode == MemoryMode8Bit {
            let displacement = get_8_bit_displacement(binary_contents, i);
            if is_word_size {
                return format!("{} word [{} + {}], {}", mnemonic, rm_or_immediate, displacement, reg_or_immediate);
            } else {
                return format!("{} byte [{} + {}], {}", mnemonic, rm_or_immediate, displacement, reg_or_immediate);
            }
        } else if memory_mode == MemoryMode16Bit {
            let displacement = get_16_bit_displacement(binary_contents, i);
            if is_word_size {
                return format!("{} word [{} + {}], {}", mnemonic, rm_or_immediate, displacement, reg_or_immediate);
            } else {
                return format!("{} byte [{} + {}], {}", mnemonic, rm_or_immediate, displacement, reg_or_immediate);
            }
        } else if memory_mode == DirectMemoryOperation {
            let displacement = get_16_bit_displacement(binary_contents, i);
            if is_word_size {
                // NOTE: in this branch the reg_or_immediate and reg_is_dest have no connection to each other. This is an exception with the direct memory mode address.
                if reg_is_dest {
                    return format!("{} word [{}], {}", mnemonic, displacement, reg_or_immediate);
                } else {
                    return format!("{} word {}, [{}]", mnemonic, reg_or_immediate, displacement);
                }
            } else {
                // NOTE: in this branch the reg_or_immediate and reg_is_dest have no connection to each other. This is an exception with the direct memory mode address.
                if reg_is_dest {
                    // NOTE: in this branch the reg_or_immediate and reg_is_dest have no connection to each other. This is an exception with the direct memory mode address.
                    return format!("{} byte [{}], {}", mnemonic, reg_or_immediate, displacement);
                } else {
                    return format!("{} byte {}, [{}]", mnemonic, displacement, reg_or_immediate);
                }
            }
        } else if memory_mode == RegisterMode {
            if reg_is_dest {
                return format!("{} {}, {}", mnemonic, rm_or_immediate, reg_or_immediate);
            } else {
                return format!("{} {}, {}", mnemonic, reg_or_immediate, rm_or_immediate);
            }
        } else {
            panic!("Invalid memory mode {:?}.", memory_mode);
        }
    } else if instruction == ImmediateToRegisterMOV {
        return format!("{} {}, {}", mnemonic, reg_or_immediate, rm_or_immediate);
    } else if instruction == ImmediateToAccumulatorADD || instruction == ImmediateToAccumulatorSUB || instruction == ImmediateToAccumulatorCMP {

        // NOTE!!!!: with the ImmediateToAccumulator operations, the registers are not specified in the bits,
        // instead, they are hard coded. if W = 1 then the register an immediate is getting moved to is ax, else al.
        // the reason why we are printing the reg_or_immediate variable is because we store the immediate value in there.
        // this is because we don't want to make a new variable for just one operation. The name is misleading but live with it.

        let ax_or_al = get_register(true, instruction, memory_mode, first_byte, second_byte, is_word_size);
        return format!("{} {}, {}", mnemonic, ax_or_al, reg_or_immediate);
    } else if instruction == RegisterMemory {
        if memory_mode == MemoryModeNoDisplacement {
            if reg_is_dest {
                return format!("{} {}, [{}]", mnemonic, reg_or_immediate, rm_or_immediate)
            } else {
                return format!("{} [{}], {}", mnemonic, rm_or_immediate, reg_or_immediate)
            }
        } else if memory_mode == MemoryMode8Bit {
            let disp = get_8_bit_displacement(binary_contents, i);
            if reg_is_dest {
                return format!("{} {}, [{} + {}]", mnemonic, reg_or_immediate, rm_or_immediate, disp)
            } else {
                return format!("{} [{} + {}], {}", mnemonic, rm_or_immediate, disp, reg_or_immediate)
            }
        } else if memory_mode == MemoryMode16Bit {
            let displacement = get_16_bit_displacement(binary_contents, i);
            if reg_is_dest {
                return format!("{} {}, [{} + {}]", mnemonic, reg_or_immediate, rm_or_immediate, displacement)
            } else {
                return format!("{} [{} + {}], {}", mnemonic, rm_or_immediate, displacement, reg_or_immediate)
            }
        } else if memory_mode == RegisterMode {
            if reg_is_dest {
                return format!("{} {}, {}", mnemonic, reg_or_immediate, rm_or_immediate)
            } else {
                return format!("{} {}, {}", mnemonic, rm_or_immediate, reg_or_immediate)
            }
        } else if memory_mode == DirectMemoryOperation {
            let displacement = get_16_bit_displacement(binary_contents, i);
            if reg_is_dest {
                return format!("{} {}, [{}]", mnemonic, displacement, rm_or_immediate)
            } else {
                return format!("{} {}, [{}]", mnemonic, rm_or_immediate, displacement)
            }
        } else {
            panic!("Unknown memory mode: {:?}, did not expect to get here.", memory_mode);
        }
    } else if instruction == JE_JUMP
        || instruction == JL_JUMP
        || instruction == JLE_JUMP
        || instruction == JB_JUMP
        || instruction == JBE_JUMP
        || instruction == JP_JUMP
        || instruction == JO_JUMP
        || instruction == JS_JUMP
        || instruction == JNE_JUMP
        || instruction == JNL_JUMP
        || instruction == JNLE_JUMP
        || instruction == JNB_JUMP
        || instruction == JNBE_JUMP
        || instruction == JNP_JUMP
        || instruction == JNO_JUMP
        || instruction == JNS
        || instruction == LOOP
        || instruction == LOOPZ
        || instruction == LOOPNZ
        || instruction == JCXZ
    {
        return format!("{} {}", mnemonic, second_byte as usize);
    } else {
        panic!("Unknown instruction: {:?}, did not expect to get here.", instruction);
    }
}

I told you it’s pretty messy, but it gets the job done.

Before doing the next change I’m going to commit because stuff can get messy really fast.

Okay let’s get started.

So, the function signature changes from this:

// "Clean code" advocates would go crazy.
let formatted_instruction = format_instruction(&binary_contents, i, first_byte, second_byte, instruction, mnemonic, is_word_size, memory_mode, reg_is_dest, &reg_or_immediate, &rm_or_immediate);

To this:

let formatted_instruction = format_instruction(&binary_contents, i, first_byte, second_byte, instruction, mnemonic, is_word_size, memory_mode, reg_is_dest, &reg_register, &rm_register, reg_immediate, rm_immediate);

The format_instruction function now takes into consideration that there are different variables for the immediate and the registers and the body changed to this:

fn format_instruction(binary_contents: &Vec<u8>, i: usize, first_byte: u8, second_byte: u8, instruction: InstructionType, mnemonic: &str, is_word_size: bool, memory_mode: MemoryModeEnum, reg_is_dest: bool, reg_register: &String, rm_register: &String, reg_immediate: usize, rm_immediate: usize) -> String {
    if instruction == ImmediateToRegisterMemory {
        if memory_mode == MemoryModeNoDisplacement {
            if is_word_size {
                return format!("{} word [{}], {}", mnemonic, rm_register, reg_immediate);
            } else {
                return format!("{} byte [{}], {}", mnemonic, rm_register, reg_immediate);
            }
        } else if memory_mode == MemoryMode8Bit {
            let displacement = get_8_bit_displacement(binary_contents, i);
            if is_word_size {
                return format!("{} word [{} + {}], {}", mnemonic, rm_register, displacement, reg_immediate);
            } else {
                return format!("{} byte [{} + {}], {}", mnemonic, rm_register, displacement, reg_immediate);
            }
        } else if memory_mode == MemoryMode16Bit {
            let displacement = get_16_bit_displacement(binary_contents, i);
            if is_word_size {
                return format!("{} word [{} + {}], {}", mnemonic, rm_register, displacement, reg_immediate);
            } else {
                return format!("{} byte [{} + {}], {}", mnemonic, rm_register, displacement, reg_immediate);
            }
        } else if memory_mode == DirectMemoryOperation {
            let displacement = get_16_bit_displacement(binary_contents, i);
            if is_word_size {
                // NOTE: in this branch the reg_or_immediate and reg_is_dest have no connection to each other. This is an exception with the direct memory mode address.
                if reg_is_dest {
                    return format!("{} word [{}], {}", mnemonic, displacement, reg_immediate);
                } else {
                    return format!("{} word {}, [{}]", mnemonic, reg_immediate, displacement);
                }
            } else {
                // NOTE: in this branch the reg_or_immediate and reg_is_dest have no connection to each other. This is an exception with the direct memory mode address.
                if reg_is_dest {
                    // NOTE: in this branch the reg_or_immediate and reg_is_dest have no connection to each other. This is an exception with the direct memory mode address.
                    return format!("{} byte [{}], {}", mnemonic, reg_immediate, displacement);
                } else {
                    return format!("{} byte {}, [{}]", mnemonic, displacement, reg_immediate);
                }
            }
        } else if memory_mode == RegisterMode {
            if reg_is_dest {
                return format!("{} {}, {}", mnemonic, rm_register, reg_immediate);
            } else {
                return format!("{} {}, {}", mnemonic, reg_immediate, rm_register);
            }
        } else {
            panic!("Invalid memory mode {:?}.", memory_mode);
        }
    } else if instruction == ImmediateToRegisterMOV {
        return format!("{} {}, {}", mnemonic, reg_register, rm_immediate);
    } else if instruction == ImmediateToAccumulatorADD || instruction == ImmediateToAccumulatorSUB || instruction == ImmediateToAccumulatorCMP {

        // NOTE!!!!: with the ImmediateToAccumulator operations, the registers are not specified in the bits,
        // instead, they are hard coded. if W = 1 then the register an immediate is getting moved to is ax, else al.
        // the reason why we are printing the reg_or_immediate variable is because we store the immediate value in there.
        // this is because we don't want to make a new variable for just one operation. The name is misleading but live with it.

        let ax_or_al = get_register(true, instruction, memory_mode, first_byte, second_byte, is_word_size);
        return format!("{} {}, {}", mnemonic, ax_or_al, reg_immediate);
    } else if instruction == RegisterMemory {
        if memory_mode == MemoryModeNoDisplacement {
            if reg_is_dest {
                return format!("{} {}, [{}]", mnemonic, reg_register, rm_register)
            } else {
                return format!("{} [{}], {}", mnemonic, rm_register, reg_register)
            }
        } else if memory_mode == MemoryMode8Bit {
            let disp = get_8_bit_displacement(binary_contents, i);
            if reg_is_dest {
                return format!("{} {}, [{} + {}]", mnemonic, reg_register, rm_register, disp)
            } else {
                return format!("{} [{} + {}], {}", mnemonic, rm_register, disp, reg_register)
            }
        } else if memory_mode == MemoryMode16Bit {
            let displacement = get_16_bit_displacement(binary_contents, i);
            if reg_is_dest {
                return format!("{} {}, [{} + {}]", mnemonic, reg_register, rm_register, displacement)
            } else {
                return format!("{} [{} + {}], {}", mnemonic, rm_register, displacement, reg_register)
            }
        } else if memory_mode == RegisterMode {
            if reg_is_dest {
                return format!("{} {}, {}", mnemonic, reg_register, rm_register)
            } else {
                return format!("{} {}, {}", mnemonic, rm_register, reg_register)
            }
        } else if memory_mode == DirectMemoryOperation {
            let displacement = get_16_bit_displacement(binary_contents, i);
            if reg_is_dest {
                return format!("{} {}, [{}]", mnemonic, displacement, rm_register)
            } else {
                return format!("{} {}, [{}]", mnemonic, rm_register, displacement)
            }
        } else {
            panic!("Unknown memory mode: {:?}, did not expect to get here.", memory_mode);
        }
    } else if instruction == JE_JUMP
        || instruction == JL_JUMP
        || instruction == JLE_JUMP
        || instruction == JB_JUMP
        || instruction == JBE_JUMP
        || instruction == JP_JUMP
        || instruction == JO_JUMP
        || instruction == JS_JUMP
        || instruction == JNE_JUMP
        || instruction == JNL_JUMP
        || instruction == JNLE_JUMP
        || instruction == JNB_JUMP
        || instruction == JNBE_JUMP
        || instruction == JNP_JUMP
        || instruction == JNO_JUMP
        || instruction == JNS
        || instruction == LOOP
        || instruction == LOOPZ
        || instruction == LOOPNZ
        || instruction == JCXZ
    {
        return format!("{} {}", mnemonic, second_byte as usize);
    } else {
        panic!("Unknown instruction: {:?}, did not expect to get here.", instruction);
    }
}

Good morning, I got my morning coffee, and I’m going to continue the project now. Yesterday I didn’t get that much done because I spent some time with my family. Let’s start.

Today I got like 2-3 hours to work on this thing.

So yesterday we got the logic to treat immediate as usize. Now we want to change the Register struct code to treat them as usize too.

pub struct Register {
   pub register: &'static str,
   pub updated_value: &'static str,
   pub original_value: &'static str,
}

Becomes:

pub struct Register {
   pub register: &'static str,
   pub updated_value: usize,
   pub original_value: usize,
}
pub fn construct_registers() -> Vec<Register>{
    let mut registers: Vec<Register> = Vec::with_capacity(REGISTERS.len());

    for register in REGISTERS.iter() {
        registers.push(Register {
            register,
            updated_value: "0",
            original_value: "0",
        });
    }
    return registers;
}

Becomes:

pub fn construct_registers() -> Vec<Register>{
    let mut registers: Vec<Register> = Vec::with_capacity(REGISTERS.len());

    for register in REGISTERS.iter() {
        registers.push(Register {
            register,
            updated_value: 0,
            original_value: 0,
        });
    }
    return registers;
}

And finally update_register_value

pub fn update_register_value(register_to_update: &'static str, value: &'static str, registers: &mut Vec<Register>) -> () {
    for reg in registers.iter_mut() {
        if reg.register == register_to_update {
            reg.updated_value = value;
        }
    }
    panic!("Register not found, this should never happen. Register that was not found was {}", register_to_update);
}

Becomes:

pub fn update_register_value(register_to_update: &'static str, value: usize, registers: &mut Vec<Register>) -> () {
    for reg in registers.iter_mut() {
        if reg.register == register_to_update {
            reg.updated_value = value;
        }
    }
    panic!("Register not found, this should never happen. Register that was not found was {}", register_to_update);
}

Okay so I’m now going to start making the logic for register states.

First of all I’m going to change the update_register_value function to take one more argument, InstructionType, this is because the registers get updated in different ways depending on the InstructionType.

For example:

mov ax, 2    ; overwrites the contents of ax with 2.
sub ax, 3    ; get the content of ax and minus 3 from it.
cmp ax, 5    ; does the same thing as sub but doesn't change the registers and just sets flags.
jnz 0x99232  ; changes instruction pointer to 0x99232 if ZF flag == 0.

So the function now becomes:

pub fn update_register_value(register_to_update: &'static str, value: usize, registers: &mut Vec<Register>, instruction: InstructionType) -> ()

The function call becomes:

update_register_value(reg_value.register, rm_immediate, &mut registers, instruction);

Ok… so we want to change the register values in different ways depending on the InstructionType. I’m not sure if this is the best way to go about this, but I’m thinking we do something like this:

pub fn update_register_value(register_to_update: &'static str, value: usize, registers: &mut Vec<Register>, instruction: InstructionType) -> () {
    for reg in registers.iter_mut() {
        if reg.register == register_to_update {
            match instruction {
                ImmediateToAccumulatorADD => reg.updated_value += value,
                ImmediateToAccumulatorSUB => reg.updated_value -= value,
                ImmediateToRegisterMemory => todo!("How do I implement this?"),
                ImmediateToRegisterMOV => reg.updated_value = value,
                _ => () // Conditional jumps, CMP instructions.
            }
            return
        }
    }
    panic!("Register not found, this should never happen. Register that was not found was {}", register_to_update);
}

Now. I’m not sure how I should implement the ImmediateToRegisterMemory InstructionType because it can be either a stack operation or a register operation. Our program currently doesn’t know what a stack is. We need to implement logic for this to handle only the register moves for now and handle the stack stuff later.

I have this enum called MemoryModeEnum that determines if the operation is a stack operation or for example a register to register move.

#[derive(PartialEq, Clone, Copy, Debug)]
pub enum MemoryModeEnum {
    MemoryModeNoDisplacement,
    MemoryMode8Bit,
    MemoryMode16Bit,
    RegisterMode,
    DirectMemoryOperation,
}

Should I pass the MemoryModeEnum member into the function too? — I don’t know yet.

Sigh… I think I have to.

The function signature becomes

pub fn update_register_value(register_to_update: &'static str, value: usize, registers: &mut Vec<Register>, instruction: InstructionType, memory_mode: MemoryModeEnum) -> () {

The function call then becomes

update_register_value(reg_value.register, rm_immediate, &mut registers, instruction, memory_mode);

Now I started making the logic for RegisterMode

pub fn update_register_value(register_to_update: &'static str, value: usize, registers: &mut Vec<Register>, instruction: InstructionType, memory_mode: MemoryModeEnum) -> () {
    for reg in registers.iter_mut() {
        if reg.register == register_to_update {
            match instruction {
                ImmediateToAccumulatorADD => reg.updated_value += value,
                ImmediateToAccumulatorSUB => reg.updated_value -= value,
                ImmediateToRegisterMemory => {
                    match memory_mode {
                        MemoryModeEnum::RegisterMode => {
													// WE NEED THE MNEMONIC.... SIGH!!!!!
                        }
                        MemoryModeEnum::MemoryModeNoDisplacement | MemoryModeEnum::MemoryMode8Bit | MemoryModeEnum::MemoryMode16Bit | MemoryModeEnum::DirectMemoryOperation => (),
                    }
                },
                ImmediateToRegisterMOV => reg.updated_value = value,
                _ => () // Conditional jumps, CMP instructions.
            }
            return
        }
    }
    panic!("Register not found, this should never happen. Register that was not found was {}", register_to_update);
}

And I realised that because I didn’t include the mnemonic (add, sub, cmp, etc.) in the InstructionType enum member itself for this particular member, so now I have to pass the mnemonic into the function too to determine what arithmetics to do… whatever lets do that since I determine the mnemonic in the main function anyway.

Function signature now becomes:

pub fn update_register_value(register_to_update: &'static str, value: usize, registers: &mut Vec<Register>, instruction: InstructionType, memory_mode: MemoryModeEnum, mnemonic: &'static str) -> ()

We now call it with:

update_register_value(reg_value.register, rm_immediate, &mut registers, instruction, memory_mode, mnemonic);

We now do this in the RegisterMode branch:

for reg in registers.iter_mut() {
        if reg.register == register_to_update {
            match instruction {
                ImmediateToAccumulatorADD => reg.updated_value += value,
                ImmediateToAccumulatorSUB => reg.updated_value -= value,
                ImmediateToRegisterMemory => {
                    match memory_mode {
                        MemoryModeEnum::RegisterMode => {
                            match mnemonic {
                                "mov" => reg.updated_value = value,
                                "add" => reg.updated_value += value,
                                "sub" => reg.updated_value -= value,
                                "cmp" => (),
                                _ => panic!("Unknown mnemonic {}", mnemonic),
                            }
                        }
                        MemoryModeEnum::MemoryModeNoDisplacement | MemoryModeEnum::MemoryMode8Bit | MemoryModeEnum::MemoryMode16Bit | MemoryModeEnum::DirectMemoryOperation => (),
                    }
										return
                },
                ImmediateToRegisterMOV => reg.updated_value = value,
                _ => () // Conditional jumps, CMP instructions.
            }
            return
        }
    }
    panic!("Register not found, this should never happen. Register that was not found was {}", register_to_update);
}

Okay so in theory the update_register_value function now does the correct stuff.

I’ll now finish up the calling code.

if reg_is_dest {
    let reg_value = get_register_state(&reg_register, &registers);
    if instruction_is_immediate_to_register(instruction) {
        // in this branch we can just update the value with the immediate.
        update_register_value(reg_value.register, rm_immediate, &mut registers, instruction, memory_mode, mnemonic);
    } else if instruction_is_conditional_jump(instruction) {
        // In the future we might update the instruction pointer in this branch if we want to.
    } else {
        let rm_value = get_register_state(&rm_register, &registers);
        // TODO: Do I pass in the original_value or updated_value here?
        update_register_value(reg_value.register, rm_value.original_value, &mut registers, instruction, memory_mode, mnemonic);
    }
} else {
    let rm = get_register_state(&rm_register, &registers);
    if instruction_is_immediate_to_register(instruction) {
        // in this branch we can just update the value with the immediate.
        update_register_value(rm.register, rm_immediate, &mut registers, instruction, memory_mode, mnemonic);
    } else if instruction_is_conditional_jump(instruction) {
        // In the future we might update the instruction pointer in this branch if we want to.
    } else {
        let reg_value = get_register_state(&rm_register, &registers);
        // TODO: Do I pass in the original_value or updated_value here?
        update_register_value(rm.register, reg_value.original_value, &mut registers, instruction, memory_mode, mnemonic);
    }
}

The IDE is telling me that there is some duplication. I might get rid of it once I know this logic is actually correct but for now this will do.

Ok now. somewhere at the end I have to update the original value. Maybe at the end of the main loop? I know that it will have to happen after I print out the original and updated value.

Meanwhile, when I think about the best place to do this, I’ll move the printing code after the register updating code:

So.. this:

let formatted_instruction = format_instruction(&binary_contents, i, first_byte, second_byte, instruction, mnemonic, is_word_size, memory_mode, reg_is_dest, &reg_register, &rm_register, reg_immediate, rm_immediate);
println!("{}", formatted_instruction);

instruction_count += 1;
i += instruction_size;

// TODO: do the register value updating here and then extract function if necessary.
if reg_is_dest {
    let reg_value = get_register_state(&reg_register, &registers);
    if instruction_is_immediate_to_register(instruction) {
        // in this branch we can just update the value with the immediate.
        update_register_value(reg_value.register, rm_immediate, &mut registers, instruction, memory_mode, mnemonic);
    } else if instruction_is_conditional_jump(instruction) {
        // In the future we might update the instruction pointer in this branch if we want to.
    } else {
        let rm_value = get_register_state(&rm_register, &registers);
        // TODO: Do I pass in the original_value or updated_value here?
        update_register_value(reg_value.register, rm_value.original_value, &mut registers, instruction, memory_mode, mnemonic);
    }
} else {
    let rm = get_register_state(&rm_register, &registers);
    if instruction_is_immediate_to_register(instruction) {
        // in this branch we can just update the value with the immediate.
        update_register_value(rm.register, reg_immediate, &mut registers, instruction, memory_mode, mnemonic);
    } else if instruction_is_conditional_jump(instruction) {
        // In the future we might update the instruction pointer in this branch if we want to.
    } else {
        let reg_value = get_register_state(&rm_register, &registers);
        // TODO: Do I pass in the original_value or updated_value here?
        update_register_value(rm.register, reg_value.original_value, &mut registers, instruction, memory_mode, mnemonic);
    }
}

Becomes this:

// TODO: do the register value updating here and then extract function if necessary.
if reg_is_dest {
    let reg_value = get_register_state(&reg_register, &registers);
    if instruction_is_immediate_to_register(instruction) {
        // in this branch we can just update the value with the immediate.
        update_register_value(reg_value.register, rm_immediate, &mut registers, instruction, memory_mode, mnemonic);
    } else if instruction_is_conditional_jump(instruction) {
        // In the future we might update the instruction pointer in this branch if we want to.
    } else {
        let rm_value = get_register_state(&rm_register, &registers);
        // TODO: Do I pass in the original_value or updated_value here?
        update_register_value(reg_value.register, rm_value.original_value, &mut registers, instruction, memory_mode, mnemonic);
    }
} else {
    let rm = get_register_state(&rm_register, &registers);
    if instruction_is_immediate_to_register(instruction) {
        // in this branch we can just update the value with the immediate.
        update_register_value(rm.register, reg_immediate, &mut registers, instruction, memory_mode, mnemonic);
    } else if instruction_is_conditional_jump(instruction) {
        // In the future we might update the instruction pointer in this branch if we want to.
    } else {
        let reg_value = get_register_state(&rm_register, &registers);
        // TODO: Do I pass in the original_value or updated_value here?
        update_register_value(rm.register, reg_value.original_value, &mut registers, instruction, memory_mode, mnemonic);
    }
}

let formatted_instruction = format_instruction(&binary_contents, i, first_byte, second_byte, instruction, mnemonic, is_word_size, memory_mode, reg_is_dest, &reg_register, &rm_register, reg_immediate, rm_immediate);
println!("{}", formatted_instruction);
instruction_count += 1;
i += instruction_size;

I tried some hacky shit because the rust compiler is so strict and I erased it all. I’m now going to eat and take my fingers off of the keyboard for a while in the hopes I’m going to come up with a good solution. my brain is currently not working.

Ok, back. First of all I’m going to clean up the code block mess that is shown in the previous picture. It became this instead:

{
    let reg = get_register_state(&reg_register, &registers);
    let rm = get_register_state(&rm_register, &registers);
    if instruction_is_immediate_to_register(instruction) {
        if reg_is_dest {
            // in this branch we can just update the value with the immediate.
            // FIXME: do we want to pass the reg.register in here or can we somehow do it in a better way
            update_register_value(reg.register, rm_immediate, &mut registers, instruction, memory_mode, mnemonic);
        } else {
            // in this branch we can just update the value with the immediate.
            update_register_value(rm.register, reg_immediate, &mut registers, instruction, memory_mode, mnemonic);
        }
    } else if instruction_is_conditional_jump(instruction) {
        // Leaving this here in case we have to implement it later with instruction pointers.
    } else {
        if reg_is_dest {
            update_register_value(reg.register, rm.updated_value, &mut registers, instruction, memory_mode, mnemonic);
        } else {
            // in this branch we can just update the value with the immediate.
            update_register_value(rm.register, reg.updated_value, &mut registers, instruction, memory_mode, mnemonic);
        }
    }
}

Ahh. much better and smaller :). The FIXME is still relevant though..

Anyway, lets try to get the original_value updating code working now.

Okay my first try is this:

let reg = get_register_state(&reg_register, &registers);
let rm = get_register_state(&rm_register, &registers);

if reg_is_dest {
    update_original_register_value(reg.register, reg.updated_value, &mut registers);
} else {
    update_original_register_value(rm.register, rm.updated_value, &mut registers);
}

Right so unfortunately I’m calling get_register_state twice more, but it doesn’t matter since it only iterates around 15 times and computers are fast.

In the branch we just check which register was changed to know which one should be updated.

Let’s try run it…. our code hits an assert. Expected.

thread 'main' panicked at 'Register not found, this should never happen. Register that was not found was ', src/registers.rs:44:5

Good thing we had the assertion in-place. I have no idea why that assertion hit. Lets breakpoint at the panic.

Right, looking at the call stack we can see that before we hit this breakpoint, it was the third and fourth call to the function that triggered this assertion.

main.rs:364 calls the get_register_state function that triggers the panic.

main.rs:364 calls the get_register_state function that triggers the panic.

Let’s walk back to the line 364 inside of main and see what’s happening in there.

Untitled

Why is rm_register blank…

Ah yes, because it should.

Untitled

The instruction is an ImmediateToRegisterMOV instruction meaning that the rm_register is actually not even used because the R/M field is used to embed an immediate value, the reg field being the destination.

Untitled

So, let’s handle that.

if instruction_is_immediate_to_register(instruction) {
	  let reg = get_register_state(&reg_register, &registers);
	  if reg_is_dest || instruction == ImmediateToRegisterMOV {
	      // in this branch we can just update the value with the immediate.
	      // FIXME: do we want to pass the reg.register in here or can we somehow do it in a better way
	      update_register_value(reg.register, rm_immediate, &mut registers, instruction, memory_mode, mnemonic);
	  } else {
	      let rm = get_register_state(&rm_register, &registers);
	      // in this branch we can just update the value with the immediate.
	      update_register_value(rm.register, reg_immediate, &mut registers, instruction, memory_mode, mnemonic);
	  }
	} else if instruction_is_conditional_jump(instruction) {
	  // Leaving this here in case we have to implement it later with instruction pointers.
	} else {
	  let reg = get_register_state(&reg_register, &registers);
	  let rm = get_register_state(&rm_register, &registers);
	  if reg_is_dest {
	      update_register_value(reg.register, rm.updated_value, &mut registers, instruction, memory_mode, mnemonic);
	  } else {
	      let rm = get_register_state(&rm_register, &registers);
	      // in this branch we can just update the value with the immediate.
	      update_register_value(rm.register, reg.updated_value, &mut registers, instruction, memory_mode, mnemonic);
	  }
}

So now with the changes the code calls get_register_state for the registers in places where we know the registers we call get_register_state with are actually used.

Let’s try to run the code:

thread 'main' panicked at 'Register not found, this should never happen. Register that was not found was ', src/registers.rs:44:5

Ah right, forgot to update the code in the other code block. Let’s fix that:

This:

let formatted_instruction = format_instruction(&binary_contents, i, first_byte, second_byte, instruction, mnemonic, is_word_size, memory_mode, reg_is_dest, &reg_register, &rm_register, reg_immediate, rm_immediate);
let reg = get_register_state(&reg_register, &registers);
let rm = get_register_state(&rm_register, &registers);

if reg_is_dest {
    println!("{} ; {} -> {}", formatted_instruction, reg.original_value, reg.updated_value);
} else {
    println!("{} ; {} -> {}", formatted_instruction, rm.original_value, rm.updated_value);
} 

// FIXME: this is kinda dumb, how do I do do this in a better way?
if reg_is_dest {
    update_original_register_value(reg.register, reg.updated_value, &mut registers);
} else {
    update_original_register_value(rm.register, rm.updated_value, &mut registers);
}

Became this:

let formatted_instruction = format_instruction(&binary_contents, i, first_byte, second_byte, instruction, mnemonic, is_word_size, memory_mode, reg_is_dest, &reg_register, &rm_register, reg_immediate, rm_immediate);

if reg_is_dest || instruction == ImmediateToRegisterMOV {
    let reg = get_register_state(&reg_register, &registers);
    println!("{} ; {} -> {}", formatted_instruction, reg.original_value, reg.updated_value);
} else {
    let rm = get_register_state(&rm_register, &registers);
    println!("{} ; {} -> {}", formatted_instruction, rm.original_value, rm.updated_value);
}
instruction_count += 1;
i += instruction_size;

// FIXME: this is kinda dumb, how do I do do this in a better way?
if reg_is_dest || instruction == ImmediateToRegisterMOV {
    let reg = get_register_state(&reg_register, &registers);
    update_original_register_value(reg.register, reg.updated_value, &mut registers);
} else {
    let rm = get_register_state(&rm_register, &registers);
    update_original_register_value(rm.register, rm.updated_value, &mut registers);
}

There we go.

Output:

mov ax, 1 ; 0 -> 1
mov bx, 2 ; 0 -> 2
mov cx, 3 ; 0 -> 3
mov dx, 4 ; 0 -> 4
mov sp, 5 ; 0 -> 5
mov bp, 6 ; 0 -> 6
mov si, 7 ; 0 -> 7
mov di, 8 ; 0 -> 8

The file we encoded looked like this:

; ========================================================================
; LISTING 43
; ========================================================================

bits 16

mov ax, 1
mov bx, 2
mov cx, 3
mov dx, 4

mov sp, 5
mov bp, 6
mov si, 7
mov di, 8

Looks good to me.

Listing_0043 done. See you in the next one.