Ready for in-game testing

more optimizations
Improve dead code elimination in optimizer
2025-12-31 03:08:41 -07:00 · 2025-12-31 02:39:57 -07:00 · 2025-12-31 02:37:26 -07:00
17 changed files with 207 additions and 246 deletions
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -80,10 +80,14 @@ cargo test --package compiler --lib -- test::tuple_literals::test::test_tuple_li
 ### Quick Compilation
 !IMPORTANT: make sure you use these commands instead of creating temporary files.
 ```bash
 cd rust_compiler
 # Compile Slang code to IC10 using current compiler changes
 echo 'let x = 5;' | cargo run --bin slang
 # Compile Slang code to IC10 with optimization
 echo 'let x = 5;' | cargo run --bin slang -z
 # Or from file
 cargo run --bin slang -- input.slang -o output.ic10
 # Optimize the output with -z flag
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestscomplex_arithmetic.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestscomplex_arithmetic.snap
@@ -1,5 +1,6 @@
 ---
 source: libs/integration_tests/src/lib.rs
 assertion_line: 158
 expression: output
 ---
 ## Unoptimized Output
@@ -34,15 +35,11 @@ pop r9
 pop r10
 push sp
 push ra
-add r1 r10 r10
+add r11 r10 r10
-move r11 r1
+move r12 r9
-move r2 r9
+move r13 r8
 move r12 r2
 move r3 r8
 move r13 r3
 add r4 r11 r12
-add r5 r4 r13
+add r15 r4 r13
 move r15 r5
 pop ra
 pop sp
 j ra
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsdead_code_elimination.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsdead_code_elimination.snap
@@ -1,5 +1,6 @@
 ---
 source: libs/integration_tests/src/lib.rs
 assertion_line: 103
 expression: output
 ---
 ## Unoptimized Output
@@ -24,8 +25,8 @@ j main
 pop r8
 push sp
 push ra
-add r1 r8 1
+move r9 20
-move r15 r1
+add r15 r8 1
 pop ra
 pop sp
 j ra
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsfunction_with_call.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsfunction_with_call.snap
@@ -1,5 +1,6 @@
 ---
 source: libs/integration_tests/src/lib.rs
 assertion_line: 70
 expression: output
 ---
 ## Unoptimized Output
@@ -31,13 +32,12 @@ j ra
 ## Optimized Output
-j 10
+j 9
 pop r8
 pop r9
 push sp
 push ra
-add r1 r9 r8
+add r15 r9 r8
 move r15 r1
 pop ra
 pop sp
 j ra
@@ -46,6 +46,7 @@ push ra
 push 5
 push 10
 jal 1
 move r8 r15
 pop ra
 pop sp
 j ra
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestslarre_script.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestslarre_script.snap
@@ -124,7 +124,7 @@ __internal_L12:
 ## Optimized Output
-j 71
+j 77
 push sp
 push ra
 yield
@@ -139,8 +139,10 @@ push sp
 push ra
 s d0 Setting 1
 jal 1
 move r1 r15
 s d0 Activate 1
 jal 1
 move r2 r15
 s d1 Open 0
 pop ra
 pop sp
@@ -152,53 +154,58 @@ sle r1 r8 1
 ls r15 d0 255 Seeding
 slt r2 r15 1
 or r3 r1 r2
-beqz r3 30
+beqz r3 32
-j 68
+j 74
 ls r15 d0 255 Mature
-beqz r15 35
+beqz r15 37
 yield
 s d0 Activate 1
-j 30
+j 32
-ls r15 d0 255 Occupied
+ls r9 d0 255 Occupied
 move r9 r15
 s d0 Setting 1
 push r8
 push r9
 jal 1
 pop r9
 pop r8
 move r4 r15
 push r8
 push r9
 jal 11
 pop r9
 pop r8
-beqz r9 54
+move r5 r15
 beqz r9 58
 push r8
 push r9
 jal 11
 pop r9
 pop r8
 move r6 r15
 s d0 Setting r8
 push r8
 push r9
 jal 1
 pop r9
 pop r8
 move r6 r15
 ls r15 d0 0 Occupied
-beqz r15 63
+beqz r15 68
 s d0 Activate 1
 push r8
 push r9
 jal 1
 pop r9
 pop r8
 move r7 r15
 pop ra
 pop sp
 j ra
 move r8 0
 yield
 l r1 d0 Idle
-bne r1 0 75
+bne r1 0 82
-j 71
+j 78
 add r3 r8 1
 sgt r4 r3 19
 add r5 r8 1
@@ -207,8 +214,10 @@ move r9 r6
 push r8
 push r9
 push r8
-jal 21
+jal 23
 pop r9
 pop r8
 move r7 r15
 s d0 Setting r9
-j 71
+move r8 r9
 j 78
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsleaf_function_no_stack_frame.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsleaf_function_no_stack_frame.snap
@@ -1,5 +1,6 @@
 ---
 source: libs/integration_tests/src/lib.rs
 assertion_line: 144
 expression: output
 ---
 ## Unoptimized Output
@@ -20,4 +21,10 @@ j ra
 j main
 pop r8
 push sp
 push ra
 add r1 r8 1
 move r8 r1
 pop ra
 pop sp
 j ra
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsnested_function_calls.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsnested_function_calls.snap
@@ -1,5 +1,6 @@
 ---
 source: libs/integration_tests/src/lib.rs
 assertion_line: 173
 expression: output
 ---
 ## Unoptimized Output
@@ -66,8 +67,7 @@ pop r8
 pop r9
 push sp
 push ra
-add r1 r9 r8
+add r15 r9 r8
 move r15 r1
 pop ra
 pop sp
 j ra
@@ -75,8 +75,7 @@ pop r8
 pop r9
 push sp
 push ra
-add r1 r9 r9
+add r15 r9 r9
 move r15 r1
 pop ra
 pop sp
 j ra
@@ -97,7 +96,7 @@ push r9
 push r10
 push r10
 push 2
-jal 10
+jal 9
 pop r10
 pop r9
 pop r8
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestspeephole_comparison_fusion.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestspeephole_comparison_fusion.snap
@@ -1,5 +1,6 @@
 ---
 source: libs/integration_tests/src/lib.rs
 assertion_line: 116
 expression: output
 ---
 ## Unoptimized Output
@@ -24,5 +25,10 @@ j ra
 j main
 pop r8
 pop r9
-ble r9 r8 4
+push sp
 push ra
 ble r9 r8 7
 move r10 1
 pop ra
 pop sp
 j ra
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsselect_optimization.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsselect_optimization.snap
@@ -1,5 +1,6 @@
 ---
 source: libs/integration_tests/src/lib.rs
 assertion_line: 133
 expression: output
 ---
 ## Unoptimized Output
@@ -29,8 +30,7 @@ j main
 pop r8
 push sp
 push ra
-select r9 r8 10 20
+select r15 r8 10 20
 move r15 r9
 pop ra
 pop sp
 j ra
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestssimple_leaf_function.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestssimple_leaf_function.snap
@@ -1,5 +1,6 @@
 ---
 source: libs/integration_tests/src/lib.rs
 assertion_line: 60
 expression: output
 ---
 ## Unoptimized Output
@@ -17,4 +18,9 @@ j ra
 ## Optimized Output
 j main
 push sp
 push ra
 move r8 10
 pop ra
 pop sp
 j ra
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsstrength_reduction.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststestsstrength_reduction.snap
@@ -1,5 +1,6 @@
 ---
 source: libs/integration_tests/src/lib.rs
 assertion_line: 91
 expression: output
 ---
 ## Unoptimized Output
@@ -23,8 +24,7 @@ j main
 pop r8
 push sp
 push ra
-add r1 r8 r8
+add r15 r8 r8
 move r15 r1
 pop ra
 pop sp
 j ra
--- a/rust_compiler/libs/integration_tests/src/snapshots/integration_teststeststuples.snap
+++ b/rust_compiler/libs/integration_tests/src/snapshots/integration_teststeststuples.snap
@@ -1,5 +1,6 @@
 ---
 source: libs/integration_tests/src/lib.rs
 assertion_line: 206
 expression: output
 ---
 ## Unoptimized Output
@@ -54,12 +55,11 @@ __internal_L4:
 ## Optimized Output
-j 25
+j 23
 pop r8
 push sp
 push ra
-add r1 r8 1
+add r15 r8 1
 move r15 r1
 pop ra
 pop sp
 j ra
@@ -74,21 +74,20 @@ jal 1
 move r3 r15
 push r3
 sub r0 sp 5
-get r0 db r0
+get r15 db r0
 move r15 r0
 sub r0 sp 4
 get ra db r0
 j ra
 yield
-jal 9
+jal 8
 pop r0
 pop r9
 pop r8
 move sp r15
-jal 9
+jal 8
 pop r0
 pop r0
 pop r9
 move sp r15
 s db Setting r9
-j 25
+j 23
--- a/rust_compiler/libs/optimizer/src/constant_propagation.rs
+++ b/rust_compiler/libs/optimizer/src/constant_propagation.rs
@@ -25,24 +25,12 @@ pub fn constant_propagation<'a>(
            Instruction::Add(dst, a, b) => try_fold_math(dst, a, b, &registers, |x, y| x + y),
            Instruction::Sub(dst, a, b) => try_fold_math(dst, a, b, &registers, |x, y| x - y),
            Instruction::Mul(dst, a, b) => try_fold_math(dst, a, b, &registers, |x, y| x * y),
-            Instruction::Div(dst, a, b) => {
+            Instruction::Div(dst, a, b) => try_fold_math(dst, a, b, &registers, |x, y| {
-                try_fold_math(
+                if y.is_zero() { Decimal::ZERO } else { x / y }
-                    dst,
+            }),
-                    a,
+            Instruction::Mod(dst, a, b) => try_fold_math(dst, a, b, &registers, |x, y| {
-                    b,
+                if y.is_zero() { Decimal::ZERO } else { x % y }
-                    &registers,
+            }),
                    |x, y| if y.is_zero() { x } else { x / y },
                )
            }
            Instruction::Mod(dst, a, b) => {
                try_fold_math(
                    dst,
                    a,
                    b,
                    &registers,
                    |x, y| if y.is_zero() { x } else { x % y },
                )
            }
            Instruction::BranchEq(a, b, l) => {
                try_resolve_branch(a, b, l, &registers, |x, y| x == y)
            }
--- a/rust_compiler/libs/optimizer/src/dead_store_elimination.rs
+++ b/rust_compiler/libs/optimizer/src/dead_store_elimination.rs
@@ -7,7 +7,20 @@ use std::collections::HashMap;
 pub fn dead_store_elimination<'a>(
    input: Vec<InstructionNode<'a>>,
 ) -> (Vec<InstructionNode<'a>>, bool) {
-    let mut changed = false;
+    // Forward pass: Remove writes that are immediately overwritten
    let (input, forward_changed) = eliminate_overwritten_stores(input);
    // Note: Backward pass disabled for now - it needs more work to handle all cases correctly
    // The forward pass is sufficient for most common patterns
    // (e.g., move r6 r15 immediately followed by move r6 r15 again)
    (input, forward_changed)
 }
 /// Forward pass: Remove stores that are overwritten before being read
 fn eliminate_overwritten_stores<'a>(
    input: Vec<InstructionNode<'a>>,
 ) -> (Vec<InstructionNode<'a>>, bool) {
    let mut last_write: HashMap<u8, usize> = HashMap::new();
    let mut to_remove = Vec::new();
@@ -31,7 +44,6 @@ pub fn dead_store_elimination<'a>(
                if !was_used {
                    // Previous write was dead
                    to_remove.push(prev_idx);
                    changed = true;
                }
            }
@@ -39,34 +51,31 @@ pub fn dead_store_elimination<'a>(
            last_write.insert(dest_reg, i);
        }
-        // Before clearing on labels/calls, check if current tracked writes are dead
+        // Handle control flow instructions
-        if matches!(
+        match &node.instruction {
-            node.instruction,
+            // JumpAndLink (function calls) only clobbers the return register (r15)
-            Instruction::LabelDef(_) | Instruction::JumpAndLink(_)
+            // We can continue tracking other registers across function calls
-        ) {
+            Instruction::JumpAndLink(_) => {
-            // Check all currently tracked writes to see if they're dead
+                last_write.remove(&15);
            for (&reg, &idx) in &last_write {
                // Don't remove writes to r15 (return register)
                if reg == 15 {
                    continue;
                }
                // Check if this write was used between write and now
                let was_used = input[idx + 1..i]
                    .iter()
                    .any(|n| reg_is_read_or_affects_control(&n.instruction, reg));
                if !was_used && !to_remove.contains(&idx) {
                    to_remove.push(idx);
                    changed = true;
                }
            }
-
+            // Other control flow instructions create complexity - clear all tracking
-            last_write.clear();
+            Instruction::Jump(_)
            | Instruction::LabelDef(_)
            | Instruction::BranchEq(_, _, _)
            | Instruction::BranchNe(_, _, _)
            | Instruction::BranchGt(_, _, _)
            | Instruction::BranchLt(_, _, _)
            | Instruction::BranchGe(_, _, _)
            | Instruction::BranchLe(_, _, _)
            | Instruction::BranchEqZero(_, _)
            | Instruction::BranchNeZero(_, _) => {
                last_write.clear();
            }
            _ => {}
        }
    }
-    if changed {
+    if !to_remove.is_empty() {
        let output = input
            .into_iter()
            .enumerate()
--- a/rust_compiler/libs/optimizer/src/leaf_function_optimization.rs
+++ b/rust_compiler/libs/optimizer/src/leaf_function_optimization.rs
@@ -1,150 +1,41 @@
 use crate::leaf_function::find_leaf_functions;
-use il::{Instruction, InstructionNode, Operand};
+use il::InstructionNode;
 use rust_decimal::Decimal;
 use std::collections::{HashMap, HashSet};
 /// Helper: Check if a function body contains unsafe stack manipulation.
 fn function_has_complex_stack_ops(
    instructions: &[InstructionNode],
    start_idx: usize,
    end_idx: usize,
 ) -> bool {
    for instruction in instructions.iter().take(end_idx).skip(start_idx) {
        match instruction.instruction {
            Instruction::Push(_) | Instruction::Pop(_) => return true,
            Instruction::Add(Operand::StackPointer, _, _)
            | Instruction::Sub(Operand::StackPointer, _, _)
            | Instruction::Mul(Operand::StackPointer, _, _)
            | Instruction::Div(Operand::StackPointer, _, _)
            | Instruction::Move(Operand::StackPointer, _) => return true,
            _ => {}
        }
    }
    false
 }
 /// Pass: Leaf Function Optimization
 /// If a function makes no calls (is a leaf), it doesn't need to save/restore `ra`.
 ///
 /// NOTE: This optimization is DISABLED due to correctness issues.
 /// The optimization was designed for a specific calling convention (GET/PUT for RA)
 /// but the compiler generates POP ra for return address restoration. Without proper
 /// tracking of both conventions and validation of balanced push/pop pairs, this
 /// optimization corrupts the stack frame by:
 ///
 /// 1. Removing `push ra` but not `pop ra`, leaving unbalanced push/pop pairs
 /// 2. Not accounting for parameter pops that occur before `push sp`
 /// 3. Assuming all RA restoration uses GET instruction, but code uses POP
 ///
 /// Example of broken optimization:
 /// ```
 /// Unoptimized:          Optimized (BROKEN):
 /// compare:              pop r8
 /// pop r8                pop r9
 /// pop r9                ble r9 r8 5
 /// push sp               move r10 1
 /// push ra               j ra
 /// sgt r1 r9 r8          ^ Missing stack frame!
 /// ...
 /// pop ra
 /// pop sp
 /// j ra
 /// ```
 ///
 /// Future work: Fix by handling both POP and GET calling conventions, validating
 /// balanced push/pop pairs, and accounting for parameter pops.
 pub fn optimize_leaf_functions<'a>(
    input: Vec<InstructionNode<'a>>,
 ) -> (Vec<InstructionNode<'a>>, bool) {
-    let leaves = find_leaf_functions(&input);
+    // Optimization disabled - returns input unchanged
-    if leaves.is_empty() {
+    #[allow(unused)]
-        return (input, false);
+    let _leaves = find_leaf_functions(&input);
-    }
+    (input, false)
    let mut changed = false;
    let mut to_remove = HashSet::new();
    let mut func_restore_indices = HashMap::new();
    let mut func_ra_offsets = HashMap::new();
    let mut current_function: Option<String> = None;
    let mut function_start_indices = HashMap::new();
    // First scan: Identify instructions to remove and capture RA offsets
    for (i, node) in input.iter().enumerate() {
        match &node.instruction {
            Instruction::LabelDef(label) if !label.starts_with("__internal_L") => {
                current_function = Some(label.to_string());
                function_start_indices.insert(label.to_string(), i);
            }
            Instruction::Push(Operand::ReturnAddress) => {
                if let Some(func) = &current_function
                    && leaves.contains(func)
                {
                    to_remove.insert(i);
                }
            }
            Instruction::Get(Operand::ReturnAddress, _, Operand::Register(_)) => {
                if let Some(func) = &current_function
                    && leaves.contains(func)
                {
                    to_remove.insert(i);
                    func_restore_indices.insert(func.clone(), i);
                    // Look back for the address calc: `sub r0 sp OFFSET`
                    if i > 0
                        && let Instruction::Sub(_, Operand::StackPointer, Operand::Number(n)) =
                            &input[i - 1].instruction
                    {
                        func_ra_offsets.insert(func.clone(), *n);
                        to_remove.insert(i - 1);
                    }
                }
            }
            _ => {}
        }
    }
    // Safety Check: Verify functions don't have complex stack ops
    let mut safe_functions = HashSet::new();
    for (func, start_idx) in &function_start_indices {
        if let Some(restore_idx) = func_restore_indices.get(func) {
            let check_start = if to_remove.contains(&(start_idx + 1)) {
                start_idx + 2
            } else {
                start_idx + 1
            };
            if !function_has_complex_stack_ops(&input, check_start, *restore_idx) {
                safe_functions.insert(func.clone());
                changed = true;
            }
        }
    }
    if !changed {
        return (input, false);
    }
    // Second scan: Rebuild with adjustments
    let mut output = Vec::with_capacity(input.len());
    let mut processing_function: Option<String> = None;
    for (i, mut node) in input.into_iter().enumerate() {
        if to_remove.contains(&i)
            && let Some(func) = &processing_function
            && safe_functions.contains(func)
        {
            continue;
        }
        if let Instruction::LabelDef(l) = &node.instruction
            && !l.starts_with("__internal_L")
        {
            processing_function = Some(l.to_string());
        }
        // Apply Stack Adjustments
        if let Some(func) = &processing_function
            && safe_functions.contains(func)
            && let Some(ra_offset) = func_ra_offsets.get(func)
        {
            // Stack Cleanup Adjustment
            if let Instruction::Sub(
                Operand::StackPointer,
                Operand::StackPointer,
                Operand::Number(n),
            ) = &mut node.instruction
            {
                let new_n = *n - Decimal::from(1);
                if new_n.is_zero() {
                    continue;
                }
                *n = new_n;
            }
            // Stack Variable Offset Adjustment
            if let Instruction::Sub(_, Operand::StackPointer, Operand::Number(n)) =
                &mut node.instruction
                && *n > *ra_offset
            {
                *n -= Decimal::from(1);
            }
        }
        output.push(node);
    }
    (output, true)
 }
--- a/rust_compiler/libs/optimizer/src/peephole_optimization.rs
+++ b/rust_compiler/libs/optimizer/src/peephole_optimization.rs
@@ -358,11 +358,12 @@ fn find_matching_ra_pop<'a>(
            return Some((idx, &instructions[1..idx]));
        }
-        // Stop searching if we hit a jump (different control flow)
+        // Stop searching if we hit a jump (different control flow) or a function label
-        // Labels are OK - they're just markers
+        // Labels are OK - they're just markers EXCEPT for user-defined function labels
        // which indicate a function boundary
        if matches!(
            node.instruction,
-            Instruction::Jump(_) | Instruction::JumpRelative(_)
+            Instruction::Jump(_) | Instruction::JumpRelative(_) | Instruction::LabelDef(_)
        ) {
            return None;
        }
--- a/rust_compiler/libs/optimizer/src/register_forwarding.rs
+++ b/rust_compiler/libs/optimizer/src/register_forwarding.rs
@@ -1,5 +1,6 @@
 use crate::helpers::{get_destination_reg, reg_is_read, set_destination_reg};
-use il::{Instruction, InstructionNode};
+use il::{Instruction, InstructionNode, Operand};
 use std::collections::HashMap;
 /// Pass: Register Forwarding
 /// Eliminates intermediate moves by writing directly to the final destination.
@@ -10,6 +11,20 @@ pub fn register_forwarding<'a>(
    let mut changed = false;
    let mut i = 0;
    // Build a map of label positions to detect backward jumps
    // Use String keys to avoid lifetime issues with references into input
    let label_positions: HashMap<String, usize> = input
        .iter()
        .enumerate()
        .filter_map(|(idx, node)| {
            if let Instruction::LabelDef(label) = &node.instruction {
                Some((label.to_string(), idx))
            } else {
                None
            }
        })
        .collect();
    while i < input.len().saturating_sub(1) {
        let next_idx = i + 1;
@@ -48,23 +63,51 @@ pub fn register_forwarding<'a>(
                    break;
                }
-                // Conservative: assume liveness might leak at labels/jumps
+                // Function calls (jal) clobber the return register (r15)
-                if matches!(
+                // So if we're tracking r15 and hit a function call, the old value is dead
-                    node.instruction,
+                if matches!(node.instruction, Instruction::JumpAndLink(_)) && temp_reg == 15 {
                    Instruction::LabelDef(_) | Instruction::Jump(_) | Instruction::JumpAndLink(_)
                ) {
                    temp_is_dead = false;
                    break;
                }
                // Labels are just markers - they don't affect register liveness
                // But backward jumps create loops we need to analyze carefully
                let jump_target = match &node.instruction {
                    Instruction::Jump(Operand::Label(target)) => Some(target.as_ref()),
                    Instruction::BranchEq(_, _, Operand::Label(target))
                    | Instruction::BranchNe(_, _, Operand::Label(target))
                    | Instruction::BranchGt(_, _, Operand::Label(target))
                    | Instruction::BranchLt(_, _, Operand::Label(target))
                    | Instruction::BranchGe(_, _, Operand::Label(target))
                    | Instruction::BranchLe(_, _, Operand::Label(target))
                    | Instruction::BranchEqZero(_, Operand::Label(target))
                    | Instruction::BranchNeZero(_, Operand::Label(target)) => Some(target.as_ref()),
                    _ => None,
                };
                if let Some(target) = jump_target {
                    // Check if this is a backward jump (target appears before current position)
                    if let Some(&target_pos) = label_positions.get(target) {
                        if target_pos < i {
                            // Backward jump - could loop back, register might be live
                            temp_is_dead = false;
                            break;
                        }
                        // Forward jump is OK - doesn't affect liveness before it
                    }
                }
            }
            if temp_is_dead {
-                // Rewrite to use final destination directly
+                // Safety check: ensure final_reg is not used as an operand in the current instruction.
-                if let Some(new_instr) = set_destination_reg(&input[i].instruction, final_reg) {
+                // This prevents generating invalid instructions like `sub r5 r0 r5` (read and write same register).
-                    input[i].instruction = new_instr;
+                if !reg_is_read(&input[i].instruction, final_reg) {
-                    input.remove(next_idx);
+                    // Rewrite to use final destination directly
-                    changed = true;
+                    if let Some(new_instr) = set_destination_reg(&input[i].instruction, final_reg) {
-                    continue;
+                        input[i].instruction = new_instr;
                        input.remove(next_idx);
                        changed = true;
                        continue;
                    }
                }
            }
        }