Fix source maps

2025-12-12 21:48:25 -07:00
parent 20f7cb9a4b
commit 9de59ee3b1
7 changed files with 333 additions and 200 deletions
--- a/rust_compiler/libs/optimizer/src/lib.rs
+++ b/rust_compiler/libs/optimizer/src/lib.rs
@@ -1,4 +1,4 @@
-use il::{Instruction, InstructionNode, Operand};
+use il::{Instruction, InstructionNode, Instructions, Operand};
 use rust_decimal::Decimal;
 use std::collections::{HashMap, HashSet};

@@ -6,7 +6,8 @@ mod leaf_function;
 use leaf_function::find_leaf_functions;

 /// Entry point for the optimizer.
-pub fn optimize<'a>(mut instructions: Vec<InstructionNode<'a>>) -> Vec<InstructionNode<'a>> {
+pub fn optimize<'a>(instructions: Instructions<'a>) -> Instructions<'a> {
+    let mut instructions = instructions.into_inner();
    let mut changed = true;
    let mut pass_count = 0;
    const MAX_PASSES: usize = 10;
@@ -49,13 +50,35 @@ pub fn optimize<'a>(mut instructions: Vec<InstructionNode<'a>>) -> Vec<Instructi
    }

    // Final Pass: Resolve Labels to Line Numbers
-    resolve_labels(instructions)
+    Instructions::new(resolve_labels(instructions))
+}
+
+/// Helper: Check if a function body contains unsafe stack manipulation.
+/// Returns true if the function modifies SP in a way that makes static RA offset analysis unsafe.
+fn function_has_complex_stack_ops(
+    instructions: &[InstructionNode],
+    start_idx: usize,
+    end_idx: usize,
+) -> bool {
+    for instruction in instructions.iter().take(end_idx).skip(start_idx) {
+        match instruction.instruction {
+            Instruction::Push(_) | Instruction::Pop(_) => return true,
+            // Check for explicit SP modification
+            Instruction::Add(Operand::StackPointer, _, _)
+            | Instruction::Sub(Operand::StackPointer, _, _)
+            | Instruction::Mul(Operand::StackPointer, _, _)
+            | Instruction::Div(Operand::StackPointer, _, _)
+            | Instruction::Move(Operand::StackPointer, _) => return true,
+            _ => {}
+        }
+    }
+    false
 }

 /// Pass: Leaf Function Optimization
 /// If a function makes no calls (is a leaf), it doesn't need to save/restore `ra`.
 fn optimize_leaf_functions<'a>(
-    mut input: Vec<InstructionNode<'a>>,
+    input: Vec<InstructionNode<'a>>,
 ) -> (Vec<InstructionNode<'a>>, bool) {
    let leaves = find_leaf_functions(&input);
    if leaves.is_empty() {
@@ -64,40 +87,44 @@ fn optimize_leaf_functions<'a>(

    let mut changed = false;
    let mut to_remove = HashSet::new();
-    let mut current_function: Option<String> = None;

-    // Map of FunctionName -> The stack offset where RA was stored.
-    // We need this to adjust other stack accesses (arguments vs locals).
+    // We map function names to the INDEX of the instruction that restores RA.
+    // We use this to validate the function body later.
+    let mut func_restore_indices = HashMap::new();
    let mut func_ra_offsets = HashMap::new();

+    let mut current_function: Option<String> = None;
+    let mut function_start_indices = HashMap::new();
+
    // First scan: Identify instructions to remove and capture RA offsets
    for (i, node) in input.iter().enumerate() {
        match &node.instruction {
            Instruction::LabelDef(label) => {
                current_function = Some(label.to_string());
+                function_start_indices.insert(label.to_string(), i);
            }
            Instruction::Push(Operand::ReturnAddress) => {
-                if let Some(func) = &current_function {
-                    if leaves.contains(func) {
-                        to_remove.insert(i);
-                        changed = true;
-                    }
+                if let Some(func) = &current_function
+                    && leaves.contains(func)
+                {
+                    to_remove.insert(i);
                }
            }
            Instruction::Get(Operand::ReturnAddress, _, Operand::Register(_)) => {
                // This is the restore instruction: `get ra db r0`
-                if let Some(func) = &current_function {
-                    if leaves.contains(func) {
-                        to_remove.insert(i);
-                        // Look back for the address calc: `sub r0 sp OFFSET`
-                        if i > 0 {
-                            if let Instruction::Sub(_, Operand::StackPointer, Operand::Number(n)) =
-                                &input[i - 1].instruction
-                            {
-                                func_ra_offsets.insert(func.clone(), *n);
-                                to_remove.insert(i - 1);
-                            }
-                        }
+                if let Some(func) = &current_function
+                    && leaves.contains(func)
+                {
+                    to_remove.insert(i);
+                    func_restore_indices.insert(func.clone(), i);
+
+                    // Look back for the address calc: `sub r0 sp OFFSET`
+                    if i > 0
+                        && let Instruction::Sub(_, Operand::StackPointer, Operand::Number(n)) =
+                            &input[i - 1].instruction
+                    {
+                        func_ra_offsets.insert(func.clone(), *n);
+                        to_remove.insert(i - 1);
                    }
                }
            }
@@ -105,54 +132,80 @@ fn optimize_leaf_functions<'a>(
        }
    }

+    // Safety Check: Verify that functions marked for optimization don't have complex stack ops.
+    // If they do, unmark them.
+    let mut safe_functions = HashSet::new();
+
+    for (func, start_idx) in &function_start_indices {
+        if let Some(restore_idx) = func_restore_indices.get(func) {
+            // Check instructions between start and restore using the helper function.
+            // We need to skip the `push ra` we just marked for removal, otherwise the helper
+            // will flag it as a complex op (Push).
+            // `start_idx` is the LabelDef. `start_idx + 1` is typically `push ra`.
+
+            let check_start = if to_remove.contains(&(start_idx + 1)) {
+                start_idx + 2
+            } else {
+                start_idx + 1
+            };
+
+            // `restore_idx` points to the `get ra` instruction. The helper scans up to `end_idx` exclusive,
+            // so we don't need to worry about the restore instruction itself.
+            if !function_has_complex_stack_ops(&input, check_start, *restore_idx) {
+                safe_functions.insert(func.clone());
+                changed = true;
+            }
+        }
+    }
+
    if !changed {
        return (input, false);
    }

-    // Second scan: Rebuild with adjustments
+    // Second scan: Rebuild with adjustments, but only for SAFE functions
    let mut output = Vec::with_capacity(input.len());
    let mut processing_function: Option<String> = None;

    for (i, mut node) in input.into_iter().enumerate() {
-        if to_remove.contains(&i) {
-            continue;
+        if to_remove.contains(&i)
+            && let Some(func) = &processing_function
+            && safe_functions.contains(func)
+        {
+            continue; // SKIP (Remove)
        }

        if let Instruction::LabelDef(l) = &node.instruction {
            processing_function = Some(l.to_string());
        }

-        // Apply Stack Adjustments if we are inside a leaf function that we optimized
-        if let Some(func) = &processing_function {
-            if let Some(ra_offset) = func_ra_offsets.get(func) {
-                // If this is the stack cleanup `sub sp sp N`, decrement N by 1 (since we removed push ra)
-                if let Instruction::Sub(
-                    Operand::StackPointer,
-                    Operand::StackPointer,
-                    Operand::Number(n),
-                ) = &mut node.instruction
-                {
-                    let new_n = *n - Decimal::from(1);
-                    if new_n.is_zero() {
-                        continue; // Remove instruction if 0
-                    }
-                    *n = new_n;
+        // Apply Stack Adjustments
+        if let Some(func) = &processing_function
+            && safe_functions.contains(func)
+            && let Some(ra_offset) = func_ra_offsets.get(func)
+        {
+            // 1. Stack Cleanup Adjustment
+            if let Instruction::Sub(
+                Operand::StackPointer,
+                Operand::StackPointer,
+                Operand::Number(n),
+            ) = &mut node.instruction
+            {
+                // Decrease cleanup amount by 1 (for the removed RA)
+                let new_n = *n - Decimal::from(1);
+                if new_n.is_zero() {
+                    continue;
                }
+                *n = new_n;
+            }

-                // Adjust stack variable accesses relative to the removed RA.
-                // Compiler layout: [Args] [RA] [Locals/Temps]
-                // Stack grows up (increment sp on push).
-                // Access is `sp - offset`.
-                // Deeper items (Args) have LARGER offsets than RA.
-                // Shallower items (Locals) have SMALLER offsets than RA.
-                // Since RA is gone, items deeper than RA (Args) effectively shift "down" (index - 1).
-                if let Instruction::Sub(_, Operand::StackPointer, Operand::Number(n)) =
-                    &mut node.instruction
-                {
-                    if *n > *ra_offset {
-                        *n -= Decimal::from(1);
-                    }
-                }
+            // 2. Stack Variable Offset Adjustment
+            // Since we verified the function is "Simple" (no nested stack mods),
+            // we can safely assume offsets > ra_offset need shifting.
+            if let Instruction::Sub(_, Operand::StackPointer, Operand::Number(n)) =
+                &mut node.instruction
+                && *n > *ra_offset
+            {
+                *n -= Decimal::from(1);
            }
        }

@@ -173,16 +226,11 @@ fn analyze_clobbers(instructions: &[InstructionNode]) -> HashMap<String, HashSet
            clobbers.insert(label.to_string(), HashSet::new());
        }

-        if let Some(label) = &current_label {
-            if let Some(reg) = get_destination_reg(&node.instruction) {
-                if let Some(set) = clobbers.get_mut(label) {
-                    set.insert(reg);
-                }
-            }
-
-            // Note: If we call another function, we technically clobber whatever IT clobbers
-            // (unless we save/restore it, which counts as a write anyway).
-            // This simple pass relies on the fact that any register modification (including restore) is a 'write'.
+        if let Some(label) = &current_label
+            && let Some(reg) = get_destination_reg(&node.instruction)
+            && let Some(set) = clobbers.get_mut(label)
+        {
+            set.insert(reg);
        }
    }
    clobbers
@@ -191,18 +239,18 @@ fn analyze_clobbers(instructions: &[InstructionNode]) -> HashMap<String, HashSet
 /// Pass: Function Call Optimization
 /// Removes Push/Restore pairs surrounding a JAL if the target function does not clobber that register.
 fn optimize_function_calls<'a>(
-    mut input: Vec<InstructionNode<'a>>,
+    input: Vec<InstructionNode<'a>>,
 ) -> (Vec<InstructionNode<'a>>, bool) {
    let clobbers = analyze_clobbers(&input);
    let mut changed = false;
    let mut to_remove = HashSet::new();
-    let mut stack_adjustments = HashMap::new(); // Index of `sub sp sp N` -> amount to subtract
+    let mut stack_adjustments = HashMap::new();

    let mut i = 0;
    while i < input.len() {
        if let Instruction::JumpAndLink(Operand::Label(target)) = &input[i].instruction {
            let target_key = target.to_string();
-            // If we don't have info on the function (e.g. extern or complex), skip
+
            if let Some(func_clobbers) = clobbers.get(&target_key) {
                // 1. Identify Pushes immediately preceding the JAL
                let mut pushes = Vec::new(); // (index, register)
@@ -221,54 +269,67 @@ fn optimize_function_calls<'a>(
                }

                // 2. Identify Restores immediately following the JAL
-                // Compiler emits: sub r0 sp Offset, get Reg db r0.
                let mut restores = Vec::new(); // (index_of_get, register, index_of_sub)
                let mut scan_fwd = i + 1;
                while scan_fwd < input.len() {
-                    // Skip over the 'sub r0 sp X' address calculation lines
+                    // Skip 'sub r0 sp X'
                    if let Instruction::Sub(Operand::Register(0), Operand::StackPointer, _) =
                        &input[scan_fwd].instruction
                    {
                        // Check next instruction for the Get
-                        if scan_fwd + 1 < input.len() {
-                            if let Instruction::Get(Operand::Register(r), _, Operand::Register(0)) =
+                        if scan_fwd + 1 < input.len()
+                            && let Instruction::Get(Operand::Register(r), _, Operand::Register(0)) =
                                &input[scan_fwd + 1].instruction
-                            {
-                                restores.push((scan_fwd + 1, *r, scan_fwd));
-                                scan_fwd += 2;
-                                continue;
-                            }
+                        {
+                            restores.push((scan_fwd + 1, *r, scan_fwd));
+                            scan_fwd += 2;
+                            continue;
                        }
                    }
                    break;
                }

-                // 3. Check for Stack Cleanup `sub sp sp N`
+                // 3. Stack Cleanup
                let cleanup_idx = scan_fwd;
-
                let has_cleanup = if cleanup_idx < input.len() {
-                    if let Instruction::Sub(
-                        Operand::StackPointer,
-                        Operand::StackPointer,
-                        Operand::Number(_),
-                    ) = &input[cleanup_idx].instruction
-                    {
-                        true
-                    } else {
-                        false
-                    }
+                    matches!(
+                        input[cleanup_idx].instruction,
+                        Instruction::Sub(
+                            Operand::StackPointer,
+                            Operand::StackPointer,
+                            Operand::Number(_)
+                        )
+                    )
                } else {
                    false
                };

-                // "All or Nothing" strategy for the safe subset:
+                // SAFEGUARD: Check Counts!
+                // If we pushed r8 twice but only restored it once, we have an argument.
+                // We must ensure the number of pushes for each register MATCHES the number of restores.
+                let mut push_counts = HashMap::new();
+                for (_, r) in &pushes {
+                    *push_counts.entry(*r).or_insert(0) += 1;
+                }
+
+                let mut restore_counts = HashMap::new();
+                for (_, r, _) in &restores {
+                    *restore_counts.entry(*r).or_insert(0) += 1;
+                }
+
+                let counts_match = push_counts
+                    .iter()
+                    .all(|(reg, count)| restore_counts.get(reg).unwrap_or(&0) == count);
+                // Also check reverse to ensure we didn't restore something we didn't push (unlikely but possible)
+                let counts_match_reverse = restore_counts
+                    .iter()
+                    .all(|(reg, count)| push_counts.get(reg).unwrap_or(&0) == count);
+
+                // Clobber Check
                let all_pushes_safe = pushes.iter().all(|(_, r)| !func_clobbers.contains(r));

-                let push_set: HashSet<u8> = pushes.iter().map(|(_, r)| *r).collect();
-                let restore_set: HashSet<u8> = restores.iter().map(|(_, r, _)| *r).collect();
-
-                if all_pushes_safe && has_cleanup && push_set == restore_set {
-                    // We can remove ALL saves/restores for this call!
+                if all_pushes_safe && has_cleanup && counts_match && counts_match_reverse {
+                    // We can remove ALL found pushes/restores safely
                    for (p_idx, _) in pushes {
                        to_remove.insert(p_idx);
                    }
@@ -278,7 +339,7 @@ fn optimize_function_calls<'a>(
                    }

                    // Reduce stack cleanup amount
-                    let num_removed = push_set.len() as i64;
+                    let num_removed = push_counts.values().sum::<i32>() as i64;
                    stack_adjustments.insert(cleanup_idx, num_removed);
                    changed = true;
                }
@@ -295,15 +356,14 @@ fn optimize_function_calls<'a>(
            }

            // Apply stack adjustment
-            if let Some(reduction) = stack_adjustments.get(&idx) {
-                if let Instruction::Sub(dst, a, Operand::Number(n)) = &node.instruction {
-                    let new_n = n - Decimal::from(*reduction);
-                    if new_n.is_zero() {
-                        continue; // Remove the sub entirely if 0
-                    }
-                    node.instruction =
-                        Instruction::Sub(dst.clone(), a.clone(), Operand::Number(new_n));
+            if let Some(reduction) = stack_adjustments.get(&idx)
+                && let Instruction::Sub(dst, a, Operand::Number(n)) = &node.instruction
+            {
+                let new_n = n - Decimal::from(*reduction);
+                if new_n.is_zero() {
+                    continue; // Remove the sub entirely if 0
                }
+                node.instruction = Instruction::Sub(dst.clone(), a.clone(), Operand::Number(new_n));
            }

            clean.push(node);
@@ -357,10 +417,16 @@ fn register_forwarding<'a>(
                    break;
                }
                // If the temp is redefined, then the old value is dead, so we are safe.
-                if let Some(redef) = get_destination_reg(&node.instruction) {
-                    if redef == temp_reg {
-                        break;
-                    }
+                if let Some(redef) = get_destination_reg(&node.instruction)
+                    && redef == temp_reg
+                {
+                    break;
+                }
+
+                // Reg15 is a return register.
+
+                if temp_reg == 15 {
+                    break;
                }
                // If we hit a label/jump, we assume liveness might leak (conservative safety)
                if matches!(
@@ -436,17 +502,17 @@ fn resolve_labels<'a>(input: Vec<InstructionNode<'a>>) -> Vec<InstructionNode<'a
                    *op = num;
                }
            }
-            Instruction::BranchEq(a, b, op)
-            | Instruction::BranchNe(a, b, op)
-            | Instruction::BranchGt(a, b, op)
-            | Instruction::BranchLt(a, b, op)
-            | Instruction::BranchGe(a, b, op)
-            | Instruction::BranchLe(a, b, op) => {
+            Instruction::BranchEq(_, _, op)
+            | Instruction::BranchNe(_, _, op)
+            | Instruction::BranchGt(_, _, op)
+            | Instruction::BranchLt(_, _, op)
+            | Instruction::BranchGe(_, _, op)
+            | Instruction::BranchLe(_, _, op) => {
                if let Some(num) = get_line(op) {
                    *op = num;
                }
            }
-            Instruction::BranchEqZero(a, op) | Instruction::BranchNeZero(a, op) => {
+            Instruction::BranchEqZero(_, op) | Instruction::BranchNeZero(_, op) => {
                if let Some(num) = get_line(op) {
                    *op = num;
                }
@@ -634,8 +700,7 @@ fn reg_is_read(instr: &Instruction, reg: u8) -> bool {
    }
 }

-// --- Constant Propagation & Dead Code (Same as before) ---
-
+/// --- Constant Propagation & Dead Code ---
 fn constant_propagation<'a>(input: Vec<InstructionNode<'a>>) -> (Vec<InstructionNode<'a>>, bool) {
    let mut output = Vec::with_capacity(input.len());
    let mut changed = false;
@@ -648,13 +713,8 @@ fn constant_propagation<'a>(input: Vec<InstructionNode<'a>>) -> (Vec<Instruction
        }

        let simplified = match &node.instruction {
-            Instruction::Move(dst, src) => {
-                if let Some(val) = resolve_value(src, &registers) {
-                    Some(Instruction::Move(dst.clone(), Operand::Number(val)))
-                } else {
-                    None
-                }
-            }
+            Instruction::Move(dst, src) => resolve_value(src, &registers)
+                .map(|val| Instruction::Move(dst.clone(), Operand::Number(val))),
            Instruction::Add(dst, a, b) => try_fold_math(dst, a, b, &registers, |x, y| x + y),
            Instruction::Sub(dst, a, b) => try_fold_math(dst, a, b, &registers, |x, y| x - y),
            Instruction::Mul(dst, a, b) => try_fold_math(dst, a, b, &registers, |x, y| x * y),
@@ -718,11 +778,11 @@ fn constant_propagation<'a>(input: Vec<InstructionNode<'a>>) -> (Vec<Instruction
        }

        // Filter out NOPs (Empty LabelDefs from branch resolution)
-        if let Instruction::LabelDef(l) = &node.instruction {
-            if l.is_empty() {
-                changed = true;
-                continue;
-            }
+        if let Instruction::LabelDef(l) = &node.instruction
+            && l.is_empty()
+        {
+            changed = true;
+            continue;
        }

        output.push(node);
@@ -779,11 +839,11 @@ fn remove_redundant_moves<'a>(input: Vec<InstructionNode<'a>>) -> (Vec<Instructi
    let mut output = Vec::with_capacity(input.len());
    let mut changed = false;
    for node in input {
-        if let Instruction::Move(dst, src) = &node.instruction {
-            if dst == src {
-                changed = true;
-                continue;
-            }
+        if let Instruction::Move(dst, src) = &node.instruction
+            && dst == src
+        {
+            changed = true;
+            continue;
        }
        output.push(node);
    }
@@ -804,9 +864,8 @@ fn remove_unreachable_code<'a>(
            changed = true;
            continue;
        }
-        match node.instruction {
-            Instruction::Jump(_) | Instruction::Jump(Operand::ReturnAddress) => dead = true,
-            _ => {}
+        if let Instruction::Jump(_) = node.instruction {
+            dead = true
        }
        output.push(node);
    }