More optimizations

This commit is contained in:
2025-12-30 22:24:47 -07:00
parent d19a53bbee
commit 63f55b66cb
11 changed files with 192 additions and 42 deletions

View File

@@ -1180,6 +1180,27 @@ impl<'a> Compiler<'a> {
Some(name.span), Some(name.span),
)?; )?;
// Pop the arguments off the stack (caller cleanup convention)
// BUT: If the function returns a tuple, it saves SP in r15 and the caller
// will restore SP with "move sp r15", which automatically cleans up everything.
// So we only pop arguments for non-tuple-returning functions.
let returns_tuple = self
.function_meta
.tuple_return_sizes
.get(&name.node)
.copied()
.unwrap_or(0)
> 0;
if !returns_tuple {
for _ in 0..arguments.len() {
self.write_instruction(
Instruction::Pop(Operand::Register(VariableScope::TEMP_STACK_REGISTER)),
Some(name.span),
)?;
}
}
// pop all registers back (if they were backed up) // pop all registers back (if they were backed up)
if backup_registers { if backup_registers {
for register in active_registers.iter().rev() { for register in active_registers.iter().rev() {

View File

@@ -24,10 +24,9 @@ j main
pop r8 pop r8
push sp push sp
push ra push ra
move r9 20
add r1 r8 1 add r1 r8 1
move r15 r1 move r15 r1
j 8 j 7
pop ra pop ra
pop sp pop sp
j ra j ra

View File

@@ -0,0 +1,33 @@
---
source: libs/integration_tests/src/lib.rs
assertion_line: 103
expression: output
---
## Unoptimized Output
j main
compute:
pop r8
push sp
push ra
move r9 20
add r1 r8 1
move r15 r1
j __internal_L1
__internal_L1:
pop ra
pop sp
j ra
## Optimized Output
j main
pop r8
push sp
push ra
add r1 r8 1
move r15 r1
j 7
pop ra
pop sp
j ra

View File

@@ -47,7 +47,6 @@ push ra
push 5 push 5
push 10 push 10
jal 1 jal 1
move r8 r15
pop ra pop ra
pop sp pop sp
j ra j ra

View File

@@ -19,9 +19,4 @@ j ra
## Optimized Output ## Optimized Output
j main j main
pop r8
j main
pop r8
add r1 r8 1
move r8 r1
j ra j ra

View File

@@ -72,13 +72,12 @@ j 8
pop ra pop ra
pop sp pop sp
j ra j ra
pop r8
pop r9 pop r9
push sp push sp
push ra push ra
add r1 r9 r9 add r1 r9 r9
move r15 r1 move r15 r1
j 18 j 17
pop ra pop ra
pop sp pop sp
j ra j ra
@@ -100,12 +99,9 @@ push r10
push r10 push r10
push 2 push 2
jal 11 jal 11
pop r10
pop r9
pop r8
move r11 r15 move r11 r15
move r15 r11 move r15 r11
j 45 j 41
pop ra pop ra
pop sp pop sp
j ra j ra

View File

@@ -0,0 +1,108 @@
---
source: libs/integration_tests/src/lib.rs
assertion_line: 173
expression: output
---
## Unoptimized Output
j main
add:
pop r8
pop r9
push sp
push ra
add r1 r9 r8
move r15 r1
j __internal_L1
__internal_L1:
pop ra
pop sp
j ra
multiply:
pop r8
pop r9
push sp
push ra
mul r1 r9 2
move r15 r1
j __internal_L2
__internal_L2:
pop ra
pop sp
j ra
complex:
pop r8
pop r9
push sp
push ra
push r8
push r9
push r9
push r8
jal add
pop r9
pop r8
move r10 r15
push r8
push r9
push r10
push r10
push 2
jal multiply
pop r10
pop r9
pop r8
move r11 r15
move r15 r11
j __internal_L3
__internal_L3:
pop ra
pop sp
j ra
## Optimized Output
j main
pop r8
pop r9
push sp
push ra
add r1 r9 r8
move r15 r1
j 8
pop ra
pop sp
j ra
pop r9
push sp
push ra
add r1 r9 r9
move r15 r1
j 17
pop ra
pop sp
j ra
pop r8
pop r9
push sp
push ra
push r8
push r9
push r9
push r8
jal 1
pop r9
pop r8
move r10 r15
push r8
push r9
push r10
push r10
push 2
jal 11
move r11 r15
move r15 r11
j 41
pop ra
pop sp
j ra

View File

@@ -24,9 +24,5 @@ j ra
j main j main
pop r8 pop r8
pop r9 pop r9
j main ble r9 r8 4
pop r8
pop r9
ble r9 r8 8
move r10 1
j ra j ra

View File

@@ -17,6 +17,4 @@ j ra
## Optimized Output ## Optimized Output
j main j main
j main
move r8 10
j ra j ra

View File

@@ -32,11 +32,29 @@ pub fn dead_store_elimination<'a>(
last_write.insert(dest_reg, i); last_write.insert(dest_reg, i);
} }
// On labels/jumps, conservatively clear tracking (value might be used elsewhere) // Before clearing on labels/calls, check if current tracked writes are dead
if matches!( if matches!(
node.instruction, node.instruction,
Instruction::LabelDef(_) | Instruction::Jump(_) | Instruction::JumpAndLink(_) Instruction::LabelDef(_) | Instruction::JumpAndLink(_)
) { ) {
// Check all currently tracked writes to see if they're dead
for (&reg, &idx) in &last_write {
// Don't remove writes to r15 (return register)
if reg == 15 {
continue;
}
// Check if this write was used between write and now
let was_used = input[idx + 1..i]
.iter()
.any(|n| reg_is_read_or_affects_control(&n.instruction, reg));
if !was_used && !to_remove.contains(&idx) {
to_remove.push(idx);
changed = true;
}
}
last_write.clear(); last_write.clear();
} }
} }
@@ -59,29 +77,12 @@ pub fn dead_store_elimination<'a>(
} }
} }
/// Simplified check: Does this instruction read the register or affect control flow? /// Simplified check: Does this instruction read the register?
fn reg_is_read_or_affects_control(instr: &Instruction, reg: u8) -> bool { fn reg_is_read_or_affects_control(instr: &Instruction, reg: u8) -> bool {
use crate::helpers::reg_is_read; use crate::helpers::reg_is_read;
// If it reads the register, it's used // If it reads the register, it's used
if reg_is_read(instr, reg) { reg_is_read(instr, reg)
return true;
}
// Conservatively assume register might be used if there's control flow
matches!(
instr,
Instruction::Jump(_)
| Instruction::JumpAndLink(_)
| Instruction::BranchEq(_, _, _)
| Instruction::BranchNe(_, _, _)
| Instruction::BranchGt(_, _, _)
| Instruction::BranchLt(_, _, _)
| Instruction::BranchGe(_, _, _)
| Instruction::BranchLe(_, _, _)
| Instruction::BranchEqZero(_, _)
| Instruction::BranchNeZero(_, _)
)
} }
#[cfg(test)] #[cfg(test)]

View File

@@ -38,6 +38,8 @@ pub fn peephole_optimization<'a>(
// Safe to remove all four: push sp, push ra, pop ra, pop sp // Safe to remove all four: push sp, push ra, pop ra, pop sp
// Also need to adjust stack pointer offsets in between by -2 // Also need to adjust stack pointer offsets in between by -2
let absolute_sp_pop = absolute_ra_pop + 1; let absolute_sp_pop = absolute_ra_pop + 1;
// Clear output since we're going to reprocess the entire input
output.clear();
for (idx, node) in input.iter().enumerate() { for (idx, node) in input.iter().enumerate() {
if idx == i if idx == i
|| idx == i + 1 || idx == i + 1
@@ -83,6 +85,8 @@ pub fn peephole_optimization<'a>(
// Safe to remove both push and pop // Safe to remove both push and pop
// Also need to adjust stack pointer offsets in between // Also need to adjust stack pointer offsets in between
let absolute_pop_idx = i + pop_idx; let absolute_pop_idx = i + pop_idx;
// Clear output since we're going to reprocess the entire input
output.clear();
for (idx, node) in input.iter().enumerate() { for (idx, node) in input.iter().enumerate() {
if idx == i || idx == absolute_pop_idx { if idx == i || idx == absolute_pop_idx {
// Skip the push and pop // Skip the push and pop