0.5.0 -- tuples and more optimizations #12

Merged
dbidwell merged 34 commits from 43-tuple-return into master 2025-12-31 17:03:51 -07:00
11 changed files with 192 additions and 42 deletions
Showing only changes of commit 63f55b66cb - Show all commits

View File

@@ -1180,6 +1180,27 @@ impl<'a> Compiler<'a> {
Some(name.span),
)?;
// Pop the arguments off the stack (caller cleanup convention)
// BUT: If the function returns a tuple, it saves SP in r15 and the caller
// will restore SP with "move sp r15", which automatically cleans up everything.
// So we only pop arguments for non-tuple-returning functions.
let returns_tuple = self
.function_meta
.tuple_return_sizes
.get(&name.node)
.copied()
.unwrap_or(0)
> 0;
if !returns_tuple {
for _ in 0..arguments.len() {
self.write_instruction(
Instruction::Pop(Operand::Register(VariableScope::TEMP_STACK_REGISTER)),
Some(name.span),
)?;
}
}
// pop all registers back (if they were backed up)
if backup_registers {
for register in active_registers.iter().rev() {

View File

@@ -24,10 +24,9 @@ j main
pop r8
push sp
push ra
move r9 20
add r1 r8 1
move r15 r1
j 8
j 7
pop ra
pop sp
j ra

View File

@@ -0,0 +1,33 @@
---
source: libs/integration_tests/src/lib.rs
assertion_line: 103
expression: output
---
## Unoptimized Output
j main
compute:
pop r8
push sp
push ra
move r9 20
add r1 r8 1
move r15 r1
j __internal_L1
__internal_L1:
pop ra
pop sp
j ra
## Optimized Output
j main
pop r8
push sp
push ra
add r1 r8 1
move r15 r1
j 7
pop ra
pop sp
j ra

View File

@@ -47,7 +47,6 @@ push ra
push 5
push 10
jal 1
move r8 r15
pop ra
pop sp
j ra

View File

@@ -19,9 +19,4 @@ j ra
## Optimized Output
j main
pop r8
j main
pop r8
add r1 r8 1
move r8 r1
j ra

View File

@@ -72,13 +72,12 @@ j 8
pop ra
pop sp
j ra
pop r8
pop r9
push sp
push ra
add r1 r9 r9
move r15 r1
j 18
j 17
pop ra
pop sp
j ra
@@ -100,12 +99,9 @@ push r10
push r10
push 2
jal 11
pop r10
pop r9
pop r8
move r11 r15
move r15 r11
j 45
j 41
pop ra
pop sp
j ra

View File

@@ -0,0 +1,108 @@
---
source: libs/integration_tests/src/lib.rs
assertion_line: 173
expression: output
---
## Unoptimized Output
j main
add:
pop r8
pop r9
push sp
push ra
add r1 r9 r8
move r15 r1
j __internal_L1
__internal_L1:
pop ra
pop sp
j ra
multiply:
pop r8
pop r9
push sp
push ra
mul r1 r9 2
move r15 r1
j __internal_L2
__internal_L2:
pop ra
pop sp
j ra
complex:
pop r8
pop r9
push sp
push ra
push r8
push r9
push r9
push r8
jal add
pop r9
pop r8
move r10 r15
push r8
push r9
push r10
push r10
push 2
jal multiply
pop r10
pop r9
pop r8
move r11 r15
move r15 r11
j __internal_L3
__internal_L3:
pop ra
pop sp
j ra
## Optimized Output
j main
pop r8
pop r9
push sp
push ra
add r1 r9 r8
move r15 r1
j 8
pop ra
pop sp
j ra
pop r9
push sp
push ra
add r1 r9 r9
move r15 r1
j 17
pop ra
pop sp
j ra
pop r8
pop r9
push sp
push ra
push r8
push r9
push r9
push r8
jal 1
pop r9
pop r8
move r10 r15
push r8
push r9
push r10
push r10
push 2
jal 11
move r11 r15
move r15 r11
j 41
pop ra
pop sp
j ra

View File

@@ -24,9 +24,5 @@ j ra
j main
pop r8
pop r9
j main
pop r8
pop r9
ble r9 r8 8
move r10 1
ble r9 r8 4
j ra

View File

@@ -17,6 +17,4 @@ j ra
## Optimized Output
j main
j main
move r8 10
j ra

View File

@@ -32,11 +32,29 @@ pub fn dead_store_elimination<'a>(
last_write.insert(dest_reg, i);
}
// On labels/jumps, conservatively clear tracking (value might be used elsewhere)
// Before clearing on labels/calls, check if current tracked writes are dead
if matches!(
node.instruction,
Instruction::LabelDef(_) | Instruction::Jump(_) | Instruction::JumpAndLink(_)
Instruction::LabelDef(_) | Instruction::JumpAndLink(_)
) {
// Check all currently tracked writes to see if they're dead
for (&reg, &idx) in &last_write {
// Don't remove writes to r15 (return register)
if reg == 15 {
continue;
}
// Check if this write was used between write and now
let was_used = input[idx + 1..i]
.iter()
.any(|n| reg_is_read_or_affects_control(&n.instruction, reg));
if !was_used && !to_remove.contains(&idx) {
to_remove.push(idx);
changed = true;
}
}
last_write.clear();
}
}
@@ -59,29 +77,12 @@ pub fn dead_store_elimination<'a>(
}
}
/// Simplified check: Does this instruction read the register or affect control flow?
/// Simplified check: Does this instruction read the register?
fn reg_is_read_or_affects_control(instr: &Instruction, reg: u8) -> bool {
use crate::helpers::reg_is_read;
// If it reads the register, it's used
if reg_is_read(instr, reg) {
return true;
}
// Conservatively assume register might be used if there's control flow
matches!(
instr,
Instruction::Jump(_)
| Instruction::JumpAndLink(_)
| Instruction::BranchEq(_, _, _)
| Instruction::BranchNe(_, _, _)
| Instruction::BranchGt(_, _, _)
| Instruction::BranchLt(_, _, _)
| Instruction::BranchGe(_, _, _)
| Instruction::BranchLe(_, _, _)
| Instruction::BranchEqZero(_, _)
| Instruction::BranchNeZero(_, _)
)
reg_is_read(instr, reg)
}
#[cfg(test)]

View File

@@ -38,6 +38,8 @@ pub fn peephole_optimization<'a>(
// Safe to remove all four: push sp, push ra, pop ra, pop sp
// Also need to adjust stack pointer offsets in between by -2
let absolute_sp_pop = absolute_ra_pop + 1;
// Clear output since we're going to reprocess the entire input
output.clear();
for (idx, node) in input.iter().enumerate() {
if idx == i
|| idx == i + 1
@@ -83,6 +85,8 @@ pub fn peephole_optimization<'a>(
// Safe to remove both push and pop
// Also need to adjust stack pointer offsets in between
let absolute_pop_idx = i + pop_idx;
// Clear output since we're going to reprocess the entire input
output.clear();
for (idx, node) in input.iter().enumerate() {
if idx == i || idx == absolute_pop_idx {
// Skip the push and pop