More optimizations
This commit is contained in:
@@ -1180,6 +1180,27 @@ impl<'a> Compiler<'a> {
|
||||
Some(name.span),
|
||||
)?;
|
||||
|
||||
// Pop the arguments off the stack (caller cleanup convention)
|
||||
// BUT: If the function returns a tuple, it saves SP in r15 and the caller
|
||||
// will restore SP with "move sp r15", which automatically cleans up everything.
|
||||
// So we only pop arguments for non-tuple-returning functions.
|
||||
let returns_tuple = self
|
||||
.function_meta
|
||||
.tuple_return_sizes
|
||||
.get(&name.node)
|
||||
.copied()
|
||||
.unwrap_or(0)
|
||||
> 0;
|
||||
|
||||
if !returns_tuple {
|
||||
for _ in 0..arguments.len() {
|
||||
self.write_instruction(
|
||||
Instruction::Pop(Operand::Register(VariableScope::TEMP_STACK_REGISTER)),
|
||||
Some(name.span),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
// pop all registers back (if they were backed up)
|
||||
if backup_registers {
|
||||
for register in active_registers.iter().rev() {
|
||||
|
||||
@@ -24,10 +24,9 @@ j main
|
||||
pop r8
|
||||
push sp
|
||||
push ra
|
||||
move r9 20
|
||||
add r1 r8 1
|
||||
move r15 r1
|
||||
j 8
|
||||
j 7
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
---
|
||||
source: libs/integration_tests/src/lib.rs
|
||||
assertion_line: 103
|
||||
expression: output
|
||||
---
|
||||
## Unoptimized Output
|
||||
|
||||
j main
|
||||
compute:
|
||||
pop r8
|
||||
push sp
|
||||
push ra
|
||||
move r9 20
|
||||
add r1 r8 1
|
||||
move r15 r1
|
||||
j __internal_L1
|
||||
__internal_L1:
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
|
||||
## Optimized Output
|
||||
|
||||
j main
|
||||
pop r8
|
||||
push sp
|
||||
push ra
|
||||
add r1 r8 1
|
||||
move r15 r1
|
||||
j 7
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
@@ -47,7 +47,6 @@ push ra
|
||||
push 5
|
||||
push 10
|
||||
jal 1
|
||||
move r8 r15
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
|
||||
@@ -19,9 +19,4 @@ j ra
|
||||
## Optimized Output
|
||||
|
||||
j main
|
||||
pop r8
|
||||
j main
|
||||
pop r8
|
||||
add r1 r8 1
|
||||
move r8 r1
|
||||
j ra
|
||||
|
||||
@@ -72,13 +72,12 @@ j 8
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
pop r8
|
||||
pop r9
|
||||
push sp
|
||||
push ra
|
||||
add r1 r9 r9
|
||||
move r15 r1
|
||||
j 18
|
||||
j 17
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
@@ -100,12 +99,9 @@ push r10
|
||||
push r10
|
||||
push 2
|
||||
jal 11
|
||||
pop r10
|
||||
pop r9
|
||||
pop r8
|
||||
move r11 r15
|
||||
move r15 r11
|
||||
j 45
|
||||
j 41
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
---
|
||||
source: libs/integration_tests/src/lib.rs
|
||||
assertion_line: 173
|
||||
expression: output
|
||||
---
|
||||
## Unoptimized Output
|
||||
|
||||
j main
|
||||
add:
|
||||
pop r8
|
||||
pop r9
|
||||
push sp
|
||||
push ra
|
||||
add r1 r9 r8
|
||||
move r15 r1
|
||||
j __internal_L1
|
||||
__internal_L1:
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
multiply:
|
||||
pop r8
|
||||
pop r9
|
||||
push sp
|
||||
push ra
|
||||
mul r1 r9 2
|
||||
move r15 r1
|
||||
j __internal_L2
|
||||
__internal_L2:
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
complex:
|
||||
pop r8
|
||||
pop r9
|
||||
push sp
|
||||
push ra
|
||||
push r8
|
||||
push r9
|
||||
push r9
|
||||
push r8
|
||||
jal add
|
||||
pop r9
|
||||
pop r8
|
||||
move r10 r15
|
||||
push r8
|
||||
push r9
|
||||
push r10
|
||||
push r10
|
||||
push 2
|
||||
jal multiply
|
||||
pop r10
|
||||
pop r9
|
||||
pop r8
|
||||
move r11 r15
|
||||
move r15 r11
|
||||
j __internal_L3
|
||||
__internal_L3:
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
|
||||
## Optimized Output
|
||||
|
||||
j main
|
||||
pop r8
|
||||
pop r9
|
||||
push sp
|
||||
push ra
|
||||
add r1 r9 r8
|
||||
move r15 r1
|
||||
j 8
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
pop r9
|
||||
push sp
|
||||
push ra
|
||||
add r1 r9 r9
|
||||
move r15 r1
|
||||
j 17
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
pop r8
|
||||
pop r9
|
||||
push sp
|
||||
push ra
|
||||
push r8
|
||||
push r9
|
||||
push r9
|
||||
push r8
|
||||
jal 1
|
||||
pop r9
|
||||
pop r8
|
||||
move r10 r15
|
||||
push r8
|
||||
push r9
|
||||
push r10
|
||||
push r10
|
||||
push 2
|
||||
jal 11
|
||||
move r11 r15
|
||||
move r15 r11
|
||||
j 41
|
||||
pop ra
|
||||
pop sp
|
||||
j ra
|
||||
@@ -24,9 +24,5 @@ j ra
|
||||
j main
|
||||
pop r8
|
||||
pop r9
|
||||
j main
|
||||
pop r8
|
||||
pop r9
|
||||
ble r9 r8 8
|
||||
move r10 1
|
||||
ble r9 r8 4
|
||||
j ra
|
||||
|
||||
@@ -17,6 +17,4 @@ j ra
|
||||
## Optimized Output
|
||||
|
||||
j main
|
||||
j main
|
||||
move r8 10
|
||||
j ra
|
||||
|
||||
@@ -32,11 +32,29 @@ pub fn dead_store_elimination<'a>(
|
||||
last_write.insert(dest_reg, i);
|
||||
}
|
||||
|
||||
// On labels/jumps, conservatively clear tracking (value might be used elsewhere)
|
||||
// Before clearing on labels/calls, check if current tracked writes are dead
|
||||
if matches!(
|
||||
node.instruction,
|
||||
Instruction::LabelDef(_) | Instruction::Jump(_) | Instruction::JumpAndLink(_)
|
||||
Instruction::LabelDef(_) | Instruction::JumpAndLink(_)
|
||||
) {
|
||||
// Check all currently tracked writes to see if they're dead
|
||||
for (®, &idx) in &last_write {
|
||||
// Don't remove writes to r15 (return register)
|
||||
if reg == 15 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this write was used between write and now
|
||||
let was_used = input[idx + 1..i]
|
||||
.iter()
|
||||
.any(|n| reg_is_read_or_affects_control(&n.instruction, reg));
|
||||
|
||||
if !was_used && !to_remove.contains(&idx) {
|
||||
to_remove.push(idx);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
last_write.clear();
|
||||
}
|
||||
}
|
||||
@@ -59,29 +77,12 @@ pub fn dead_store_elimination<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
/// Simplified check: Does this instruction read the register or affect control flow?
|
||||
/// Simplified check: Does this instruction read the register?
|
||||
fn reg_is_read_or_affects_control(instr: &Instruction, reg: u8) -> bool {
|
||||
use crate::helpers::reg_is_read;
|
||||
|
||||
// If it reads the register, it's used
|
||||
if reg_is_read(instr, reg) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Conservatively assume register might be used if there's control flow
|
||||
matches!(
|
||||
instr,
|
||||
Instruction::Jump(_)
|
||||
| Instruction::JumpAndLink(_)
|
||||
| Instruction::BranchEq(_, _, _)
|
||||
| Instruction::BranchNe(_, _, _)
|
||||
| Instruction::BranchGt(_, _, _)
|
||||
| Instruction::BranchLt(_, _, _)
|
||||
| Instruction::BranchGe(_, _, _)
|
||||
| Instruction::BranchLe(_, _, _)
|
||||
| Instruction::BranchEqZero(_, _)
|
||||
| Instruction::BranchNeZero(_, _)
|
||||
)
|
||||
reg_is_read(instr, reg)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -38,6 +38,8 @@ pub fn peephole_optimization<'a>(
|
||||
// Safe to remove all four: push sp, push ra, pop ra, pop sp
|
||||
// Also need to adjust stack pointer offsets in between by -2
|
||||
let absolute_sp_pop = absolute_ra_pop + 1;
|
||||
// Clear output since we're going to reprocess the entire input
|
||||
output.clear();
|
||||
for (idx, node) in input.iter().enumerate() {
|
||||
if idx == i
|
||||
|| idx == i + 1
|
||||
@@ -83,6 +85,8 @@ pub fn peephole_optimization<'a>(
|
||||
// Safe to remove both push and pop
|
||||
// Also need to adjust stack pointer offsets in between
|
||||
let absolute_pop_idx = i + pop_idx;
|
||||
// Clear output since we're going to reprocess the entire input
|
||||
output.clear();
|
||||
for (idx, node) in input.iter().enumerate() {
|
||||
if idx == i || idx == absolute_pop_idx {
|
||||
// Skip the push and pop
|
||||
|
||||
Reference in New Issue
Block a user