Posts: 76
Joined: Sun Apr 15, 2012 2:23 pm

Small bootloader

Sat Nov 10, 2012 8:25 pm

I just created a tiny bootloader and I thought I'd share it with the class.
It is so small that it fits inside the first 0x100 bytes (252 bytes to be exact, + 48 bytes of code that handles the relocation to 0x0).
It preserves the atags setup and register values, which means that it can theoretically load a Linux kernel (not tested), though it might take a while at 115.2kbps
It does not use any special protocol for the transfer which means that any terminal that can send a raw file will work. This is of course also the biggest weakness as there will be no error checking or recovery.
I use UART0, as the slightly larger FIFO can reduce the risks involved in not checking for errors. Kudos to dwelch67 for making the example for UART0 by the way.
The code is made as one single block of assembly, as that removes the need for stack management and frees up a couple of extra words. It does reserve space for a standard patchable vector table though. Apart from that, I think it probably breaks most, if not all, of the "good programming practice" rules, like hardcoding of values, no functions and writing to the NULL pointer.
You might notice that the bootloader itself does not use ldr method to initialize variables. That avoids an issue where the assembler could place the actual value outside the region that gets relocated. Besides, you need to have more than 2x8bit blocks in the constant that gets loaded before mov + orr gets bigger.


Here it is, to be loaded at default 0x8000:

Code: Select all

.section .init
.globl _start
    ldr pc,reset_handler
    ldr pc,undefined_handler
    ldr pc,swi_handler
    ldr pc,prefetch_handler
    ldr pc,data_handler
    ldr pc,unused_handler
    ldr pc,irq_handler
    ldr pc,fiq_handler
reset_handler: .word loader_loader
undefined_handler: .word 0x40
swi_handler: .word 0x40
prefetch_handler: .word 0x40
data_handler: .word 0x40
unused_handler: .word 0x40
irq_handler: .word 0x40
fiq_handler: .word 0x40

# This gets loaded at address 0x40
    mov     r0, #0x40
    bx      r0

# This gets loaded at address 0x48
# BEGIN: UART initialization
    status .req r0
    machtype .req r1
    atags .req r2
    val .req r3
    UART_PHYS_BASE .req r4
    GPIO_PHYS_BASE .req r5
    TIMER_PHYS_BASE .req r6
    active_time .req r7
    current_time .req r8
    load_ptr .req r9
    mov     UART_PHYS_BASE,#0x20000000
    orr     UART_PHYS_BASE,UART_PHYS_BASE,#0x00200000
    orr     UART_PHYS_BASE,UART_PHYS_BASE,#0x00001000
    mov     GPIO_PHYS_BASE,#0x20000000
    orr     GPIO_PHYS_BASE, GPIO_PHYS_BASE, #0x00200000
    mov     TIMER_PHYS_BASE,#0x20000000
    orr     TIMER_PHYS_BASE, TIMER_PHYS_BASE, #0x00003000
    mov     load_ptr,#0x8000
    # UART0_CR = 0
    mov     val,#0
    str     val, [UART_PHYS_BASE, #0x30]
    # GPIO function to UART0 for pins 14 and 15
    mov     val,#0x24000
    str     val, [GPIO_PHYS_BASE, #4]
    # UART0_ICR = 0x7F1
    mov     val,#0x7F0
    orr     val, val, #0x001
    str     val, [UART_PHYS_BASE, #0x44]

    # UART0_IBRD = 1
    mov     val,#1
    str     val, [UART_PHYS_BASE, #0x24]

    # UART0_FBRD = 40
    mov     val,#40
    str     val, [UART_PHYS_BASE, #0x28]

    # UART0_LCRH = 0x70
    mov     val,#0x70
    str     val, [UART_PHYS_BASE, #0x2C]

    # UART0_CR = 0x301
    mov     val,#0x300
    orr     val, val, #0x001
    str     val, [UART_PHYS_BASE, #0x30]
# END: UART initialization
# Transmit a 'B' on the UART
    mov     val, #0x42
    str     val, [UART_PHYS_BASE, #0x00]

# Initialize time for timeout
    ldr     current_time, [TIMER_PHYS_BASE, #4]
    mov     active_time, current_time
# Read UART0_FR
    ldr     val, [UART_PHYS_BASE, #0x18]
    ands    val, val, #0x10
# if result is non-zero, just continue to timeout check
    bne     cont$
# Load the actual UART data byte and mask to byte
    ldr     val, [UART_PHYS_BASE, #0x00]
    and     val, val, #0xFF
# *load_ptr = val, load_ptr++
    strb    val, [load_ptr], #1

# Set now as active time
    mov     active_time, current_time

# Check for timeout
    ldr     current_time, [TIMER_PHYS_BASE, #4]
    sub     val, current_time, active_time
    cmp     val, #0x50000
    bls     loop$
# ... but only if we have started receiving data
    cmp     load_ptr, #0x8000
    beq     loop$

# Transmit an 'S' on the UART
    mov     val, #0x53
    str     val, [UART_PHYS_BASE, #0x00]

# Now branch to loaded code entry point
    mov     val, #0x8000
    bx      val


    mov     r3, # 0x8000
    mov     r4, # bootloader_end - bootloader_start
    mov     r5, #0
    ldr     r6, [r3], #4        @ load starting from 0x8000, postincrement by 4
    str     r6, [r5], #4        @ store starting from 0x0, postincrement by 4
    subs    r4, r4,   #4        @ if ((r4 -= 4) != 0)
    bne     copy                @     goto copy
    mov     r3, #0x20
    mov     r4, #0x48
    str     r4, [r3]            @ *(0x20) = 0x48, point reset vector to "reset"
# Goto address 0x0 for the reset vector
    mov     r7, #0
    bx      r7

Return to “Bare metal, Assembly language”