@
@    Mandelboot - Booting RP2040 into printing Mandelbrot fractal
@    Copyright (C) 2021  Matthias Koch
@
@    This program is free software: you can redistribute it and/or modify
@    it under the terms of the GNU General Public License as published by
@    the Free Software Foundation, either version 3 of the License, or
@    (at your option) any later version.
@
@    This program is distributed in the hope that it will be useful,
@    but WITHOUT ANY WARRANTY; without even the implied warranty of
@    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@    GNU General Public License for more details.
@
@    You should have received a copy of the GNU General Public License
@    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@

@ Many thanks to Robert Clausecker for the challenge,
@   saved bytes and a smooth blink

@ Many thanks to Jan Bramkamp for hints on initialisation of the RP2040

.syntax unified
.cpu cortex-m0
.thumb

@ -----------------------------------------------------------------------------
@  Resets
@ -----------------------------------------------------------------------------

.equ RESETS_BASE, 0x4000c000

.equ RESET,             0
.equ WDSEL,             4
.equ RESET_DONE,        8

@ -----------------------------------------------------------------------------
@ Crystal Oscillator
@ -----------------------------------------------------------------------------

.equ XOSC_BASE, 0x40024000

.equ XOSC_CTRL,         0x00     @ Crystal Oscillator Control
.equ XOSC_STATUS,       0x04     @ Crystal Oscillator Status
.equ XOSC_DORMANT,      0x08     @ Crystal Oscillator pause control
.equ XOSC_STARTUP,      0x0c     @ Controls the startup delay
.equ XOSC_COUNT,        0x1c     @ A down counter running at the XOSC frequency
                                 @ which counts to zero and stops.

.equ XOSC_ENABLE_12MHZ, 0xfabaa0
.equ XOSC_DELAY,        47       @ ceil((f_crystal * t_stable) / 256)

@ -----------------------------------------------------------------------------
@  Clock tree
@ -----------------------------------------------------------------------------

.equ CLOCKS_BASE, 0x40008000

.equ CLK_REF_CTRL,      0x30     @ Clock control, can be changed on-the-fly
.equ CLK_REF_DIV,       0x34     @ Clock divisor, can be changed on-the-fly
.equ CLK_REF_SELECTED,  0x38     @ Indicates currently selected src (one-hot)

.equ CLK_SYS_CTRL,      0x3c     @ Clock control, can be changed on-the-fly
.equ CLK_SYS_DIV,       0x40     @ Clock divisor, can be changed on-the-fly
.equ CLK_SYS_SELECTED,  0x44     @ Indicates currently selected src (one-hot)

.equ CLK_PERI_CTRL,     0x48     @ Clock control, can be changed on-the-fly
.equ CLK_PERI_SELECTED, 0x50     @ Indicates currently selected src (one-hot)

@ All clocks can be changed on-the-fly, except for auxsrc

@ -----------------------------------------------------------------------------
@  Pins and their configuration
@ -----------------------------------------------------------------------------

.equ IO_BANK0_BASE, 0x40014000

.equ GPIO_25_STATUS,    IO_BANK0_BASE + 8 * 24
.equ GPIO_25_CTRL,      IO_BANK0_BASE + 8 * 25 + 4

.equ PADS_BANK0_BASE, 0x4001c000

.equ GPIO_25_PAD,       PADS_BANK0_BASE + 0x68

@ -----------------------------------------------------------------------------
@  IO on pins
@ -----------------------------------------------------------------------------

.equ SIO_BASE, 0xd0000000
.equ CPUID,             0x000    @ Processor core identifier
.equ GPIO_IN,           0x004    @ Input value for GPIO pins
.equ GPIO_OUT,          0x010    @ GPIO output value
.equ GPIO_OUT_SET,      0x014    @ GPIO output value set
.equ GPIO_OUT_CLR,      0x018    @ GPIO output value clear
.equ GPIO_OUT_XOR,      0x01c    @ GPIO output value XOR
.equ GPIO_OE,           0x020    @ GPIO output enable
.equ GPIO_OE_SET ,      0x024    @ GPIO output enable set
.equ GPIO_OE_CLR ,      0x028    @ GPIO output enable clear
.equ GPIO_OE_XOR ,      0x02c    @ GPIO output enable XOR

@ -----------------------------------------------------------------------------
@  UART
@ -----------------------------------------------------------------------------

.equ UART0_BASE, 0x40034000

.equ UARTDR,            0x00     @ Data, UARTDR
.equ UARTRSR,           0x04     @ Receive Status/Error Clear, UARTRSR/UARTECR
.equ UARTFR,            0x18     @ Flag, UARTFR
.equ UARTILPR,          0x20     @ IrDA Low-Power Counter, UARTILPR
.equ UARTIBRD,          0x24     @ Integer Baud Rate, UARTIBRD
.equ UARTFBRD,          0x28     @ Fractional Baud Rate, UARTFBRD
.equ UARTLCR_H,         0x2c     @ Line Control, UARTLCR_H
.equ UARTCR,            0x30     @ Control, UARTCR
.equ UARTIFLS,          0x34     @ Interrupt FIFO Level Select, UARTIFLS
.equ UARTIMSC,          0x38     @ Interrupt Mask Set/Clear, UARTIMSC
.equ UARTRIS,           0x3c     @ Raw Interrupt Status, UARTRIS
.equ UARTMIS,           0x40     @ Masked Interrupt Status, UARTMIS
.equ UARTICR,           0x44     @ Interrupt Clear, UARTICR
.equ UARTDMACR,         0x48     @ DMA Control, UARTDMACR

@ -----------------------------------------------------------------------------
Reset: @ Let it shine !
@ -----------------------------------------------------------------------------

    adr  r0, literal_pool        @ Load initialisation constants
    ldm  r0!, {r1-r5}

    movs r0, 0                   @ Activate all peripherals
    str  r0, [r1, #RESET]        @ r1 = 0x4000c000 (RESET_BASE)

                                 @ Compute further constants:
    movs r1, 2                   @ r1 = 0x00000002 XOSC for everything
                                 @ r2 = 0x40034000 UART0_BASE
                                 @ r3 = 0x00fabaa0 XOSC_ENABLE_12MHZ
                                 @ r4 = 0x40008000 CLOCKS_BASE
                                 @ r5 = 0x00000301
    lsls r7, r1, 15              @ r7 = 0x00010000
    subs r6, r2, r7              @ r6 = 0x40024000 XOSC_BASE
    subs r7, r6, r7              @ r7 = 0x40014000 IO_BANK0_BASE


    str  r5, [r6, #XOSC_STARTUP] @ A value of 47 for XOSC_DELAY would suffice,
                                 @ but writing 0x301 to is saves one opcode.
    str  r3, [r6, #XOSC_CTRL]    @ Activate XOSC. r3 = XOSC_ENABLE_12MHZ

1:  ldr  r0, [r6, #XOSC_STATUS]  @ Wait for stable flag (in MSB)
    asrs r0, r0, 31
    bpl  1b

    str  r1, [r4, #CLK_REF_CTRL] @ Select XOSC as source for clk_ref,
                                 @ which is the clock source of
                                 @ everything in reset configuration.
                                 @ r1 = 2, r4 = CLOCKS_BASE

    lsls r3, r1, 10              @ Enable clk_peri, which drives the UART
    str  r3, [r4,#CLK_PERI_CTRL] @ 2 << 10 = 0x800


                                 @ Configure the UART and set baud rate
    movs r3, 6                   @ 12e6/(16*115200) = 6.5104
    str  r3, [r2, #UARTIBRD]     @ r2 = UART0_BASE
    movs r3, 33                  @ (0.5104*64)+0.5 = 33.166
    str  r3, [r2, #UARTFBRD]
    movs r3, (3 << 5) | (1 << 4) @ 8N1, enable FIFOs
    str  r3, [r2, #UARTLCR_H]
    str  r5, [r2, #UARTCR]       @ Enable UART



    str  r1, [r7, #4+0]          @ Set UART special function for RX and TX pins
    str  r1, [r7, #4+8]          @ r7 = IO_BANK0_BASE

    movs r1, 5                   @ Set LED pin special function to SIO
    adds r7, 4+8*25              @ SIO control for register this pin
    str  r1, [r7]                @ GPIO_25_CTRL

                                 @ r0 contains -1 at this point

@ -----------------------------------------------------------------------------
mandelbrot:  @ Print mandelbrot set with shining LED.
@ -----------------------------------------------------------------------------

                                 @ Initialisation and emit set r2 to UART0_BASE
    ldrb r3, [r2, #UARTDR]       @ Fetch character to clear UART flag

    lsls r7, r2, 14              @ SIO_BASE = UART0_BASE << 14
    movs r5, 1                   @ Constant 1, reused at many places
    lsls r1, r5, 25
    str  r1, [r7, #GPIO_OE]      @ Set Pin 25 as output
    str  r1, [r7, #GPIO_OUT]     @ Set LED on


                                 @ Constants selecting the section for printing
                                 @ partially hardwired in code to save space:

.equ mandel_shift,      12       @ Use 12 fractional digits
.equ xmax,              3        @ x range for printing
.equ ymax,              2        @ y
.equ dx,                (2*xmax << mandel_shift) / 192  @ x step size
  @  dy,                (2*ymax << mandel_shift) / 64   @ y step size
  @  norm_max,          4        @ End iteration when reaching this

                                 @ r0: Mandelbrot/Tricorn switch, must be kept
                                 @ r1: Zr for iteration loop
                                 @ r2: Zi for iteration loop
                                 @ r3: Scratch
                                 @ r4: Iteration count
                                 @ r5: Constant 1
                                 @ r6: Loop X
                                 @ r7: Loop Y

    negs r0, r0                  @ Switch between 1: Mandelbrot or -1: Tricorn

    mvns r7, r5                  @ Start at -ymax = -2
    lsls r7, r7, mandel_shift    @ Shift for fractional digits
y_loop:

      movs r6, xmax
      negs r6, r6                @ Start at -xmax = -3
      lsls r6, mandel_shift      @ Shift for fractional digits
x_loop:

      movs r1, r6                @ Zr = Cr  Prepare values for iteration
      movs r2, r7                @ Zi = Ci    in this point
      movs r4, 64                @ Maximum number of iterations

iteration_loop:

        movs r3, r2
        muls r3, r2              @ (Zi * Zi)
        muls r2, r1              @ (Zr * Zi)
        muls r1, r1              @ (Zr * Zr)
        subs r1, r3              @ (Zr^2 - Zi^2)

        adds r3, r3              @ (Zr^2 - Zi^2 + 2*Zi^2) = (Zr^2 + Zi^2)
        adds r3, r1              @ Detour saves one register...

        lsrs r3, r3, 14 + mandel_shift @ Finished if (Zr^2 + Zi^2) gets larger
        bne.n iteration_finished       @ than norm_max = 4 << mandel_shift

        asrs r1, r1, mandel_shift      @ (Zr^2 - Zi^2) >>> mandel_shift
        asrs r2, r2, mandel_shift-1    @ 2 * (Zr * Zi) >>> mandel_shift

        muls r2, r0              @ Complex conjugate of Zi to select fractal

        adds r1, r6              @ Zr' = Zr^2 - Zi^2 + Cr
        adds r2, r7              @ Zi' = 2 * Zr * Zi + Ci

        subs r4, 1               @ Next iteration ?
        bne.n iteration_loop

iteration_finished:

        movs r1, 15              @ Map iteration count
        ands r4, r1              @ by masking
        adr r3, colormap         @ into the ASCII
        ldrb r4, [r3, r4]        @ "colormap" and
        bl emit                  @ print a character.

      adds r6, dx                @ Next step in x
      movs r1, xmax              @ Check if
      lsls r1, mandel_shift      @ maximum x value
      cmp r6, r1                 @ (shifted for fractional digits)
      ble.n x_loop               @ is already reached.

      movs r4, 10                @ Current line finished.
      bl emit

    lsls r1, r5, 8               @ dy = 0x100 = 1 << 8
    adds r7, r1                  @ Next step in y
    lsls r1, r5, 13              @ ymax = 2 << 12 = 1 << 13
    cmp r7, r1                   @ Reached ymax or more lines to print ?
    ble.n y_loop

@ -----------------------------------------------------------------------------
breathe_initialisation:
@ -----------------------------------------------------------------------------

 @ movs r5, 1                    @ Set up initial x, y
   lsls r6, r5, 19               @ for Minsky circle algorithm

@ -----------------------------------------------------------------------------
breathe_led: @ Generate breathing LED effect while waiting for keypress
@ -----------------------------------------------------------------------------

                                 @ r0 : Mandelbrot/Tricorn switch, must be kept
                                 @ r1 : Scratch
                                 @ r2 : Initialised to UART0_BASE, must be kept
                                 @ r3 : Scratch
                                 @ r4 : Blink speed toggle switch, 10 on begin
                                 @ r5 : Minsky circle alg x = sin(t)
                                 @ r6 : Minsky circle alg y = cos(t)
                                 @ r7 : Phase for sigma-delta modulator

    eors r4, r0                  @ Blink speed depending on fractal type:
    bmi 1f                       @ Tricorns -1 skips every second Minsky step

                                 @ Minsky circle algorithm x,y = sin(t),cos(t)
    asrs r1, r5, 17              @ -dx = y >> 17
    subs r6, r1                  @  x += dx
    asrs r1, r6, 17              @  dy = x >> 17
    adds r5, r1                  @  y += dy

1:  asrs r1, r6, 13              @ -49 <= r4 <= 64   --> scaled cos(t)
    adds r1, 183                 @ 134 <= r4 <= 247  --> scaled cos(t) + offset

    movs r3, 7                   @ Simplified bitexp function:
    ands r3, r1                  @ | Valid for inputs from 16 to 247.
    adds r3, 8                   @ | Gives 0 if below 16, and
    lsrs r1, r1, 3               @ |   too small values above 247.
    subs r1, 2                   @ | This opcode adjusts offset, can be removed
    lsls r3, r1                  @ Input in r1, output in r3

    bpl.n 1f                     @ Terminate on apex only for smooth transition
    ldr r1, [r2, #UARTFR]        @ Fetch status, r2 = UART0_BASE from emit
    lsls r1, r1, 31-4            @ Shift RX FIFO empty, bit 4, into MSB
    bpl.n mandelbrot
1:
    subs r7, r3                  @ Sigma-delta phase accumulator
    sbcs r3, r3                  @ Output through carry, inverted for subs

    lsls r1, r2, 14              @ SIO_BASE = UART0_BASE << 14.
    str r3, [r1, #GPIO_OUT]      @ Set LED to sigma-delta modulator output
    b.n breathe_led

@ -----------------------------------------------------------------------------
emit: @ Emit character in r4
@ -----------------------------------------------------------------------------
    ldr r2, uart0_base
1:  ldr r3, [r2, #UARTFR]
    lsls r3, r3, 31-5            @ Shift TX FIFO full, bit 5, into MSB
    bmi 1b
    strb r4, [r2, #UARTDR]       @ Output the character
    bx lr

    .align
literal_pool:                    @ Constants for IO initialisation
    .word RESETS_BASE            @ r1
uart0_base:
    .word UART0_BASE             @ r2
    .word XOSC_ENABLE_12MHZ      @ r3
    .word CLOCKS_BASE            @ r4
    .word 1<<9 | 1<<8 | 1<<0     @ r5
    .ltorg

                                 @ Iteration counter runs backwards:
                                 @  111111
                                 @  5432109876543210
colormap:                   .ascii " +@0O8%mnv*;:,. "
