//2499 from progen...THUMB-version here by Kuemmel running at 640x480

.syntax unified

.set OS_ReadVduVariables,         0x31
.set OS_Exit,                     0x11
.set OS_ReadEscapeState,          0x2c
.set OS_ReadMonotonicTime,        0x42
.set OS_WriteS,                   0x01

screen_x = 640
screen_y = 480

start:
//precalc mapping with FPEmulator
   adr r6,start+1024       //address for mapping coordinates
   mov r0, #512            //scale => default 256
   .word 0xee070110        //flts f7, r0
   mvn r4, #(screen_y/2-1) //default 127
   precalc_loop_y:
      flts f2, r4
      dvfs f0, f2, f7      // Y/scale
      fmls f0, f0, f0      //(Y/scale)^2
      mvn r1, #(screen_x/2)
      add r1,r1,#1
      precalc_loop_x:
         flts f3, r1
         dvfs f1, f3, f7    //  X/scale
         fmls f1, f1, f1    // (X/scale)^2
         adfs f1, f0, f1    // (X/scale)^2 + (Y/scale)^2
         rsfs f1, f1, #1.0  //  1 - (X/scale)^2 + (Y/scale)^2 = Z^2
         sqts f1, f1        // Z
         fmls f4, f3, f1    // X'
         fmls f5, f2, f1    // Y'
         fix  r2, f4
         fix  r3, f5
         and r2, r2, #255
         str r2, [r6], #1
         strb r3, [r6], #3
         add r1, r1, #4
         cmp r1, #(screen_x/2)
      blt precalc_loop_x
      add r4, r4, #1
      cmp r4, #(screen_y/2)
   blt precalc_loop_y

   adr pc, thumb+1   //jump to thumb code and switch to thumb mode

.thumb
thumb:

//init texture and screen
   swi OS_WriteS
   .byte 22,28                //28 (640x480) mode default 22,13 => mode 13 320x256
   .ascii"2499"
   .byte 5,0
   adr.n r0,start+256
   movs r1,r0
   movs r2,#148
   subs r4,r4,#(screen_y/2+1) //to create -1
   stmia r1!,{r2,r4}
   swi   OS_ReadVduVariables

//copy string output to texture
   ldr.n  r7,start+256+8
   movs r5,#31
   cpy_loop:
      ldmia r7!,{r0-r3}
      stmia r6!,{r0-r3}      //use r6 as storage address from before
      tst   r5,#0b11
      it eq
      addeq r7,r7,#(screen_x-64)
   subs  r5,r5,#1
   bpl cpy_loop

//main intro loop
   mainloop:
      swi OS_ReadMonotonicTime
      adr.n  r6,start+1024
      ldr.n  r5,start+256+8
      movs r4,#(screen_x*screen_y)
      adds r7,r6,r4
      scrcpy_loop:
         ldmia r6!,{r2}
         ands  r1,r2,#0x1c00
         adds  r3,r2,r0
         uxtb  r3,r3
         tst   r2,#0x2000
         it ne
         eorne r3,r3,#0x80
         add   r3,r3,r1,lsr#2
         lsrs  r3,r3,#2       //lsr doesn't work with ldrb in thumb...
         ldrb  r3,[r7,r3]
         eor   r3,r3,r2,lsr#8
         orr   r3,r3,r3,lsl#16
         stmia r5!,{r3}
         subs  r4,r4,#4
      bne scrcpy_loop
      swi OS_ReadEscapeState
   bcc mainloop
   swi OS_Exit
