diff --git a/Makefile b/Makefile
index 11721e4..be8ad4b 100644
--- a/Makefile
+++ b/Makefile
@@ -52,9 +52,12 @@ MISC_TOOLS = phoenix_info sunxi-nand-image-builder
# Note: To use this target, set/adjust CROSS_COMPILE and MKSUNXIBOOT if needed
BINFILES = jtag-loop.sunxi fel-sdboot.sunxi uart0-helloworld-sdboot.sunxi
-CROSS_COMPILE ?= arm-none-eabi-
-CROSS_CC ?= $(CROSS_COMPILE)gcc
MKSUNXIBOOT ?= mksunxiboot
+PATH_DIRS := $(shell echo $$PATH | sed -e 's/:/ /g')
+# Try to guess a suitable default ARM cross toolchain
+CROSS_DEFAULT := arm-none-eabi-
+CROSS_COMPILE ?= $(or $(shell find $(PATH_DIRS) -executable -name 'arm*-gcc' -printf '%f\t' | cut -f 1 | sed -e 's/-gcc/-/'),$(CROSS_DEFAULT))
+CROSS_CC ?= $(CROSS_COMPILE)gcc
DESTDIR ?=
PREFIX ?= /usr/local
@@ -128,7 +131,7 @@ PROGRESS := progress.c progress.h
SOC_INFO := soc_info.c soc_info.h
FEL_LIB := fel_lib.c fel_lib.h
-sunxi-fel: fel.c fel-to-spl-thunk.h $(PROGRESS) $(SOC_INFO) $(FEL_LIB)
+sunxi-fel: fel.c thunks/fel-to-spl-thunk.h $(PROGRESS) $(SOC_INFO) $(FEL_LIB)
$(CC) $(HOST_CFLAGS) $(LIBUSB_CFLAGS) $(LDFLAGS) -o $@ $(filter %.c,$^) $(LIBS) $(LIBUSB_LIBS)
sunxi-nand-part: nand-part-main.c nand-part.c nand-part-a10.h nand-part-a20.h
@@ -173,6 +176,11 @@ boot_head_sun5i.elf: boot_head.S boot_head.lds
sunxi-bootinfo: bootinfo.c
+# "preprocessed" .h files for inclusion of ARM thunk code
+headers:
+ make -C thunks/ CROSS_COMPILE=$(CROSS_COMPILE)
+
+
# target tools
TARGET_CFLAGS = $(DEFAULT_CFLAGS) -static $(CFLAGS)
sunxi-meminfo: meminfo.c
diff --git a/README.md b/README.md
index ef43d90..e9d7803 100644
--- a/README.md
+++ b/README.md
@@ -112,8 +112,9 @@ and our default target (when simply using `make`).
* `make target-tools`
builds tools that are intended for the target (Allwinner SoC), using a
-cross-compiler. The toolchain prefix *CROSS_COMPILE* defaults to `arm-none-eabi-`,
-adjust it if needed.
+cross-compiler. The Makefile will try to auto-detect a suitable toolchain
+prefix, and falls back to `arm-none-eabi-` otherwise.
+If needed, you may override this by explicitly setting *CROSS_COMPILE*.
_Hint:_ When compiling 'natively' on the target platform you may
simply use an empty toolchain prefix here (`make target-tools CROSS_COMPILE=`
or `make all CROSS_COMPILE=`).
diff --git a/fel.c b/fel.c
index efd74d9..d71631e 100644
--- a/fel.c
+++ b/fel.c
@@ -216,7 +216,7 @@ void aw_fel_fill(feldev_handle *dev, uint32_t offset, size_t size, unsigned char
}
static uint32_t fel_to_spl_thunk[] = {
- #include "fel-to-spl-thunk.h"
+ #include "thunks/fel-to-spl-thunk.h"
};
#define DRAM_BASE 0x40000000
diff --git a/fel_lib.c b/fel_lib.c
index 4e16adb..d482ce8 100644
--- a/fel_lib.c
+++ b/fel_lib.c
@@ -484,6 +484,31 @@ void fel_memmove(feldev_handle *dev,
fel_memcpy_up(dev, dst_addr, src_addr, size);
}
+/*
+ * Bitwise manipulation of a 32-bit word at given address, via bit masks that
+ * specify which bits to clear and which to set.
+ */
+void fel_clrsetbits_le32(feldev_handle *dev,
+ uint32_t addr, uint32_t clrbits, uint32_t setbits)
+{
+ uint32_t arm_code[] = {
+ htole32(0xe59f0018), /* 0: ldr r0, [addr] */
+ htole32(0xe5901000), /* 4: ldr r1, [r0] */
+ htole32(0xe59f2014), /* 8: ldr r2, [clrbits] */
+ htole32(0xe1c11002), /* c: bic r1, r1, r2 */
+ htole32(0xe59f2010), /* 10: ldr r2, [setbits] */
+ htole32(0xe1811002), /* 14: orr r1, r1, r2 */
+ htole32(0xe5801000), /* 18: str r1, [r0] */
+ htole32(0xe12fff1e), /* 1c: bx lr */
+
+ htole32(addr), /* address */
+ htole32(clrbits), /* bits to clear */
+ htole32(setbits), /* bits to set */
+ };
+ aw_fel_write(dev, arm_code, dev->soc_info->scratch_addr, sizeof(arm_code));
+ aw_fel_execute(dev, dev->soc_info->scratch_addr);
+}
+
/*
* Memory access to the SID (root) keys proved to be unreliable for certain
* SoCs. This function uses an alternative, register-based approach to retrieve
diff --git a/fel_lib.h b/fel_lib.h
index 4de0ef9..62ab8dd 100644
--- a/fel_lib.h
+++ b/fel_lib.h
@@ -70,6 +70,13 @@ void fel_writel_n(feldev_handle *dev, uint32_t addr, uint32_t *src, size_t count
void fel_memmove(feldev_handle *dev,
uint32_t dst_addr, uint32_t src_addr, size_t size);
+void fel_clrsetbits_le32(feldev_handle *dev,
+ uint32_t addr, uint32_t clrbits, uint32_t setbits);
+#define fel_clrbits_le32(dev, addr, value) \
+ fel_clrsetbits_le32(dev, addr, value, 0)
+#define fel_setbits_le32(dev, addr, value) \
+ fel_clrsetbits_le32(dev, addr, 0, value)
+
/* retrieve SID root key */
bool fel_get_sid_root_key(feldev_handle *dev, uint32_t *result,
bool force_workaround);
diff --git a/thunks/Makefile b/thunks/Makefile
new file mode 100644
index 0000000..8d2547b
--- /dev/null
+++ b/thunks/Makefile
@@ -0,0 +1,36 @@
+#
+# build "preprocessed" .h files for inclusion of ARM scratch code
+#
+
+SPL_THUNK := fel-to-spl-thunk.h
+THUNKS := clrsetbits.h
+THUNKS += memcpy.h
+THUNKS += readl_writel.h
+THUNKS += rmr-thunk.h
+THUNKS += sid_read_root.h
+
+all: $(SPL_THUNK) $(THUNKS)
+# clean up object files afterwards
+ rm -f *.o
+
+# This empty prerequisite enforces a rebuild of all the headers on every run
+FORCE:
+
+# If not specified explicitly: try to guess a suitable ARM toolchain prefix
+PATH_DIRS := $(shell echo $$PATH | sed -e 's/:/ /g')
+CROSS_COMPILE ?= $(shell find $(PATH_DIRS) -executable -name 'arm*-gcc' -printf '%f\t' | cut -f 1 | sed -e 's/-gcc/-/')
+
+AS := $(CROSS_COMPILE)as
+OBJDUMP := $(CROSS_COMPILE)objdump
+
+AWK_O_TO_H := awk -f objdump_to_h.awk
+
+# The SPL thunk requires a different output format. The "style" variable for
+# awk controls this, and causes the htole32() conversion to be omitted.
+fel-to-spl-thunk.h: fel-to-spl-thunk.S FORCE
+ $(AS) -o $(subst .S,.o,$<) $<
+ $(OBJDUMP) -d $(subst .S,.o,$<) | $(AWK_O_TO_H) -v style=old > $@
+
+$(THUNKS): %.h: %.S FORCE
+ $(AS) -o $(subst .S,.o,$<) $<
+ $(OBJDUMP) -d $(subst .S,.o,$<) | $(AWK_O_TO_H) > $@
diff --git a/thunks/README.md b/thunks/README.md
new file mode 100644
index 0000000..e9ca949
--- /dev/null
+++ b/thunks/README.md
@@ -0,0 +1,20 @@
+
+# thunks/README.md
+
+This directory contains assembly sources for ARM [thunk] code, and
+a corresponding _Makefile_. The idea is that the resulting binary routines
+can be transferred to a suitable target device and then executed 'remotely',
+usually via `sunxi-fel`.
+
+Normally you don't need to change or (re)build anything within this folder.
+Currently our main build process (via the parent directory's _Makefile_)
+only includes `fel-to-spl-thunk.h` directly. Other _.h_ files are provided
+**just for reference**. The main purpose of this folder is simply keeping
+track of _.S_ sources, to help with possible future maintenance of the
+various code snippets.
+
+Please note that any files lacking explicit license information are intended
+to be covered by the project's [overall license](../LICENSE.md) (GPLv2).
+
+
+[thunk]: https://en.wikipedia.org/wiki/Thunk#Interoperability
diff --git a/thunks/clrsetbits.S b/thunks/clrsetbits.S
new file mode 100644
index 0000000..d148d3c
--- /dev/null
+++ b/thunks/clrsetbits.S
@@ -0,0 +1,17 @@
+/*
+ * Thunk code to assist with bitwise operations (set/clear) via FEL
+ */
+
+fel_clrsetbits_le32:
+ ldr r0, 1f /* address */
+ ldr r1, [r0] /* load value */
+ ldr r2, 2f /* clrbits mask */
+ bic r1, r2 /* clear bits, post-increment r1 */
+ ldr r2, 3f /* setbits mask */
+ orr r1, r2 /* set bits (logical "or") */
+ str r1, [r0] /* store result */
+ bx lr
+
+1: .word 0 /* addr */
+2: .word 0 /* clrbits (= bits to clear) */
+3: .word 0 /* setbits (= bits to set) */
diff --git a/thunks/clrsetbits.h b/thunks/clrsetbits.h
new file mode 100644
index 0000000..4f76ae1
--- /dev/null
+++ b/thunks/clrsetbits.h
@@ -0,0 +1,9 @@
+ /* : */
+ htole32(0xe59f0018), /* 0: ldr r0, [pc, #24] */
+ htole32(0xe5901000), /* 4: ldr r1, [r0] */
+ htole32(0xe59f2014), /* 8: ldr r2, [pc, #20] */
+ htole32(0xe1c11002), /* c: bic r1, r1, r2 */
+ htole32(0xe59f2010), /* 10: ldr r2, [pc, #16] */
+ htole32(0xe1811002), /* 14: orr r1, r1, r2 */
+ htole32(0xe5801000), /* 18: str r1, [r0] */
+ htole32(0xe12fff1e), /* 1c: bx lr */
diff --git a/fel-to-spl-thunk.S b/thunks/fel-to-spl-thunk.S
similarity index 100%
rename from fel-to-spl-thunk.S
rename to thunks/fel-to-spl-thunk.S
diff --git a/fel-to-spl-thunk.h b/thunks/fel-to-spl-thunk.h
similarity index 93%
rename from fel-to-spl-thunk.h
rename to thunks/fel-to-spl-thunk.h
index 636bb81..286ba53 100644
--- a/fel-to-spl-thunk.h
+++ b/thunks/fel-to-spl-thunk.h
@@ -1,4 +1,6 @@
+ /* : */
0xea000015, /* 0: b 5c */
+ /* : */
0xe1a00000, /* 4: nop */
0xe1a00000, /* 8: nop */
0xe1a00000, /* c: nop */
@@ -7,13 +9,17 @@
0xe1a00000, /* 18: nop */
0xe1a00000, /* 1c: nop */
0xe1a00000, /* 20: nop */
+ /* : */
0xe1a00000, /* 24: nop */
+ /* : */
0xe28f40dc, /* 28: add r4, pc, #220 */
+ /* : */
0xe4940004, /* 2c: ldr r0, [r4], #4 */
0xe4941004, /* 30: ldr r1, [r4], #4 */
0xe4946004, /* 34: ldr r6, [r4], #4 */
0xe3560000, /* 38: cmp r6, #0 */
0x012fff1e, /* 3c: bxeq lr */
+ /* : */
0xe5902000, /* 40: ldr r2, [r0] */
0xe5913000, /* 44: ldr r3, [r1] */
0xe2566004, /* 48: subs r6, r6, #4 */
@@ -21,6 +27,7 @@
0xe4803004, /* 50: str r3, [r0], #4 */
0x1afffff9, /* 54: bne 40 */
0xeafffff3, /* 58: b 2c */
+ /* : */
0xe59f80a4, /* 5c: ldr r8, [pc, #164] */
0xe24f0044, /* 60: sub r0, pc, #68 */
0xe520d004, /* 64: str sp, [r0, #-4]! */
@@ -34,10 +41,12 @@
0xe1120003, /* 84: tst r2, r3 */
0x1a000012, /* 88: bne d8 */
0xebffffe5, /* 8c: bl 28 */
+ /* : */
0xe3067c39, /* 90: movw r7, #27705 */
0xe3457f0a, /* 94: movt r7, #24330 */
0xe1a00008, /* 98: mov r0, r8 */
0xe5905010, /* 9c: ldr r5, [r0, #16] */
+ /* : */
0xe4902004, /* a0: ldr r2, [r0], #4 */
0xe2555004, /* a4: subs r5, r5, #4 */
0xe0877002, /* a8: add r7, r7, r2 */
@@ -52,14 +61,18 @@
0xf57ff06f, /* cc: isb sy */
0xe12fff38, /* d0: blx r8 */
0xea000006, /* d4: b f4 */
+ /* : */
0xe3032f2e, /* d8: movw r2, #16174 */
0xe3432f3f, /* dc: movt r2, #16191 */
0xe5882008, /* e0: str r2, [r8, #8] */
0xea000003, /* e4: b f8 */
+ /* : */
0xe304222e, /* e8: movw r2, #16942 */
0xe3442441, /* ec: movt r2, #17473 */
0xe5882008, /* f0: str r2, [r8, #8] */
+ /* : */
0xebffffcb, /* f4: bl 28 */
+ /* : */
0xe8bd4004, /* f8: pop {r2, lr} */
0xe121f002, /* fc: msr CPSR_c, r2 */
0xe59dd000, /* 100: ldr sp, [sp] */
diff --git a/thunks/memcpy.S b/thunks/memcpy.S
new file mode 100644
index 0000000..e280f41
--- /dev/null
+++ b/thunks/memcpy.S
@@ -0,0 +1,70 @@
+/*
+ * copy "upwards", increasing destination and source addresses
+ */
+fel_memcpy_up:
+ ldr r0, 1f /* dst_addr */
+ ldr r1, 2f /* src_addr */
+ ldr r2, 3f /* bytes */
+ sub r3, r1, r0
+ tst r3, #3 /* test LSB for word alignment */
+ bne copyup_tail /* unaligned access, copy byte-wise */
+copyup_head:
+ tst r1, #3 /* word boundary? */
+ beq copyup_loop
+ ldrb r3, [r1], #1 /* load and post-inc */
+ strb r3, [r0], #1 /* store and post-inc */
+ subs r2, #1 /* r2 -= 1 */
+ bpl copyup_head
+ bx lr /* early return on small byte count (r2 < 0) */
+copyup_loop:
+ subs r2, #4 /* r2 -= 4 */
+ ldrpl r3, [r1], #4 /* load and post-inc */
+ strpl r3, [r0], #4 /* store and post-inc */
+ bpl copyup_loop /* while (r2 >= 0) */
+ add r2, #4 /* r2 = remaining byte count */
+copyup_tail:
+ subs r2, #1 /* r2 -= 1 */
+ bxmi lr /* return on (r2 < 0) */
+ ldrb r3, [r1], #1 /* load and post-inc */
+ strb r3, [r0], #1 /* store and post-inc */
+ b copyup_tail
+
+1: .word 0 /* dst_addr */
+2: .word 0 /* src_addr */
+3: .word 0 /* bytes */
+
+/*
+ * copy "downwards", using base-relative indexing
+ */
+fel_memcpy_down:
+ ldr r0, 1f /* dst_addr */
+ ldr r1, 2f /* src_addr */
+ ldr r2, 3f /* bytes */
+ sub r3, r0, r1
+ tst r3, #3 /* test LSB for word alignment */
+ bne copydn_tail /* unaligned access, copy byte-wise */
+copydn_head:
+ add r3, r1, r2 /* r3 = r1 + r2, for alignment check */
+ tst r3, #3 /* word boundary? */
+ beq copydn_loop
+ subs r2, #1 /* r2 -= 1 */
+ bxmi lr /* early return on small byte count (r2 < 0) */
+ ldrb r3, [r1, r2] /* load byte */
+ strb r3, [r0, r2] /* store byte */
+ b copydn_head
+copydn_loop:
+ subs r2, #4 /* r2 -= 4 */
+ ldrpl r3, [r1, r2] /* load word */
+ strpl r3, [r0, r2] /* store word */
+ bpl copydn_loop /* while (r2 >= 0) */
+ add r2, #4 /* r2 = remaining byte count */
+copydn_tail:
+ subs r2, #1 /* r2 -= 1 */
+ bxmi lr /* return on (r2 < 0) */
+ ldrb r3, [r1, r2] /* load byte */
+ strb r3, [r0, r2] /* store byte */
+ b copydn_tail
+
+1: .word 0 /* dst_addr */
+2: .word 0 /* src_addr */
+3: .word 0 /* bytes */
diff --git a/thunks/memcpy.h b/thunks/memcpy.h
new file mode 100644
index 0000000..ddfeaaf
--- /dev/null
+++ b/thunks/memcpy.h
@@ -0,0 +1,55 @@
+ /* : */
+ htole32(0xe59f0054), /* 0: ldr r0, [pc, #84] */
+ htole32(0xe59f1054), /* 4: ldr r1, [pc, #84] */
+ htole32(0xe59f2054), /* 8: ldr r2, [pc, #84] */
+ htole32(0xe0413000), /* c: sub r3, r1, r0 */
+ htole32(0xe3130003), /* 10: tst r3, #3 */
+ htole32(0x1a00000b), /* 14: bne 48 */
+ /* : */
+ htole32(0xe3110003), /* 18: tst r1, #3 */
+ htole32(0x0a000004), /* 1c: beq 34 */
+ htole32(0xe4d13001), /* 20: ldrb r3, [r1], #1 */
+ htole32(0xe4c03001), /* 24: strb r3, [r0], #1 */
+ htole32(0xe2522001), /* 28: subs r2, r2, #1 */
+ htole32(0x5afffff9), /* 2c: bpl 18 */
+ htole32(0xe12fff1e), /* 30: bx lr */
+ /* : */
+ htole32(0xe2522004), /* 34: subs r2, r2, #4 */
+ htole32(0x54913004), /* 38: ldrpl r3, [r1], #4 */
+ htole32(0x54803004), /* 3c: strpl r3, [r0], #4 */
+ htole32(0x5afffffb), /* 40: bpl 34 */
+ htole32(0xe2822004), /* 44: add r2, r2, #4 */
+ /* : */
+ htole32(0xe2522001), /* 48: subs r2, r2, #1 */
+ htole32(0x412fff1e), /* 4c: bxmi lr */
+ htole32(0xe4d13001), /* 50: ldrb r3, [r1], #1 */
+ htole32(0xe4c03001), /* 54: strb r3, [r0], #1 */
+ htole32(0xeafffffa), /* 58: b 48 */
+ /* : */
+ htole32(0xe59f0058), /* 68: ldr r0, [pc, #88] */
+ htole32(0xe59f1058), /* 6c: ldr r1, [pc, #88] */
+ htole32(0xe59f2058), /* 70: ldr r2, [pc, #88] */
+ htole32(0xe0403001), /* 74: sub r3, r0, r1 */
+ htole32(0xe3130003), /* 78: tst r3, #3 */
+ htole32(0x1a00000c), /* 7c: bne b4 */
+ /* : */
+ htole32(0xe0813002), /* 80: add r3, r1, r2 */
+ htole32(0xe3130003), /* 84: tst r3, #3 */
+ htole32(0x0a000004), /* 88: beq a0 */
+ htole32(0xe2522001), /* 8c: subs r2, r2, #1 */
+ htole32(0x412fff1e), /* 90: bxmi lr */
+ htole32(0xe7d13002), /* 94: ldrb r3, [r1, r2] */
+ htole32(0xe7c03002), /* 98: strb r3, [r0, r2] */
+ htole32(0xeafffff7), /* 9c: b 80 */
+ /* : */
+ htole32(0xe2522004), /* a0: subs r2, r2, #4 */
+ htole32(0x57913002), /* a4: ldrpl r3, [r1, r2] */
+ htole32(0x57803002), /* a8: strpl r3, [r0, r2] */
+ htole32(0x5afffffb), /* ac: bpl a0 */
+ htole32(0xe2822004), /* b0: add r2, r2, #4 */
+ /* : */
+ htole32(0xe2522001), /* b4: subs r2, r2, #1 */
+ htole32(0x412fff1e), /* b8: bxmi lr */
+ htole32(0xe7d13002), /* bc: ldrb r3, [r1, r2] */
+ htole32(0xe7c03002), /* c0: strb r3, [r0, r2] */
+ htole32(0xeafffffa), /* c4: b b4 */
diff --git a/thunks/objdump_to_h.awk b/thunks/objdump_to_h.awk
new file mode 100644
index 0000000..bab0a35
--- /dev/null
+++ b/thunks/objdump_to_h.awk
@@ -0,0 +1,33 @@
+# labels
+/[[:xdigit:]]+ <\w+>:/ {
+ # (Note: using $0 instead of $2 would also include the address)
+ if (style=="old")
+ printf "\t/* %s */\n", $2
+ else
+ printf "\t\t/* %s */\n", $2
+}
+
+# disassembly lines
+/[[:xdigit:]]+:/ {
+ if (style=="old")
+ printf "\t0x%s, /* %9s %-10s", $2, $1, $3
+ else
+ printf "\t\thtole32(0x%s), /* %5s %-5s", $2, $1, $3
+
+ for (i = 4; i <= NF; i++)
+ if ($i == ";") {
+ # strip comment (anything after and including ';')
+ NF = i - 1
+ break
+ }
+ # clear $1 to $3, which re-calculates $0 (= remainder of line)
+ $3 = ""
+ $2 = ""
+ $1 = ""
+ gsub("^\\s+", "") # strip leading whitespace
+
+ if (style=="old")
+ printf " %-28s */\n", $0
+ else
+ printf " %-23s */\n", $0
+}
diff --git a/thunks/readl_writel.S b/thunks/readl_writel.S
new file mode 100644
index 0000000..ebe7d0c
--- /dev/null
+++ b/thunks/readl_writel.S
@@ -0,0 +1,41 @@
+/*
+ * Thunk code for buffered 'long' (i.e. 32-bit) read and write operations
+ */
+
+.equ MAX_WORDS, 0x100 - 12
+
+fel_readl_n:
+ ldr r0, 1f /* read_addr */
+ adr r1, 3f /* read_data */
+ ldr r2, 2f /* read_count */
+ /* limit word count to a maximum value */
+ cmp r2, #MAX_WORDS
+ movgt r2, #MAX_WORDS
+read_loop:
+ subs r2, #1
+ bxmi lr
+ ldr r3, [r0], #4
+ str r3, [r1], #4
+ b read_loop
+
+1: .word 0 /* read_addr */
+2: .word 0 /* read_count */
+3: .word 0 /* read_data */
+
+fel_writel_n:
+ ldr r0, 1f /* write_addr */
+ adr r1, 3f /* write_data */
+ ldr r2, 2f /* write_count */
+ /* limit word count to a maximum value */
+ cmp r2, #MAX_WORDS
+ movgt r2, #MAX_WORDS
+write_loop:
+ subs r2, #1
+ bxmi lr
+ ldr r3, [r1], #4
+ str r3, [r0], #4
+ b write_loop
+
+1: .word 0 /* write_addr */
+2: .word 0 /* write_count */
+3: .word 0 /* write_data */
diff --git a/thunks/readl_writel.h b/thunks/readl_writel.h
new file mode 100644
index 0000000..d50b5ac
--- /dev/null
+++ b/thunks/readl_writel.h
@@ -0,0 +1,24 @@
+ /* : */
+ htole32(0xe59f0020), /* 0: ldr r0, [pc, #32] */
+ htole32(0xe28f1024), /* 4: add r1, pc, #36 */
+ htole32(0xe59f201c), /* 8: ldr r2, [pc, #28] */
+ htole32(0xe35200f4), /* c: cmp r2, #244 */
+ htole32(0xc3a020f4), /* 10: movgt r2, #244 */
+ /* : */
+ htole32(0xe2522001), /* 14: subs r2, r2, #1 */
+ htole32(0x412fff1e), /* 18: bxmi lr */
+ htole32(0xe4903004), /* 1c: ldr r3, [r0], #4 */
+ htole32(0xe4813004), /* 20: str r3, [r1], #4 */
+ htole32(0xeafffffa), /* 24: b 14 */
+ /* : */
+ htole32(0xe59f0020), /* 34: ldr r0, [pc, #32] */
+ htole32(0xe28f1024), /* 38: add r1, pc, #36 */
+ htole32(0xe59f201c), /* 3c: ldr r2, [pc, #28] */
+ htole32(0xe35200f4), /* 40: cmp r2, #244 */
+ htole32(0xc3a020f4), /* 44: movgt r2, #244 */
+ /* : */
+ htole32(0xe2522001), /* 48: subs r2, r2, #1 */
+ htole32(0x412fff1e), /* 4c: bxmi lr */
+ htole32(0xe4913004), /* 50: ldr r3, [r1], #4 */
+ htole32(0xe4803004), /* 54: str r3, [r0], #4 */
+ htole32(0xeafffffa), /* 58: b 48 */
diff --git a/thunks/rmr-thunk.S b/thunks/rmr-thunk.S
new file mode 100644
index 0000000..10429b5
--- /dev/null
+++ b/thunks/rmr-thunk.S
@@ -0,0 +1,24 @@
+/*
+ * Request AArch32/AArch64 warm reset, using RVBAR and Reset Management Register
+ */
+
+rmr_request:
+ ldr r0, 1f /* RVBAR register address */
+ ldr r1, 2f /* desired entry point (reset vector) */
+ str r1, [r0]
+ dsb
+ isb /* make sure we write the address */
+
+ ldr r1, 3f /* RMR mode: bit 1 = RR, bit 0 = AA64 */
+ mrc p15, 0, r0, c12, c0, 2 /* read RMR */
+ orr r0, r0, r1 /* request warm reset (according to rmr_mode) */
+ mcr p15, 0, r0, c12, c0, 2 /* write RMR, trigger reset */
+
+ isb
+0:
+ wfi
+ b 0b /* loop */
+
+1: .word 0 /* rvbar_reg */
+2: .word 0 /* entry_point */
+3: .word 0 /* rmr_mode (2 = AArch32, 3 = AArch64) */
diff --git a/thunks/rmr-thunk.h b/thunks/rmr-thunk.h
new file mode 100644
index 0000000..1e6fcd9
--- /dev/null
+++ b/thunks/rmr-thunk.h
@@ -0,0 +1,13 @@
+ /* : */
+ htole32(0xe59f0028), /* 0: ldr r0, [pc, #40] */
+ htole32(0xe59f1028), /* 4: ldr r1, [pc, #40] */
+ htole32(0xe5801000), /* 8: str r1, [r0] */
+ htole32(0xf57ff04f), /* c: dsb sy */
+ htole32(0xf57ff06f), /* 10: isb sy */
+ htole32(0xe59f101c), /* 14: ldr r1, [pc, #28] */
+ htole32(0xee1c0f50), /* 18: mrc 15, 0, r0, cr12, cr0, {2} */
+ htole32(0xe1800001), /* 1c: orr r0, r0, r1 */
+ htole32(0xee0c0f50), /* 20: mcr 15, 0, r0, cr12, cr0, {2} */
+ htole32(0xf57ff06f), /* 24: isb sy */
+ htole32(0xe320f003), /* 28: wfi */
+ htole32(0xeafffffd), /* 2c: b 28 */
diff --git a/sid_read_root.S b/thunks/sid_read_root.S
similarity index 100%
rename from sid_read_root.S
rename to thunks/sid_read_root.S
diff --git a/sid_read_root.h b/thunks/sid_read_root.h
similarity index 100%
rename from sid_read_root.h
rename to thunks/sid_read_root.h