/* Load 64-bit GDT and jump to 64-bit code segment */ ur_mov gdt64_ptr, %eax, 4 lgdt (%eax)
/* Again, we use the same strategy, only this time we generate an actual * uk_reloc entry to be automatically resolved by the early relocator, * instead of relying on the code that relocates the start16 section * before starting the Application Processors, since execution of * lcpu_start32 comes before that. */ ur_mov jump_to64, %eax, 4 movl %eax, -6(%eax) ljmp $(gdt64_cs - gdt64), $START32_PLACEHOLDER .code64 jump_to64: /* Set up remaining segment registers */ movl $(gdt64_ds - gdt64), %eax movl %eax, %es movl %eax, %ss movl %eax, %ds xorl %eax, %eax movl %eax, %fs movl %eax, %gs leaq lcpu_start64(%rip), %rcx jmp *%rcx END(lcpu_start32)
/* Request basic CPU features and APIC ID * TODO: This APIC ID is limited to 256. Better get from leaf 0x1f */ movl $1, %eax cpuid shrl $24, %ebx
/* Use APIC_ID * LCPU_SIZE for indexing the cpu structure */ movl $LCPU_SIZE, %eax imul %ebx, %eax
/* Compute pointer into CPU struct array and store it in RBP * We do not use the frame pointer, yet *//* lcpus 为数组,用来存储所有逻辑 cpu 的元数据。*/ leaq lcpus(%rip), %rbp addq %rax, %rbp
/* Put CPU into init state */ /* lcpu 状态设为 init 状态*/ movl $LCPU_STATE_INIT, LCPU_STATE_OFFSET(%rbp)
/* Enable FPU, SSE, (XSAVE feature), (AVX), FS and GS base, (memory protection keys (PKU)). Request extended CPU features.()表示需要配置相应启动参数。*/ ......
/* Check if we have startup arguments supplied */ test %r8, %r8 jz no_args
/* Initialize the CPU configuration with the supplied startup args */ movq LCPU_SARGS_ENTRY_OFFSET(%r8), %rax /* 将函数跳转地址设为 multiboot_entry 函数地址 */ movq LCPU_SARGS_STACKP_OFFSET(%r8), %rsp /*将栈地址设为 bss 段地址*/
jmp jump_to_entry
no_args: /* Load the stack pointer and the entry address from the CPU struct */ movq LCPU_ENTRY_OFFSET(%rbp), %rax movq LCPU_STACKP_OFFSET(%rbp), %rsp
jump_to_entry: /* According to System V AMD64 the stack pointer must be aligned to * 16-bytes. In other words, the value (RSP+8) must be a multiple of * 16 when control is transferred to the function entry point (i.e., * the compiler expects a misalignment due to the return address having * been pushed onto the stack). */ andq $~0xf, %rsp subq $0x8, %rsp
/* Memory region flags */ #define UKPLAT_MEMRF_ALL 0xffff
#define UKPLAT_MEMRF_PERMS 0x0007 #define UKPLAT_MEMRF_READ 0x0001 /* Region is readable */ #define UKPLAT_MEMRF_WRITE 0x0002 /* Region is writable */ #define UKPLAT_MEMRF_EXECUTE 0x0004 /* Region is executable */
#define UKPLAT_MEMRF_UNMAP 0x0010 /* Must be unmapped at boot */ #define UKPLAT_MEMRF_MAP 0x0020 /* Must be mapped at boot */
/* We have to call this here as the very early do_uk_reloc32 relocator * does not also relocate the UKPLAT_MEMRT_KERNEL mrd's like its C * equivalent, do_uk_reloc, does. */ do_uk_reloc_kmrds(0, 0);
/* Ensure that the memory map contains the legacy high mem area *//* bi->mrds 中插入 HI_MEM 和 BIOS_ROM。*/ rc = ukplat_memregion_list_insert_legacy_hi_mem(&bi->mrds); /* (gdb) p/x bi->mrds $3 = {capacity = 0x80, count = 0x6, mrds = 0x12f2e8 <bi_bootinfo_sec+80>} (gdb) p/x *(bi->mrds.mrds)@6 $4 = {{pbase = 0xa0000, vbase = 0xa0000, len = 0x40000, type = 0x2, flags = 0x23}, {pbase = 0xe0000, vbase = 0xe0000, len = 0x20000, type = 0x2, flags = 0x21}, {pbase = 0x100000, vbase = 0x100000, len = 0x27000, type = 0x4, flags = 0x5}, { pbase = 0x127000, vbase = 0x127000, len = 0x8000, type = 0x4, flags = 0x1}, {pbase = 0x12f000, vbase = 0x12f000, len = 0x56000, type = 0x4, flags = 0x3}, {pbase = 0x12f000, vbase = 0x12f000, len = 0x0, type = 0x4, flags = 0x3}} */ if (unlikely(rc)) multiboot_crash("Could not insert legacy memory region", rc);
/* Add modules from the multiboot info to the memory region list *//* 将 mod 相关信息加载到 bi->mrds。*/ if (mi->flags & MULTIBOOT_INFO_MODS) { mods = (multiboot_module_t *)(__uptr)mi->mods_addr; for (i = 0; i < mi->mods_count; i++) { mrd.pbase = mods[i].mod_start; mrd.vbase = mods[i].mod_start; /* 1:1 mapping */ mrd.len = mods[i].mod_end - mods[i].mod_start; mrd.type = UKPLAT_MEMRT_INITRD; mrd.flags = UKPLAT_MEMRF_READ | UKPLAT_MEMRF_MAP;
/* Add map ranges from the multiboot info to the memory region list * CAUTION: These could generally overlap with regions already in the * list. We thus split new free regions accordingly to remove allocated * ranges. For all other ranges, we assume RESERVED type and that they * do NOT overlap with other allocated ranges (e.g., modules). *//*将 multiboot info 映射加入到 bi->mrds。multiboot info 信息从第二页开始。*/ /* (gdb) p/x *(multiboot_memory_map_t*)(mi->mmap_addr)@6 $20 = {{size = 0x14, addr = 0x0, len = 0x9fc00, type = 0x1}, {size = 0x14, addr = 0x9fc00, len = 0x400, type = 0x2}, {size = 0x14, addr = 0xf0000, len = 0x10000, type = 0x2}, {size = 0x14, addr = 0x100000, len = 0x7ee0000, type = 0x1}, {size = 0x14, addr = 0x7fe0000, len = 0x20000, type = 0x2}, {size = 0x14, addr = 0xfffc0000, len = 0x40000, type = 0x2}} */ if (mi->flags & MULTIBOOT_INFO_MEM_MAP) { for (offset = 0; offset < mi->mmap_length; offset += m->size + sizeof(m->size)) { m = (void *)(__uptr)(mi->mmap_addr + offset);
start = MAX(m->addr, __PAGE_SIZE); end = m->addr + m->len; if (unlikely(end <= start || end - start < PAGE_SIZE)) continue;
/* Switch away from the bootstrap stack */ uk_pr_info("Switch from bootstrap stack to stack @%p\n", bstack); /*更换栈为 bstack,并执行 _ukplat_entry2。*/ lcpu_arch_jump_to(bstack, _ukplat_entry2); }
1 2 3 4 5 6 7 8 9 10 11 12 13
staticvoid __noreturn _ukplat_entry2(void) { /* It's not possible to unwind past this function, because the stack * pointer was overwritten in lcpu_arch_jump_to. Therefore, mark the * previous instruction pointer as undefined, so that debuggers or * profilers stop unwinding here. */ ukarch_cfi_unwind_end();
ukplat_entry_argp(NULL, cmdline, cmdline_len);
ukplat_lcpu_halt(); }
_ukplat_entry2 转到 ukplat_entry_argp 运行。
ukplat_entry_argp 和 ukplat_entry
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/* defined in <uk/plat.h> */ voidukplat_entry_argp(char *arg0, char *argb, __sz argb_len) { staticchar *argv[CONFIG_LIBUKBOOT_MAXNBARGS]; int argc = 0;
#if !CONFIG_LIBUKBOOT_NOALLOC uk_pr_info("Initialize memory allocator...\n"); /* 初始化内存分配器。并将 bi->mrds 中的内存区域加入到内存分配中管理。*/ a = heap_init(); if (unlikely(!a)) UK_CRASH("Failed to initialize memory allocator\n"); else { /* 将 a 设置为默认内存分配器。*/ rc = ukplat_memallocator_set(a); if (unlikely(rc != 0)) UK_CRASH("Could not set the platform memory allocator\n"); }
/* Allocate a TLS for this execution context */ tls = uk_memalign(a, ukarch_tls_area_align(), ukarch_tls_area_size()); if (!tls) UK_CRASH("Failed to allocate and initialize TLS\n");
/* * Application * * We are calling the application constructors right before calling * the application's main(). All of our Unikraft systems, VFS, * networking stack is initialized at this point. This way we closely * mimic what a regular user application (e.g., BSD, Linux) would expect * from its OS being initialized. */ /* preinit and init are used in C++ applications. * Therefore they are empty for sqlite (which is C) */ uk_pr_info("Pre-init table at %p - %p\n", &__preinit_array_start[0], &__preinit_array_end); uk_ctortab_foreach(ctorfn, __preinit_array_start, __preinit_array_end) { if (!*ctorfn) continue;
/* 1、uk_inittab 相关定义*/ /* unikraft/include/uk/init.h */ /** * Register a Unikraft init function that is * called during bootstrap (uk_inittab) * * @param fn * Initialization function to be called * @param class * Initialization class (1 (earliest) to 6 (latest)) * @param prio * Priority level (0 (earliest) to 9 (latest)), must be a constant. * Use the UK_PRIO_AFTER() helper macro for computing priority dependencies. * Note: Any other value for level will be ignored */ #define __UK_INITTAB(fn, base, prio) \ static const uk_init_func_t \ __used __section(".uk_inittab" #base #prio) \ __uk_inittab ## base ## prio ## _ ## fn = (fn)
/** * Define a library initialization. At this point in time some platform * component may not be initialized, so it wise to initializes those component * to initialized. */ #define UK_INIT_CLASS_EARLY 1 #define uk_early_initcall_prio(fn, prio) \ uk_initcall_class_prio(fn, UK_INIT_CLASS_EARLY, prio) /** * Define a stage for platform initialization. Platform at this point read * all the device and device are initialized. */ #define UK_INIT_CLASS_PLAT 2 #define uk_plat_initcall_prio(fn, prio) \ uk_initcall_class_prio(fn, UK_INIT_CLASS_PLAT, prio) /** * Define a stage for performing library initialization. This library * initialization is performed after the platform is completely initialized. */ #define UK_INIT_CLASS_LIB 3 #define uk_lib_initcall_prio(fn, prio) \ uk_initcall_class_prio(fn, UK_INIT_CLASS_LIB, prio) /** * Define a stage for filesystem initialization. */ #define UK_INIT_CLASS_ROOTFS 4 #define uk_rootfs_initcall_prio(fn, prio) \ uk_initcall_class_prio(fn, UK_INIT_CLASS_ROOTFS, prio) /** * Define a stage for device initialization */ #define UK_INIT_CLASS_SYS 5 #define uk_sys_initcall_prio(fn, prio) \ uk_initcall_class_prio(fn, UK_INIT_CLASS_SYS, prio) /** * Define a stage for application pre-initialization */ #define UK_INIT_CLASS_LATE 6 #define uk_late_initcall_prio(fn, prio) \ uk_initcall_class_prio(fn, UK_INIT_CLASS_LATE, prio)
min = round_pgup((uintptr_t)base); max = round_pgdown((uintptr_t)base + (uintptr_t)len); UK_ASSERT(max > min);
/* Allocate space for allocator descriptor */ /* 由此分配可知,uk_alloc 和 uk_bbpalloc 相邻 */ metalen = round_pgup(sizeof(*a) + sizeof(*b));
/* enough space for allocator available? */ if (min + metalen > max) { uk_pr_err("Not enough space for allocator: %"__PRIsz" B required but only %"__PRIuptr" B usable\n", metalen, (max - min)); returnNULL; } /* 注意 a 和 b 的地址 */ a = (struct uk_alloc *)min; uk_pr_info("Initialize binary buddy allocator %"__PRIuptr"\n", (uintptr_t)a); min += metalen; memset(a, 0, metalen); b = (struct uk_bbpalloc *)&a->priv; for (i = 0; i < FREELIST_SIZE; i++) { b->free_head[i] = &b->free_tail[i]; b->free_tail[i].pprev = &b->free_head[i]; b->free_tail[i].next = NULL; } b->memr_head = NULL;
__sz ukarch_tls_area_size(void) { /* NOTE: X86_64 ABI requires that fs:%0 contains the address of itself, * to allow certain optimizations. Hence, the overall size of an * TLS allocation is the aligned up TLS area plus 8 bytes for this * self-pointer. */ /* _tls_start 位于 tls 起始位置,_tls_end 位于 .tbss 末尾 */ __sz static_tls_len = ALIGN_UP((__uptr) _tls_end - (__uptr) _tls_start, sizeof(void *)); return static_tls_len + TCB_SIZE; } #define TLS_SECTIONS \ . = ALIGN(__PAGE_SIZE); \ _tls_start = .; \ .tdata : \ { \ *(.tdata) \ *(.tdata.*) \ *(.gnu.linkonce.td.*) \ } UK_SEGMENT_TLS UK_SEGMENT_TLS_LOAD \ _etdata = .; \ .tbss : \ { \ *(.tbss) \ *(.tbss.*) \ *(.gnu.linkonce.tb.*) \ *(.tcommon) \ } \ /* \ * NOTE: Because the .tbss section is zero-sized in the final \ * ELF image, just setting _tls_end to the end of it \ * does not give us the the size of the memory area once \ * loaded, so we use SIZEOF to have it point to the end. \ * _tls_end is only used to compute the .tbss size. \ */ \ _tls_end = . + SIZEOF(.tbss);
/* Add idle thread to the scheduler's thread list */ UK_TAILQ_INSERT_TAIL(&c->sched.thread_list, &c->idle, thread_list);
return &c->sched;
err_free_c: uk_free(a, c); err_out: returnNULL; }
intuk_sched_start(struct uk_sched *s) { structuk_thread *main_thread; uintptr_t tlsp; int ret;
UK_ASSERT(s); UK_ASSERT(s->sched_start); UK_ASSERT(!s->is_started); UK_ASSERT(!uk_thread_current()); /* No other thread runs */
/* Allocate an `uk_thread` instance for current context * NOTE: We assume that if we have a TLS pointer, it points to * an TLS that is derived from the Unikraft TLS template. */ tlsp = ukplat_tlsp_get(); main_thread = uk_thread_create_bare(s->a, 0x0, 0x0, tlsp, !(!tlsp), false, "init", NULL, NULL); if (!main_thread) { ret = -ENOMEM; goto err_out; } main_thread->sched = s;
/* Because `main_thread` acts as container for storing the current * context, it does not have IP and SP set. We have to manually mark * the thread as RUNNABLE. */ uk_thread_set_runnable(main_thread);
/* Set main_thread as current scheduled thread */ ukplat_per_lcpu_current(__uk_sched_thread_current) = main_thread;
/* Add main to the scheduler's thread list */ UK_TAILQ_INSERT_TAIL(&s->thread_list, main_thread, thread_list);
/* Enable scheduler, like time slicing, etc. and notify that `s` * has an (already) scheduled thread */ ret = s->sched_start(s, main_thread); if (ret < 0) goto err_unset_thread_current; s->is_started = true; return0;