From 51c12ca1f2bf79176a0c1c352e897b4c44cf6f4a Mon Sep 17 00:00:00 2001 From: Ying-Shiuan Pan Date: Wed, 10 Jul 2013 23:24:51 +0800 Subject: [PATCH] add: original codes of Definitive Guide to the Xen Hypervisor --- COPYING | 23 ++ chapter11/Makefile | 2 + chapter11/enumerate_vms.c | 93 ++++++++ chapter12/sched_trivial.c | 90 ++++++++ chapter13/isXen.c | 30 +++ chapter2/Makefile | 15 ++ chapter2/bootstrap.x86_32.S | 38 ++++ chapter2/debug.h | 25 +++ chapter2/domain_config | 18 ++ chapter2/example.lds | 27 +++ chapter2/kernel.c | 13 ++ chapter3/gettimeofday.c | 63 ++++++ chapter4/mapping.c | 27 +++ chapter4/offering.c | 23 ++ chapter6/.exrc | 16 ++ chapter6/Makefile | 17 ++ chapter6/Makefile~ | 17 ++ chapter6/bootstrap.x86_32.S | 276 +++++++++++++++++++++++ chapter6/console.c | 106 +++++++++ chapter6/console.h | 8 + chapter6/domain_config | 18 ++ chapter6/event.c | 102 +++++++++ chapter6/event.h | 27 +++ chapter6/include/barrier.h | 24 ++ chapter6/include/hypercall-x86_32.h | 331 ++++++++++++++++++++++++++++ chapter6/include/util.h | 27 +++ chapter6/include/x86_mm.h | 215 ++++++++++++++++++ chapter6/kernel.c | 33 +++ chapter6/loader.lds | 27 +++ chapter6/traps.c | 79 +++++++ chapter7/.exrc | 16 ++ chapter7/Makefile | 17 ++ chapter7/Makefile~ | 17 ++ chapter7/bootstrap.x86_32.S | 276 +++++++++++++++++++++++ chapter7/console.c | 140 ++++++++++++ chapter7/console.h | 8 + chapter7/domain_config | 18 ++ chapter7/event.c | 102 +++++++++ chapter7/event.h | 27 +++ chapter7/include/barrier.h | 24 ++ chapter7/include/hypercall-x86_32.h | 331 ++++++++++++++++++++++++++++ chapter7/include/util.h | 27 +++ chapter7/include/x86_mm.h | 215 ++++++++++++++++++ chapter7/kernel.c | 33 +++ chapter7/loader.lds | 27 +++ chapter7/traps.c | 79 +++++++ chapter8/Makefile | 17 ++ chapter8/bootstrap.x86_32.S | 277 +++++++++++++++++++++++ chapter8/console.c | 123 +++++++++++ chapter8/console.h | 8 + chapter8/domain_config | 18 ++ chapter8/include/barrier.h | 24 ++ chapter8/include/hypercall-x86_32.h | 331 ++++++++++++++++++++++++++++ chapter8/include/util.h | 27 +++ chapter8/include/x86_mm.h | 215 ++++++++++++++++++ chapter8/kernel.c | 39 ++++ chapter8/loader.lds | 27 +++ chapter8/traps.c | 83 +++++++ chapter8/xenstore.c | 221 +++++++++++++++++++ chapter8/xenstore.h | 9 + 60 files changed, 4586 insertions(+) create mode 100644 COPYING create mode 100644 chapter11/Makefile create mode 100644 chapter11/enumerate_vms.c create mode 100644 chapter12/sched_trivial.c create mode 100644 chapter13/isXen.c create mode 100644 chapter2/Makefile create mode 100644 chapter2/bootstrap.x86_32.S create mode 100644 chapter2/debug.h create mode 100644 chapter2/domain_config create mode 100644 chapter2/example.lds create mode 100644 chapter2/kernel.c create mode 100644 chapter3/gettimeofday.c create mode 100644 chapter4/mapping.c create mode 100644 chapter4/offering.c create mode 100644 chapter6/.exrc create mode 100644 chapter6/Makefile create mode 100644 chapter6/Makefile~ create mode 100644 chapter6/bootstrap.x86_32.S create mode 100644 chapter6/console.c create mode 100644 chapter6/console.h create mode 100644 chapter6/domain_config create mode 100644 chapter6/event.c create mode 100644 chapter6/event.h create mode 100644 chapter6/include/barrier.h create mode 100644 chapter6/include/hypercall-x86_32.h create mode 100644 chapter6/include/util.h create mode 100644 chapter6/include/x86_mm.h create mode 100644 chapter6/kernel.c create mode 100644 chapter6/loader.lds create mode 100644 chapter6/traps.c create mode 100644 chapter7/.exrc create mode 100644 chapter7/Makefile create mode 100644 chapter7/Makefile~ create mode 100644 chapter7/bootstrap.x86_32.S create mode 100644 chapter7/console.c create mode 100644 chapter7/console.h create mode 100644 chapter7/domain_config create mode 100644 chapter7/event.c create mode 100644 chapter7/event.h create mode 100644 chapter7/include/barrier.h create mode 100644 chapter7/include/hypercall-x86_32.h create mode 100644 chapter7/include/util.h create mode 100644 chapter7/include/x86_mm.h create mode 100644 chapter7/kernel.c create mode 100644 chapter7/loader.lds create mode 100644 chapter7/traps.c create mode 100644 chapter8/Makefile create mode 100644 chapter8/bootstrap.x86_32.S create mode 100644 chapter8/console.c create mode 100644 chapter8/console.h create mode 100644 chapter8/domain_config create mode 100644 chapter8/include/barrier.h create mode 100644 chapter8/include/hypercall-x86_32.h create mode 100644 chapter8/include/util.h create mode 100644 chapter8/include/x86_mm.h create mode 100644 chapter8/kernel.c create mode 100644 chapter8/loader.lds create mode 100644 chapter8/traps.c create mode 100644 chapter8/xenstore.c create mode 100644 chapter8/xenstore.h diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..21b32ae --- /dev/null +++ b/COPYING @@ -0,0 +1,23 @@ +Source files taken from the Xen distribution are governed by their original license. + +Original source files in this distribution are distributed under the following license: + +Copyright (c) 2007 David Chisnall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/chapter11/Makefile b/chapter11/Makefile new file mode 100644 index 0000000..2d915b3 --- /dev/null +++ b/chapter11/Makefile @@ -0,0 +1,2 @@ +enumerate_vms: enumerate_vms.c + c99 $^ -lxenapi -lcurl -o $@ diff --git a/chapter11/enumerate_vms.c b/chapter11/enumerate_vms.c new file mode 100644 index 0000000..4a90ca3 --- /dev/null +++ b/chapter11/enumerate_vms.c @@ -0,0 +1,93 @@ +#include + +#include +#include + +typedef struct +{ + xen_result_func func; + void *handle; +} xen_comms; + +static char *url; + +static size_t +write_func(void *ptr, size_t size, size_t nmemb, xen_comms *comms) +{ + size_t n = size * nmemb; + return comms->func(ptr, n, comms->handle) ? n : 0; +} + + +static int +call_func(const void *data, size_t len, void *user_handle, + void *result_handle, xen_result_func result_func) +{ + (void)user_handle; + + CURL *curl = curl_easy_init(); + if (!curl) { + return -1; + } + + xen_comms comms = { + .func = result_func, + .handle = result_handle + }; + + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &write_func); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &comms); + curl_easy_setopt(curl, CURLOPT_POST, 1); + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data); + curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, len); + + CURLcode result = curl_easy_perform(curl); + + curl_easy_cleanup(curl); + + return result; +} + +int main(int argc, char **argv) +{ + if (argc != 4) + { + fprintf(stderr, "Usage:\n\n%s \n", argv[0]); + } + + url = argv[1]; + + /* General setup */ + xen_init(); + curl_global_init(CURL_GLOBAL_ALL); + + xen_session *session = + xen_session_login_with_password(call_func, NULL, argv[2], argv[3]); + + if(session->ok) + { + /* Get the host */ + xen_host host; + xen_session_get_this_host(session, &host, session); + /* Get the set of VMs */ + struct xen_vm_set * VMs; + xen_host_get_resident_vms(session, &VMs, host); + /* Print the names */ + for(unsigned int i=0 ; isize ; i++) + { + char * name; + xen_host_get_name_label(session, &name, host); + printf("VM %d: %s\n", i, name); + } + } + else + { + printf(stderr, "Connection failed\n"); + } + xen_session_logout(session); + curl_global_cleanup(); + xen_fini(); + return 0; +} diff --git a/chapter12/sched_trivial.c b/chapter12/sched_trivial.c new file mode 100644 index 0000000..8daf2cf --- /dev/null +++ b/chapter12/sched_trivial.c @@ -0,0 +1,90 @@ +#include +#include +#include + + +/* CPU Run Queue */ +static struct vcpu * vcpu_list_head = NULL; +static struct vcpu * vcpu_list_tail = NULL; +unsigned int vcpus = 0; +#define VCPU_NEXT(_vcpu) ((struct vcpu*)_vcpu->sched_priv) + +/* Add a VCPU */ +int trivial_init_vcpu(struct vcpu * v) +{ + if (vcpu_list_head == NULL) + { + vcpu_list_head = vcpu_list_tail = v; + } + else + { + vcpu_list_tail->sched_priv = vcpu_list_tail = v; + } + v->sched_priv = NULL; + return 0; +} + +/* Remove a VCPU */ +void trivial_destroy_vcpu(struct vcpu * v) +{ + if(v == vcpu_list_head) + { + vcpu_list_head = VCPU_NEXT(v); + } + else + { + struct vcpu * last = NULL; + struct vcpu * current = vcpu_list_head; + while(current != v && current != NULL) + { + last = current; + current = VCPU_NEXT(current); + } + if(current != NULL) + { + last->sched_priv = VCPU_NEXT(current); + } + } +} + +/* Move the front VCPU to the back */ +static inline void increment_run_queue(void) +{ + vcpu_list_tail->sched_priv = vcpu_list_head; + vcpu_list_tail = vcpu_list_head; + vcpu_list_head = VCPU_NEXT(vcpu_list_tail); + vcpu_list_tail->sched_priv = NULL; +} + +/* Pick a VCPU to run */ +struct task_slice trivial_do_schedule(s_time_t) +{ + struct task_slice ret; + /* Fixed-size quantum */ + ret.time = MILLISECS(10); + struct * vcpu head = vcpu_list_head; + do + { + /* Find a runnable VCPU */ + increment_run_queue(); + if(vcpu_runnable(vcpu_list_head)) + { + ret.task = vcpu_list_head; + } + } while(head != vcpu_list_head); + /* Return the idle task if there isn't one */ + ret.task = ((struct vcpu*)__get_per_cpu(schedule_data)).idle); + return ret; +} + +struct scheduler sched_trivial_def = { + .name = "Trivial Round Robin Scheduler", + .opt_name = "trivial", + .sched_id = XEN_SCHEDULER_SEDF, + + .init_vcpu = trivial_init_vcpu, + .destroy_vcpu = trivial_destroy_vcpu, + + .do_schedule = trivial_do_schedule, +}; + diff --git a/chapter13/isXen.c b/chapter13/isXen.c new file mode 100644 index 0000000..daa337d --- /dev/null +++ b/chapter13/isXen.c @@ -0,0 +1,30 @@ +#include +#include +#include + +typedef union +{ + uint32_t r[3]; + char string[12]; +} cpuid_t; + +#define CPUID(command, result) \ + __asm __volatile(\ + "CPUID"\ + : "=b" (result.r[0]), "=c" (cpu.r[1]), "=d" (cpu.r[2])\ + : "a" (command)); + +int main(void) +{ + cpuid_t cpu; + CPUID(0,cpu); + if(strncmp(cpu.string, "XenVMMXenVMM", 12) == 0) + { + printf("Running as a Xen HVM guest\n"); + } + else + { + printf("Running on native hardware or a non-Xen hypervisor.\n"); + } + return 0; +} diff --git a/chapter2/Makefile b/chapter2/Makefile new file mode 100644 index 0000000..7f6f9e8 --- /dev/null +++ b/chapter2/Makefile @@ -0,0 +1,15 @@ +CPPFLAGS += -I../xen/xen/include/public +LDFLAGS += -nostdlib -T example.lds +CFLAGS += -std=c99 +ASFLAGS = -D__ASSEMBLY__ + +.PHONY: all + +all: testkernel + +testkernel: bootstrap.x86_32.o kernel.o + $(CC) $(LDFLAGS) $^ -o testkernel + +clean: + rm -f *.o + rm -f testkernel diff --git a/chapter2/bootstrap.x86_32.S b/chapter2/bootstrap.x86_32.S new file mode 100644 index 0000000..710479c --- /dev/null +++ b/chapter2/bootstrap.x86_32.S @@ -0,0 +1,38 @@ +#include + + +.section __xen_guest + .ascii "GUEST_OS=Hacking_Xen_Example" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x0" + .ascii ",ELF_PADDR_OFFSET=0x0" + .ascii ",HYPERCALL_PAGE=0x2" + .ascii ",PAE=no" + .ascii ",LOADER=generic" + .byte 0 +.text + +.globl _start, shared_info, hypercall_page + +_start: + cld + lss stack_start,%esp + push %esi + call start_kernel + +stack_start: + .long stack+8192, FLAT_KERNEL_SS + + /* Unpleasant -- the PTE that maps this page is actually overwritten */ + /* to map the real shared-info page! :-) */ + .org 0x1000 +shared_info: + .org 0x2000 + +hypercall_page: + .org 0x3000 + +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C diff --git a/chapter2/debug.h b/chapter2/debug.h new file mode 100644 index 0000000..5f14fc8 --- /dev/null +++ b/chapter2/debug.h @@ -0,0 +1,25 @@ +#include +#include + +#define __STR(x) #x +#define STR(x) __STR(x) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} diff --git a/chapter2/domain_config b/chapter2/domain_config new file mode 100644 index 0000000..226388b --- /dev/null +++ b/chapter2/domain_config @@ -0,0 +1,18 @@ +# -*- mode: python; -*- +#==================================================== +#Python configuration setup for 'xm create'. This +#script sets the parameters used when a domain is +#created using 'xm create'. You use a separate script +#for each domain you want to create, or you can set the +#parameters for the domain on the xm command line. +#==================================================== +#Kernel image file. +kernel = "testkernel" +# Initial memory allocation (in megabytes) for the new +# domain. +memory = 32 +# A name for your domain. All domains must have +# different names. +name = "Simplest_Kernel" + +on_crash = 'destroy' diff --git a/chapter2/example.lds b/chapter2/example.lds new file mode 100644 index 0000000..63bf480 --- /dev/null +++ b/chapter2/example.lds @@ -0,0 +1,27 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0x0; /* Start of the output file */ + + _text = .; /* Text and read-only data */ + + .text : { + *(.text) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { /* Read only data section */ + *(.rodata) + *(.rodata.*) + } + + .data : { /* Data */ + *(.data) + } + + _edata = .; /* End of data section */ + +} diff --git a/chapter2/kernel.c b/chapter2/kernel.c new file mode 100644 index 0000000..2f52c01 --- /dev/null +++ b/chapter2/kernel.c @@ -0,0 +1,13 @@ +#include +#include +#include "debug.h" + +/* Some static space for the stack */ +char stack[8192]; + +/* Main kernel entry point, called by trampoline */ +void start_kernel(start_info_t * start_info) +{ + HYPERVISOR_console_io(CONSOLEIO_write,12,"Hello World\n"); + while(1); +} diff --git a/chapter3/gettimeofday.c b/chapter3/gettimeofday.c new file mode 100644 index 0000000..591afe6 --- /dev/null +++ b/chapter3/gettimeofday.c @@ -0,0 +1,63 @@ +/* + * gettimeofday() example implementation. + * Requires the shared info page to be mapped and stored in a global variable + * + */ + + +#include +#include + +extern shared_info_t * shared_info; + + +#define NANOSECONDS(tsc) (tsc << shared_info->cpu_info[0].time.tsc_shift)\ + * shared_info->cpu_info[0].time.tsc_to_system_mul + +#define RDTSC(x) asm volatile ("RDTSC":"=A"(tsc)) + +int gettimeofday(struct timeval *tp, struct timezone *tzp) +{ + uint64_t tsc; + /* Get the time values from the shared info page */ + uint32_t version, wc_version; + uint32_t seconds, nanoseconds, system_time; + uint64_t old_tsc; + /* Loop until we can read all required values from the same update */ + do + { + /* Spin if the time value is being updated */ + do + { + wc_version = shared_info->wc_version; + version = shared_info->cpu_info[0].time.version; + } while( + version & 1 == 1 + || + wc_version & 1 == 1); + /* Read the values */ + seconds = shared_info->wc_sec; + nanoseconds = shared_info->wc_nsec; + system_time = shared_info->cpu_info[0].time.system_time; + old_tsc = shared_info->cpu_info[0].time.tsc_timestamp; + } while( + version != shared_info->cpu_info[0].time.version + || + wc_version != shared_info->wc_version + ); + /* Get the current TSC value */ + RDTSC(tsc); + /* Get the number of elapsed cycles */ + tsc -= old_tsc; + /* Update the system time */ + system_time += NANOSECONDS(tsc); + /* Update the nanosecond time */ + nanoseconds += system_time; + /* Move complete seconds to the second counter */ + seconds += nanoseconds / 1000000000; + nanoseconds = nanoseconds % 1000000000; + /* Return second and millisecond values */ + tp->tv_sec = seconds; + tp->tv_usec = nanoseconds * 1000; + return 0; +} diff --git a/chapter4/mapping.c b/chapter4/mapping.c new file mode 100644 index 0000000..e607c9f --- /dev/null +++ b/chapter4/mapping.c @@ -0,0 +1,27 @@ +#include + +grant_handle_t map(domid_t friend, + unsigned int entry, + void * shared_page, + grant_handle_t * handle) +{ + /* Set up the mapping operation */ + gnttab_map_grant_ref_t map_op; + map_op.host_addr = shared_page; + map_op.flags = GNTMAP_host_map; + map_op.ref = entry; + map_op.dom = friend; + /* Perform the map */ + HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op,1); + /* Check if it worked */ + if(map_op.status != GNTST_okay) + { + return -1; + } + else + { + /* Return the handle */ + *handle = map_op.handle; + return 0; + } +} diff --git a/chapter4/offering.c b/chapter4/offering.c new file mode 100644 index 0000000..0b18b5f --- /dev/null +++ b/chapter4/offering.c @@ -0,0 +1,23 @@ +#include + +extern void * shared_page; +extern grant_entry_t * grant_table; + +void offer_page() +{ + uint16_t flags; + /* Create the grant table */ + gnttab_setup_table_t setup_op; + + setup_op.dom = DOMID_SELF; + setup_op.nr_frames = 1; + setup_op.frame_list = grant_table; + + HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup_op, 1); + + /* Offer the grant */ + grant_table[0].domid = DOMID_FRIEND; + grant_table[0].frame = shared_page >> 12; + flags = GTF_permit_access & GTF_reading & GTF_writing; + grant_table[0].flags = flags; +} diff --git a/chapter6/.exrc b/chapter6/.exrc new file mode 100644 index 0000000..276de1e --- /dev/null +++ b/chapter6/.exrc @@ -0,0 +1,16 @@ +if &cp | set nocp | endif +let s:cpo_save=&cpo +set cpo&vim +nmap gx NetrwBrowseX +nnoremap NetrwBrowseX :call netrw#NetBrowseX(expand(""),0) +let &cpo=s:cpo_save +unlet s:cpo_save +set autoindent +set backspace=indent,eol,start +set fileencodings=utf-8,latin1 +set helplang=en +set history=50 +set hlsearch +set ruler +set viminfo='20,\"50 +" vim: set ft=vim : diff --git a/chapter6/Makefile b/chapter6/Makefile new file mode 100644 index 0000000..70cb45e --- /dev/null +++ b/chapter6/Makefile @@ -0,0 +1,17 @@ +#CPPFLAGS += -I../xen/xen/include/ -I../xen/xen/include/asm +CPPFLAGS += -Iinclude -Iinclude/x86 -DCONFIG_X86_PAE -D__XEN_INTERFACE_VERSION__=0x00030203 +LDFLAGS += -nostdlib -T loader.lds -g +CFLAGS += -std=c99 -g +ASFLAGS = -D__ASSEMBLY__ + +.PHONY: all + + +all: testkernel + +testkernel: bootstrap.x86_32.o kernel.o console.o event.o traps.o + $(CC) $(LDFLAGS) $^ -o testkernel + +clean: + rm -f *.o + rm -f testkernel diff --git a/chapter6/Makefile~ b/chapter6/Makefile~ new file mode 100644 index 0000000..60d4a5c --- /dev/null +++ b/chapter6/Makefile~ @@ -0,0 +1,17 @@ +#CPPFLAGS += -I../xen/xen/include/ -I../xen/xen/include/asm +CPPFLAGS += -Iinclude -Iinclude/x86 -DCONFIG_X86_PAE -D__XEN_INTERFACE_VERSION__=0x00030203 +LDFLAGS += -nostdlib -T loader.lds -g +CFLAGS += -std=c99 -g +ASFLAGS = -D__ASSEMBLY__ + +.PHONY: all + + +all: testkernel + +testkernel: bootstrap.x86_32.o kernel.o console.o event.o traps.o xenstore.o + $(CC) $(LDFLAGS) $^ -o testkernel + +clean: + rm -f *.o + rm -f testkernel diff --git a/chapter6/bootstrap.x86_32.S b/chapter6/bootstrap.x86_32.S new file mode 100644 index 0000000..f5875b5 --- /dev/null +++ b/chapter6/bootstrap.x86_32.S @@ -0,0 +1,276 @@ +#include + +.section __xen_guest + .ascii "GUEST_OS=Mini-OS" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x0" /* &_text from minios_x86_32.lds */ + .ascii ",ELF_PADDR_OFFSET=0x0" + .ascii ",HYPERCALL_PAGE=0x2" +#ifdef CONFIG_X86_PAE + .ascii ",PAE=yes" +#else + .ascii ",PAE=no" +#endif + .ascii ",LOADER=generic" + .byte 0 +.text + +.globl _start, shared_info, hypercall_page + +_start: + cld + lss stack_start,%esp + push %esi + call start_kernel + +stack_start: + .long stack+8192, FLAT_KERNEL_SS + + /* Unpleasant -- the PTE that maps this page is actually overwritten */ + /* to map the real shared-info page! :-) */ + .org 0x1000 +shared_info: + .org 0x2000 + +hypercall_page: + .org 0x3000 + +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C + +#define ENTRY(X) .globl X ; X : + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(FLAT_KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ + popl %ds; \ + popl %es; \ + addl $4,%esp; \ + iret; \ + +ENTRY(divide_error) + pushl $0 # no error code + pushl $do_divide_error +do_exception: + pushl %ds + pushl %eax + xorl %eax, %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es, %ecx + movl ES(%esp), %edi # get the function address + movl ORIG_EAX(%esp), %edx # get the error code + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl $(FLAT_KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + movl %esp,%eax # pt_regs pointer + pushl %edx + pushl %eax + call *%edi + jmp ret_from_exception + +ret_from_exception: + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne safesti + RESTORE_ALL + +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until weve done all processing. HOWEVER, we must enable events before +# popping the stack frame (cant be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so wed +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + call do_hypervisor_callback + add $4,%esp + movl HYPERVISOR_shared_info,%esi + xorl %eax,%eax + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne safesti +safesti:movb $0,1(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + testb $0xFF,(%esi) + jnz 14f # process more events if necessary... + RESTORE_ALL +14: movb $1,1(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00,0x00 # testb $0xff,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00 # movb $1,1(%esi) + .byte 0x00,0x00 # jmp 11b + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) + pop %ds + pop %es + pop %fs + pop %gs + iret + +ENTRY(coprocessor_error) + pushl $0 + pushl $do_coprocessor_error + jmp do_exception + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $do_simd_coprocessor_error + jmp do_exception + +ENTRY(device_not_available) + iret + +ENTRY(debug) + pushl $0 + pushl $do_debug + jmp do_exception + +ENTRY(int3) + pushl $0 + pushl $do_int3 + jmp do_exception + +ENTRY(overflow) + pushl $0 + pushl $do_overflow + jmp do_exception + +ENTRY(bounds) + pushl $0 + pushl $do_bounds + jmp do_exception + +ENTRY(invalid_op) + pushl $0 + pushl $do_invalid_op + jmp do_exception + + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $do_coprocessor_segment_overrun + jmp do_exception + + +ENTRY(invalid_TSS) + pushl $do_invalid_TSS + jmp do_exception + + +ENTRY(segment_not_present) + pushl $do_segment_not_present + jmp do_exception + + +ENTRY(stack_segment) + pushl $do_stack_segment + jmp do_exception + + +ENTRY(general_protection) + pushl $do_general_protection + jmp do_exception + + +ENTRY(alignment_check) + pushl $do_alignment_check + jmp do_exception + + +ENTRY(page_fault) + pushl $do_page_fault + jmp do_exception + +ENTRY(machine_check) + pushl $0 + pushl $do_machine_check + jmp do_exception + + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $do_spurious_interrupt_bug + jmp do_exception diff --git a/chapter6/console.c b/chapter6/console.c new file mode 100644 index 0000000..431c4b3 --- /dev/null +++ b/chapter6/console.c @@ -0,0 +1,106 @@ +#include "console.h" +#include +#include +#include "event.h" + +static evtchn_port_t console_evt; +extern char _text; +struct xencons_interface * console; + +/* Initialise the console */ +int console_init(start_info_t * start) +{ + console = (struct xencons_interface*) + ((machine_to_phys_mapping[start->console.domU.mfn] << 12) + + + ((unsigned long)&_text)); + console_evt = start->console.domU.evtchn; + /* TODO: Set up the event channel */ + return 0; +} + +/* Write a NULL-terminated string */ +int console_write(char * message) +{ + struct evtchn_send event; + event.port = console_evt; + int length = 0; + while(*message != '\0') + { + /* Wait for the back end to clear enough space in the buffer */ + XENCONS_RING_IDX data; + do + { + data = console->out_prod - console->out_cons; + HYPERVISOR_event_channel_op(EVTCHNOP_send, &event); + mb(); + } while (data >= sizeof(console->out)); + /* Copy the byte */ + int ring_index = MASK_XENCONS_IDX(console->out_prod, console->out); + console->out[ring_index] = *message; + /* Ensure that the data really is in the ring before continuing */ + wmb(); + /* Increment input and output pointers */ + console->out_prod++; + length++; + message++; + } + HYPERVISOR_event_channel_op(EVTCHNOP_send, &event); + return length; +} + +/* Block while data is in the out buffer */ +void console_flush(void) +{ + /* While there is data in the out channel */ + while(console->out_cons < console->out_prod) + { + /* Let other processes run */ + HYPERVISOR_sched_op(SCHEDOP_yield, 0); + mb(); + } +} + +char atoc(int n) +{ + switch(n) + { + case 0: return '0'; + case 1: return '1'; + case 2: return '2'; + case 3: return '3'; + case 4: return '4'; + case 5: return '5'; + case 6: return '6'; + case 7: return '7'; + case 8: return '8'; + case 9: return '9'; + default: return 'x'; + } +} + +void console_write_int(unsigned int n) +{ + char buffer[11]; + int digits = 0; + unsigned int digit = 1000000000; + int mid = 0; + for(int i=0 ; i<10 ; i++) + { + if(n >= digit || mid != 0) + { + char digit_value = n / digit; + buffer[digits] = atoc(digit_value); + n -= digit_value * digit; + digits++; + mid = 1; + } + digit /= 10; + } + if(digits == 0) + { + buffer[digits++] = '0'; + } + buffer[digits] = '\0'; + console_write(buffer); +} diff --git a/chapter6/console.h b/chapter6/console.h new file mode 100644 index 0000000..37b5a64 --- /dev/null +++ b/chapter6/console.h @@ -0,0 +1,8 @@ +#include +#include +#include +#include + +int console_init(start_info_t * start); +int console_write(char * message); +void console_flush(void); diff --git a/chapter6/domain_config b/chapter6/domain_config new file mode 100644 index 0000000..226388b --- /dev/null +++ b/chapter6/domain_config @@ -0,0 +1,18 @@ +# -*- mode: python; -*- +#==================================================== +#Python configuration setup for 'xm create'. This +#script sets the parameters used when a domain is +#created using 'xm create'. You use a separate script +#for each domain you want to create, or you can set the +#parameters for the domain on the xm command line. +#==================================================== +#Kernel image file. +kernel = "testkernel" +# Initial memory allocation (in megabytes) for the new +# domain. +memory = 32 +# A name for your domain. All domains must have +# different names. +name = "Simplest_Kernel" + +on_crash = 'destroy' diff --git a/chapter6/event.c b/chapter6/event.c new file mode 100644 index 0000000..ef353c2 --- /dev/null +++ b/chapter6/event.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include "event.h" +#include "console.h" +#include + +#define NUM_CHANNELS (1024) + +//x86 only +//Set bit 'bit' in bitfield 'field' +#define SET_BIT(bit,field) __asm__ __volatile__ ("lock btsl %1,%0":"=m"(field):"Ir"(bit):"memory" ); +#define CLEAR_BIT(field, bit) __asm__ __volatile__ ("lock btrl %1,%0":"=m" ((field)):"Ir"((bit)):"memory") + +/* Locations in the bootstrapping code */ +extern volatile shared_info_t shared_info; +void hypervisor_callback(void); +void failsafe_callback(void); + + +static evtchn_handler_t handlers[NUM_CHANNELS]; + +void EVT_IGN(evtchn_port_t port, struct pt_regs * regs) {}; + +/* Initialise the event handlers */ +void init_events(void) +{ + /* Set the event delivery callbacks */ + HYPERVISOR_set_callbacks( + FLAT_KERNEL_CS, (unsigned long)hypervisor_callback, + FLAT_KERNEL_CS, (unsigned long)failsafe_callback); + /* Set all handlers to ignore, and mask them */ + for(unsigned int i=0 ; ievtchn_upcall_pending = 0; + /* Set the pending selector to 0 and get the old value atomically */ + pending_selector = xchg(&vcpu->evtchn_pending_sel, 0); + while(pending_selector != 0) + { + /* Get the first bit of the selector and clear it */ + next_event_offset = first_bit(pending_selector); + pending_selector &= ~(1 << next_event_offset); + unsigned int event; + + /* While there are events pending on unmasked channels */ + while(( event = + (shared_info.evtchn_pending[pending_selector] + & + ~shared_info.evtchn_mask[pending_selector])) + != 0) + { + /* Find the first waiting event */ + unsigned int event_offset = first_bit(event); + + /* Combine the two offsets to get the port */ + evtchn_port_t port = (pending_selector << 5) + event_offset; + /* Handler the event */ + handlers[port](port, regs); + /* Clear the pending flag */ + CLEAR_BIT(shared_info.evtchn_pending[0], event_offset); + } + } +} diff --git a/chapter6/event.h b/chapter6/event.h new file mode 100644 index 0000000..07ac804 --- /dev/null +++ b/chapter6/event.h @@ -0,0 +1,27 @@ +#include +#include +#include + + +struct pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; + +typedef void (*evtchn_handler_t)(evtchn_port_t, struct pt_regs *); + +void init_events(void); +void register_event(evtchn_port_t port, evtchn_handler_t handler); diff --git a/chapter6/include/barrier.h b/chapter6/include/barrier.h new file mode 100644 index 0000000..fa6b756 --- /dev/null +++ b/chapter6/include/barrier.h @@ -0,0 +1,24 @@ +/* + * Definitions of memory barriers used by Xen. + */ + +#if defined(__i386__) +#define mb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" ) +#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" ) +#define wmb() __asm__ __volatile__ ( "" : : : "memory") +#elif defined(__x86_64__) +#define mb() __asm__ __volatile__ ( "mfence" : : : "memory") +#define rmb() __asm__ __volatile__ ( "lfence" : : : "memory") +#define wmb() __asm__ __volatile__ ( "" : : : "memory") +#elif defined(__ia64__) +#define mb() __asm__ __volatile__ ("mf" ::: "memory") +#define rmb() __asm__ __volatile__ ("mf" ::: "memory") +#define wmb() __asm__ __volatile__ ("mf" ::: "memory") +#elif defined(__powerpc__) +/* XXX loosen these up later */ +#define mb() __asm__ __volatile__ ("sync" : : : "memory") +#define rmb() __asm__ __volatile__ ("sync" : : : "memory") /* lwsync? */ +#define wmb() __asm__ __volatile__ ("sync" : : : "memory") /* eieio? */ +#else +#error "Define barriers" +#endif diff --git a/chapter6/include/hypercall-x86_32.h b/chapter6/include/hypercall-x86_32.h new file mode 100644 index 0000000..417b648 --- /dev/null +++ b/chapter6/include/hypercall-x86_32.h @@ -0,0 +1,331 @@ +/****************************************************************************** + * hypercall-x86_32.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_X86_32_H__ +#define __HYPERCALL_X86_32_H__ + +#include +#include +#include +//#include + +#define __STR(x) #x +#define STR(x) __STR(x) + +typedef uint64_t u64; +#if !defined(CONFIG_X86_PAE) +typedef struct { unsigned long pte_low; } pte_t; +#else +typedef struct { unsigned long pte_low, pte_high; } pte_t; +#endif /* CONFIG_X86_PAE */ + +#if !defined(CONFIG_X86_PAE) +#define __pte(x) ((pte_t) { (x) } ) +#else +#define __pte(x) ({ unsigned long long _x = (x); \ + ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) +#endif +extern char hypercall_page[4096]; + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)), \ + "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline long +HYPERVISOR_set_timer_op( + u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor( + u64 ma, u64 desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, pte_t new_val, unsigned long flags) +{ + unsigned long pte_hi = 0; +#ifdef CONFIG_X86_PAE + pte_hi = new_val.pte_high; +#endif + return _hypercall4(int, update_va_mapping, va, + new_val.pte_low, pte_hi, flags); +} + +static inline int +HYPERVISOR_event_channel_op( + int cmd, void *op) +{ + return _hypercall2(int, event_channel_op, cmd, op); +} + +static inline int +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op( + void *physdev_op) +{ + return _hypercall1(int, physdev_op, physdev_op); +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + unsigned long pte_hi = 0; +#ifdef CONFIG_X86_PAE + pte_hi = new_val.pte_high; +#endif + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte_low, pte_hi, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +#endif /* __HYPERCALL_X86_32_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff --git a/chapter6/include/util.h b/chapter6/include/util.h new file mode 100644 index 0000000..a76707d --- /dev/null +++ b/chapter6/include/util.h @@ -0,0 +1,27 @@ + +#define WORD_BOUNDARY (sizeof(long) - 1) + +inline void zero(char * addr, unsigned int length) +{ + /* Fill to word boundary */ + while(length > 0 && (((long)addr) & WORD_BOUNDARY)) + { + *addr = 0; + length--; + } + long * addrl = (long*)addr; + /* Zero a word at a time */ + while(length > sizeof(long)) + { + *addrl = 0; + addrl++; + length -= 4; + } + addr = (char*)addrl; + /* Zero anything else */ + while(length > 0) + { + *addr = 0; + length--; + } +} diff --git a/chapter6/include/x86_mm.h b/chapter6/include/x86_mm.h new file mode 100644 index 0000000..c4c03d3 --- /dev/null +++ b/chapter6/include/x86_mm.h @@ -0,0 +1,215 @@ +/* + * Page table manipulation macros and constants. + * + * From MiniOS + */ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * Copyright (c) 2005, Keir A Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _ARCH_MM_H_ +#define _ARCH_MM_H_ + +#if defined(__i386__) +#include +#elif defined(__x86_64__) +#include +#else +#error "Unsupported architecture" +#endif + +#define L1_FRAME 1 +#define L2_FRAME 2 +#define L3_FRAME 3 + +#define L1_PAGETABLE_SHIFT 12 + +#if defined(__i386__) + +#if !defined(CONFIG_X86_PAE) + +#define L2_PAGETABLE_SHIFT 22 + +#define L1_PAGETABLE_ENTRIES 1024 +#define L2_PAGETABLE_ENTRIES 1024 + +#define PADDR_BITS 32 +#define PADDR_MASK (~0UL) + +#define NOT_L1_FRAMES 1 +#define PRIpte "08lx" +typedef unsigned long pgentry_t; + +#else /* defined(CONFIG_X86_PAE) */ + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 4 + +#define PADDR_BITS 44 +#define PADDR_MASK ((1ULL << PADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) + +/* + * If starting from virtual address greater than 0xc0000000, + * this value will be 2 to account for final mid-level page + * directory which is always mapped in at this location. + */ +#define NOT_L1_FRAMES 3 +#define PRIpte "016llx" +typedef uint64_t pgentry_t; + +#endif /* !defined(CONFIG_X86_PAE) */ + +#elif defined(__x86_64__) + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 +#define L4_PAGETABLE_SHIFT 39 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 512 +#define L4_PAGETABLE_ENTRIES 512 + +/* These are page-table limitations. Current CPUs support only 40-bit phys. */ +#define PADDR_BITS 52 +#define VADDR_BITS 48 +#define PADDR_MASK ((1UL << PADDR_BITS)-1) +#define VADDR_MASK ((1UL << VADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) +#define L3_MASK ((1UL << L4_PAGETABLE_SHIFT) - 1) + +#define NOT_L1_FRAMES 3 +#define PRIpte "016lx" +typedef unsigned long pgentry_t; + +#endif + +#define L1_MASK ((1UL << L2_PAGETABLE_SHIFT) - 1) + +/* Given a virtual address, get an entry offset into a page table. */ +#define l1_table_offset(_a) \ + (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) +#define l2_table_offset(_a) \ + (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) +#if defined(__x86_64__) || defined(CONFIG_X86_PAE) +#define l3_table_offset(_a) \ + (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) +#endif +#if defined(__x86_64__) +#define l4_table_offset(_a) \ + (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) +#endif + +#define _PAGE_PRESENT 0x001UL +#define _PAGE_RW 0x002UL +#define _PAGE_USER 0x004UL +#define _PAGE_PWT 0x008UL +#define _PAGE_PCD 0x010UL +#define _PAGE_ACCESSED 0x020UL +#define _PAGE_DIRTY 0x040UL +#define _PAGE_PAT 0x080UL +#define _PAGE_PSE 0x080UL +#define _PAGE_GLOBAL 0x100UL + +#if defined(__i386__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER) +#if defined(CONFIG_X86_PAE) +#define L3_PROT (_PAGE_PRESENT) +#endif /* CONFIG_X86_PAE */ +#elif defined(__x86_64__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#endif /* __i386__ || __x86_64__ */ + +#ifndef CONFIG_X86_PAE +#define PAGE_SIZE (1UL << L1_PAGETABLE_SHIFT) +#else +#define PAGE_SIZE (1ULL << L1_PAGETABLE_SHIFT) +#endif +#define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT) +#define PFN_DOWN(x) ((x) >> L1_PAGETABLE_SHIFT) +#define PFN_PHYS(x) ((x) << L1_PAGETABLE_SHIFT) +#define PHYS_PFN(x) ((x) >> L1_PAGETABLE_SHIFT) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +/* Definitions for machine and pseudophysical addresses. */ +#ifdef CONFIG_X86_PAE +typedef unsigned long long paddr_t; +typedef unsigned long long maddr_t; +#else +typedef unsigned long paddr_t; +typedef unsigned long maddr_t; +#endif + +extern unsigned long *phys_to_machine_mapping; +extern char _text, _etext, _edata, _end; +#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) +static __inline__ maddr_t phys_to_machine(paddr_t phys) +{ + maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); + return machine; +} + +#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) +static __inline__ paddr_t machine_to_phys(maddr_t machine) +{ + paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); + return phys; +} + +#define VIRT_START ((unsigned long)&_text) + +#define to_phys(x) ((unsigned long)(x)-VIRT_START) +#define to_virt(x) ((void *)((unsigned long)(x)+VIRT_START)) + +#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define virt_to_mfn(_virt) (pfn_to_mfn(virt_to_pfn(_virt))) +#define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) +#define virt_to_mach(_virt) (phys_to_machine(to_phys(_virt))) +#define mfn_to_virt(_mfn) (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT)) +#define pfn_to_virt(_pfn) (to_virt((_pfn) << PAGE_SHIFT)) + +/* Pagetable walking. */ +#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> L1_PAGETABLE_SHIFT) +#define pte_to_virt(_pte) to_virt(mfn_to_pfn(pte_to_mfn(_pte)) << PAGE_SHIFT) + + +#endif /* _ARCH_MM_H_ */ diff --git a/chapter6/kernel.c b/chapter6/kernel.c new file mode 100644 index 0000000..252500f --- /dev/null +++ b/chapter6/kernel.c @@ -0,0 +1,33 @@ +#include "event.h" +#include "console.h" + +/* Some static space for the stack */ +char stack[8192]; + +extern shared_info_t shared_info; +extern void handle_input(evtchn_port_t port, struct pt_regs * regs); + +shared_info_t *HYPERVISOR_shared_info; + +/* Main kernel entry point, called by trampoline */ +void start_kernel(start_info_t * start_info) +{ + /* Map the shared info page */ + HYPERVISOR_update_va_mapping((unsigned long) &shared_info, + __pte(start_info->shared_info | 7), + UVMF_INVLPG); + /* Set the pointer used in the bootstrap for reenabling + * event delivery after an upcall */ + HYPERVISOR_shared_info = &shared_info; + /* Set up and unmask events */ + init_events(); + /* Initialise the console */ + console_init(start_info); + /* Write a message to check that it worked */ + console_write("Hello world!\r\n"); + /* Loop, handling events */ + while(1) + { + HYPERVISOR_sched_op(SCHEDOP_block,0); + } +} diff --git a/chapter6/loader.lds b/chapter6/loader.lds new file mode 100644 index 0000000..63bf480 --- /dev/null +++ b/chapter6/loader.lds @@ -0,0 +1,27 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0x0; /* Start of the output file */ + + _text = .; /* Text and read-only data */ + + .text : { + *(.text) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { /* Read only data section */ + *(.rodata) + *(.rodata.*) + } + + .data : { /* Data */ + *(.data) + } + + _edata = .; /* End of data section */ + +} diff --git a/chapter6/traps.c b/chapter6/traps.c new file mode 100644 index 0000000..a4050c0 --- /dev/null +++ b/chapter6/traps.c @@ -0,0 +1,79 @@ +#include +#include +#include + +/* + * These are assembler stubs in entry.S. + * They are the actual entry points for virtual exceptions. + */ +void divide_error(void); +void debug(void); +void int3(void); +void overflow(void); +void bounds(void); +void invalid_op(void); +void device_not_available(void); +void coprocessor_segment_overrun(void); +void invalid_TSS(void); +void segment_not_present(void); +void stack_segment(void); +void general_protection(void); +void page_fault(void); +void coprocessor_error(void); +void simd_coprocessor_error(void); +void alignment_check(void); +void spurious_interrupt_bug(void); +void machine_check(void); + +/* Dummy implementation. Should actually do something */ +void do_divide_error(void) {} +void do_debug(void) {} +void do_int3(void) {} +void do_overflow(void) {} +void do_bounds(void) {} +void do_invalid_op(void) {} +void do_device_not_available(void) {} +void do_coprocessor_segment_overrun(void) {} +void do_invalid_TSS(void) {} +void do_segment_not_present(void) {} +void do_stack_segment(void) {} +void do_general_protection(void) {} +void do_page_fault(void) {} +void do_coprocessor_error(void) {} +void do_simd_coprocessor_error(void) {} +void do_alignment_check(void) {} +void do_spurious_interrupt_bug(void) {} +void do_machine_check(void) {} + +/* + * Submit a virtual IDT to teh hypervisor. This consists of tuples + * (interrupt vector, privilege ring, CS:EIP of handler). + * The 'privilege ring' field specifies the least-privileged ring that + * can trap to that vector using a software-interrupt instruction (INT). + */ +static trap_info_t trap_table[] = { + { 0, 0, FLAT_KERNEL_CS, (unsigned long)divide_error }, + { 1, 0, FLAT_KERNEL_CS, (unsigned long)debug }, + { 3, 3, FLAT_KERNEL_CS, (unsigned long)int3 }, + { 4, 3, FLAT_KERNEL_CS, (unsigned long)overflow }, + { 5, 3, FLAT_KERNEL_CS, (unsigned long)bounds }, + { 6, 0, FLAT_KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0, FLAT_KERNEL_CS, (unsigned long)device_not_available }, + { 9, 0, FLAT_KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, + { 10, 0, FLAT_KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0, FLAT_KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0, FLAT_KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0, FLAT_KERNEL_CS, (unsigned long)general_protection }, + { 14, 0, FLAT_KERNEL_CS, (unsigned long)page_fault }, + { 15, 0, FLAT_KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0, FLAT_KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0, FLAT_KERNEL_CS, (unsigned long)alignment_check }, + { 19, 0, FLAT_KERNEL_CS, (unsigned long)simd_coprocessor_error }, + { 0, 0, 0, 0 } +}; + +void trap_init(void) +{ + HYPERVISOR_set_trap_table(trap_table); +} + diff --git a/chapter7/.exrc b/chapter7/.exrc new file mode 100644 index 0000000..276de1e --- /dev/null +++ b/chapter7/.exrc @@ -0,0 +1,16 @@ +if &cp | set nocp | endif +let s:cpo_save=&cpo +set cpo&vim +nmap gx NetrwBrowseX +nnoremap NetrwBrowseX :call netrw#NetBrowseX(expand(""),0) +let &cpo=s:cpo_save +unlet s:cpo_save +set autoindent +set backspace=indent,eol,start +set fileencodings=utf-8,latin1 +set helplang=en +set history=50 +set hlsearch +set ruler +set viminfo='20,\"50 +" vim: set ft=vim : diff --git a/chapter7/Makefile b/chapter7/Makefile new file mode 100644 index 0000000..70cb45e --- /dev/null +++ b/chapter7/Makefile @@ -0,0 +1,17 @@ +#CPPFLAGS += -I../xen/xen/include/ -I../xen/xen/include/asm +CPPFLAGS += -Iinclude -Iinclude/x86 -DCONFIG_X86_PAE -D__XEN_INTERFACE_VERSION__=0x00030203 +LDFLAGS += -nostdlib -T loader.lds -g +CFLAGS += -std=c99 -g +ASFLAGS = -D__ASSEMBLY__ + +.PHONY: all + + +all: testkernel + +testkernel: bootstrap.x86_32.o kernel.o console.o event.o traps.o + $(CC) $(LDFLAGS) $^ -o testkernel + +clean: + rm -f *.o + rm -f testkernel diff --git a/chapter7/Makefile~ b/chapter7/Makefile~ new file mode 100644 index 0000000..60d4a5c --- /dev/null +++ b/chapter7/Makefile~ @@ -0,0 +1,17 @@ +#CPPFLAGS += -I../xen/xen/include/ -I../xen/xen/include/asm +CPPFLAGS += -Iinclude -Iinclude/x86 -DCONFIG_X86_PAE -D__XEN_INTERFACE_VERSION__=0x00030203 +LDFLAGS += -nostdlib -T loader.lds -g +CFLAGS += -std=c99 -g +ASFLAGS = -D__ASSEMBLY__ + +.PHONY: all + + +all: testkernel + +testkernel: bootstrap.x86_32.o kernel.o console.o event.o traps.o xenstore.o + $(CC) $(LDFLAGS) $^ -o testkernel + +clean: + rm -f *.o + rm -f testkernel diff --git a/chapter7/bootstrap.x86_32.S b/chapter7/bootstrap.x86_32.S new file mode 100644 index 0000000..f5875b5 --- /dev/null +++ b/chapter7/bootstrap.x86_32.S @@ -0,0 +1,276 @@ +#include + +.section __xen_guest + .ascii "GUEST_OS=Mini-OS" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x0" /* &_text from minios_x86_32.lds */ + .ascii ",ELF_PADDR_OFFSET=0x0" + .ascii ",HYPERCALL_PAGE=0x2" +#ifdef CONFIG_X86_PAE + .ascii ",PAE=yes" +#else + .ascii ",PAE=no" +#endif + .ascii ",LOADER=generic" + .byte 0 +.text + +.globl _start, shared_info, hypercall_page + +_start: + cld + lss stack_start,%esp + push %esi + call start_kernel + +stack_start: + .long stack+8192, FLAT_KERNEL_SS + + /* Unpleasant -- the PTE that maps this page is actually overwritten */ + /* to map the real shared-info page! :-) */ + .org 0x1000 +shared_info: + .org 0x2000 + +hypercall_page: + .org 0x3000 + +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C + +#define ENTRY(X) .globl X ; X : + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(FLAT_KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ + popl %ds; \ + popl %es; \ + addl $4,%esp; \ + iret; \ + +ENTRY(divide_error) + pushl $0 # no error code + pushl $do_divide_error +do_exception: + pushl %ds + pushl %eax + xorl %eax, %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es, %ecx + movl ES(%esp), %edi # get the function address + movl ORIG_EAX(%esp), %edx # get the error code + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl $(FLAT_KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + movl %esp,%eax # pt_regs pointer + pushl %edx + pushl %eax + call *%edi + jmp ret_from_exception + +ret_from_exception: + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne safesti + RESTORE_ALL + +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until weve done all processing. HOWEVER, we must enable events before +# popping the stack frame (cant be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so wed +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + call do_hypervisor_callback + add $4,%esp + movl HYPERVISOR_shared_info,%esi + xorl %eax,%eax + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne safesti +safesti:movb $0,1(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + testb $0xFF,(%esi) + jnz 14f # process more events if necessary... + RESTORE_ALL +14: movb $1,1(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00,0x00 # testb $0xff,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00 # movb $1,1(%esi) + .byte 0x00,0x00 # jmp 11b + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) + pop %ds + pop %es + pop %fs + pop %gs + iret + +ENTRY(coprocessor_error) + pushl $0 + pushl $do_coprocessor_error + jmp do_exception + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $do_simd_coprocessor_error + jmp do_exception + +ENTRY(device_not_available) + iret + +ENTRY(debug) + pushl $0 + pushl $do_debug + jmp do_exception + +ENTRY(int3) + pushl $0 + pushl $do_int3 + jmp do_exception + +ENTRY(overflow) + pushl $0 + pushl $do_overflow + jmp do_exception + +ENTRY(bounds) + pushl $0 + pushl $do_bounds + jmp do_exception + +ENTRY(invalid_op) + pushl $0 + pushl $do_invalid_op + jmp do_exception + + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $do_coprocessor_segment_overrun + jmp do_exception + + +ENTRY(invalid_TSS) + pushl $do_invalid_TSS + jmp do_exception + + +ENTRY(segment_not_present) + pushl $do_segment_not_present + jmp do_exception + + +ENTRY(stack_segment) + pushl $do_stack_segment + jmp do_exception + + +ENTRY(general_protection) + pushl $do_general_protection + jmp do_exception + + +ENTRY(alignment_check) + pushl $do_alignment_check + jmp do_exception + + +ENTRY(page_fault) + pushl $do_page_fault + jmp do_exception + +ENTRY(machine_check) + pushl $0 + pushl $do_machine_check + jmp do_exception + + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $do_spurious_interrupt_bug + jmp do_exception diff --git a/chapter7/console.c b/chapter7/console.c new file mode 100644 index 0000000..769f4c0 --- /dev/null +++ b/chapter7/console.c @@ -0,0 +1,140 @@ +#include "console.h" +#include +#include +#include "event.h" + +static evtchn_port_t console_evt; +extern char _text; +struct xencons_interface * console; + +/* Event received on console event channel */ +void handle_input(evtchn_port_t port, struct pt_regs * regs) +{ + XENCONS_RING_IDX cons = console->in_cons; + XENCONS_RING_IDX prod = console->in_prod; + int length = prod - cons; + if(length > 0) + { + char buffer[10]; + console_read(buffer, ++length); + console_write(buffer); + } +} + +/* Initialise the console */ +int console_init(start_info_t * start) +{ + console = (struct xencons_interface*) + ((machine_to_phys_mapping[start->console.domU.mfn] << 12) + + + ((unsigned long)&_text)); + console_evt = start->console.domU.evtchn; + /* Set up the event channel */ + register_event(console_evt, handle_input); + return 0; +} + +/* Write a NULL-terminated string */ +int console_write(char * message) +{ + struct evtchn_send event; + event.port = console_evt; + int length = 0; + while(*message != '\0') + { + /* Wait for the back end to clear enough space in the buffer */ + XENCONS_RING_IDX data; + do + { + data = console->out_prod - console->out_cons; + HYPERVISOR_event_channel_op(EVTCHNOP_send, &event); + mb(); + } while (data >= sizeof(console->out)); + /* Copy the byte */ + int ring_index = MASK_XENCONS_IDX(console->out_prod, console->out); + console->out[ring_index] = *message; + /* Ensure that the data really is in the ring before continuing */ + wmb(); + /* Increment input and output pointers */ + console->out_prod++; + length++; + message++; + } + HYPERVISOR_event_channel_op(EVTCHNOP_send, &event); + return length; +} + +/* Read up to length characters from the console into buffer */ +int console_read(char * buffer, int n) +{ + int length = 0; + while(n > length && console->in_cons < console->in_prod) + { + /* Copy the character */ + int ring_index = MASK_XENCONS_IDX(console->in_cons, console->in); + *buffer = console->in[ring_index]; + buffer++; + length++; + console->in_cons++; + wmb(); + } + /* NULL-terminate the string */ + *buffer = '\0'; + return length; +} + +/* Block while data is in the out buffer */ +void console_flush(void) +{ + /* While there is data in the out channel */ + while(console->out_cons < console->out_prod) + { + /* Let other processes run */ + HYPERVISOR_sched_op(SCHEDOP_yield, 0); + mb(); + } +} + +char atoc(int n) +{ + switch(n) + { + case 0: return '0'; + case 1: return '1'; + case 2: return '2'; + case 3: return '3'; + case 4: return '4'; + case 5: return '5'; + case 6: return '6'; + case 7: return '7'; + case 8: return '8'; + case 9: return '9'; + default: return 'x'; + } +} + +void console_write_int(unsigned int n) +{ + char buffer[11]; + int digits = 0; + unsigned int digit = 1000000000; + int mid = 0; + for(int i=0 ; i<10 ; i++) + { + if(n >= digit || mid != 0) + { + char digit_value = n / digit; + buffer[digits] = atoc(digit_value); + n -= digit_value * digit; + digits++; + mid = 1; + } + digit /= 10; + } + if(digits == 0) + { + buffer[digits++] = '0'; + } + buffer[digits] = '\0'; + console_write(buffer); +} diff --git a/chapter7/console.h b/chapter7/console.h new file mode 100644 index 0000000..37b5a64 --- /dev/null +++ b/chapter7/console.h @@ -0,0 +1,8 @@ +#include +#include +#include +#include + +int console_init(start_info_t * start); +int console_write(char * message); +void console_flush(void); diff --git a/chapter7/domain_config b/chapter7/domain_config new file mode 100644 index 0000000..226388b --- /dev/null +++ b/chapter7/domain_config @@ -0,0 +1,18 @@ +# -*- mode: python; -*- +#==================================================== +#Python configuration setup for 'xm create'. This +#script sets the parameters used when a domain is +#created using 'xm create'. You use a separate script +#for each domain you want to create, or you can set the +#parameters for the domain on the xm command line. +#==================================================== +#Kernel image file. +kernel = "testkernel" +# Initial memory allocation (in megabytes) for the new +# domain. +memory = 32 +# A name for your domain. All domains must have +# different names. +name = "Simplest_Kernel" + +on_crash = 'destroy' diff --git a/chapter7/event.c b/chapter7/event.c new file mode 100644 index 0000000..4f85e89 --- /dev/null +++ b/chapter7/event.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include "event.h" +#include "console.h" +#include + +#define NUM_CHANNELS (1024) + +//x86 only +//Set bit 'bit' in bitfield 'field' +#define SET_BIT(bit,field) __asm__ __volatile__ ("lock btsl %1,%0":"=m"(field):"Ir"(bit):"memory" ); +#define CLEAR_BIT(field, bit) __asm__ __volatile__ ("lock btrl %1,%0":"=m" ((field)):"Ir"((bit)):"memory") + +/* Locations in the bootstrapping code */ +extern volatile shared_info_t shared_info; +void hypervisor_callback(void); +void failsafe_callback(void); + + +static evtchn_handler_t handlers[NUM_CHANNELS]; + +void EVT_IGN(evtchn_port_t port, struct pt_regs * regs) {}; + +/* Initialise the event handlers */ +void init_events(void) +{ + /* Set the event delivery callbacks */ + HYPERVISOR_set_callbacks( + FLAT_KERNEL_CS, (unsigned long)hypervisor_callback, + FLAT_KERNEL_CS, (unsigned long)failsafe_callback); + /* Set all handlers to ignore, and mask them */ + for(unsigned int i=0 ; ievtchn_upcall_pending = 0; + /* Set the pending selector to 0 and get the old value atomically */ + pending_selector = xchg(&vcpu->evtchn_pending_sel, 0); + while(pending_selector != 0) + { + /* Get the first bit of the selector and clear it */ + next_event_offset = first_bit(pending_selector); + pending_selector &= ~(1 << next_event_offset); + unsigned int event; + + /* While there are events pending on unmasked channels */ + while(( event = + (shared_info.evtchn_pending[pending_selector] + & + ~shared_info.evtchn_mask[pending_selector])) + != 0) + { + /* Find the first waiting event */ + unsigned int event_offset = first_bit(event); + + /* Combine the two offsets to get the port */ + evtchn_port_t port = (pending_selector << 5) + event_offset; + /* Handle the event */ + handlers[port](port, regs); + /* Clear the pending flag */ + CLEAR_BIT(shared_info.evtchn_pending[0], event_offset); + } + } +} diff --git a/chapter7/event.h b/chapter7/event.h new file mode 100644 index 0000000..07ac804 --- /dev/null +++ b/chapter7/event.h @@ -0,0 +1,27 @@ +#include +#include +#include + + +struct pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; + +typedef void (*evtchn_handler_t)(evtchn_port_t, struct pt_regs *); + +void init_events(void); +void register_event(evtchn_port_t port, evtchn_handler_t handler); diff --git a/chapter7/include/barrier.h b/chapter7/include/barrier.h new file mode 100644 index 0000000..fa6b756 --- /dev/null +++ b/chapter7/include/barrier.h @@ -0,0 +1,24 @@ +/* + * Definitions of memory barriers used by Xen. + */ + +#if defined(__i386__) +#define mb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" ) +#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" ) +#define wmb() __asm__ __volatile__ ( "" : : : "memory") +#elif defined(__x86_64__) +#define mb() __asm__ __volatile__ ( "mfence" : : : "memory") +#define rmb() __asm__ __volatile__ ( "lfence" : : : "memory") +#define wmb() __asm__ __volatile__ ( "" : : : "memory") +#elif defined(__ia64__) +#define mb() __asm__ __volatile__ ("mf" ::: "memory") +#define rmb() __asm__ __volatile__ ("mf" ::: "memory") +#define wmb() __asm__ __volatile__ ("mf" ::: "memory") +#elif defined(__powerpc__) +/* XXX loosen these up later */ +#define mb() __asm__ __volatile__ ("sync" : : : "memory") +#define rmb() __asm__ __volatile__ ("sync" : : : "memory") /* lwsync? */ +#define wmb() __asm__ __volatile__ ("sync" : : : "memory") /* eieio? */ +#else +#error "Define barriers" +#endif diff --git a/chapter7/include/hypercall-x86_32.h b/chapter7/include/hypercall-x86_32.h new file mode 100644 index 0000000..417b648 --- /dev/null +++ b/chapter7/include/hypercall-x86_32.h @@ -0,0 +1,331 @@ +/****************************************************************************** + * hypercall-x86_32.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_X86_32_H__ +#define __HYPERCALL_X86_32_H__ + +#include +#include +#include +//#include + +#define __STR(x) #x +#define STR(x) __STR(x) + +typedef uint64_t u64; +#if !defined(CONFIG_X86_PAE) +typedef struct { unsigned long pte_low; } pte_t; +#else +typedef struct { unsigned long pte_low, pte_high; } pte_t; +#endif /* CONFIG_X86_PAE */ + +#if !defined(CONFIG_X86_PAE) +#define __pte(x) ((pte_t) { (x) } ) +#else +#define __pte(x) ({ unsigned long long _x = (x); \ + ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) +#endif +extern char hypercall_page[4096]; + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)), \ + "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline long +HYPERVISOR_set_timer_op( + u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor( + u64 ma, u64 desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, pte_t new_val, unsigned long flags) +{ + unsigned long pte_hi = 0; +#ifdef CONFIG_X86_PAE + pte_hi = new_val.pte_high; +#endif + return _hypercall4(int, update_va_mapping, va, + new_val.pte_low, pte_hi, flags); +} + +static inline int +HYPERVISOR_event_channel_op( + int cmd, void *op) +{ + return _hypercall2(int, event_channel_op, cmd, op); +} + +static inline int +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op( + void *physdev_op) +{ + return _hypercall1(int, physdev_op, physdev_op); +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + unsigned long pte_hi = 0; +#ifdef CONFIG_X86_PAE + pte_hi = new_val.pte_high; +#endif + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte_low, pte_hi, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +#endif /* __HYPERCALL_X86_32_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff --git a/chapter7/include/util.h b/chapter7/include/util.h new file mode 100644 index 0000000..a76707d --- /dev/null +++ b/chapter7/include/util.h @@ -0,0 +1,27 @@ + +#define WORD_BOUNDARY (sizeof(long) - 1) + +inline void zero(char * addr, unsigned int length) +{ + /* Fill to word boundary */ + while(length > 0 && (((long)addr) & WORD_BOUNDARY)) + { + *addr = 0; + length--; + } + long * addrl = (long*)addr; + /* Zero a word at a time */ + while(length > sizeof(long)) + { + *addrl = 0; + addrl++; + length -= 4; + } + addr = (char*)addrl; + /* Zero anything else */ + while(length > 0) + { + *addr = 0; + length--; + } +} diff --git a/chapter7/include/x86_mm.h b/chapter7/include/x86_mm.h new file mode 100644 index 0000000..c4c03d3 --- /dev/null +++ b/chapter7/include/x86_mm.h @@ -0,0 +1,215 @@ +/* + * Page table manipulation macros and constants. + * + * From MiniOS + */ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * Copyright (c) 2005, Keir A Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _ARCH_MM_H_ +#define _ARCH_MM_H_ + +#if defined(__i386__) +#include +#elif defined(__x86_64__) +#include +#else +#error "Unsupported architecture" +#endif + +#define L1_FRAME 1 +#define L2_FRAME 2 +#define L3_FRAME 3 + +#define L1_PAGETABLE_SHIFT 12 + +#if defined(__i386__) + +#if !defined(CONFIG_X86_PAE) + +#define L2_PAGETABLE_SHIFT 22 + +#define L1_PAGETABLE_ENTRIES 1024 +#define L2_PAGETABLE_ENTRIES 1024 + +#define PADDR_BITS 32 +#define PADDR_MASK (~0UL) + +#define NOT_L1_FRAMES 1 +#define PRIpte "08lx" +typedef unsigned long pgentry_t; + +#else /* defined(CONFIG_X86_PAE) */ + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 4 + +#define PADDR_BITS 44 +#define PADDR_MASK ((1ULL << PADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) + +/* + * If starting from virtual address greater than 0xc0000000, + * this value will be 2 to account for final mid-level page + * directory which is always mapped in at this location. + */ +#define NOT_L1_FRAMES 3 +#define PRIpte "016llx" +typedef uint64_t pgentry_t; + +#endif /* !defined(CONFIG_X86_PAE) */ + +#elif defined(__x86_64__) + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 +#define L4_PAGETABLE_SHIFT 39 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 512 +#define L4_PAGETABLE_ENTRIES 512 + +/* These are page-table limitations. Current CPUs support only 40-bit phys. */ +#define PADDR_BITS 52 +#define VADDR_BITS 48 +#define PADDR_MASK ((1UL << PADDR_BITS)-1) +#define VADDR_MASK ((1UL << VADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) +#define L3_MASK ((1UL << L4_PAGETABLE_SHIFT) - 1) + +#define NOT_L1_FRAMES 3 +#define PRIpte "016lx" +typedef unsigned long pgentry_t; + +#endif + +#define L1_MASK ((1UL << L2_PAGETABLE_SHIFT) - 1) + +/* Given a virtual address, get an entry offset into a page table. */ +#define l1_table_offset(_a) \ + (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) +#define l2_table_offset(_a) \ + (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) +#if defined(__x86_64__) || defined(CONFIG_X86_PAE) +#define l3_table_offset(_a) \ + (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) +#endif +#if defined(__x86_64__) +#define l4_table_offset(_a) \ + (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) +#endif + +#define _PAGE_PRESENT 0x001UL +#define _PAGE_RW 0x002UL +#define _PAGE_USER 0x004UL +#define _PAGE_PWT 0x008UL +#define _PAGE_PCD 0x010UL +#define _PAGE_ACCESSED 0x020UL +#define _PAGE_DIRTY 0x040UL +#define _PAGE_PAT 0x080UL +#define _PAGE_PSE 0x080UL +#define _PAGE_GLOBAL 0x100UL + +#if defined(__i386__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER) +#if defined(CONFIG_X86_PAE) +#define L3_PROT (_PAGE_PRESENT) +#endif /* CONFIG_X86_PAE */ +#elif defined(__x86_64__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#endif /* __i386__ || __x86_64__ */ + +#ifndef CONFIG_X86_PAE +#define PAGE_SIZE (1UL << L1_PAGETABLE_SHIFT) +#else +#define PAGE_SIZE (1ULL << L1_PAGETABLE_SHIFT) +#endif +#define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT) +#define PFN_DOWN(x) ((x) >> L1_PAGETABLE_SHIFT) +#define PFN_PHYS(x) ((x) << L1_PAGETABLE_SHIFT) +#define PHYS_PFN(x) ((x) >> L1_PAGETABLE_SHIFT) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +/* Definitions for machine and pseudophysical addresses. */ +#ifdef CONFIG_X86_PAE +typedef unsigned long long paddr_t; +typedef unsigned long long maddr_t; +#else +typedef unsigned long paddr_t; +typedef unsigned long maddr_t; +#endif + +extern unsigned long *phys_to_machine_mapping; +extern char _text, _etext, _edata, _end; +#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) +static __inline__ maddr_t phys_to_machine(paddr_t phys) +{ + maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); + return machine; +} + +#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) +static __inline__ paddr_t machine_to_phys(maddr_t machine) +{ + paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); + return phys; +} + +#define VIRT_START ((unsigned long)&_text) + +#define to_phys(x) ((unsigned long)(x)-VIRT_START) +#define to_virt(x) ((void *)((unsigned long)(x)+VIRT_START)) + +#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define virt_to_mfn(_virt) (pfn_to_mfn(virt_to_pfn(_virt))) +#define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) +#define virt_to_mach(_virt) (phys_to_machine(to_phys(_virt))) +#define mfn_to_virt(_mfn) (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT)) +#define pfn_to_virt(_pfn) (to_virt((_pfn) << PAGE_SHIFT)) + +/* Pagetable walking. */ +#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> L1_PAGETABLE_SHIFT) +#define pte_to_virt(_pte) to_virt(mfn_to_pfn(pte_to_mfn(_pte)) << PAGE_SHIFT) + + +#endif /* _ARCH_MM_H_ */ diff --git a/chapter7/kernel.c b/chapter7/kernel.c new file mode 100644 index 0000000..252500f --- /dev/null +++ b/chapter7/kernel.c @@ -0,0 +1,33 @@ +#include "event.h" +#include "console.h" + +/* Some static space for the stack */ +char stack[8192]; + +extern shared_info_t shared_info; +extern void handle_input(evtchn_port_t port, struct pt_regs * regs); + +shared_info_t *HYPERVISOR_shared_info; + +/* Main kernel entry point, called by trampoline */ +void start_kernel(start_info_t * start_info) +{ + /* Map the shared info page */ + HYPERVISOR_update_va_mapping((unsigned long) &shared_info, + __pte(start_info->shared_info | 7), + UVMF_INVLPG); + /* Set the pointer used in the bootstrap for reenabling + * event delivery after an upcall */ + HYPERVISOR_shared_info = &shared_info; + /* Set up and unmask events */ + init_events(); + /* Initialise the console */ + console_init(start_info); + /* Write a message to check that it worked */ + console_write("Hello world!\r\n"); + /* Loop, handling events */ + while(1) + { + HYPERVISOR_sched_op(SCHEDOP_block,0); + } +} diff --git a/chapter7/loader.lds b/chapter7/loader.lds new file mode 100644 index 0000000..63bf480 --- /dev/null +++ b/chapter7/loader.lds @@ -0,0 +1,27 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0x0; /* Start of the output file */ + + _text = .; /* Text and read-only data */ + + .text : { + *(.text) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { /* Read only data section */ + *(.rodata) + *(.rodata.*) + } + + .data : { /* Data */ + *(.data) + } + + _edata = .; /* End of data section */ + +} diff --git a/chapter7/traps.c b/chapter7/traps.c new file mode 100644 index 0000000..a4050c0 --- /dev/null +++ b/chapter7/traps.c @@ -0,0 +1,79 @@ +#include +#include +#include + +/* + * These are assembler stubs in entry.S. + * They are the actual entry points for virtual exceptions. + */ +void divide_error(void); +void debug(void); +void int3(void); +void overflow(void); +void bounds(void); +void invalid_op(void); +void device_not_available(void); +void coprocessor_segment_overrun(void); +void invalid_TSS(void); +void segment_not_present(void); +void stack_segment(void); +void general_protection(void); +void page_fault(void); +void coprocessor_error(void); +void simd_coprocessor_error(void); +void alignment_check(void); +void spurious_interrupt_bug(void); +void machine_check(void); + +/* Dummy implementation. Should actually do something */ +void do_divide_error(void) {} +void do_debug(void) {} +void do_int3(void) {} +void do_overflow(void) {} +void do_bounds(void) {} +void do_invalid_op(void) {} +void do_device_not_available(void) {} +void do_coprocessor_segment_overrun(void) {} +void do_invalid_TSS(void) {} +void do_segment_not_present(void) {} +void do_stack_segment(void) {} +void do_general_protection(void) {} +void do_page_fault(void) {} +void do_coprocessor_error(void) {} +void do_simd_coprocessor_error(void) {} +void do_alignment_check(void) {} +void do_spurious_interrupt_bug(void) {} +void do_machine_check(void) {} + +/* + * Submit a virtual IDT to teh hypervisor. This consists of tuples + * (interrupt vector, privilege ring, CS:EIP of handler). + * The 'privilege ring' field specifies the least-privileged ring that + * can trap to that vector using a software-interrupt instruction (INT). + */ +static trap_info_t trap_table[] = { + { 0, 0, FLAT_KERNEL_CS, (unsigned long)divide_error }, + { 1, 0, FLAT_KERNEL_CS, (unsigned long)debug }, + { 3, 3, FLAT_KERNEL_CS, (unsigned long)int3 }, + { 4, 3, FLAT_KERNEL_CS, (unsigned long)overflow }, + { 5, 3, FLAT_KERNEL_CS, (unsigned long)bounds }, + { 6, 0, FLAT_KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0, FLAT_KERNEL_CS, (unsigned long)device_not_available }, + { 9, 0, FLAT_KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, + { 10, 0, FLAT_KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0, FLAT_KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0, FLAT_KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0, FLAT_KERNEL_CS, (unsigned long)general_protection }, + { 14, 0, FLAT_KERNEL_CS, (unsigned long)page_fault }, + { 15, 0, FLAT_KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0, FLAT_KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0, FLAT_KERNEL_CS, (unsigned long)alignment_check }, + { 19, 0, FLAT_KERNEL_CS, (unsigned long)simd_coprocessor_error }, + { 0, 0, 0, 0 } +}; + +void trap_init(void) +{ + HYPERVISOR_set_trap_table(trap_table); +} + diff --git a/chapter8/Makefile b/chapter8/Makefile new file mode 100644 index 0000000..04546f7 --- /dev/null +++ b/chapter8/Makefile @@ -0,0 +1,17 @@ +#CPPFLAGS += -I../xen/xen/include/ -I../xen/xen/include/asm +CPPFLAGS += -Iinclude -Iinclude/x86 -DCONFIG_X86_PAE -D__XEN_INTERFACE_VERSION__=0x00030203 +LDFLAGS += -nostdlib -T loader.lds -g +CFLAGS += -std=c99 -g +ASFLAGS = -D__ASSEMBLY__ + +.PHONY: all + + +all: testkernel + +testkernel: bootstrap.x86_32.o kernel.o console.o traps.o xenstore.o + $(CC) $(LDFLAGS) $^ -o testkernel + +clean: + rm -f *.o + rm -f testkernel diff --git a/chapter8/bootstrap.x86_32.S b/chapter8/bootstrap.x86_32.S new file mode 100644 index 0000000..de57716 --- /dev/null +++ b/chapter8/bootstrap.x86_32.S @@ -0,0 +1,277 @@ +#include + +.section __xen_guest + .ascii "GUEST_OS=Mini-OS" + .ascii ",XEN_VER=xen-3.0" + .ascii ",VIRT_BASE=0x0" /* &_text from minios_x86_32.lds */ + .ascii ",ELF_PADDR_OFFSET=0x0" + .ascii ",HYPERCALL_PAGE=0x2" +#ifdef CONFIG_X86_PAE + .ascii ",PAE=yes" +#else + .ascii ",PAE=no" +#endif + .ascii ",LOADER=generic" + .byte 0 +.text + +.globl _start, shared_info, hypercall_page + +_start: + cld + lss stack_start,%esp + push %esi + call start_kernel + +stack_start: + .long stack+8192, FLAT_KERNEL_SS + + /* Unpleasant -- the PTE that maps this page is actually overwritten */ + /* to map the real shared-info page! :-) */ + .org 0x1000 +shared_info: + .org 0x2000 + +hypercall_page: + .org 0x3000 + +ES = 0x20 +ORIG_EAX = 0x24 +EIP = 0x28 +CS = 0x2C + +#define ENTRY(X) .globl X ; X : + +#define SAVE_ALL \ + cld; \ + pushl %es; \ + pushl %ds; \ + pushl %eax; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + movl $(FLAT_KERNEL_DS),%edx; \ + movl %edx,%ds; \ + movl %edx,%es; + +#define RESTORE_ALL \ + popl %ebx; \ + popl %ecx; \ + popl %edx; \ + popl %esi; \ + popl %edi; \ + popl %ebp; \ + popl %eax; \ + popl %ds; \ + popl %es; \ + addl $4,%esp; \ + iret; \ + +ENTRY(divide_error) + pushl $0 # no error code + pushl $do_divide_error +do_exception: + pushl %ds + pushl %eax + xorl %eax, %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + decl %eax # eax = -1 + pushl %ecx + pushl %ebx + cld + movl %es, %ecx + movl ES(%esp), %edi # get the function address + movl ORIG_EAX(%esp), %edx # get the error code + movl %eax, ORIG_EAX(%esp) + movl %ecx, ES(%esp) + movl $(FLAT_KERNEL_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + movl %esp,%eax # pt_regs pointer + pushl %edx + pushl %eax + call *%edi + jmp ret_from_exception + +ret_from_exception: + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne safesti + RESTORE_ALL + +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until weve done all processing. HOWEVER, we must enable events before +# popping the stack frame (cant be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so wed +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +ENTRY(hypervisor_callback) + pushl %eax + SAVE_ALL + movl EIP(%esp),%eax + cmpl $scrit,%eax + jb 11f + cmpl $ecrit,%eax + jb critical_region_fixup +11: push %esp + call do_hypervisor_callback + add $4,%esp + movl shared_info,%esi + xorl %eax,%eax + movb CS(%esp),%cl + test $2,%cl # slow return to ring 2 or 3 + jne safesti +safesti:movb $0,1(%esi) # reenable event callbacks +scrit: /**** START OF CRITICAL REGION ****/ + testb $0xFF,(%esi) + jnz 14f # process more events if necessary... + RESTORE_ALL +14: movb $1,1(%esi) + jmp 11b +ecrit: /**** END OF CRITICAL REGION ****/ +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + mov %esp,%esi + add %eax,%esi # %esi points at end of src region + mov %esp,%edi + add $0x34,%edi # %edi points at end of dst region + mov %eax,%ecx + shr $2,%ecx # convert words to bytes + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp 11b + +critical_fixup_table: + .byte 0x00,0x00,0x00 # testb $0xff,(%esi) + .byte 0x00,0x00 # jne 14f + .byte 0x00 # pop %ebx + .byte 0x04 # pop %ecx + .byte 0x08 # pop %edx + .byte 0x0c # pop %esi + .byte 0x10 # pop %edi + .byte 0x14 # pop %ebp + .byte 0x18 # pop %eax + .byte 0x1c # pop %ds + .byte 0x20 # pop %es + .byte 0x24,0x24,0x24 # add $4,%esp + .byte 0x28 # iret + .byte 0x00,0x00,0x00,0x00 # movb $1,1(%esi) + .byte 0x00,0x00 # jmp 11b + +# Hypervisor uses this for application faults while it executes. +ENTRY(failsafe_callback) + pop %ds + pop %es + pop %fs + pop %gs + iret + +ENTRY(coprocessor_error) + pushl $0 + pushl $do_coprocessor_error + jmp do_exception + +ENTRY(simd_coprocessor_error) + pushl $0 + pushl $do_simd_coprocessor_error + jmp do_exception + +ENTRY(device_not_available) + iret + +ENTRY(debug) + pushl $0 + pushl $do_debug + jmp do_exception + +ENTRY(int3) + pushl $0 + pushl $do_int3 + jmp do_exception + +ENTRY(overflow) + pushl $0 + pushl $do_overflow + jmp do_exception + +ENTRY(bounds) + pushl $0 + pushl $do_bounds + jmp do_exception + +ENTRY(invalid_op) + pushl $0 + pushl $do_invalid_op + jmp do_exception + + +ENTRY(coprocessor_segment_overrun) + pushl $0 + pushl $do_coprocessor_segment_overrun + jmp do_exception + + +ENTRY(invalid_TSS) + pushl $do_invalid_TSS + jmp do_exception + + +ENTRY(segment_not_present) + pushl $do_segment_not_present + jmp do_exception + + +ENTRY(stack_segment) + pushl $do_stack_segment + jmp do_exception + + +ENTRY(general_protection) + pushl $do_general_protection + jmp do_exception + + +ENTRY(alignment_check) + pushl $do_alignment_check + jmp do_exception + + +ENTRY(page_fault) + pushl $do_page_fault + jmp do_exception + +ENTRY(machine_check) + pushl $0 + pushl $do_machine_check + jmp do_exception + + +ENTRY(spurious_interrupt_bug) + pushl $0 + pushl $do_spurious_interrupt_bug + jmp do_exception + diff --git a/chapter8/console.c b/chapter8/console.c new file mode 100644 index 0000000..1a4cead --- /dev/null +++ b/chapter8/console.c @@ -0,0 +1,123 @@ +#include "console.h" +//#include +#include +#include + +static evtchn_port_t console_evt; +extern char _text; +struct xencons_interface * console; + +/* Initialise the console */ +int console_init(start_info_t * start) +{ + console = (struct xencons_interface*) + ((machine_to_phys_mapping[start->console.domU.mfn] << 12) + + + ((unsigned long)&_text)); + console_evt = start->console.domU.evtchn; + /* TODO: Set up the event channel */ + return 0; +} + +/* Write a NULL-terminated string */ +int console_write(char * message) +{ + struct evtchn_send event; + event.port = console_evt; + int length = 0; + while(*message != '\0') + { + /* Wait for the back end to clear enough space in the buffer */ + XENCONS_RING_IDX data; + do + { + data = console->out_prod - console->out_cons; + HYPERVISOR_event_channel_op(EVTCHNOP_send, &event); + mb(); + } while (data >= sizeof(console->out)); + /* Copy the byte */ + int ring_index = MASK_XENCONS_IDX(console->out_prod, console->out); + console->out[ring_index] = *message; + /* Ensure that the data really is in the ring before continuing */ + wmb(); + /* Increment input and output pointers */ + console->out_prod++; + length++; + message++; + } + HYPERVISOR_event_channel_op(EVTCHNOP_send, &event); + return length; +} + +/* Read up to length characters from the console into buffer */ +int console_read(char * buffer, int n) +{ + int length = 0; + while(n < length && console->in_cons < console->in_prod) + { + /* Copy the character */ + int ring_index = MASK_XENCONS_IDX(console->in_cons, console->in); + *buffer = console->in[ring_index]; + buffer++; + length++; + console->in_cons++; + mb(); + } + /* NULL-terminate the string */ + *buffer = '\0'; + return length; +} + +/* Block while data is in the out buffer */ +void console_flush() +{ + /* While there is data in the out channel */ + while(console->out_cons < console->out_prod) + { + /* Let other processes run */ + HYPERVISOR_sched_op(SCHEDOP_yield, 0); + mb(); + } +} + +char atoc(int n) +{ + switch(n) + { + case 0: return '0'; + case 1: return '1'; + case 2: return '2'; + case 3: return '3'; + case 4: return '4'; + case 5: return '5'; + case 6: return '6'; + case 7: return '7'; + case 8: return '8'; + case 9: return '9'; + default: return 'x'; + } +} + +void console_write_int(int n) +{ + char buffer[11]; + int digits = 0; + int digit = 1000000000; + for(int i=0 ; i<10 ; i++) + { + if(n >= digit) + { + char digit_value = n / digit; + buffer[digits] = atoc(digit_value); + n -= digit_value * digit; + digits++; + } + digit /= 10; + } + if(digits == 0) + { + buffer[digits++] = '0'; + } + buffer[digits] = '\0'; + console_write(buffer); +} diff --git a/chapter8/console.h b/chapter8/console.h new file mode 100644 index 0000000..37b5a64 --- /dev/null +++ b/chapter8/console.h @@ -0,0 +1,8 @@ +#include +#include +#include +#include + +int console_init(start_info_t * start); +int console_write(char * message); +void console_flush(void); diff --git a/chapter8/domain_config b/chapter8/domain_config new file mode 100644 index 0000000..226388b --- /dev/null +++ b/chapter8/domain_config @@ -0,0 +1,18 @@ +# -*- mode: python; -*- +#==================================================== +#Python configuration setup for 'xm create'. This +#script sets the parameters used when a domain is +#created using 'xm create'. You use a separate script +#for each domain you want to create, or you can set the +#parameters for the domain on the xm command line. +#==================================================== +#Kernel image file. +kernel = "testkernel" +# Initial memory allocation (in megabytes) for the new +# domain. +memory = 32 +# A name for your domain. All domains must have +# different names. +name = "Simplest_Kernel" + +on_crash = 'destroy' diff --git a/chapter8/include/barrier.h b/chapter8/include/barrier.h new file mode 100644 index 0000000..fa6b756 --- /dev/null +++ b/chapter8/include/barrier.h @@ -0,0 +1,24 @@ +/* + * Definitions of memory barriers used by Xen. + */ + +#if defined(__i386__) +#define mb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" ) +#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" ) +#define wmb() __asm__ __volatile__ ( "" : : : "memory") +#elif defined(__x86_64__) +#define mb() __asm__ __volatile__ ( "mfence" : : : "memory") +#define rmb() __asm__ __volatile__ ( "lfence" : : : "memory") +#define wmb() __asm__ __volatile__ ( "" : : : "memory") +#elif defined(__ia64__) +#define mb() __asm__ __volatile__ ("mf" ::: "memory") +#define rmb() __asm__ __volatile__ ("mf" ::: "memory") +#define wmb() __asm__ __volatile__ ("mf" ::: "memory") +#elif defined(__powerpc__) +/* XXX loosen these up later */ +#define mb() __asm__ __volatile__ ("sync" : : : "memory") +#define rmb() __asm__ __volatile__ ("sync" : : : "memory") /* lwsync? */ +#define wmb() __asm__ __volatile__ ("sync" : : : "memory") /* eieio? */ +#else +#error "Define barriers" +#endif diff --git a/chapter8/include/hypercall-x86_32.h b/chapter8/include/hypercall-x86_32.h new file mode 100644 index 0000000..417b648 --- /dev/null +++ b/chapter8/include/hypercall-x86_32.h @@ -0,0 +1,331 @@ +/****************************************************************************** + * hypercall-x86_32.h + * + * Copied from XenLinux. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_X86_32_H__ +#define __HYPERCALL_X86_32_H__ + +#include +#include +#include +//#include + +#define __STR(x) #x +#define STR(x) __STR(x) + +typedef uint64_t u64; +#if !defined(CONFIG_X86_PAE) +typedef struct { unsigned long pte_low; } pte_t; +#else +typedef struct { unsigned long pte_low, pte_high; } pte_t; +#endif /* CONFIG_X86_PAE */ + +#if !defined(CONFIG_X86_PAE) +#define __pte(x) ((pte_t) { (x) } ) +#else +#define __pte(x) ({ unsigned long long _x = (x); \ + ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); }) +#endif +extern char hypercall_page[4096]; + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + __asm volatile ( \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)), \ + "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op( + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline long +HYPERVISOR_set_timer_op( + u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor( + u64 ma, u64 desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, pte_t new_val, unsigned long flags) +{ + unsigned long pte_hi = 0; +#ifdef CONFIG_X86_PAE + pte_hi = new_val.pte_high; +#endif + return _hypercall4(int, update_va_mapping, va, + new_val.pte_low, pte_hi, flags); +} + +static inline int +HYPERVISOR_event_channel_op( + int cmd, void *op) +{ + return _hypercall2(int, event_channel_op, cmd, op); +} + +static inline int +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op( + void *physdev_op) +{ + return _hypercall1(int, physdev_op, physdev_op); +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + unsigned long pte_hi = 0; +#ifdef CONFIG_X86_PAE + pte_hi = new_val.pte_high; +#endif + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte_low, pte_hi, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op( + unsigned long op, + unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +#endif /* __HYPERCALL_X86_32_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff --git a/chapter8/include/util.h b/chapter8/include/util.h new file mode 100644 index 0000000..a76707d --- /dev/null +++ b/chapter8/include/util.h @@ -0,0 +1,27 @@ + +#define WORD_BOUNDARY (sizeof(long) - 1) + +inline void zero(char * addr, unsigned int length) +{ + /* Fill to word boundary */ + while(length > 0 && (((long)addr) & WORD_BOUNDARY)) + { + *addr = 0; + length--; + } + long * addrl = (long*)addr; + /* Zero a word at a time */ + while(length > sizeof(long)) + { + *addrl = 0; + addrl++; + length -= 4; + } + addr = (char*)addrl; + /* Zero anything else */ + while(length > 0) + { + *addr = 0; + length--; + } +} diff --git a/chapter8/include/x86_mm.h b/chapter8/include/x86_mm.h new file mode 100644 index 0000000..c4c03d3 --- /dev/null +++ b/chapter8/include/x86_mm.h @@ -0,0 +1,215 @@ +/* + * Page table manipulation macros and constants. + * + * From MiniOS + */ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + * + * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge + * Copyright (c) 2005, Keir A Fraser + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _ARCH_MM_H_ +#define _ARCH_MM_H_ + +#if defined(__i386__) +#include +#elif defined(__x86_64__) +#include +#else +#error "Unsupported architecture" +#endif + +#define L1_FRAME 1 +#define L2_FRAME 2 +#define L3_FRAME 3 + +#define L1_PAGETABLE_SHIFT 12 + +#if defined(__i386__) + +#if !defined(CONFIG_X86_PAE) + +#define L2_PAGETABLE_SHIFT 22 + +#define L1_PAGETABLE_ENTRIES 1024 +#define L2_PAGETABLE_ENTRIES 1024 + +#define PADDR_BITS 32 +#define PADDR_MASK (~0UL) + +#define NOT_L1_FRAMES 1 +#define PRIpte "08lx" +typedef unsigned long pgentry_t; + +#else /* defined(CONFIG_X86_PAE) */ + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 4 + +#define PADDR_BITS 44 +#define PADDR_MASK ((1ULL << PADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) + +/* + * If starting from virtual address greater than 0xc0000000, + * this value will be 2 to account for final mid-level page + * directory which is always mapped in at this location. + */ +#define NOT_L1_FRAMES 3 +#define PRIpte "016llx" +typedef uint64_t pgentry_t; + +#endif /* !defined(CONFIG_X86_PAE) */ + +#elif defined(__x86_64__) + +#define L2_PAGETABLE_SHIFT 21 +#define L3_PAGETABLE_SHIFT 30 +#define L4_PAGETABLE_SHIFT 39 + +#define L1_PAGETABLE_ENTRIES 512 +#define L2_PAGETABLE_ENTRIES 512 +#define L3_PAGETABLE_ENTRIES 512 +#define L4_PAGETABLE_ENTRIES 512 + +/* These are page-table limitations. Current CPUs support only 40-bit phys. */ +#define PADDR_BITS 52 +#define VADDR_BITS 48 +#define PADDR_MASK ((1UL << PADDR_BITS)-1) +#define VADDR_MASK ((1UL << VADDR_BITS)-1) + +#define L2_MASK ((1UL << L3_PAGETABLE_SHIFT) - 1) +#define L3_MASK ((1UL << L4_PAGETABLE_SHIFT) - 1) + +#define NOT_L1_FRAMES 3 +#define PRIpte "016lx" +typedef unsigned long pgentry_t; + +#endif + +#define L1_MASK ((1UL << L2_PAGETABLE_SHIFT) - 1) + +/* Given a virtual address, get an entry offset into a page table. */ +#define l1_table_offset(_a) \ + (((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1)) +#define l2_table_offset(_a) \ + (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)) +#if defined(__x86_64__) || defined(CONFIG_X86_PAE) +#define l3_table_offset(_a) \ + (((_a) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)) +#endif +#if defined(__x86_64__) +#define l4_table_offset(_a) \ + (((_a) >> L4_PAGETABLE_SHIFT) & (L4_PAGETABLE_ENTRIES - 1)) +#endif + +#define _PAGE_PRESENT 0x001UL +#define _PAGE_RW 0x002UL +#define _PAGE_USER 0x004UL +#define _PAGE_PWT 0x008UL +#define _PAGE_PCD 0x010UL +#define _PAGE_ACCESSED 0x020UL +#define _PAGE_DIRTY 0x040UL +#define _PAGE_PAT 0x080UL +#define _PAGE_PSE 0x080UL +#define _PAGE_GLOBAL 0x100UL + +#if defined(__i386__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER) +#if defined(CONFIG_X86_PAE) +#define L3_PROT (_PAGE_PRESENT) +#endif /* CONFIG_X86_PAE */ +#elif defined(__x86_64__) +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#endif /* __i386__ || __x86_64__ */ + +#ifndef CONFIG_X86_PAE +#define PAGE_SIZE (1UL << L1_PAGETABLE_SHIFT) +#else +#define PAGE_SIZE (1ULL << L1_PAGETABLE_SHIFT) +#endif +#define PAGE_SHIFT L1_PAGETABLE_SHIFT +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> L1_PAGETABLE_SHIFT) +#define PFN_DOWN(x) ((x) >> L1_PAGETABLE_SHIFT) +#define PFN_PHYS(x) ((x) << L1_PAGETABLE_SHIFT) +#define PHYS_PFN(x) ((x) >> L1_PAGETABLE_SHIFT) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +/* Definitions for machine and pseudophysical addresses. */ +#ifdef CONFIG_X86_PAE +typedef unsigned long long paddr_t; +typedef unsigned long long maddr_t; +#else +typedef unsigned long paddr_t; +typedef unsigned long maddr_t; +#endif + +extern unsigned long *phys_to_machine_mapping; +extern char _text, _etext, _edata, _end; +#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) +static __inline__ maddr_t phys_to_machine(paddr_t phys) +{ + maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); + return machine; +} + +#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) +static __inline__ paddr_t machine_to_phys(maddr_t machine) +{ + paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); + return phys; +} + +#define VIRT_START ((unsigned long)&_text) + +#define to_phys(x) ((unsigned long)(x)-VIRT_START) +#define to_virt(x) ((void *)((unsigned long)(x)+VIRT_START)) + +#define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define virt_to_mfn(_virt) (pfn_to_mfn(virt_to_pfn(_virt))) +#define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) +#define virt_to_mach(_virt) (phys_to_machine(to_phys(_virt))) +#define mfn_to_virt(_mfn) (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT)) +#define pfn_to_virt(_pfn) (to_virt((_pfn) << PAGE_SHIFT)) + +/* Pagetable walking. */ +#define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> L1_PAGETABLE_SHIFT) +#define pte_to_virt(_pte) to_virt(mfn_to_pfn(pte_to_mfn(_pte)) << PAGE_SHIFT) + + +#endif /* _ARCH_MM_H_ */ diff --git a/chapter8/kernel.c b/chapter8/kernel.c new file mode 100644 index 0000000..4e5acd1 --- /dev/null +++ b/chapter8/kernel.c @@ -0,0 +1,39 @@ +#include "console.h" +#include "xenstore.h" +#include + +/* Some static space for the stack */ +char stack[8192]; + +/* Locations in the bootstrapping code */ +extern shared_info_t * shared_info; +void hypervisor_callback(void); +void failsafe_callback(void); + +/* Main kernel entry point, called by trampoline */ +void start_kernel(start_info_t * start_info) +{ + /* Define hypervisor upcall entry points */ + HYPERVISOR_set_callbacks( + FLAT_KERNEL_CS, (unsigned long)hypervisor_callback, + FLAT_KERNEL_CS, (unsigned long)failsafe_callback); + /* Map the shared info page */ + HYPERVISOR_update_va_mapping((unsigned long) shared_info, + __pte(start_info->shared_info), + UVMF_INVLPG); + /* Initialise the console */ + console_init(start_info); + /* Write a message to check that it worked */ + console_write("Hello world!\n\r"); + console_write("Xen magic string: "); + console_write(start_info->magic); + console_write("\n\r"); + + /* Set up the XenStore driver */ + xenstore_init(start_info); + /* Test the store */ + xenstore_test(); + /* Flush the console buffer */ + console_flush(); + /* Exit, since we don't know how to do anything else */ +} diff --git a/chapter8/loader.lds b/chapter8/loader.lds new file mode 100644 index 0000000..63bf480 --- /dev/null +++ b/chapter8/loader.lds @@ -0,0 +1,27 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) +SECTIONS +{ + . = 0x0; /* Start of the output file */ + + _text = .; /* Text and read-only data */ + + .text : { + *(.text) + } = 0x9090 + + _etext = .; /* End of text section */ + + .rodata : { /* Read only data section */ + *(.rodata) + *(.rodata.*) + } + + .data : { /* Data */ + *(.data) + } + + _edata = .; /* End of data section */ + +} diff --git a/chapter8/traps.c b/chapter8/traps.c new file mode 100644 index 0000000..b943157 --- /dev/null +++ b/chapter8/traps.c @@ -0,0 +1,83 @@ +#include +#include +#include + +/* + * These are assembler stubs in entry.S. + * They are the actual entry points for virtual exceptions. + */ +void divide_error(void); +void debug(void); +void int3(void); +void overflow(void); +void bounds(void); +void invalid_op(void); +void device_not_available(void); +void coprocessor_segment_overrun(void); +void invalid_TSS(void); +void segment_not_present(void); +void stack_segment(void); +void general_protection(void); +void page_fault(void); +void coprocessor_error(void); +void simd_coprocessor_error(void); +void alignment_check(void); +void spurious_interrupt_bug(void); +void machine_check(void); + +/* Dummy implementation. Should actually do something */ +void do_divide_error(void) {} +void do_debug(void) {} +void do_int3(void) {} +void do_overflow(void) {} +void do_bounds(void) {} +void do_invalid_op(void) {} +void do_device_not_available(void) {} +void do_coprocessor_segment_overrun(void) {} +void do_invalid_TSS(void) {} +void do_segment_not_present(void) {} +void do_stack_segment(void) {} +void do_general_protection(void) {} +void do_page_fault(void) {} +void do_coprocessor_error(void) {} +void do_simd_coprocessor_error(void) {} +void do_alignment_check(void) {} +void do_spurious_interrupt_bug(void) {} +void do_machine_check(void) {} +void do_hypervisor_callback(void) +{ + +} + +/* + * Submit a virtual IDT to teh hypervisor. This consists of tuples + * (interrupt vector, privilege ring, CS:EIP of handler). + * The 'privilege ring' field specifies the least-privileged ring that + * can trap to that vector using a software-interrupt instruction (INT). + */ +static trap_info_t trap_table[] = { + { 0, 0, FLAT_KERNEL_CS, (unsigned long)divide_error }, + { 1, 0, FLAT_KERNEL_CS, (unsigned long)debug }, + { 3, 3, FLAT_KERNEL_CS, (unsigned long)int3 }, + { 4, 3, FLAT_KERNEL_CS, (unsigned long)overflow }, + { 5, 3, FLAT_KERNEL_CS, (unsigned long)bounds }, + { 6, 0, FLAT_KERNEL_CS, (unsigned long)invalid_op }, + { 7, 0, FLAT_KERNEL_CS, (unsigned long)device_not_available }, + { 9, 0, FLAT_KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, + { 10, 0, FLAT_KERNEL_CS, (unsigned long)invalid_TSS }, + { 11, 0, FLAT_KERNEL_CS, (unsigned long)segment_not_present }, + { 12, 0, FLAT_KERNEL_CS, (unsigned long)stack_segment }, + { 13, 0, FLAT_KERNEL_CS, (unsigned long)general_protection }, + { 14, 0, FLAT_KERNEL_CS, (unsigned long)page_fault }, + { 15, 0, FLAT_KERNEL_CS, (unsigned long)spurious_interrupt_bug }, + { 16, 0, FLAT_KERNEL_CS, (unsigned long)coprocessor_error }, + { 17, 0, FLAT_KERNEL_CS, (unsigned long)alignment_check }, + { 19, 0, FLAT_KERNEL_CS, (unsigned long)simd_coprocessor_error }, + { 0, 0, 0, 0 } +}; + +void trap_init(void) +{ + HYPERVISOR_set_trap_table(trap_table); +} + diff --git a/chapter8/xenstore.c b/chapter8/xenstore.c new file mode 100644 index 0000000..a1be101 --- /dev/null +++ b/chapter8/xenstore.c @@ -0,0 +1,221 @@ +#include "xenstore.h" +#include +#include +#include + +static evtchn_port_t xenstore_evt; +extern char _text; +struct xenstore_domain_interface * xenstore; + +unsigned int strlen(char* str) +{ + unsigned int i=0; + while(*str != '\0') + { + str++; + i++; + } + return i; +} + +/* Initialise the XenStore */ +int xenstore_init(start_info_t * start) +{ + xenstore = (struct xenstore_domain_interface*) + ((machine_to_phys_mapping[start->store_mfn] << 12) + + + ((unsigned long)&_text)); + xenstore_evt = start->store_evtchn; + /* TODO: Set up the event channel */ + + return 0; +} + +/* Write a request to the back end */ +int xenstore_write_request(char * message, int length) +{ + /* Check that the message will fit */ + if(length > XENSTORE_RING_SIZE) + { + return -1; + } + + int i; + for(i=xenstore->req_prod ; length > 0 ; i++,length--) + { + /* Wait for the back end to clear enough space in the buffer */ + XENSTORE_RING_IDX data; + do + { + data = i - xenstore->req_cons; + mb(); + } while (data >= sizeof(xenstore->req)); + /* Copy the byte */ + int ring_index = MASK_XENSTORE_IDX(i); + xenstore->req[ring_index] = *message; + message++; + } + /* Ensure that the data really is in the ring before continuing */ + wmb(); + xenstore->req_prod = i; + return 0; +} + +/* Read a response from the response ring */ +int xenstore_read_response(char * message, int length) +{ + int i; + for(i=xenstore->rsp_cons ; length > 0 ; i++,length--) + { + /* Wait for the back end put data in the buffer */ + XENSTORE_RING_IDX data; + do + { + + data = xenstore->rsp_prod - i; + mb(); + } while (data == 0); + /* Copy the byte */ + int ring_index = MASK_XENSTORE_IDX(i); + *message = xenstore->rsp[ring_index]; + message++; + } + xenstore->rsp_cons = i; + return 0; +} + +/* Current request ID */ +static int req_id = 0; + +#define NOTIFY() \ + do {\ + struct evtchn_send event;\ + event.port = xenstore_evt;\ + HYPERVISOR_event_channel_op(EVTCHNOP_send, &event);\ + } while(0) + +#define IGNORE(n) \ + do {\ + char buffer[XENSTORE_RING_SIZE];\ + xenstore_read_response(buffer, n);\ + } while(0) + +/* Write a key/value pair to the XenStore */ +int xenstore_write(char * key, char * value) +{ + int key_length = strlen(key); + int value_length = strlen(value); + struct xsd_sockmsg msg; + msg.type = XS_WRITE; + msg.req_id = req_id; + msg.tx_id = 0; + msg.len = 2 + key_length + value_length; + /* Write the message */ + xenstore_write_request((char*)&msg, sizeof(msg)); + xenstore_write_request(key, key_length + 1); + xenstore_write_request(value, value_length + 1); + /* Notify the back end */ + NOTIFY(); + xenstore_read_response((char*)&msg, sizeof(msg)); + IGNORE(msg.len); + if(msg.req_id != req_id++) + { + return -1; + } + return 0; +} + +/* Read a value from the store */ +int xenstore_read(char * key, char * value, int value_length) +{ + int key_length = strlen(key); + struct xsd_sockmsg msg; + msg.type = XS_READ; + msg.req_id = req_id; + msg.tx_id = 0; + msg.len = 1 + key_length; + /* Write the message */ + xenstore_write_request((char*)&msg, sizeof(msg)); + xenstore_write_request(key, key_length + 1); + /* Notify the back end */ + NOTIFY(); + xenstore_read_response((char*)&msg, sizeof(msg)); + if(msg.req_id != req_id++) + { + IGNORE(msg.len); + return -1; + } + /* If we have enough space in the buffer */ + if(value_length >= msg.len) + { + xenstore_read_response(value, msg.len); + return 0; + } + /* Truncate */ + xenstore_read_response(value, value_length); + IGNORE(msg.len - value_length); + return -2; +} + +int xenstore_ls(char * key, char * values, int value_length) +{ + int key_length = strlen(key); + struct xsd_sockmsg msg; + msg.type = XS_DIRECTORY; + msg.req_id = req_id; + msg.tx_id = 0; + msg.len = 1 + key_length; + /* Write the message */ + xenstore_write_request((char*)&msg, sizeof(msg)); + xenstore_write_request(key, key_length + 1); + /* Notify the back end */ + NOTIFY(); + xenstore_read_response((char*)&msg, sizeof(msg)); + if(msg.req_id != req_id++) + { + IGNORE(msg.len); + return -1; + } + /* If we have enough space in the buffer */ + if(value_length >= msg.len) + { + xenstore_read_response(values, msg.len); + return msg.len; + } + /* Truncate */ + xenstore_read_response(values, value_length); + IGNORE(msg.len - value_length); + return -2; +} + +/* Test the XenStore driver */ +void xenstore_test() +{ + char buffer[1024]; + buffer[1023] = '\0'; + console_write("\n\r"); + /* Get the name of the running VM */ + xenstore_read("name", buffer, 1023); + console_write("VM name: "); + console_write(buffer); + console_write("\n\r"); + /* Set the key "example" to "foo" */ + xenstore_write("example", "foo"); + xenstore_read("example", buffer, 1023); + console_write("example = "); + console_write(buffer); + console_write("\n\r"); + /* Get info about the console */ + int length = xenstore_ls("console",buffer,1023); + console_write("console contains:\r\n "); + char * out = buffer; + while(length > 0) + { + char value[16]; + value[15] = '\0'; + int len = console_write(out); + console_write("\n\r "); + length -= len + 1; + out += len + 1; + } +} diff --git a/chapter8/xenstore.h b/chapter8/xenstore.h new file mode 100644 index 0000000..8dd9511 --- /dev/null +++ b/chapter8/xenstore.h @@ -0,0 +1,9 @@ +#include +#include +#include +#include +#include + +int xenstore_init(start_info_t * start); +int xenstore_write(char * key, char * value); +int xenstore_read(char * key, char * value, int value_length);