From afb4d0cbb2204d226f01113ff22e83258d94cb05 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Thu, 5 Mar 2020 11:59:29 -0800 Subject: [PATCH 01/16] paging: Put tables in `static mut` We don't actually need an AtomicRefcell here. It's fine for paging::setup() to run multiple times, it's idempontent and we only do writes. We also place the page tables in #[no_mangle] static mut variables so they can be linked from assembly code. Signed-off-by: Joe Richey --- src/main.rs | 2 +- src/paging.rs | 73 ++++++++++++++++++++++++--------------------------- 2 files changed, 35 insertions(+), 40 deletions(-) diff --git a/src/main.rs b/src/main.rs index a9bcb9b1..0c6a3a99 100644 --- a/src/main.rs +++ b/src/main.rs @@ -156,7 +156,7 @@ fn boot_from_device(device: &mut block::VirtioBlockDevice) -> bool { pub extern "C" fn rust64_start() -> ! { log!("\nStarting.."); enable_sse(); - paging::MANAGER.borrow_mut().setup(); + paging::setup(); pci::print_bus(); diff --git a/src/paging.rs b/src/paging.rs index 3b3124fa..448ede42 100644 --- a/src/paging.rs +++ b/src/paging.rs @@ -1,56 +1,51 @@ -use atomic_refcell::AtomicRefCell; use x86_64::{ registers::control::Cr3, structures::paging::{PageSize, PageTable, PageTableFlags, PhysFrame, Size2MiB}, PhysAddr, }; -// This is the number of GiB we will identity map. +// Amount of memory we identity map in setup(), max 512 GiB. const ADDRESS_SPACE_GIB: usize = 4; -pub static MANAGER: AtomicRefCell = AtomicRefCell::new(Manager::new()); -pub struct Manager { - l4: PageTable, - l3: PageTable, - l2s: [PageTable; ADDRESS_SPACE_GIB], -} - -impl Manager { - const fn new() -> Self { - Manager { - l4: PageTable::new(), - l3: PageTable::new(), - l2s: [PageTable::new(); ADDRESS_SPACE_GIB], +// Put the Page Tables in static muts to make linking easier +#[no_mangle] +static mut L4_TABLE: PageTable = PageTable::new(); +#[no_mangle] +static mut L3_TABLE: PageTable = PageTable::new(); +#[no_mangle] +static mut L2_TABLES: [PageTable; ADDRESS_SPACE_GIB] = [PageTable::new(); ADDRESS_SPACE_GIB]; + +pub fn setup() { + // SAFETY: This function is idempontent and only writes to static memory and + // CR3. Thus, it is safe to run multiple times or on multiple threads. + let (l4, l3, l2s) = unsafe { (&mut L4_TABLE, &mut L3_TABLE, &mut L2_TABLES) }; + log!("Setting up {} GiB identity mapping", ADDRESS_SPACE_GIB); + let pt_flags = PageTableFlags::PRESENT | PageTableFlags::WRITABLE; + + // Setup Identity map using L2 huge pages + let mut next_addr = PhysAddr::new(0); + for l2 in l2s.iter_mut() { + for l2e in l2.iter_mut() { + l2e.set_addr(next_addr, pt_flags | PageTableFlags::HUGE_PAGE); + next_addr += Size2MiB::SIZE; } } - pub fn setup(&mut self) { - log!("Setting up {} GiB identity mapping", ADDRESS_SPACE_GIB); - - let pt_flags = PageTableFlags::PRESENT | PageTableFlags::WRITABLE; - // Setup Identity map using L2 huge pages - let mut next_addr = PhysAddr::new(0); - for l2 in self.l2s.iter_mut() { - for l2e in l2.iter_mut() { - l2e.set_addr(next_addr, pt_flags | PageTableFlags::HUGE_PAGE); - next_addr += Size2MiB::SIZE; - } - } - - // Point L3 at L2s - for (i, l2) in self.l2s.iter().enumerate() { - self.l3[i].set_addr(phys_addr(l2), pt_flags); - } + // Point L3 at L2s + for (i, l2) in l2s.iter().enumerate() { + l3[i].set_addr(phys_addr(l2), pt_flags); + } - // Point L4 at L3 - self.l4[0].set_addr(phys_addr(&self.l3), pt_flags); + // Point L4 at L3 + l4[0].set_addr(phys_addr(l3), pt_flags); - // Point Cr3 at PML4 - let cr3_flags = Cr3::read().1; - let pml4t_frame = PhysFrame::from_start_address(phys_addr(&self.l4)).unwrap(); - unsafe { Cr3::write(pml4t_frame, cr3_flags) }; - log!("Page tables setup"); + // Point Cr3 at L4 + let (cr3_frame, cr3_flags) = Cr3::read(); + let l4_frame = PhysFrame::from_start_address(phys_addr(l4)).unwrap(); + if cr3_frame != l4_frame { + unsafe { Cr3::write(l4_frame, cr3_flags) }; } + log!("Page tables setup"); } // Map a virtual address to a PhysAddr (assumes identity mapping) From 0fb8e30a9377e30e2e0fe109b2f84355f5b67c23 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 02:56:10 -0700 Subject: [PATCH 02/16] mem: Simplify MemoryRegion methods We really only need from_bytes and as_bytes. There's no need for a generic from_slice method. Signed-off-by: Joe Richey --- src/bzimage.rs | 2 +- src/mem.rs | 11 ++++++++--- src/pe.rs | 9 +++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/bzimage.rs b/src/bzimage.rs index 330f13e9..d9af9bea 100644 --- a/src/bzimage.rs +++ b/src/bzimage.rs @@ -143,7 +143,7 @@ pub fn load_kernel(f: &mut dyn Read) -> Result { f.read(&mut buf[0..512])?; f.read(&mut buf[512..])?; - let setup = crate::mem::MemoryRegion::from_slice(&buf[..]); + let setup = crate::mem::MemoryRegion::from_bytes(&mut buf[..]); if setup.read_u16(0x1fe) != 0xAA55 { return Err(Error::MagicMissing); diff --git a/src/mem.rs b/src/mem.rs index a52c2cf6..f8cfa1e8 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -22,18 +22,23 @@ pub struct MemoryRegion { } impl MemoryRegion { - pub fn new(base: u64, length: u64) -> MemoryRegion { + pub const fn new(base: u64, length: u64) -> MemoryRegion { MemoryRegion { base, length } } /// Take a slice and turn it into a region of memory - pub fn from_slice(data: &[T]) -> MemoryRegion { + pub fn from_bytes(data: &mut [u8]) -> MemoryRegion { MemoryRegion { base: data.as_ptr() as u64, - length: (data.len() * core::mem::size_of::()) as u64, + length: data.len() as u64, } } + // Expose the entire region as a byte slice + pub fn as_bytes(&mut self) -> &mut [u8] { + self.as_mut_slice(0, self.length) + } + /// Expose a section of the memory region as a slice pub fn as_mut_slice(&mut self, offset: u64, length: u64) -> &mut [T] { assert!((offset + (length * core::mem::size_of::() as u64)) <= self.length); diff --git a/src/pe.rs b/src/pe.rs index 3dc1d526..2748fe78 100644 --- a/src/pe.rs +++ b/src/pe.rs @@ -60,7 +60,7 @@ impl<'a> Loader<'a> { Err(_) => return Err(Error::FileError), } - let dos_region = MemoryRegion::from_slice(&data); + let dos_region = MemoryRegion::from_bytes(&mut data); // 'MZ' magic if dos_region.read_u16(0) != 0x5a4d { @@ -74,7 +74,7 @@ impl<'a> Loader<'a> { return Err(Error::InvalidExecutable); } - let pe_region = MemoryRegion::from_slice(&data[pe_header_offset as usize..]); + let pe_region = MemoryRegion::from_bytes(&mut data[pe_header_offset as usize..]); // The Microsoft specification uses offsets relative to the COFF area // which is 4 after the signature (so all offsets are +4 relative to the spec) @@ -91,7 +91,8 @@ impl<'a> Loader<'a> { self.num_sections = pe_region.read_u16(6); let optional_header_size = pe_region.read_u16(20); - let optional_region = MemoryRegion::from_slice(&data[(24 + pe_header_offset) as usize..]); + let optional_region = + MemoryRegion::from_bytes(&mut data[(24 + pe_header_offset) as usize..]); // Only support x86-64 EFI if optional_region.read_u16(0) != 0x20b { @@ -177,7 +178,7 @@ impl<'a> Loader<'a> { let l: &mut [u8] = loaded_region .as_mut_slice(u64::from(section.virt_address), u64::from(section_size)); - let reloc_region = MemoryRegion::from_slice(l); + let reloc_region = MemoryRegion::from_bytes(l); let mut section_bytes_remaining = section_size; let mut offset = 0; From fce1a43d06359855c71d0a811db067ed16a852e5 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 03:02:07 -0700 Subject: [PATCH 03/16] errors: Improve error types and implement Debug As our error types are enums, we can include the "source" error in our later error types. This allows for more information when debugging failures. To this end, we also implement Debug. Signed-off-by: Joe Richey --- src/bzimage.rs | 13 +++++++------ src/loader.rs | 13 +++++++------ src/virtio.rs | 1 + 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/bzimage.rs b/src/bzimage.rs index d9af9bea..1a7c78df 100644 --- a/src/bzimage.rs +++ b/src/bzimage.rs @@ -14,16 +14,17 @@ use crate::fat::{self, Read}; +#[derive(Debug)] pub enum Error { - FileError, + FileError(fat::Error), KernelOld, MagicMissing, NotRelocatable, } impl From for Error { - fn from(_: fat::Error) -> Error { - Error::FileError + fn from(e: fat::Error) -> Error { + Error::FileError(e) } } @@ -85,7 +86,7 @@ pub fn load_initrd(f: &mut dyn Read) -> Result<(), Error> { let mut data: [u8; 512] = [0; 512]; match f.read(&mut data) { Err(crate::fat::Error::EndOfFile) => break, - Err(_) => return Err(Error::FileError), + Err(e) => return Err(Error::FileError(e)), Ok(_) => {} } let dst = initrd_region.as_mut_slice(u64::from(offset), u64::from(bytes_remaining)); @@ -96,7 +97,7 @@ pub fn load_initrd(f: &mut dyn Read) -> Result<(), Error> { let dst = initrd_region.as_mut_slice(u64::from(offset), 512); match f.read(dst) { Err(crate::fat::Error::EndOfFile) => break, - Err(_) => return Err(Error::FileError), + Err(e) => return Err(Error::FileError(e)), Ok(_) => {} } @@ -202,7 +203,7 @@ pub fn load_kernel(f: &mut dyn Read) -> Result { // 0x200 is the startup_64 offset return Ok(u64::from(KERNEL_LOCATION) + 0x200); } - Err(_) => return Err(Error::FileError), + Err(e) => return Err(Error::FileError(e)), Ok(_) => {} }; diff --git a/src/loader.rs b/src/loader.rs index 92b45bcb..229851ac 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -23,20 +23,21 @@ pub struct LoaderConfig { pub cmdline: [u8; 4096], } +#[derive(Debug)] pub enum Error { - FileError, - BzImageError, + FileError(fat::Error), + BzImageError(bzimage::Error), } impl From for Error { - fn from(_: fat::Error) -> Error { - Error::FileError + fn from(e: fat::Error) -> Error { + Error::FileError(e) } } impl From for Error { - fn from(_: bzimage::Error) -> Error { - Error::BzImageError + fn from(e: bzimage::Error) -> Error { + Error::BzImageError(e) } } diff --git a/src/virtio.rs b/src/virtio.rs index 778e2305..df2a4faf 100644 --- a/src/virtio.rs +++ b/src/virtio.rs @@ -13,6 +13,7 @@ // limitations under the License. /// Virtio related errors +#[derive(Debug)] pub enum Error { VirtioUnsupportedDevice, VirtioLegacyOnly, From e40fb02df4acb4c6befc7ab1e5546e2ac3a0e83b Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 16:45:21 -0700 Subject: [PATCH 04/16] common: Move string manipulation functions to common.rs We can have `ascii_strip` use entirely safe code by `unwrap()`ing the result of `from_utf8` instead of using `from_utf8_unchecked`. We also add a helper function to convert a C-string pointer into a byte slice. Signed-off-by: Joe Richey --- src/common.rs | 18 ++++++++++++++++++ src/loader.rs | 5 +---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/common.rs b/src/common.rs index ea6ce683..4a7c4300 100644 --- a/src/common.rs +++ b/src/common.rs @@ -33,6 +33,24 @@ macro_rules! container_of_mut { }}; } +// SAFETY: Requires that addr point to a static, null-terminated C-string. +// The returned slice does not include the null-terminator. +pub unsafe fn from_cstring(addr: u64) -> &'static [u8] { + if addr == 0 { + return &[]; + } + let start = addr as *const u8; + let mut size: usize = 0; + while start.add(size).read() != 0 { + size += 1; + } + core::slice::from_raw_parts(start, size) +} + +pub fn ascii_strip(s: &[u8]) -> &str { + core::str::from_utf8(s).unwrap().trim_matches(char::from(0)) +} + pub fn ucs2_as_ascii_length(input: *const u16) -> usize { let mut len = 0; loop { diff --git a/src/loader.rs b/src/loader.rs index 229851ac..5ba30e4d 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -14,6 +14,7 @@ use crate::{ bzimage, + common::ascii_strip, fat::{self, Read}, }; @@ -104,10 +105,6 @@ fn parse_entry(f: &mut fat::File) -> Result { Ok(loader_config) } -fn ascii_strip(s: &[u8]) -> &str { - unsafe { core::str::from_utf8_unchecked(&s) }.trim_matches(char::from(0)) -} - const ENTRY_DIRECTORY: &str = "/loader/entries/"; fn default_entry_path(fs: &fat::Filesystem) -> Result<[u8; 260], fat::Error> { From 11a882125490d5dec6c9f11462b7536f0927e3c2 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 16:54:42 -0700 Subject: [PATCH 05/16] boot: Add structures for Info, E820, and Params To support multiple boot protocols, we need a common abstraction for the information given in a boot protocol. This is the Info trait. This also requires adding a common E820Entry structure, so we can get the memory map. We also add a boot::Params structure (i.e. the Linux Zeropage) to make reading/writing the structure easier. This will let us avoid needing to hardcode struct offsets. The layout for these structures is taken from the Kernel's arch/x86/include/uapi/asm/bootparam.h Signed-off-by: Joe Richey --- src/boot.rs | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 1 + 2 files changed, 188 insertions(+) create mode 100644 src/boot.rs diff --git a/src/boot.rs b/src/boot.rs new file mode 100644 index 00000000..4cec53b0 --- /dev/null +++ b/src/boot.rs @@ -0,0 +1,187 @@ +use core::mem; + +use crate::common; + +// Common data needed for all boot paths +pub trait Info { + // Starting address of the Root System Descriptor Pointer + fn rsdp_addr(&self) -> u64; + // The kernel command line (not including null terminator) + fn cmdline(&self) -> &[u8]; + // Methods to access the E820 Memory map + fn num_entries(&self) -> u8; + fn entry(&self, idx: u8) -> E820Entry; +} + +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct E820Entry { + pub addr: u64, + pub size: u64, + pub entry_type: u32, +} + +impl E820Entry { + pub const RAM_TYPE: u32 = 1; +} + +// The so-called "zeropage" +#[derive(Clone, Copy)] +#[repr(C, packed)] +pub struct Params { + screen_info: ScreenInfo, // 0x000 + apm_bios_info: ApmBiosInfo, // 0x040 + _pad2: [u8; 4], // 0x054 + tboot_addr: u64, // 0x058 + ist_info: IstInfo, // 0x060 + pub acpi_rsdp_addr: u64, // 0x070 + _pad3: [u8; 8], // 0x078 + hd0_info: HdInfo, // 0x080 - obsolete + hd1_info: HdInfo, // 0x090 - obsolete + sys_desc_table: SysDescTable, // 0x0a0 - obsolete + olpc_ofw_header: OlpcOfwHeader, // 0x0b0 + ext_ramdisk_image: u32, // 0x0c0 + ext_ramdisk_size: u32, // 0x0c4 + ext_cmd_line_ptr: u32, // 0x0c8 + _pad4: [u8; 0x74], // 0x0cc + edd_info: EdidInfo, // 0x140 + efi_info: EfiInfo, // 0x1c0 + alt_mem_k: u32, // 0x1e0 + scratch: u32, // 0x1e4 + e820_entries: u8, // 0x1e8 + eddbuf_entries: u8, // 0x1e9 + edd_mbr_sig_buf_entries: u8, // 0x1ea + kbd_status: u8, // 0x1eb + secure_boot: u8, // 0x1ec + _pad5: [u8; 2], // 0x1ed + sentinel: u8, // 0x1ef + _pad6: [u8; 1], // 0x1f0 + pub hdr: Header, // 0x1f1 + _pad7: [u8; 0x290 - HEADER_END], + edd_mbr_sig_buffer: [u32; 16], // 0x290 + e820_table: [E820Entry; 128], // 0x2d0 + _pad8: [u8; 0x30], // 0xcd0 + eddbuf: [EddInfo; 6], // 0xd00 + _pad9: [u8; 0x114], // 0xeec +} + +impl Default for Params { + fn default() -> Self { + // SAFETY: Struct consists entirely of primitive integral types. + unsafe { mem::zeroed() } + } +} + +impl Params { + pub fn set_entries(&mut self, info: &dyn Info) { + self.e820_entries = info.num_entries(); + for i in 0..self.e820_entries { + self.e820_table[i as usize] = info.entry(i); + } + } +} + +impl Info for Params { + fn rsdp_addr(&self) -> u64 { + self.acpi_rsdp_addr + } + fn cmdline(&self) -> &[u8] { + unsafe { common::from_cstring(self.hdr.cmd_line_ptr as u64) } + } + fn num_entries(&self) -> u8 { + self.e820_entries + } + fn entry(&self, idx: u8) -> E820Entry { + assert!(idx < self.num_entries()); + self.e820_table[idx as usize] + } +} + +const HEADER_START: usize = 0x1f1; +const HEADER_END: usize = HEADER_START + mem::size_of::
(); + +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct Header { + pub setup_sects: u8, + pub root_flags: u16, + pub syssize: u32, + pub ram_size: u16, + pub vid_mode: u16, + pub root_dev: u16, + pub boot_flag: u16, + pub jump: u16, + pub header: [u8; 4], + pub version: u16, + pub realmode_swtch: u32, + pub start_sys_seg: u16, + pub kernel_version: u16, + pub type_of_loader: u8, + pub loadflags: u8, + pub setup_move_size: u16, + pub code32_start: u32, + pub ramdisk_image: u32, + pub ramdisk_size: u32, + pub bootsect_kludge: u32, + pub heap_end_ptr: u16, + pub ext_loader_ver: u8, + pub ext_loader_type: u8, + pub cmd_line_ptr: u32, + pub initrd_addr_max: u32, + pub kernel_alignment: u32, + pub relocatable_kernel: u8, + pub min_alignment: u8, + pub xloadflags: u16, + pub cmdline_size: u32, + pub hardware_subarch: u32, + pub hardware_subarch_data: u64, + pub payload_offset: u32, + pub payload_length: u32, + pub setup_data: u64, + pub pref_address: u64, + pub init_size: u32, + pub handover_offset: u32, +} + +// Right now the stucts below are unused, so we only need them to be the correct +// size. Update test_size_and_offset if a struct's real definition is added. +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct ScreenInfo([u8; 0x40]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct ApmBiosInfo([u8; 0x14]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct IstInfo([u8; 0x10]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct HdInfo([u8; 0x10]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct SysDescTable([u8; 0x10]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct OlpcOfwHeader([u8; 0x10]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct EdidInfo([u8; 0x80]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct EfiInfo([u8; 0x20]); +#[derive(Clone, Copy)] +#[repr(C, packed)] +struct EddInfo([u8; 0x52]); + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_size_and_offset() { + assert_eq!(mem::size_of::
(), 119); + assert_eq!(mem::size_of::(), 20); + assert_eq!(mem::size_of::(), 4096); + + assert_eq!(offset_of!(Params, hdr), HEADER_START); + } +} diff --git a/src/main.rs b/src/main.rs index 0c6a3a99..cb1690f4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,6 +34,7 @@ mod common; #[cfg(not(test))] mod asm; mod block; +mod boot; mod bzimage; mod efi; mod fat; From 0f16ac3cf6f04b282071eda4ec7101a6b0bae7fc Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 17:10:50 -0700 Subject: [PATCH 06/16] fat: Add utility functions for loading data from files This allows much of the existing code to be simplified, by letting us load the remainer of a file into a specific memory region. This also makes it much easier to write the code to load the Linux Kernel header from the bzimage file. Signed-off-by: Joe Richey --- src/boot.rs | 26 +++++++++++++++++++++++++- src/fat.rs | 20 +++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/boot.rs b/src/boot.rs index 4cec53b0..ff172893 100644 --- a/src/boot.rs +++ b/src/boot.rs @@ -1,6 +1,10 @@ use core::mem; -use crate::common; +use crate::{ + common, + fat::{Error, Read}, + mem::MemoryRegion, +}; // Common data needed for all boot paths pub trait Info { @@ -143,6 +147,26 @@ pub struct Header { pub handover_offset: u32, } +impl Header { + // Read a kernel header from the first two sectors of a file + pub fn from_file(f: &mut dyn Read) -> Result { + let mut data: [u8; 1024] = [0; 1024]; + let mut region = MemoryRegion::from_bytes(&mut data); + + f.seek(0)?; + f.load_file(&mut region)?; + + #[repr(C)] + struct HeaderData { + before: [u8; HEADER_START], + hdr: Header, + after: [u8; 1024 - HEADER_END], + } + // SAFETY: Struct consists entirely of primitive integral types. + Ok(unsafe { mem::transmute::<_, HeaderData>(data) }.hdr) + } +} + // Right now the stucts below are unused, so we only need them to be the correct // size. Update test_size_and_offset if a struct's real definition is added. #[derive(Clone, Copy)] diff --git a/src/fat.rs b/src/fat.rs index abc6c707..1bdf1d38 100644 --- a/src/fat.rs +++ b/src/fat.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::block::SectorRead; +use crate::{block::SectorRead, mem::MemoryRegion}; #[repr(packed)] struct Header { @@ -244,6 +244,24 @@ pub trait Read { fn read(&mut self, data: &mut [u8]) -> Result; fn seek(&mut self, offset: u32) -> Result<(), Error>; fn get_size(&self) -> u32; + + // Loads the remainder of the file into the specified memory region + fn load_file(&mut self, mem: &mut MemoryRegion) -> Result<(), Error> { + let mut chunks = mem.as_bytes().chunks_exact_mut(512); + for chunk in chunks.by_ref() { + self.read(chunk)?; + } + let last = chunks.into_remainder(); + if last.is_empty() { + return Ok(()); + } + // Use tmp buffer for last, partial sector + let mut dst = [0; 512]; + let bytes = self.read(&mut dst)? as usize; + assert_eq!(bytes, last.len()); + last.copy_from_slice(&dst[..bytes]); + Ok(()) + } } impl<'a> Read for File<'a> { From df561f8ce1138d5c7ce241450e2da5ce50b4a055 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 17:24:36 -0700 Subject: [PATCH 07/16] rust64_start: Read kernel boot params from %rsi This works because Option<&T> has the same FFI layout as *const T except that a null pointer is None. Signed-off-by: Joe Richey --- src/asm/ram64.s | 1 + src/main.rs | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/asm/ram64.s b/src/asm/ram64.s index 09bbb942..0751fc5f 100644 --- a/src/asm/ram64.s +++ b/src/asm/ram64.s @@ -11,4 +11,5 @@ ram64_start: # Setup the stack (at the end of our RAM region) movq $ram_max, %rsp + # BootParams are in %rsi, the second paramter of the System V ABI. jmp rust64_start diff --git a/src/main.rs b/src/main.rs index cb1690f4..a47d55b3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -153,9 +153,16 @@ fn boot_from_device(device: &mut block::VirtioBlockDevice) -> bool { true } -#[cfg_attr(not(test), no_mangle)] -pub extern "C" fn rust64_start() -> ! { - log!("\nStarting.."); +#[no_mangle] +pub extern "C" fn rust64_start(_rdi: *const (), rsi: Option<&boot::Params>) -> ! { + if let Some(boot_params) = rsi { + log!("\nBooting via Linux Boot Protocol"); + run(boot_params) + } + panic!("Unable to determine boot protocol") +} + +fn run(info: &dyn boot::Info) -> ! { enable_sse(); paging::setup(); From bd2f43bee81aaaf274b8fecbe2c66364f62e3976 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 17:53:35 -0700 Subject: [PATCH 08/16] efi: Use Info to setup allocator and EFI tables This allows efi_exec to work with multiple boot protocols. Signed-off-by: Joe Richey --- src/efi/mod.rs | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/src/efi/mod.rs b/src/efi/mod.rs index b1dedd82..61dfc0de 100644 --- a/src/efi/mod.rs +++ b/src/efi/mod.rs @@ -27,6 +27,8 @@ use r_efi::{ }, }; +use crate::boot; + mod alloc; mod block; mod console; @@ -574,29 +576,13 @@ extern "win64" fn image_unload(_: Handle) -> Status { efi::Status::UNSUPPORTED } -/// The 'zero page', a.k.a linux kernel bootparams. -pub const ZERO_PAGE_START: usize = 0x7000; - -const E820_RAM: u32 = 1; - -#[repr(C, packed)] -struct E820Entry { - addr: u64, - size: u64, - entry_type: u32, -} - const PAGE_SIZE: u64 = 4096; // Populate allocator from E820, fixed ranges for the firmware and the loaded binary. -fn populate_allocator(image_address: u64, image_size: u64) { - let mut zero_page = crate::mem::MemoryRegion::new(ZERO_PAGE_START as u64, 4096); - - let e820_count = zero_page.read_u8(0x1e8); - let e820_table = zero_page.as_mut_slice::(0x2d0, u64::from(e820_count)); - - for entry in e820_table { - if entry.entry_type == E820_RAM { +fn populate_allocator(info: &dyn boot::Info, image_address: u64, image_size: u64) { + for i in 0..info.num_entries() { + let entry = info.entry(i); + if entry.entry_type == boot::E820Entry::RAM_TYPE { ALLOCATOR.borrow_mut().add_initial_allocation( MemoryType::ConventionalMemory, entry.size / PAGE_SIZE, @@ -633,6 +619,7 @@ pub fn efi_exec( address: u64, loaded_address: u64, loaded_size: u64, + info: &dyn boot::Info, fs: &crate::fat::Filesystem, block: *const crate::block::VirtioBlockDevice, ) { @@ -715,7 +702,7 @@ pub fn efi_exec( }; let vendor_data = 0u32; - let acpi_rsdp_ptr = unsafe { *((ZERO_PAGE_START + 0x70) as u64 as *const u64) }; + let acpi_rsdp_ptr = info.rsdp_addr(); let mut ct = if acpi_rsdp_ptr != 0 { efi::ConfigurationTable { @@ -765,7 +752,7 @@ pub fn efi_exec( configuration_table: &mut ct, }; - populate_allocator(loaded_address, loaded_size); + populate_allocator(info, loaded_address, loaded_size); let efi_part_id = unsafe { block::populate_block_wrappers(&mut BLOCK_WRAPPERS, block) }; From b0d9f3e2fb7af6d068f099d21c988a59b10483ed Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 18:54:55 -0700 Subject: [PATCH 09/16] bzimage: Rewrite Linux Kernel Loading code This allows Linux to be booted with any boot protocol. The old code took in the Zeropage passed in via the Linux Kernel Boot Protocol, modified it, and passed it into the Linux Kernel. This is not the correct way to boot Linux per the documentation: https://www.kernel.org/doc/Documentation/x86/boot.txt This code now correctly: - Uses a brand-new Zeropage inside the `Kernel` struct - Adds in the E820 map and RSDP pointer from the boot::Info - Reads the header from the file and copies it into the Zeropage - Loads the kernel and initrd into avalible memory - Properly manages the command-line at a fixed memory location - Jumps to the appropriate starting address Signed-off-by: Joe Richey --- src/bzimage.rs | 273 +++++++++++++++++++++---------------------------- src/loader.rs | 18 ++-- 2 files changed, 129 insertions(+), 162 deletions(-) diff --git a/src/bzimage.rs b/src/bzimage.rs index 1a7c78df..176723a3 100644 --- a/src/bzimage.rs +++ b/src/bzimage.rs @@ -11,13 +11,18 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +use atomic_refcell::AtomicRefCell; -use crate::fat::{self, Read}; +use crate::{ + boot::{E820Entry, Header, Info, Params}, + fat::{self, Read}, + mem::MemoryRegion, +}; #[derive(Debug)] pub enum Error { FileError(fat::Error), - KernelOld, + NoInitrdMemory, MagicMissing, NotRelocatable, } @@ -28,185 +33,145 @@ impl From for Error { } } -// From firecracker -/// Kernel command line start address. -const CMDLINE_START: usize = 0x4b000; -/// Kernel command line start address maximum size. -const CMDLINE_MAX_SIZE: usize = 0x10000; -/// The 'zero page', a.k.a linux kernel bootparams. -pub const ZERO_PAGE_START: usize = 0x7000; +const KERNEL_LOCATION: u64 = 0x20_0000; -const KERNEL_LOCATION: u32 = 0x20_0000; +#[repr(transparent)] +pub struct Kernel(Params); -const E820_RAM: u32 = 1; - -#[repr(C, packed)] -struct E820Entry { - addr: u64, - size: u64, - entry_type: u32, -} - -pub fn load_initrd(f: &mut dyn Read) -> Result<(), Error> { - let mut zero_page = crate::mem::MemoryRegion::new(ZERO_PAGE_START as u64, 4096); - - let mut max_load_address = u64::from(zero_page.read_u32(0x22c)); - if max_load_address == 0 { - max_load_address = 0x37ff_ffff; +impl Kernel { + pub fn new(info: &dyn Info) -> Self { + let mut kernel = Self(Params::default()); + kernel.0.acpi_rsdp_addr = info.rsdp_addr(); + kernel.0.set_entries(info); + kernel } - let e820_count = zero_page.read_u8(0x1e8); - let e820_table = zero_page.as_mut_slice::(0x2d0, u64::from(e820_count)); + pub fn load_kernel(&mut self, f: &mut dyn Read) -> Result<(), Error> { + self.0.hdr = Header::from_file(f)?; - // Search E820 table for highest usable ram location that is below the limit. - let mut top_of_usable_ram = 0; - for entry in e820_table { - if entry.entry_type == E820_RAM { - let m = entry.addr + entry.size - 1; - if m > top_of_usable_ram && m < max_load_address { - top_of_usable_ram = m; - } + if self.0.hdr.boot_flag != 0xAA55 || self.0.hdr.header != *b"HdrS" { + return Err(Error::MagicMissing); + } + // Check relocatable + if self.0.hdr.version < 0x205 || self.0.hdr.relocatable_kernel == 0 { + return Err(Error::NotRelocatable); } - } - if top_of_usable_ram > max_load_address { - top_of_usable_ram = max_load_address; + // Skip over the setup sectors + let setup_sects = match self.0.hdr.setup_sects { + 0 => 4, + n => n as u32, + }; + let setup_bytes = (setup_sects + 1) * 512; + let remaining_bytes = f.get_size() - setup_bytes; + + let mut region = MemoryRegion::new(KERNEL_LOCATION, remaining_bytes as u64); + f.seek(setup_bytes)?; + f.load_file(&mut region)?; + + // Fill out "write/modify" fields + self.0.hdr.type_of_loader = 0xff; // Unknown Loader + self.0.hdr.code32_start = KERNEL_LOCATION as u32; // Where we load the kernel + self.0.hdr.cmd_line_ptr = CMDLINE_START as u32; // Where we load the cmdline + Ok(()) } - // Align address to 2MiB boundary as we use 2 MiB pages - let initrd_address = (top_of_usable_ram - u64::from(f.get_size())) & !((2 << 20) - 1); - let mut initrd_region = crate::mem::MemoryRegion::new(initrd_address, u64::from(f.get_size())); - - let mut offset = 0; - while offset < f.get_size() { - let bytes_remaining = f.get_size() - offset; - - // Use intermediata buffer for last, partial sector - if bytes_remaining < 512 { - let mut data: [u8; 512] = [0; 512]; - match f.read(&mut data) { - Err(crate::fat::Error::EndOfFile) => break, - Err(e) => return Err(Error::FileError(e)), - Ok(_) => {} - } - let dst = initrd_region.as_mut_slice(u64::from(offset), u64::from(bytes_remaining)); - dst.copy_from_slice(&data[0..bytes_remaining as usize]); - break; - } + // Compute the load address for the initial ramdisk + fn initrd_addr(&self, size: u64) -> Option { + let initrd_addr_max = match self.0.hdr.initrd_addr_max { + 0 => 0x37FF_FFFF, + a => a as u64, + }; + let max_start = (initrd_addr_max + 1) - size; - let dst = initrd_region.as_mut_slice(u64::from(offset), 512); - match f.read(dst) { - Err(crate::fat::Error::EndOfFile) => break, - Err(e) => return Err(Error::FileError(e)), - Ok(_) => {} + let mut option_addr = None; + for i in 0..self.0.num_entries() { + let entry = self.0.entry(i); + if entry.entry_type != E820Entry::RAM_TYPE { + continue; + } + let addr = entry.addr + entry.size - size; + // Align address to 2MiB boundary as we use 2 MiB pages + let addr = addr & !((2 << 20) - 1); + // The ramdisk must fit in the region completely + if addr > max_start || addr < entry.addr { + continue; + } + // Use the largest address we can find + if let Some(load_addr) = option_addr { + if load_addr >= addr { + continue; + } + } + option_addr = Some(addr) } - - offset += 512; + option_addr } - // initrd pointer/size - zero_page.write_u32(0x218, initrd_address as u32); - zero_page.write_u32(0x21c, f.get_size()); - Ok(()) -} - -pub fn append_commandline(addition: &str) -> Result<(), Error> { - let mut cmdline_region = - crate::mem::MemoryRegion::new(CMDLINE_START as u64, CMDLINE_MAX_SIZE as u64); - let zero_page = crate::mem::MemoryRegion::new(ZERO_PAGE_START as u64, 4096); - - let cmdline = cmdline_region.as_mut_slice::(0, CMDLINE_MAX_SIZE as u64); - - // Use the actual string length but limit to the orgiginal incoming size - let orig_len = zero_page.read_u32(0x238) as usize; - - let orig_cmdline = unsafe { - core::str::from_utf8_unchecked(&cmdline[0..orig_len]).trim_matches(char::from(0)) - }; - let orig_len = orig_cmdline.len(); - - cmdline[orig_len] = b' '; - cmdline[orig_len + 1..orig_len + 1 + addition.len()].copy_from_slice(addition.as_bytes()); - cmdline[orig_len + 1 + addition.len()] = 0; - - // Commandline pointer/size - zero_page.write_u32(0x228, CMDLINE_START as u32); - zero_page.write_u32(0x238, (orig_len + addition.len() + 1) as u32); - - Ok(()) -} - -pub fn load_kernel(f: &mut dyn Read) -> Result { - f.seek(0)?; - - let mut buf: [u8; 1024] = [0; 1024]; - - f.read(&mut buf[0..512])?; - f.read(&mut buf[512..])?; - - let setup = crate::mem::MemoryRegion::from_bytes(&mut buf[..]); + pub fn load_initrd(&mut self, f: &mut dyn Read) -> Result<(), Error> { + let size = f.get_size() as u64; + let addr = match self.initrd_addr(size) { + Some(addr) => addr, + None => return Err(Error::NoInitrdMemory), + }; - if setup.read_u16(0x1fe) != 0xAA55 { - return Err(Error::MagicMissing); - } + let mut region = MemoryRegion::new(addr, size); + f.seek(0)?; + f.load_file(&mut region)?; - if setup.read_u32(0x202) != 0x5372_6448 { - return Err(Error::MagicMissing); + // initrd pointer/size + self.0.hdr.ramdisk_image = addr as u32; + self.0.hdr.ramdisk_size = size as u32; + Ok(()) } - // Need for relocation - if setup.read_u16(0x206) < 0x205 { - return Err(Error::KernelOld); + pub fn append_cmdline(&mut self, addition: &[u8]) { + if !addition.is_empty() { + CMDLINE.borrow_mut().append(addition); + assert!(CMDLINE.borrow().len() < self.0.hdr.cmdline_size); + } } - // Check relocatable - if setup.read_u8(0x234) == 0 { - return Err(Error::NotRelocatable); + pub fn boot(&mut self) { + // 0x200 is the startup_64 offset + let jump_address = self.0.hdr.code32_start as u64 + 0x200; + // Rely on x86 C calling convention where second argument is put into %rsi register + let ptr = jump_address as *const (); + let code: extern "C" fn(usize, usize) = unsafe { core::mem::transmute(ptr) }; + (code)(0 /* dummy value */, &mut self.0 as *mut _ as usize); } +} - let header_start = 0x1f1 as usize; - let header_end = 0x202 + buf[0x0201] as usize; - - // Reuse the zero page that we were originally given - // TODO: Zero and fill it ourself but will need to save E820 details - let mut zero_page = crate::mem::MemoryRegion::new(ZERO_PAGE_START as u64, 4096); - - let dst = zero_page.as_mut_slice(header_start as u64, (header_end - header_start) as u64); - dst.copy_from_slice(&buf[header_start..header_end]); - - // Unknown loader - zero_page.write_u8(0x210, 0xff); +// This is the highest region at which we can load the kernel command line. +const CMDLINE_START: u64 = 0x4b000; +const CMDLINE_MAX_LEN: u64 = 0x10000; - // Where we will load the kernel into - zero_page.write_u32(0x214, KERNEL_LOCATION); +static CMDLINE: AtomicRefCell = AtomicRefCell::new(CmdLine::new()); - let mut setup_sects = buf[header_start] as usize; +struct CmdLine { + region: MemoryRegion, + length: usize, // Does not include null pointer +} - if setup_sects == 0 { - setup_sects = 4; +impl CmdLine { + const fn new() -> Self { + Self { + region: MemoryRegion::new(CMDLINE_START, CMDLINE_MAX_LEN), + length: 0, + } } - setup_sects += 1; // Include the boot sector - - let setup_bytes = setup_sects * 512; // Use to start reading the main image - - let mut load_offset = u64::from(KERNEL_LOCATION); - - f.seek(setup_bytes as u32)?; - - loop { - let mut dst = crate::mem::MemoryRegion::new(load_offset, 512); - let dst = dst.as_mut_slice(0, 512); + const fn len(&self) -> u32 { + self.length as u32 + } - match f.read(dst) { - Err(crate::fat::Error::EndOfFile) => { - // 0x200 is the startup_64 offset - return Ok(u64::from(KERNEL_LOCATION) + 0x200); - } - Err(e) => return Err(Error::FileError(e)), - Ok(_) => {} - }; + fn append(&mut self, args: &[u8]) { + let bytes = self.region.as_bytes(); + bytes[self.length] = b' '; + self.length += 1; - load_offset += 512; + bytes[self.length..self.length + args.len()].copy_from_slice(args); + self.length += args.len(); + bytes[self.length] = 0; } } diff --git a/src/loader.rs b/src/loader.rs index 5ba30e4d..bdd8aee9 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -13,7 +13,8 @@ // limitations under the License. use crate::{ - bzimage, + boot, + bzimage::{self, Kernel}, common::ascii_strip, fat::{self, Read}, }; @@ -121,7 +122,7 @@ fn default_entry_path(fs: &fat::Filesystem) -> Result<[u8; 260], fat::Error> { Ok(entry_path) } -pub fn load_default_entry(fs: &fat::Filesystem) -> Result { +pub fn load_default_entry(fs: &fat::Filesystem, info: &dyn boot::Info) -> Result { let default_entry_path = default_entry_path(&fs)?; let default_entry_path = ascii_strip(&default_entry_path); @@ -132,19 +133,20 @@ pub fn load_default_entry(fs: &fat::Filesystem) -> Result { let initrd_path = ascii_strip(&entry.initrd_path); let cmdline = ascii_strip(&entry.cmdline); + let mut kernel = Kernel::new(info); + let mut bzimage_file = fs.open(bzimage_path)?; - let jump_address = bzimage::load_kernel(&mut bzimage_file)?; + kernel.load_kernel(&mut bzimage_file)?; if !initrd_path.is_empty() { let mut initrd_file = fs.open(initrd_path)?; - bzimage::load_initrd(&mut initrd_file)?; + kernel.load_initrd(&mut initrd_file)?; } - if !cmdline.is_empty() { - bzimage::append_commandline(cmdline)? - } + kernel.append_cmdline(info.cmdline()); + kernel.append_cmdline(cmdline.as_bytes()); - Ok(jump_address) + Ok(kernel) } #[cfg(test)] From ae9c8a935df10522997ab9392cf771f01050727b Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 19:30:22 -0700 Subject: [PATCH 10/16] main: Rewrite boot_from_device We now pass the boot::Info to `loader::load_default_entry` and `efi::efi_exec`. We also reorganize the code in this function to: - Avoid unnecessary nesting - log errors when they occur The code is now much more readable Signed-off-by: Joe Richey --- src/main.rs | 111 +++++++++++++++++++++++----------------------------- 1 file changed, 48 insertions(+), 63 deletions(-) diff --git a/src/main.rs b/src/main.rs index a47d55b3..f5bb8877 100644 --- a/src/main.rs +++ b/src/main.rs @@ -76,80 +76,65 @@ fn enable_sse() { const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4; const VIRTIO_PCI_BLOCK_DEVICE_ID: u16 = 0x1042; -fn boot_from_device(device: &mut block::VirtioBlockDevice) -> bool { - match device.init() { - Err(_) => { - log!("Error configuring block device"); - return false; - } - Ok(_) => log!( - "Virtio block device configured. Capacity: {} sectors", - device.get_capacity() - ), +fn boot_from_device(device: &mut block::VirtioBlockDevice, info: &dyn boot::Info) -> bool { + if let Err(err) = device.init() { + log!("Error configuring block device: {:?}", err); + return false; } + log!( + "Virtio block device configured. Capacity: {} sectors", + device.get_capacity() + ); - let mut f; - - match part::find_efi_partition(device) { - Ok((start, end)) => { - log!("Found EFI partition"); - f = fat::Filesystem::new(device, start, end); - if f.init().is_err() { - log!("Failed to create filesystem"); - return false; - } - } - Err(_) => { - log!("Failed to find EFI partition"); + let (start, end) = match part::find_efi_partition(device) { + Ok(p) => p, + Err(err) => { + log!("Failed to find EFI partition: {:?}", err); return false; } - } + }; + log!("Found EFI partition"); + let mut f = fat::Filesystem::new(device, start, end); + if let Err(err) = f.init() { + log!("Failed to create filesystem: {:?}", err); + return false; + } log!("Filesystem ready"); - let jump_address; + match loader::load_default_entry(&f, info) { + Ok(mut kernel) => { + device.reset(); + log!("Jumping to kernel"); + kernel.boot(); + return true; + } + Err(err) => log!("Error loading default entry: {:?}", err), + } - match loader::load_default_entry(&f) { - Ok(addr) => { - jump_address = addr; + log!("Using EFI boot."); + let mut file = match f.open("/EFI/BOOT/BOOTX64 EFI") { + Ok(file) => file, + Err(err) => { + log!("Failed to load default EFI binary: {:?}", err); + return false; } - Err(_) => { - log!("Error loading default entry. Using EFI boot."); - match f.open("/EFI/BOOT/BOOTX64 EFI") { - Ok(mut file) => { - log!("Found bootloader (BOOTX64.EFI)"); - let mut l = pe::Loader::new(&mut file); - match l.load(0x20_0000) { - Ok((a, size)) => { - log!("Executable loaded"); - efi::efi_exec(a, 0x20_0000, size, &f, device); - return true; - } - Err(e) => { - match e { - pe::Error::FileError => log!("File error"), - pe::Error::InvalidExecutable => log!("Invalid executable"), - } - return false; - } - } - } - Err(_) => { - log!("Failed to find bootloader"); - return false; - } - } + }; + log!("Found bootloader (BOOTX64.EFI)"); + + let mut l = pe::Loader::new(&mut file); + let load_addr = 0x20_0000; + let (entry_addr, size) = match l.load(load_addr) { + Ok(load_info) => load_info, + Err(err) => { + log!("Error loading executable: {:?}", err); + return false; } - } + }; device.reset(); - - log!("Jumping to kernel"); - - // Rely on x86 C calling convention where second argument is put into %rsi register - let ptr = jump_address as *const (); - let code: extern "C" fn(u64, u64) = unsafe { core::mem::transmute(ptr) }; - (code)(0 /* dummy value */, bzimage::ZERO_PAGE_START as u64); + log!("Executable loaded"); + efi::efi_exec(entry_addr, 0x20_0000, size, info, &f, device); true } @@ -175,7 +160,7 @@ fn run(info: &dyn boot::Info) -> ! { let mut pci_transport = pci::VirtioPciTransport::new(pci_device); block::VirtioBlockDevice::new(&mut pci_transport); let mut device = block::VirtioBlockDevice::new(&mut pci_transport); - boot_from_device(&mut device) + boot_from_device(&mut device, info) }, ); From 2664ab4d9defd8020ff9ba3b76c9cff275b1d2df Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 20:26:59 -0700 Subject: [PATCH 11/16] pvh: Add Structures for PVH Boot Protocol These are simply translated from the C structs in: xen/include/public/arch-x86/hvm/start_info.h Note that unlike the Linux Boot Protocol structures, these structures don't need to be `#[repr(packed)]` as they are garunteed to have the proper alignment. Signed-off-by: Joe Richey --- src/main.rs | 1 + src/pvh.rs | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 src/pvh.rs diff --git a/src/main.rs b/src/main.rs index f5bb8877..5cff6b2e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -44,6 +44,7 @@ mod paging; mod part; mod pci; mod pe; +mod pvh; mod virtio; #[cfg(all(not(test), feature = "log-panic"))] diff --git a/src/pvh.rs b/src/pvh.rs new file mode 100644 index 00000000..c5a49f4f --- /dev/null +++ b/src/pvh.rs @@ -0,0 +1,55 @@ +use crate::{ + boot::{E820Entry, Info}, + common, +}; + +// Structures from xen/include/public/arch-x86/hvm/start_info.h +#[derive(Debug)] +#[repr(C)] +pub struct StartInfo { + magic: [u8; 4], + version: u32, + flags: u32, + nr_modules: u32, + modlist_paddr: u64, + cmdline_paddr: u64, + rsdp_paddr: u64, + memmap_paddr: u64, + memmap_entries: u32, + _pad: u32, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +struct MemMapEntry { + addr: u64, + size: u64, + entry_type: u32, + _pad: u32, +} + +impl Info for StartInfo { + fn rsdp_addr(&self) -> u64 { + self.rsdp_paddr + } + fn cmdline(&self) -> &[u8] { + unsafe { common::from_cstring(self.cmdline_paddr) } + } + fn num_entries(&self) -> u8 { + // memmap_paddr and memmap_entries only exist in version 1 or later + if self.version < 1 || self.memmap_paddr == 0 { + return 0; + } + self.memmap_entries as u8 + } + fn entry(&self, idx: u8) -> E820Entry { + assert!(idx < self.num_entries()); + let ptr = self.memmap_paddr as *const MemMapEntry; + let entry = unsafe { *ptr.offset(idx as isize) }; + E820Entry { + addr: entry.addr, + size: entry.size, + entry_type: entry.entry_type, + } + } +} From 8cbba8c70384962d376c90eb04a96812a9d631de Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Mon, 30 Mar 2020 02:01:19 -0700 Subject: [PATCH 12/16] asm: Remove Serial debug statements This approch doesn't scale to our other ASM code and it clutters the output. Signed-off-by: Joe Richey --- src/asm/ram64.s | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/asm/ram64.s b/src/asm/ram64.s index 0751fc5f..e8183115 100644 --- a/src/asm/ram64.s +++ b/src/asm/ram64.s @@ -3,11 +3,6 @@ .code64 ram64_start: - # Indicate (via serial) that we are in long/64-bit mode - movw $0x3f8, %dx - movb $'L', %al - outb %al, %dx - # Setup the stack (at the end of our RAM region) movq $ram_max, %rsp From 52701c531762887d546922104b32169ef2976d19 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Mon, 30 Mar 2020 02:36:03 -0700 Subject: [PATCH 13/16] layout: Cleanup and comment linker script Signed-off-by: Joe Richey --- layout.ld | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/layout.ld b/layout.ld index 5403d7d1..517e5b8a 100644 --- a/layout.ld +++ b/layout.ld @@ -2,7 +2,7 @@ ENTRY(ram64_start) PHDRS { - program PT_LOAD FILEHDR PHDRS ; + ram PT_LOAD FILEHDR PHDRS ; } /* Loaders like to put stuff in low memory (< 1M), so we don't use it. */ @@ -13,16 +13,26 @@ stack_size = 64K; SECTIONS { - /* Mapping in the program headers makes it easier to mmap the whole file. */ + /* Mapping the program headers into RAM makes the file smaller. */ . = ram_min; . += SIZEOF_HEADERS; - .rodata : { *(.rodata .rodata.*) } :program - .text : { *(.text .text.*) } :program - .data : { *(.data .data.*) } :program - .bss : { *(.bss .bss.*) } :program + /* These sections are mapped into RAM from the file. Omitting :ram from + later sections avoids emitting empty sections in the final binary. */ + data_start = .; + .rodata : { *(.rodata .rodata.*) } :ram + .text : { *(.text .text.*) } + .data : { *(.data .data.*) } + data_size = . - data_start; - firmware_ram_size = . - ram_min; + /* The BSS section isn't mapped from any file data. It is simply zeroed + in RAM. So our file size should be computed from here. */ + file_size = . - ram_min; + .bss : { + bss_start = .; + *(.bss .bss.*) + bss_size = . - bss_start; + } ASSERT((. <= ram_max - stack_size), "firmware size too big for RAM region") From ac0db6531bf7226866855ee227b192df2ca6163d Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 20:30:43 -0700 Subject: [PATCH 14/16] pvh: Add code to read PVH Boot Protocol stucts Note that this also requires zeroing out %rdi in the Linux Boot Protocol path, so that the rdi parameter will have a valid repr. Signed-off-by: Joe Richey --- layout.ld | 2 +- src/asm/ram64.s | 7 ++++++- src/main.rs | 6 +++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/layout.ld b/layout.ld index 517e5b8a..064bbd12 100644 --- a/layout.ld +++ b/layout.ld @@ -1,4 +1,4 @@ -ENTRY(ram64_start) +ENTRY(linux64_start) PHDRS { diff --git a/src/asm/ram64.s b/src/asm/ram64.s index e8183115..6999c048 100644 --- a/src/asm/ram64.s +++ b/src/asm/ram64.s @@ -1,10 +1,15 @@ .section .text, "ax" -.global ram64_start +.global linux64_start .code64 +linux64_start: + # Zero out %rdi, its value is unspecificed in the Linux Boot Protocol. + xorq %rdi, %rdi + ram64_start: # Setup the stack (at the end of our RAM region) movq $ram_max, %rsp + # PVH start_info is in %rdi, the first paramter of the System V ABI. # BootParams are in %rsi, the second paramter of the System V ABI. jmp rust64_start diff --git a/src/main.rs b/src/main.rs index 5cff6b2e..0da96ba7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -140,7 +140,11 @@ fn boot_from_device(device: &mut block::VirtioBlockDevice, info: &dyn boot::Info } #[no_mangle] -pub extern "C" fn rust64_start(_rdi: *const (), rsi: Option<&boot::Params>) -> ! { +pub extern "C" fn rust64_start(rdi: Option<&pvh::StartInfo>, rsi: Option<&boot::Params>) -> ! { + if let Some(start_info) = rdi { + log!("\nBooting via PVH Boot Protocol"); + run(start_info) + } if let Some(boot_params) = rsi { log!("\nBooting via Linux Boot Protocol"); run(boot_params) From 3c722424d5f48589bd7c8dc53585180450bf2112 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 20:36:59 -0700 Subject: [PATCH 15/16] pvh: Add code to transition from 32-bit to 64-bit PVH starts in 32-bit mode, so we have to transition to 64-bit mode before we can start running Rust code. As we have not yet initialized the stack, we can only use registers and static memory. This transition does the following: - Sets up page tables to identity map 2 MiB - Loads page tables into CR3 - Sets CR4.PAE, EFER.LME, and CRO.PG - Sets up a 64-bit GDT - Long Jumps to 64-bit code We put the GDT in the .rodata section, and we put the 32-bit code in its own section. This makes it easier to debug and dissassemble the binary. Signed-off-by: Joe Richey --- layout.ld | 1 + src/asm/gdt64.s | 32 ++++++++++++++++++++++++++++++++ src/asm/mod.rs | 2 ++ src/asm/ram32.s | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+) create mode 100644 src/asm/gdt64.s create mode 100644 src/asm/ram32.s diff --git a/layout.ld b/layout.ld index 064bbd12..108a5dbc 100644 --- a/layout.ld +++ b/layout.ld @@ -22,6 +22,7 @@ SECTIONS data_start = .; .rodata : { *(.rodata .rodata.*) } :ram .text : { *(.text .text.*) } + .text32 : { *(.text32) } .data : { *(.data .data.*) } data_size = . - data_start; diff --git a/src/asm/gdt64.s b/src/asm/gdt64.s new file mode 100644 index 00000000..9737c43f --- /dev/null +++ b/src/asm/gdt64.s @@ -0,0 +1,32 @@ +.section .rodata, "a" + +gdt64_ptr: + .short gdt64_end - gdt64_start - 1 # GDT length is actually (length - 1) + .quad gdt64_start + +gdt64_start: # First descriptor is always null + .quad 0 +code64_desc: # 64-bit Code-Segments always have: Base = 0, Limit = 4G + # CS.Limit[15:00] = 0 - Ignored + .short 0x0000 + # CS.Base[15:00] = 0 - Ignored + .short 0x0000 + # CS.Base[23:16] = 0 (bits 0-7) - Ignored + .byte 0x00 + # CS.Accessed = 1 (bit 8) - Don't write to segment on first use + # CS.ReadEnable = 1 (bit 9) - Read/Execute Code-Segment + # CS.Conforming = 0 (bit 10) - Nonconforming, no lower-priv access + # CS.Executable = 1 (bit 11) - Code-Segement + # CS.S = 1 (bit 12) - Not a System-Segement + # CS.DPL = 0 (bits 13-14) - We only use this segment in Ring 0 + # CS.P = 1 (bit 15) - Segment is present + .byte 0b10011011 + # CS.Limit[19:16] = 0 (bits 16-19) - Ignored + # CS.AVL = 0 (bit 20) - Our software doesn't use this bit + # CS.L = 1 (bit 21) - This isn't a 64-bit segment + # CS.D = 0 (bit 22) - This is a 32-bit segment + # CS.G = 0 (bit 23) - Ignored + .byte 0b00100000 + # CS.Base[31:24] = 0 (bits 24-31) - Ignored + .byte 0x00 +gdt64_end: diff --git a/src/asm/mod.rs b/src/asm/mod.rs index e1b44166..8183e317 100644 --- a/src/asm/mod.rs +++ b/src/asm/mod.rs @@ -1 +1,3 @@ +global_asm!(include_str!("ram32.s")); global_asm!(include_str!("ram64.s")); +global_asm!(include_str!("gdt64.s")); diff --git a/src/asm/ram32.s b/src/asm/ram32.s new file mode 100644 index 00000000..e989fd7e --- /dev/null +++ b/src/asm/ram32.s @@ -0,0 +1,46 @@ +.section .text32, "ax" +.code32 + +ram32_start: + # Stash the PVH start_info struct in %rdi. + movl %ebx, %edi + # Zero out %rsi, its value is unspecificed in the PVH Boot Protocol. + xorl %esi, %esi + +setup_page_tables: + # First L2 entry identity maps [0, 2 MiB) + movl $0b10000011, (L2_TABLES) # huge (bit 7), writable (bit 1), present (bit 0) + # First L3 entry points to L2 table + movl $L2_TABLES, %eax + orb $0b00000011, %al # writable (bit 1), present (bit 0) + movl %eax, (L3_TABLE) + # First L4 entry points to L3 table + movl $L3_TABLE, %eax + orb $0b00000011, %al # writable (bit 1), present (bit 0) + movl %eax, (L4_TABLE) + +enable_paging: + # Load page table root into CR3 + movl $L4_TABLE, %eax + movl %eax, %cr3 + + # Set CR4.PAE (Physical Address Extension) + movl %cr4, %eax + orb $0b00100000, %al # Set bit 5 + movl %eax, %cr4 + # Set EFER.LME (Long Mode Enable) + movl $0xC0000080, %ecx + rdmsr + orb $0b00000001, %ah # Set bit 8 + wrmsr + # Set CRO.PG (Paging) + movl %cr0, %eax + orl $(1 << 31), %eax + movl %eax, %cr0 + +jump_to_64bit: + # We are now in 32-bit compatibility mode. To enter 64-bit mode, we need to + # load a 64-bit code segment into our GDT. + lgdtl gdt64_ptr + # Set CS to a 64-bit segment and jump to 64-bit code. + ljmpl $(code64_desc - gdt64_start), $ram64_start From bea1bd382d0934311717adf4cb64bd4243e9d3e4 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sat, 28 Mar 2020 20:42:51 -0700 Subject: [PATCH 16/16] pvh: Add PVH ELFNOTE This adds information to the ELF binary so that a loader will know where to start the executable. This now allows the firmware to be booted via the PVH Boot Protocol. Signed-off-by: Joe Richey --- layout.ld | 4 +++- src/asm/mod.rs | 1 + src/asm/note.s | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 src/asm/note.s diff --git a/layout.ld b/layout.ld index 108a5dbc..5aff83e1 100644 --- a/layout.ld +++ b/layout.ld @@ -3,6 +3,7 @@ ENTRY(linux64_start) PHDRS { ram PT_LOAD FILEHDR PHDRS ; + note PT_NOTE ; } /* Loaders like to put stuff in low memory (< 1M), so we don't use it. */ @@ -13,9 +14,10 @@ stack_size = 64K; SECTIONS { - /* Mapping the program headers into RAM makes the file smaller. */ + /* Mapping the program headers and note into RAM makes the file smaller. */ . = ram_min; . += SIZEOF_HEADERS; + .note : { *(.note) } :note :ram /* These sections are mapped into RAM from the file. Omitting :ram from later sections avoids emitting empty sections in the final binary. */ diff --git a/src/asm/mod.rs b/src/asm/mod.rs index 8183e317..95dd9360 100644 --- a/src/asm/mod.rs +++ b/src/asm/mod.rs @@ -1,3 +1,4 @@ +global_asm!(include_str!("note.s")); global_asm!(include_str!("ram32.s")); global_asm!(include_str!("ram64.s")); global_asm!(include_str!("gdt64.s")); diff --git a/src/asm/note.s b/src/asm/note.s new file mode 100644 index 00000000..674cf70e --- /dev/null +++ b/src/asm/note.s @@ -0,0 +1,20 @@ +.section .note, "a" + +# From xen/include/public/elfnote.h, "Physical entry point into the kernel." +XEN_ELFNOTE_PHYS32_ENTRY = 18 + +# We don't bother defining an ELFNOTE macro, as we only have one note. +# This is equialent to the kernel's: +# ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long pvh_start) +.align 4 + .long name_end - name_start # namesz + .long desc_end - desc_start # descsz + .long XEN_ELFNOTE_PHYS32_ENTRY # type +name_start: + .asciz "Xen" +name_end: +.align 4 +desc_start: + .long ram32_start +desc_end: +.align 4