Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC] Add support for UFFD backed memory when restoring Firecracker VM from snapshot #1

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
265 changes: 247 additions & 18 deletions Cargo.lock

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions src/api_server/src/request/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ mod tests {

let mut expected_cfg = LoadSnapshotParams {
snapshot_path: PathBuf::from("foo"),
mem_file_path: PathBuf::from("bar"),
mem_backend_type: Default::default(),
mem_backend_path: PathBuf::from("bar"),
enable_diff_snapshots: false,
resume_vm: false,
};
Expand All @@ -125,7 +126,8 @@ mod tests {

expected_cfg = LoadSnapshotParams {
snapshot_path: PathBuf::from("foo"),
mem_file_path: PathBuf::from("bar"),
mem_backend_type: Default::default(),
mem_backend_path: PathBuf::from("bar"),
enable_diff_snapshots: true,
resume_vm: false,
};
Expand All @@ -144,7 +146,8 @@ mod tests {

expected_cfg = LoadSnapshotParams {
snapshot_path: PathBuf::from("foo"),
mem_file_path: PathBuf::from("bar"),
mem_backend_type: Default::default(),
mem_backend_path: PathBuf::from("bar"),
enable_diff_snapshots: false,
resume_vm: true,
};
Expand Down
14 changes: 11 additions & 3 deletions src/api_server/swagger/firecracker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1045,16 +1045,24 @@ definitions:
SnapshotLoadParams:
type: object
required:
- mem_file_path
- mem_backend_path
- snapshot_path
properties:
enable_diff_snapshots:
type: boolean
description:
Enable support for incremental (diff) snapshots by tracking dirty guest pages.
mem_file_path:
mem_backend_type:
type: string
enum:
- File
- UffdOverUDS
mem_backend_path:
type: string
description: Path to the file that contains the guest memory to be loaded.
description: Based on 'mem_backend_type' it is either
1) Path to the file that contains the guest memory to be loaded,
2) Path to the UDS where a custom page-fault handler process is listening
for the UFFD set up by Firecracker to handle its guest memory page faults.
snapshot_path:
type: string
description: Path to the file that contains the microVM state to be loaded.
Expand Down
8 changes: 8 additions & 0 deletions src/firecracker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,11 @@ seccompiler = { path = "../seccompiler" }
snapshot = { path = "../snapshot"}
utils = { path = "../utils" }
vmm = { path = "../vmm" }

[dev-dependencies]
nix = "0.23.0"
regex = ">=1.0.0"
serde = { version = ">=1.0.27", features = ["derive"] }
serde_json = ">=1.0.9"
userfaultfd = ">=0.4.0"
micro_http = { git = "https://github.com/firecracker-microvm/micro-http", rev = "36e59a0" }
71 changes: 71 additions & 0 deletions src/firecracker/examples/uffd_handler/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
mod uffd_handler;

use std::io::{Read, Write};
use std::os::unix::net::UnixStream;
use std::sync::{Arc, Barrier};

fn firecracker_api_call(stream: &mut UnixStream, method_and_uri: &str, body: &str) {
let request = format!(
"{} HTTP/1.1\r\n\
Content-Length: {}\r\n\
Content-Type: application/json\r\n\r\n\
{}",
method_and_uri,
body.len(),
body
);
println!("API request:\n{}", request);
stream
.write_all(request.as_bytes())
.expect("cannot send API request");

let mut response = vec![0u8; 1024];
let bytes_read = stream
.read(&mut response[..])
.expect("cannot read API response");
response.resize(bytes_read, 0);
println!(
"API response of {} bytes:\n{}",
bytes_read,
String::from_utf8(response).unwrap()
);
}

fn main() {
println!("Connecting to Firecracker API.");

// TODO: make paths configurable thru cmdline params.
let snapshot_path = "./foo.image";
let mem_file_path = "/tmp/foo.mem";
let path_to_api_socket = "/tmp/firecracker-sb0.sock";
let path_to_uffd_socket = "/tmp/firecracker-sb0-uffd.sock";

let mut socket = UnixStream::connect(path_to_api_socket).expect("cannot connect");

let barrier = Arc::new(Barrier::new(2));
let uffd_sock_path = path_to_uffd_socket.to_string();
let mem_fpath = mem_file_path.to_string();
let uds_barrier = barrier.clone();

let handle =
std::thread::spawn(move || uffd_handler::run(uffd_sock_path, mem_fpath, uds_barrier));
// Wait for uffd thread to start listening on uffd UDS before sending API call to fc.
barrier.wait();
println!("Sending Load Snap API call.");

let body = format!(
"\
{{\
\"snapshot_path\":\"{}\",\
\"mem_backend_type\":\"UffdOverUDS\",\
\"mem_backend_path\":\"{}\"\
}}",
snapshot_path, path_to_uffd_socket
);
firecracker_api_call(&mut socket, "PUT /snapshot/load", &body);

println!("Sending Resume-VM API call.");
firecracker_api_call(&mut socket, "PATCH /vm", "{\"state\":\"Resumed\"}");

handle.join().expect("uffd thread crashed");
}
166 changes: 166 additions & 0 deletions src/firecracker/examples/uffd_handler/uffd_handler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
use libc::c_void;
use nix::poll::{poll, PollFd, PollFlags};
use nix::sys::mman::{mmap, MapFlags, ProtFlags};
use nix::unistd::{sysconf, SysconfVar};
use serde::Deserialize;
use std::fs::File;
use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd};
use std::os::unix::net::{UnixListener, UnixStream};
use std::ptr;
use std::sync::{Arc, Barrier};
use userfaultfd::Uffd;
use utils::sock_ctrl_msg::ScmSocket;

// copy/pasted from firecracker vmm
mod firecracker_imports {
use super::Deserialize;
/// This describes the mapping between Firecracker base virtual address and offset in the
/// buffer or file backend for a guest memory region. It is used to tell an external
/// process/thread where to populate the guest memory data for this range.
///
/// E.g. Guest memory contents for a region of `size` bytes can be found in the backend
/// at `offset` bytes from the beginning, and should be copied/populated into `base_host_address`.
#[derive(Clone, Debug, Default, Deserialize)]
pub struct RegionBackendMapping {
/// Base host virtual address where the guest memory contents for this region
/// should be copied/populated.
pub base_h_va: u64,
/// Region size.
pub size: usize,
/// Offset in the backend file/buffer where the region contents are.
pub offset: u64,
}
}
use firecracker_imports::RegionBackendMapping;

struct UffdPfHandler {
mappings: Vec<RegionBackendMapping>,
backing_buffer: *const u8,
uffd: Uffd,
}

impl UffdPfHandler {
pub fn from_unix_stream(stream: UnixStream, data: *const u8, size: usize) -> Self {
let mut message_buf = vec![0u8; 1024];
let (bytes_read, file) = stream
.recv_with_fd(&mut message_buf[..])
.expect("cannot recv_with_fd");
message_buf.resize(bytes_read, 0);

let body = String::from_utf8(message_buf).unwrap();
println!("API response of {} bytes:\n{:?}", bytes_read, body);
let file = file.expect("Uffd not passed through UDS!");

let mappings =
serde_json::from_str::<Vec<RegionBackendMapping>>(&body).expect("deser failed");
let memsize: usize = mappings.iter().map(|r| r.size).sum();
// Make sure memory size matches backing data size.
assert_eq!(memsize, size);

let uffd = unsafe { Uffd::from_raw_fd(file.into_raw_fd()) };
Self {
mappings,
backing_buffer: data,
uffd,
}
}

fn serve_pf(&self, addr: *mut u8, page_size: usize) {
let dst = (addr as usize & !(page_size as usize - 1)) as *mut c_void;
println!(
" looking for file offset corresponding to hVA dst: {:?}",
dst
);

let mut found_mapping = None;
for r in self.mappings.iter() {
let fault_page_addr = dst as u64;
if r.base_h_va <= fault_page_addr && fault_page_addr < r.base_h_va + r.size as u64 {
found_mapping = Some(r.clone());
}
}

if let Some(r) = &found_mapping {
println!(" found it in: {:?}", r);
} else {
panic!("could not find addr within region mappings");
}

let r = found_mapping.unwrap();
let src = self.backing_buffer as u64 + r.offset;
// Populate whole region from backing mem-file.
let copy = unsafe {
self.uffd
.copy(src as *const _, r.base_h_va as *mut _, r.size, true)
.expect("uffd copy")
};
println!(" (uffdio_copy.copy returned {})", copy);
}

fn run_loop(&self) {
let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as usize;
let pollfd = PollFd::new(self.uffd.as_raw_fd(), PollFlags::POLLIN);
println!("\nfault_handler_thread():");

// Loop, handling incoming events on the userfaultfd file descriptor
loop {
println!(" waiting for PFs...");
// See what poll() tells us about the userfaultfd
let nready = poll(&mut [pollfd], -1).expect("poll");

let revents = pollfd.revents().unwrap();
println!(
" poll() returns: nready = {}; POLLIN = {}; POLLERR = {}",
nready,
revents.contains(PollFlags::POLLIN),
revents.contains(PollFlags::POLLERR),
);

// Read an event from the userfaultfd
let event = self
.uffd
.read_event()
.expect("read uffd_msg")
.expect("uffd_msg ready");

// We expect only one kind of event; verify that assumption
if let userfaultfd::Event::Pagefault { addr, .. } = event {
// Display info about the page-fault event
println!(" UFFD_EVENT_PAGEFAULT event: {:?}", event);
self.serve_pf(addr as *mut u8, page_size);
} else {
panic!("Unexpected event on userfaultfd");
}
}
}
}

pub fn run(uffd_sock_path: String, mem_file_path: String, barrier: Arc<Barrier>) {
let file = File::open(mem_file_path).expect("cannot open memfile");
let size = file.metadata().unwrap().len() as usize;
// Create a page that will be copied into the faulting region
let memfile_buffer = unsafe {
mmap(
ptr::null_mut(),
size,
ProtFlags::PROT_READ,
MapFlags::MAP_PRIVATE,
file.as_raw_fd(),
0,
)
.expect("mmap")
} as *const u8;

// Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker.
let listener = UnixListener::bind(&uffd_sock_path).expect("cannot bind");

println!("Bound UDS at: {:?}", uffd_sock_path);
// Signal main thread we're ready and listening on uffd UDS.
barrier.wait();

let (stream, _) = listener.accept().expect("cannot listen");
let uffd_handler = UffdPfHandler::from_unix_stream(stream, memfile_buffer, size);

uffd_handler.run_loop();
println!("Uffd thread done!");
}
2 changes: 1 addition & 1 deletion src/utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// More specifically, we are re-exporting modules from `vmm_sys_util` as part
// of the `utils` crate.
pub use vmm_sys_util::{
epoll, errno, eventfd, fam, ioctl, rand, syscall, tempdir, tempfile, terminal,
epoll, errno, eventfd, fam, ioctl, rand, sock_ctrl_msg, syscall, tempdir, tempfile, terminal,
};
pub use vmm_sys_util::{ioctl_expr, ioctl_ioc_nr, ioctl_iow_nr};

Expand Down
1 change: 1 addition & 0 deletions src/vmm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ vm-superio = ">=0.4.0"
linux-loader = ">=0.4.0"
serde = { version = ">=1.0.27", features = ["derive"] }
serde_json = ">=1.0.9"
userfaultfd = ">=0.4.0"
versionize = ">=0.1.6"
versionize_derive = ">=0.1.3"

Expand Down
Loading