generated from rust-vmm/crate-template
-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This adds the virtio-specific parts that use the previously formed interfaces and scsi emulation in order to build a daemon that offers files from the host system as drives to the guest. The vast majority of this work was done by Gaelan Steele as part of a GSoC project [1][2]. [1] #4 [2] https://gist.github.com/Gaelan/febec4e4606e1320026a0924c3bf74d0 Co-developed-by: Erik Schilling <[email protected]> Signed-off-by: Erik Schilling <[email protected]> Signed-off-by: Gaelan Steele <[email protected]>
- Loading branch information
Showing
3 changed files
with
696 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
pub mod scsi; | ||
pub mod virtio; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,390 @@ | ||
use std::{ | ||
convert::TryFrom, | ||
fs::File, | ||
io::{self, ErrorKind}, | ||
path::PathBuf, | ||
process::exit, | ||
sync::{Arc, RwLock}, | ||
}; | ||
|
||
use clap::{arg, Parser}; | ||
use log::{debug, error, info, warn}; | ||
use vhost::{ | ||
vhost_user, | ||
vhost_user::{ | ||
message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures}, | ||
Listener, | ||
}, | ||
}; | ||
use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringRwLock, VringT}; | ||
use vhost_user_scsi::{ | ||
scsi::{ | ||
self, | ||
emulation::{ | ||
block_device::{BlockDevice, FileBackend, MediumRotationRate}, | ||
target::EmulatedTarget, | ||
}, | ||
CmdError, TaskAttr, | ||
}, | ||
virtio::{self, Request, RequestParseError, Response, ResponseCode, VirtioScsiLun, SENSE_SIZE}, | ||
}; | ||
use virtio_bindings::{ | ||
virtio_config::VIRTIO_F_VERSION_1, | ||
virtio_ring::{VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC}, | ||
virtio_scsi::VIRTIO_SCSI_F_HOTPLUG, | ||
}; | ||
use virtio_queue::QueueOwnedT; | ||
use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap}; | ||
use vmm_sys_util::{ | ||
epoll::EventSet, | ||
eventfd::{EventFd, EFD_NONBLOCK}, | ||
}; | ||
|
||
const REQUEST_QUEUE: u16 = 2; | ||
|
||
type DescriptorChainWriter = virtio::DescriptorChainWriter<GuestMemoryLoadGuard<GuestMemoryMmap>>; | ||
type DescriptorChainReader = virtio::DescriptorChainReader<GuestMemoryLoadGuard<GuestMemoryMmap>>; | ||
type Target = dyn scsi::Target<DescriptorChainWriter, DescriptorChainReader>; | ||
|
||
struct VhostUserScsiBackend { | ||
event_idx: bool, | ||
mem: Option<GuestMemoryAtomic<GuestMemoryMmap>>, | ||
targets: Vec<Box<Target>>, | ||
exit_event: EventFd, | ||
} | ||
|
||
impl VhostUserScsiBackend { | ||
fn new() -> Self { | ||
Self { | ||
event_idx: false, | ||
mem: None, | ||
targets: Vec::new(), | ||
exit_event: EventFd::new(EFD_NONBLOCK).expect("Creating exit eventfd"), | ||
} | ||
} | ||
|
||
fn parse_target(&mut self, lun: VirtioScsiLun) -> Option<(&mut Target, u16)> { | ||
match lun { | ||
VirtioScsiLun::TargetLun(target, lun) => self | ||
.targets | ||
.get_mut(usize::from(target)) | ||
.map(|tgt| (tgt.as_mut(), lun)), | ||
VirtioScsiLun::ReportLuns => { | ||
// TODO: do we need to handle the REPORT LUNS well-known LUN? | ||
// In practice, everyone seems to just use LUN 0 | ||
warn!("Guest is trying to use the REPORT LUNS well-known LUN, which we don't support."); | ||
None | ||
} | ||
} | ||
} | ||
|
||
fn process_request_queue(&mut self, vring: &VringRwLock) -> Result<(), io::Error> { | ||
let chains: Vec<_> = vring | ||
.get_mut() | ||
.get_queue_mut() | ||
.iter(self.mem.as_ref().unwrap().memory()) | ||
.map_err(|e| io::Error::new(ErrorKind::Other, e))? | ||
.collect(); | ||
for dc in chains { | ||
let mut writer = DescriptorChainWriter::new(dc.clone()); | ||
let mut reader = DescriptorChainReader::new(dc.clone()); | ||
|
||
self.handle_request_queue(&mut reader, &mut writer); | ||
|
||
vring | ||
.add_used(dc.head_index(), writer.max_written()) | ||
.map_err(|e| io::Error::new(ErrorKind::Other, e))?; | ||
} | ||
vring | ||
.signal_used_queue() | ||
.map_err(|e| io::Error::new(ErrorKind::Other, e))?; | ||
Ok(()) | ||
} | ||
|
||
fn handle_request_queue( | ||
&mut self, | ||
reader: &mut DescriptorChainReader, | ||
writer: &mut DescriptorChainWriter, | ||
) { | ||
// TODO: make error handling responsibility of caller -> deduplicate writing of the error | ||
|
||
let mut body_writer = writer.clone(); | ||
const RESPONSE_HEADER_SIZE: u32 = 12; | ||
body_writer.skip( | ||
RESPONSE_HEADER_SIZE + u32::try_from(SENSE_SIZE).expect("SENSE_SIZE should fit 32bit"), | ||
); | ||
|
||
let response = match Request::parse(reader) { | ||
Ok(r) => { | ||
if let Some((target, lun)) = self.parse_target(r.lun) { | ||
let output = target.execute_command( | ||
lun, | ||
scsi::Request { | ||
id: r.id, | ||
cdb: &r.cdb, | ||
task_attr: match r.task_attr { | ||
0 => TaskAttr::Simple, | ||
1 => TaskAttr::Ordered, | ||
2 => TaskAttr::HeadOfQueue, | ||
3 => TaskAttr::Aca, | ||
_ => { | ||
// virtio-scsi spec allows us to map any task attr to simple, presumably | ||
// including future ones | ||
warn!("Unknown task attr: {}", r.task_attr); | ||
TaskAttr::Simple | ||
} | ||
}, | ||
data_in: &mut body_writer, | ||
data_out: reader, | ||
crn: r.crn, | ||
prio: r.prio, | ||
}, | ||
); | ||
|
||
match output { | ||
Ok(output) => { | ||
assert!(output.sense.len() < SENSE_SIZE); | ||
|
||
Response { | ||
response: ResponseCode::Ok, | ||
status: output.status, | ||
status_qualifier: output.status_qualifier, | ||
sense: output.sense, | ||
// TODO: handle residual for data in | ||
residual: body_writer.residual(), | ||
} | ||
} | ||
Err(CmdError::CdbTooShort) => { | ||
// the CDB buffer is, by default, sized larger than any CDB we support; we don't | ||
// handle writes to config space (because QEMU doesn't let us), so there's no | ||
// way the guest can set it too small | ||
unreachable!(); | ||
} | ||
Err(CmdError::DataIn(e)) => { | ||
if e.kind() == ErrorKind::WriteZero { | ||
Response::error(ResponseCode::Overrun, 0) | ||
} else { | ||
error!("Error writing response to guest memory: {}", e); | ||
|
||
// There's some chance the header and data in are on different descriptors, | ||
// and only the data in descriptor is bad, so let's at least try to write an | ||
// error to the header | ||
Response::error(ResponseCode::Failure, body_writer.residual()) | ||
} | ||
} | ||
} | ||
} else { | ||
debug!("Rejecting command to LUN with bad target {:?}", r.lun); | ||
Response::error(ResponseCode::BadTarget, body_writer.residual()) | ||
} | ||
} | ||
Err(RequestParseError::CouldNotReadGuestMemory(e)) => { | ||
// See comment later about errors while writing to guest mem; maybe we at least | ||
// got functional write desciptors, so we can report an error | ||
error!("Error reading request from guest memory: {:?}", e); | ||
Response::error(ResponseCode::Failure, body_writer.residual()) | ||
} | ||
Err(RequestParseError::FailedParsingLun(lun)) => { | ||
error!("Unable to parse LUN: {:?}", lun); | ||
Response::error(ResponseCode::Failure, body_writer.residual()) | ||
} | ||
}; | ||
|
||
if let Err(e) = response.write(writer) { | ||
// Alright, so something went wrong writing our response header to guest memory. | ||
// The only reason this should ever happen, I think, is if the guest gave us a | ||
// virtio descriptor with an invalid address. | ||
|
||
// There's not a great way to recover from this - we just discovered that | ||
// our only way of communicating with the guest doesn't work - so we either | ||
// silently fail or crash. There isn't too much sense in crashing, IMO, as | ||
// the guest could still recover by, say, installing a fixed kernel and | ||
// rebooting. So let's just log an error and do nothing. | ||
error!("Error writing response to guest memory: {:?}", e); | ||
} | ||
} | ||
|
||
fn add_target(&mut self, target: Box<Target>) { | ||
self.targets.push(target); | ||
} | ||
} | ||
|
||
impl VhostUserBackendMut<VringRwLock> for VhostUserScsiBackend { | ||
fn num_queues(&self) -> usize { | ||
// control + event + request queues | ||
let num_request_queues = 1; | ||
2 + num_request_queues | ||
} | ||
|
||
fn max_queue_size(&self) -> usize { | ||
128 // qemu assumes this by default | ||
} | ||
|
||
fn features(&self) -> u64 { | ||
1 << VIRTIO_F_VERSION_1 | ||
| 1 << VIRTIO_SCSI_F_HOTPLUG | ||
| 1 << VIRTIO_RING_F_INDIRECT_DESC | ||
| 1 << VIRTIO_RING_F_EVENT_IDX | ||
| VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() | ||
} | ||
|
||
fn protocol_features(&self) -> VhostUserProtocolFeatures { | ||
VhostUserProtocolFeatures::MQ | ||
} | ||
|
||
fn set_event_idx(&mut self, enabled: bool) { | ||
self.event_idx = enabled; | ||
} | ||
|
||
fn update_memory( | ||
&mut self, | ||
atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>, | ||
) -> std::result::Result<(), std::io::Error> { | ||
info!("Memory updated - guest probably booting"); | ||
self.mem = Some(atomic_mem); | ||
Ok(()) | ||
} | ||
|
||
fn handle_event( | ||
&mut self, | ||
device_event: u16, | ||
evset: EventSet, | ||
vrings: &[VringRwLock], | ||
thread_id: usize, | ||
) -> io::Result<bool> { | ||
assert!(evset == EventSet::IN); | ||
assert!(vrings.len() == 3); | ||
assert!((device_event as usize) < vrings.len()); | ||
assert!(thread_id == 0); | ||
|
||
let vring = &vrings[device_event as usize]; | ||
match device_event { | ||
REQUEST_QUEUE => { | ||
if self.event_idx { | ||
// vm-virtio's Queue implementation only checks avail_index | ||
// once, so to properly support EVENT_IDX we need to keep | ||
// calling process_request_queue() until it stops finding | ||
// new requests on the queue. | ||
loop { | ||
vring.disable_notification().unwrap(); | ||
self.process_request_queue(vring)?; | ||
if !vring.enable_notification().unwrap() { | ||
break; | ||
} | ||
} | ||
} else { | ||
// Without EVENT_IDX, a single call is enough. | ||
self.process_request_queue(vring)?; | ||
} | ||
} | ||
_ => { | ||
error!("Ignoring descriptor on queue {}", device_event); | ||
} | ||
} | ||
|
||
Ok(false) | ||
} | ||
|
||
fn get_config(&self, _offset: u32, _size: u32) -> Vec<u8> { | ||
// QEMU handles config space itself | ||
panic!("Access to configuration space is not supported."); | ||
} | ||
|
||
fn set_config(&mut self, _offset: u32, _buf: &[u8]) -> std::result::Result<(), std::io::Error> { | ||
// QEMU handles config space itself | ||
panic!("Access to configuration space is not supported."); | ||
} | ||
|
||
fn exit_event(&self, _thread_index: usize) -> Option<EventFd> { | ||
Some(self.exit_event.try_clone().expect("Cloning exit eventfd")) | ||
} | ||
} | ||
|
||
#[derive(Parser)] | ||
struct Opt { | ||
/// Make the images read-only. | ||
/// | ||
/// Currently, we don't actually support writes, but sometimes we want to | ||
/// pretend the disk is writable to work around issues with some tools that | ||
/// use the Linux SCSI generic API. | ||
#[arg(long = "read-only", short = 'r')] | ||
read_only: bool, | ||
/// Tell the guest this disk is non-rotational. | ||
/// | ||
/// Affects some heuristics in Linux around, for example, scheduling. | ||
#[arg(long = "solid-state", short = 's')] | ||
solid_state: bool, | ||
/// Location of vhost-user socket. | ||
sock: PathBuf, | ||
/// Images against which the SCSI actions are emulated. | ||
images: Vec<PathBuf>, | ||
} | ||
|
||
fn main() { | ||
println!("Hello world"); | ||
env_logger::init(); | ||
|
||
let opt = Opt::parse(); | ||
|
||
let mut backend = VhostUserScsiBackend::new(); | ||
let mut target = EmulatedTarget::new(); | ||
|
||
if opt.images.len() > 256 { | ||
error!("More than 256 LUNs aren't currently supported."); | ||
// This is fairly simple to add; it's just a matter of supporting the right LUN | ||
// encoding formats. | ||
exit(1); | ||
} | ||
|
||
if !opt.read_only { | ||
warn!("Currently, only read-only images are supported. Unless you know what you're doing, you want to pass -r"); | ||
} | ||
|
||
for image in opt.images { | ||
let mut dev = BlockDevice::new(FileBackend::new(File::open(image).expect("Opening image"))); | ||
dev.set_write_protected(opt.read_only); | ||
dev.set_solid_state(if opt.solid_state { | ||
MediumRotationRate::NonRotating | ||
} else { | ||
MediumRotationRate::Unreported | ||
}); | ||
target.add_lun(Box::new(dev)); | ||
} | ||
|
||
backend.add_target(Box::new(target)); | ||
|
||
let backend = Arc::new(RwLock::new(backend)); | ||
|
||
let mut daemon = VhostUserDaemon::new( | ||
"vhost-user-scsi".into(), | ||
Arc::clone(&backend), | ||
GuestMemoryAtomic::new(GuestMemoryMmap::new()), | ||
) | ||
.expect("Creating daemon"); | ||
|
||
daemon | ||
.start(Listener::new(opt.sock, true).expect("Creating listener")) | ||
.expect("Starting daemon"); | ||
|
||
let run_result = daemon.wait(); | ||
|
||
match run_result { | ||
Ok(()) => { | ||
info!("Stopping cleanly."); | ||
} | ||
Err(vhost_user_backend::Error::HandleRequest(vhost_user::Error::PartialMessage)) => { | ||
info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug."); | ||
} | ||
Err(e) => { | ||
error!("Error running daemon: {:?}", e); | ||
} | ||
} | ||
|
||
// No matter the result, we need to shut down the worker thread. | ||
// unwrap will only panic if we already panicked somewhere else | ||
backend | ||
.read() | ||
.unwrap() | ||
.exit_event | ||
.write(1) | ||
.expect("Shutting down worker thread"); | ||
} |
Oops, something went wrong.