Skip to content

Commit

Permalink
scsi: Add virtio daemon
Browse files Browse the repository at this point in the history
This adds the virtio-specific parts that use the previously formed
interfaces and scsi emulation in order to build a daemon that offers files
from the host system as drives to the guest.

The vast majority of this work was done by Gaelan Steele as part of a
GSoC project [1][2].

[1] #4
[2] https://gist.github.com/Gaelan/febec4e4606e1320026a0924c3bf74d0

Co-developed-by: Erik Schilling <[email protected]>
Signed-off-by: Erik Schilling <[email protected]>
Signed-off-by: Gaelan Steele <[email protected]>
  • Loading branch information
Gaelan authored and Ablu committed Mar 13, 2023
1 parent 47fb818 commit e9a0965
Show file tree
Hide file tree
Showing 3 changed files with 696 additions and 1 deletion.
1 change: 1 addition & 0 deletions crates/scsi/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub mod scsi;
pub mod virtio;
389 changes: 388 additions & 1 deletion crates/scsi/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,390 @@
use std::{
convert::TryFrom,
fs::File,
io::{self, ErrorKind},
path::PathBuf,
process::exit,
sync::{Arc, RwLock},
};

use clap::{arg, Parser};
use log::{debug, error, info, warn};
use vhost::{
vhost_user,
vhost_user::{
message::{VhostUserProtocolFeatures, VhostUserVirtioFeatures},
Listener,
},
};
use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringRwLock, VringT};
use vhost_user_scsi::{
scsi::{
self,
emulation::{
block_device::{BlockDevice, FileBackend, MediumRotationRate},
target::EmulatedTarget,
},
CmdError, TaskAttr,
},
virtio::{self, Request, RequestParseError, Response, ResponseCode, VirtioScsiLun, SENSE_SIZE},
};
use virtio_bindings::{
virtio_config::VIRTIO_F_VERSION_1,
virtio_ring::{VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC},
virtio_scsi::VIRTIO_SCSI_F_HOTPLUG,
};
use virtio_queue::QueueOwnedT;
use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryLoadGuard, GuestMemoryMmap};
use vmm_sys_util::{
epoll::EventSet,
eventfd::{EventFd, EFD_NONBLOCK},
};

const REQUEST_QUEUE: u16 = 2;

type DescriptorChainWriter = virtio::DescriptorChainWriter<GuestMemoryLoadGuard<GuestMemoryMmap>>;
type DescriptorChainReader = virtio::DescriptorChainReader<GuestMemoryLoadGuard<GuestMemoryMmap>>;
type Target = dyn scsi::Target<DescriptorChainWriter, DescriptorChainReader>;

struct VhostUserScsiBackend {
event_idx: bool,
mem: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
targets: Vec<Box<Target>>,
exit_event: EventFd,
}

impl VhostUserScsiBackend {
fn new() -> Self {
Self {
event_idx: false,
mem: None,
targets: Vec::new(),
exit_event: EventFd::new(EFD_NONBLOCK).expect("Creating exit eventfd"),
}
}

fn parse_target(&mut self, lun: VirtioScsiLun) -> Option<(&mut Target, u16)> {
match lun {
VirtioScsiLun::TargetLun(target, lun) => self
.targets
.get_mut(usize::from(target))
.map(|tgt| (tgt.as_mut(), lun)),
VirtioScsiLun::ReportLuns => {
// TODO: do we need to handle the REPORT LUNS well-known LUN?
// In practice, everyone seems to just use LUN 0
warn!("Guest is trying to use the REPORT LUNS well-known LUN, which we don't support.");
None
}
}
}

fn process_request_queue(&mut self, vring: &VringRwLock) -> Result<(), io::Error> {
let chains: Vec<_> = vring
.get_mut()
.get_queue_mut()
.iter(self.mem.as_ref().unwrap().memory())
.map_err(|e| io::Error::new(ErrorKind::Other, e))?
.collect();
for dc in chains {
let mut writer = DescriptorChainWriter::new(dc.clone());
let mut reader = DescriptorChainReader::new(dc.clone());

self.handle_request_queue(&mut reader, &mut writer);

vring
.add_used(dc.head_index(), writer.max_written())
.map_err(|e| io::Error::new(ErrorKind::Other, e))?;
}
vring
.signal_used_queue()
.map_err(|e| io::Error::new(ErrorKind::Other, e))?;
Ok(())
}

fn handle_request_queue(
&mut self,
reader: &mut DescriptorChainReader,
writer: &mut DescriptorChainWriter,
) {
// TODO: make error handling responsibility of caller -> deduplicate writing of the error

let mut body_writer = writer.clone();
const RESPONSE_HEADER_SIZE: u32 = 12;
body_writer.skip(
RESPONSE_HEADER_SIZE + u32::try_from(SENSE_SIZE).expect("SENSE_SIZE should fit 32bit"),
);

let response = match Request::parse(reader) {
Ok(r) => {
if let Some((target, lun)) = self.parse_target(r.lun) {
let output = target.execute_command(
lun,
scsi::Request {
id: r.id,
cdb: &r.cdb,
task_attr: match r.task_attr {
0 => TaskAttr::Simple,
1 => TaskAttr::Ordered,
2 => TaskAttr::HeadOfQueue,
3 => TaskAttr::Aca,
_ => {
// virtio-scsi spec allows us to map any task attr to simple, presumably
// including future ones
warn!("Unknown task attr: {}", r.task_attr);
TaskAttr::Simple
}
},
data_in: &mut body_writer,
data_out: reader,
crn: r.crn,
prio: r.prio,
},
);

match output {
Ok(output) => {
assert!(output.sense.len() < SENSE_SIZE);

Response {
response: ResponseCode::Ok,
status: output.status,
status_qualifier: output.status_qualifier,
sense: output.sense,
// TODO: handle residual for data in
residual: body_writer.residual(),
}
}
Err(CmdError::CdbTooShort) => {
// the CDB buffer is, by default, sized larger than any CDB we support; we don't
// handle writes to config space (because QEMU doesn't let us), so there's no
// way the guest can set it too small
unreachable!();
}
Err(CmdError::DataIn(e)) => {
if e.kind() == ErrorKind::WriteZero {
Response::error(ResponseCode::Overrun, 0)
} else {
error!("Error writing response to guest memory: {}", e);

// There's some chance the header and data in are on different descriptors,
// and only the data in descriptor is bad, so let's at least try to write an
// error to the header
Response::error(ResponseCode::Failure, body_writer.residual())
}
}
}
} else {
debug!("Rejecting command to LUN with bad target {:?}", r.lun);
Response::error(ResponseCode::BadTarget, body_writer.residual())
}
}
Err(RequestParseError::CouldNotReadGuestMemory(e)) => {
// See comment later about errors while writing to guest mem; maybe we at least
// got functional write desciptors, so we can report an error
error!("Error reading request from guest memory: {:?}", e);
Response::error(ResponseCode::Failure, body_writer.residual())
}
Err(RequestParseError::FailedParsingLun(lun)) => {
error!("Unable to parse LUN: {:?}", lun);
Response::error(ResponseCode::Failure, body_writer.residual())
}
};

if let Err(e) = response.write(writer) {
// Alright, so something went wrong writing our response header to guest memory.
// The only reason this should ever happen, I think, is if the guest gave us a
// virtio descriptor with an invalid address.

// There's not a great way to recover from this - we just discovered that
// our only way of communicating with the guest doesn't work - so we either
// silently fail or crash. There isn't too much sense in crashing, IMO, as
// the guest could still recover by, say, installing a fixed kernel and
// rebooting. So let's just log an error and do nothing.
error!("Error writing response to guest memory: {:?}", e);
}
}

fn add_target(&mut self, target: Box<Target>) {
self.targets.push(target);
}
}

impl VhostUserBackendMut<VringRwLock> for VhostUserScsiBackend {
fn num_queues(&self) -> usize {
// control + event + request queues
let num_request_queues = 1;
2 + num_request_queues
}

fn max_queue_size(&self) -> usize {
128 // qemu assumes this by default
}

fn features(&self) -> u64 {
1 << VIRTIO_F_VERSION_1
| 1 << VIRTIO_SCSI_F_HOTPLUG
| 1 << VIRTIO_RING_F_INDIRECT_DESC
| 1 << VIRTIO_RING_F_EVENT_IDX
| VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()
}

fn protocol_features(&self) -> VhostUserProtocolFeatures {
VhostUserProtocolFeatures::MQ
}

fn set_event_idx(&mut self, enabled: bool) {
self.event_idx = enabled;
}

fn update_memory(
&mut self,
atomic_mem: GuestMemoryAtomic<GuestMemoryMmap>,
) -> std::result::Result<(), std::io::Error> {
info!("Memory updated - guest probably booting");
self.mem = Some(atomic_mem);
Ok(())
}

fn handle_event(
&mut self,
device_event: u16,
evset: EventSet,
vrings: &[VringRwLock],
thread_id: usize,
) -> io::Result<bool> {
assert!(evset == EventSet::IN);
assert!(vrings.len() == 3);
assert!((device_event as usize) < vrings.len());
assert!(thread_id == 0);

let vring = &vrings[device_event as usize];
match device_event {
REQUEST_QUEUE => {
if self.event_idx {
// vm-virtio's Queue implementation only checks avail_index
// once, so to properly support EVENT_IDX we need to keep
// calling process_request_queue() until it stops finding
// new requests on the queue.
loop {
vring.disable_notification().unwrap();
self.process_request_queue(vring)?;
if !vring.enable_notification().unwrap() {
break;
}
}
} else {
// Without EVENT_IDX, a single call is enough.
self.process_request_queue(vring)?;
}
}
_ => {
error!("Ignoring descriptor on queue {}", device_event);
}
}

Ok(false)
}

fn get_config(&self, _offset: u32, _size: u32) -> Vec<u8> {
// QEMU handles config space itself
panic!("Access to configuration space is not supported.");
}

fn set_config(&mut self, _offset: u32, _buf: &[u8]) -> std::result::Result<(), std::io::Error> {
// QEMU handles config space itself
panic!("Access to configuration space is not supported.");
}

fn exit_event(&self, _thread_index: usize) -> Option<EventFd> {
Some(self.exit_event.try_clone().expect("Cloning exit eventfd"))
}
}

#[derive(Parser)]
struct Opt {
/// Make the images read-only.
///
/// Currently, we don't actually support writes, but sometimes we want to
/// pretend the disk is writable to work around issues with some tools that
/// use the Linux SCSI generic API.
#[arg(long = "read-only", short = 'r')]
read_only: bool,
/// Tell the guest this disk is non-rotational.
///
/// Affects some heuristics in Linux around, for example, scheduling.
#[arg(long = "solid-state", short = 's')]
solid_state: bool,
/// Location of vhost-user socket.
sock: PathBuf,
/// Images against which the SCSI actions are emulated.
images: Vec<PathBuf>,
}

fn main() {
println!("Hello world");
env_logger::init();

let opt = Opt::parse();

let mut backend = VhostUserScsiBackend::new();
let mut target = EmulatedTarget::new();

if opt.images.len() > 256 {
error!("More than 256 LUNs aren't currently supported.");
// This is fairly simple to add; it's just a matter of supporting the right LUN
// encoding formats.
exit(1);
}

if !opt.read_only {
warn!("Currently, only read-only images are supported. Unless you know what you're doing, you want to pass -r");
}

for image in opt.images {
let mut dev = BlockDevice::new(FileBackend::new(File::open(image).expect("Opening image")));
dev.set_write_protected(opt.read_only);
dev.set_solid_state(if opt.solid_state {
MediumRotationRate::NonRotating
} else {
MediumRotationRate::Unreported
});
target.add_lun(Box::new(dev));
}

backend.add_target(Box::new(target));

let backend = Arc::new(RwLock::new(backend));

let mut daemon = VhostUserDaemon::new(
"vhost-user-scsi".into(),
Arc::clone(&backend),
GuestMemoryAtomic::new(GuestMemoryMmap::new()),
)
.expect("Creating daemon");

daemon
.start(Listener::new(opt.sock, true).expect("Creating listener"))
.expect("Starting daemon");

let run_result = daemon.wait();

match run_result {
Ok(()) => {
info!("Stopping cleanly.");
}
Err(vhost_user_backend::Error::HandleRequest(vhost_user::Error::PartialMessage)) => {
info!("vhost-user connection closed with partial message. If the VM is shutting down, this is expected behavior; otherwise, it might be a bug.");
}
Err(e) => {
error!("Error running daemon: {:?}", e);
}
}

// No matter the result, we need to shut down the worker thread.
// unwrap will only panic if we already panicked somewhere else
backend
.read()
.unwrap()
.exit_event
.write(1)
.expect("Shutting down worker thread");
}
Loading

0 comments on commit e9a0965

Please sign in to comment.