-
Notifications
You must be signed in to change notification settings - Fork 98
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support DDA on VA-backed OHCL VM: Configurable Bounce Buffer for DMA #275
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1207,12 +1207,89 @@ async fn new_underhill_vm( | |
|
||
let boot_info = runtime_params.parsed_openhcl_boot(); | ||
|
||
// Determine if x2apic is supported so that the topology matches | ||
// reality. | ||
// | ||
// We don't know if x2apic is forced on, but currently it doesn't really | ||
// matter because the topology's initial x2apic state is not currently | ||
// used in Underhill. | ||
// | ||
// FUTURE: consider having Underhill decide whether x2apic is enabled at | ||
// boot rather than allowing the host to make that decision. This would | ||
// just require Underhill setting the apicbase register on the VPs | ||
// before start. | ||
// | ||
// TODO: centralize cpuid querying logic. | ||
#[cfg(guest_arch = "x86_64")] | ||
let x2apic = if isolation.is_hardware_isolated() { | ||
// For hardware CVMs, always enable x2apic support at boot. | ||
vm_topology::processor::x86::X2ApicState::Enabled | ||
} else if safe_x86_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VersionAndFeatures.0, 0).ecx | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fyi, this will conflict with Eric's recent change that refactors this crate. |
||
& (1 << 21) | ||
!= 0 | ||
{ | ||
vm_topology::processor::x86::X2ApicState::Supported | ||
} else { | ||
vm_topology::processor::x86::X2ApicState::Unsupported | ||
}; | ||
|
||
#[cfg(guest_arch = "x86_64")] | ||
let processor_topology = new_x86_topology(&boot_info.cpus, x2apic) | ||
.context("failed to construct the processor topology")?; | ||
|
||
#[cfg(guest_arch = "aarch64")] | ||
let processor_topology = new_aarch64_topology( | ||
boot_info | ||
.gic | ||
.context("did not get gic state from bootloader")?, | ||
&boot_info.cpus, | ||
) | ||
.context("failed to construct the processor topology")?; | ||
|
||
// The amount of memory required by the GET igvm_attest request | ||
let attestation = get_protocol::IGVM_ATTEST_MSG_SHARED_GPA as u64 * hvdef::HV_PAGE_SIZE; | ||
|
||
// TODO: determine actual memory usage by NVME/MANA. hardcode as 10MB | ||
let device_dma = 10 * 1024 * 1024; | ||
const MIN_PER_QUEUE_PAGES: u64 = (128 * 1024 + hvdef::HV_PAGE_SIZE) / hvdef::HV_PAGE_SIZE; | ||
const DEFAULT_DMA_BOUNCE_BUFFER_PAGES_PER_QUEUE: u64 = 128; | ||
#[allow(clippy::assertions_on_constants)] | ||
const _: () = assert!( | ||
DEFAULT_DMA_BOUNCE_BUFFER_PAGES_PER_QUEUE >= MIN_PER_QUEUE_PAGES, | ||
"not enough room for an ATAPI IO plus a PRP list" | ||
); | ||
|
||
const DEFAULT_NVME_DRIVERS: u32 = 8; | ||
let (max_nvme_drivers, dma_bounce_buffer_pages_per_queue, dma_bounce_buffer_pages_per_io_threshold) = dps.general.vtl2_settings.as_ref().map_or( | ||
(DEFAULT_NVME_DRIVERS, DEFAULT_DMA_BOUNCE_BUFFER_PAGES_PER_QUEUE, None), | ||
|vtl2_settings| { | ||
let original_dma_bounce_buffer_pages_per_queue = vtl2_settings | ||
.fixed | ||
.dma_bounce_buffer_pages_per_queue | ||
.unwrap_or(DEFAULT_DMA_BOUNCE_BUFFER_PAGES_PER_QUEUE); | ||
|
||
let dma_bounce_buffer_pages_per_queue = if original_dma_bounce_buffer_pages_per_queue < MIN_PER_QUEUE_PAGES { | ||
tracing::warn!( | ||
"the value of dma_bounce_buffer_pages_per_queue ({}) is less than MIN_PER_QUEUE_PAGES ({})", | ||
original_dma_bounce_buffer_pages_per_queue, MIN_PER_QUEUE_PAGES | ||
); | ||
MIN_PER_QUEUE_PAGES | ||
} else { | ||
original_dma_bounce_buffer_pages_per_queue | ||
}; | ||
|
||
( | ||
vtl2_settings.fixed.max_nvme_drivers.unwrap_or(DEFAULT_NVME_DRIVERS), | ||
dma_bounce_buffer_pages_per_queue, | ||
vtl2_settings.fixed.dma_bounce_buffer_pages_per_io_threshold, | ||
) | ||
}, | ||
); | ||
|
||
// TODO: determine actual memory usage by NVME/MANA. hardcode as 10MB | ||
let device_dma = 10 * 1024 * 1024 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you give a rough estimate of how much memory this will use? I suspect that John's comments obviate this discussion, but it does seem like this is inefficient: we're allocating (what I assume is) a lot of memory for devices that may not be using them. |
||
+ max_nvme_drivers as u64 | ||
* processor_topology.vp_count() as u64 | ||
* dma_bounce_buffer_pages_per_queue | ||
* hvdef::HV_PAGE_SIZE; | ||
// Determine the amount of shared memory to reserve from VTL0. | ||
let shared_pool_size = match isolation { | ||
#[cfg(guest_arch = "x86_64")] | ||
|
@@ -1275,45 +1352,6 @@ async fn new_underhill_vm( | |
physical_address_size, | ||
)?; | ||
|
||
// Determine if x2apic is supported so that the topology matches | ||
// reality. | ||
// | ||
// We don't know if x2apic is forced on, but currently it doesn't really | ||
// matter because the topology's initial x2apic state is not currently | ||
// used in Underhill. | ||
// | ||
// FUTURE: consider having Underhill decide whether x2apic is enabled at | ||
// boot rather than allowing the host to make that decision. This would | ||
// just require Underhill setting the apicbase register on the VPs | ||
// before start. | ||
// | ||
// TODO: centralize cpuid querying logic. | ||
#[cfg(guest_arch = "x86_64")] | ||
let x2apic = if isolation.is_hardware_isolated() { | ||
// For hardware CVMs, always enable x2apic support at boot. | ||
vm_topology::processor::x86::X2ApicState::Enabled | ||
} else if safe_x86_intrinsics::cpuid(x86defs::cpuid::CpuidFunction::VersionAndFeatures.0, 0).ecx | ||
& (1 << 21) | ||
!= 0 | ||
{ | ||
vm_topology::processor::x86::X2ApicState::Supported | ||
} else { | ||
vm_topology::processor::x86::X2ApicState::Unsupported | ||
}; | ||
|
||
#[cfg(guest_arch = "x86_64")] | ||
let processor_topology = new_x86_topology(&boot_info.cpus, x2apic) | ||
.context("failed to construct the processor topology")?; | ||
|
||
#[cfg(guest_arch = "aarch64")] | ||
let processor_topology = new_aarch64_topology( | ||
boot_info | ||
.gic | ||
.context("did not get gic state from bootloader")?, | ||
&boot_info.cpus, | ||
) | ||
.context("failed to construct the processor topology")?; | ||
|
||
let mut with_vmbus: bool = false; | ||
let mut with_vmbus_relay = false; | ||
if dps.general.vmbus_redirection_enabled { | ||
|
@@ -1737,6 +1775,9 @@ async fn new_underhill_vm( | |
&driver_source, | ||
processor_topology.vp_count(), | ||
vfio_dma_buffer(&shared_vis_pages_pool), | ||
dma_bounce_buffer_pages_per_queue, | ||
dma_bounce_buffer_pages_per_io_threshold, | ||
Some(partition.clone()), | ||
); | ||
|
||
resolver.add_async_resolver::<DiskHandleKind, _, NvmeDiskConfig, _>(NvmeDiskResolver::new( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,9 @@ message Vtl2SettingsFixed { | |
optional uint32 io_ring_size = 2; | ||
// Specify the maximum number of bounce buffer pages allowed per cpu | ||
optional uint32 max_bounce_buffer_pages = 3; | ||
optional uint64 dma_bounce_buffer_pages_per_queue = 4; | ||
optional uint32 dma_bounce_buffer_pages_per_io_threshold = 5; | ||
optional uint32 max_nvme_drivers = 6; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again a moot point, but I want to get in the habit of asking: we need a test that ensures that not passing in these values results in acceptable behavior. I see the code handling it, but since this is our API surface: we need to be certain we aren't regressing it. Both JSON (until we delete ...) and PB. |
||
} | ||
|
||
message Vtl2SettingsDynamic { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ pub mod sync_wrapper; | |
pub mod zerodisk; | ||
|
||
use guestmem::AccessError; | ||
use hvdef::HvError; | ||
use inspect::Inspect; | ||
use scsi_buffers::RequestBuffers; | ||
use stackfuture::StackFuture; | ||
|
@@ -50,6 +51,8 @@ pub enum DiskError { | |
ReservationConflict, | ||
#[error("unsupported eject")] | ||
UnsupportedEject, | ||
#[error("failed to pin/unpin guest memory {0}")] | ||
Hv(HvError), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not just "PinFailure" instead of Hv? |
||
} | ||
|
||
/// Io error details | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are these changes indeed part of your commit? Or stale from a fork?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have to move these codes up so we can calculate device_dma using processor_topology.vp_count() below.