blob: b8e9b7b1120fa7dcc4f36c0bc1b375fb063555e5 [file] [log] [blame]
// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: BSD-3-Clause
//! Safe wrappers over the
//! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html) API.
use std::io;
use std::ops::{Deref, Drop};
use std::os::unix::io::{AsRawFd, RawFd};
#[cfg(any(target_os = "linux", target_os = "android"))]
use bitflags::bitflags;
use libc::{
epoll_create1, epoll_ctl, epoll_event, epoll_wait, EPOLLERR, EPOLLET, EPOLLEXCLUSIVE, EPOLLHUP,
EPOLLIN, EPOLLONESHOT, EPOLLOUT, EPOLLPRI, EPOLLRDHUP, EPOLLWAKEUP, EPOLL_CLOEXEC,
EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD,
};
use crate::syscall::SyscallReturnCode;
/// Wrapper over `EPOLL_CTL_*` operations that can be performed on a file descriptor.
#[repr(i32)]
pub enum ControlOperation {
/// Add a file descriptor to the interest list.
Add = EPOLL_CTL_ADD,
/// Change the settings associated with a file descriptor that is
/// already in the interest list.
Modify = EPOLL_CTL_MOD,
/// Remove a file descriptor from the interest list.
Delete = EPOLL_CTL_DEL,
}
bitflags! {
/// The type of events we can monitor a file descriptor for.
pub struct EventSet: u32 {
/// The associated file descriptor is available for read operations.
const IN = EPOLLIN as u32;
/// The associated file descriptor is available for write operations.
const OUT = EPOLLOUT as u32;
/// Error condition happened on the associated file descriptor.
const ERROR = EPOLLERR as u32;
/// This can be used to detect peer shutdown when using Edge Triggered monitoring.
const READ_HANG_UP = EPOLLRDHUP as u32;
/// Sets the Edge Triggered behavior for the associated file descriptor.
/// The default behavior is Level Triggered.
const EDGE_TRIGGERED = EPOLLET as u32;
/// Hang up happened on the associated file descriptor. Note that `epoll_wait`
/// will always wait for this event and it is not necessary to set it in events.
const HANG_UP = EPOLLHUP as u32;
/// There is an exceptional condition on that file descriptor. It is mostly used to
/// set high priority for some data.
const PRIORITY = EPOLLPRI as u32;
/// The event is considered as being "processed" from the time when it is returned
/// by a call to `epoll_wait` until the next call to `epoll_wait` on the same
/// epoll file descriptor, the closure of that file descriptor, the removal of the
/// event file descriptor via EPOLL_CTL_DEL, or the clearing of EPOLLWAKEUP
/// for the event file descriptor via EPOLL_CTL_MOD.
const WAKE_UP = EPOLLWAKEUP as u32;
/// Sets the one-shot behavior for the associated file descriptor.
const ONE_SHOT = EPOLLONESHOT as u32;
/// Sets an exclusive wake up mode for the epoll file descriptor that is being
/// attached to the associated file descriptor.
/// When a wake up event occurs and multiple epoll file descriptors are attached to
/// the same target file using this mode, one or more of the epoll file descriptors
/// will receive an event with `epoll_wait`. The default here is for all those file
/// descriptors to receive an event.
const EXCLUSIVE = EPOLLEXCLUSIVE as u32;
}
}
/// Wrapper over
/// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
// We are using `transparent` here to be super sure that this struct and its fields
// have the same alignment as those from the `epoll_event` struct from C.
#[repr(transparent)]
#[derive(Clone, Copy)]
pub struct EpollEvent(epoll_event);
impl std::fmt::Debug for EpollEvent {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{{ events: {}, data: {} }}", self.events(), self.data())
}
}
impl Deref for EpollEvent {
type Target = epoll_event;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Default for EpollEvent {
fn default() -> Self {
EpollEvent(epoll_event {
events: 0u32,
u64: 0u64,
})
}
}
impl EpollEvent {
/// Create a new epoll_event instance.
///
/// # Arguments
///
/// `events` - contains an event mask.
/// `data` - a user data variable. `data` field can be a fd on which
/// we want to monitor the events specified by `events`.
///
/// # Examples
///
/// ```
/// extern crate vmm_sys_util;
/// use vmm_sys_util::epoll::{EpollEvent, EventSet};
///
/// let event = EpollEvent::new(EventSet::IN, 2);
/// ```
pub fn new(events: EventSet, data: u64) -> Self {
EpollEvent(epoll_event {
events: events.bits(),
u64: data,
})
}
/// Returns the `events` from
/// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
///
/// # Examples
///
/// ```
/// extern crate vmm_sys_util;
/// use vmm_sys_util::epoll::{EpollEvent, EventSet};
///
/// let event = EpollEvent::new(EventSet::IN, 2);
/// assert_eq!(event.events(), 1);
/// ```
pub fn events(&self) -> u32 {
self.events
}
/// Returns the `EventSet` corresponding to `epoll_event.events`.
///
/// # Panics
///
/// Panics if `libc::epoll_event` contains invalid events.
///
///
/// # Examples
///
/// ```
/// extern crate vmm_sys_util;
/// use vmm_sys_util::epoll::{EpollEvent, EventSet};
///
/// let event = EpollEvent::new(EventSet::IN, 2);
/// assert_eq!(event.event_set(), EventSet::IN);
/// ```
pub fn event_set(&self) -> EventSet {
// This unwrap is safe because `epoll_events` can only be user created or
// initialized by the kernel. We trust the kernel to only send us valid
// events. The user can only initialize `epoll_events` using valid events.
EventSet::from_bits(self.events()).unwrap()
}
/// Returns the `data` from the `libc::epoll_event`.
///
/// # Examples
///
/// ```
/// extern crate vmm_sys_util;
/// use vmm_sys_util::epoll::{EpollEvent, EventSet};
///
/// let event = EpollEvent::new(EventSet::IN, 2);
/// assert_eq!(event.data(), 2);
/// ```
pub fn data(&self) -> u64 {
self.u64
}
/// Converts the `libc::epoll_event` data to a RawFd.
///
/// This conversion is lossy when the data does not correspond to a RawFd
/// (data does not fit in a i32).
///
/// # Examples
///
/// ```
/// extern crate vmm_sys_util;
/// use vmm_sys_util::epoll::{EpollEvent, EventSet};
///
/// let event = EpollEvent::new(EventSet::IN, 2);
/// assert_eq!(event.fd(), 2);
/// ```
pub fn fd(&self) -> RawFd {
self.u64 as i32
}
}
/// Wrapper over epoll functionality.
#[derive(Debug)]
pub struct Epoll {
epoll_fd: RawFd,
}
impl Epoll {
/// Create a new epoll file descriptor.
pub fn new() -> io::Result<Self> {
let epoll_fd = SyscallReturnCode(
// SAFETY: Safe because the return code is transformed by `into_result` in a `Result`.
unsafe { epoll_create1(EPOLL_CLOEXEC) },
)
.into_result()?;
Ok(Epoll { epoll_fd })
}
/// Wrapper for `libc::epoll_ctl`.
///
/// This can be used for adding, modifying or removing a file descriptor in the
/// interest list of the epoll instance.
///
/// # Arguments
///
/// * `operation` - refers to the action to be performed on the file descriptor.
/// * `fd` - the file descriptor on which we want to perform `operation`.
/// * `event` - refers to the `epoll_event` instance that is linked to `fd`.
///
/// # Examples
///
/// ```
/// extern crate vmm_sys_util;
///
/// use std::os::unix::io::AsRawFd;
/// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
/// use vmm_sys_util::eventfd::EventFd;
///
/// let epoll = Epoll::new().unwrap();
/// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
/// epoll
/// .ctl(
/// ControlOperation::Add,
/// event_fd.as_raw_fd() as i32,
/// EpollEvent::new(EventSet::OUT, event_fd.as_raw_fd() as u64),
/// )
/// .unwrap();
/// epoll
/// .ctl(
/// ControlOperation::Modify,
/// event_fd.as_raw_fd() as i32,
/// EpollEvent::new(EventSet::IN, 4),
/// )
/// .unwrap();
/// ```
pub fn ctl(&self, operation: ControlOperation, fd: RawFd, event: EpollEvent) -> io::Result<()> {
SyscallReturnCode(
// SAFETY: Safe because we give a valid epoll file descriptor, a valid file descriptor
// to watch, as well as a valid epoll_event structure. We also check the return value.
unsafe {
epoll_ctl(
self.epoll_fd,
operation as i32,
fd,
&event as *const EpollEvent as *mut epoll_event,
)
},
)
.into_empty_result()
}
/// Wrapper for `libc::epoll_wait`.
/// Returns the number of file descriptors in the interest list that became ready
/// for I/O or `errno` if an error occurred.
///
/// # Arguments
///
/// * `timeout` - specifies for how long the `epoll_wait` system call will block
/// (measured in milliseconds).
/// * `events` - points to a memory area that will be used for storing the events
/// returned by `epoll_wait()` call.
///
/// # Examples
///
/// ```
/// extern crate vmm_sys_util;
///
/// use std::os::unix::io::AsRawFd;
/// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
/// use vmm_sys_util::eventfd::EventFd;
///
/// let epoll = Epoll::new().unwrap();
/// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
///
/// let mut ready_events = vec![EpollEvent::default(); 10];
/// epoll
/// .ctl(
/// ControlOperation::Add,
/// event_fd.as_raw_fd() as i32,
/// EpollEvent::new(EventSet::OUT, 4),
/// )
/// .unwrap();
/// let ev_count = epoll.wait(-1, &mut ready_events[..]).unwrap();
/// assert_eq!(ev_count, 1);
/// ```
pub fn wait(&self, timeout: i32, events: &mut [EpollEvent]) -> io::Result<usize> {
let events_count = SyscallReturnCode(
// SAFETY: Safe because we give a valid epoll file descriptor and an array of
// epoll_event structures that will be modified by the kernel to indicate information
// about the subset of file descriptors in the interest list.
// We also check the return value.
unsafe {
epoll_wait(
self.epoll_fd,
events.as_mut_ptr() as *mut epoll_event,
events.len() as i32,
timeout,
)
},
)
.into_result()? as usize;
Ok(events_count)
}
}
impl AsRawFd for Epoll {
fn as_raw_fd(&self) -> RawFd {
self.epoll_fd
}
}
impl Drop for Epoll {
fn drop(&mut self) {
// SAFETY: Safe because this fd is opened with `epoll_create` and we trust
// the kernel to give us a valid fd.
unsafe {
libc::close(self.epoll_fd);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::eventfd::EventFd;
#[test]
fn test_event_ops() {
let mut event = EpollEvent::default();
assert_eq!(event.events(), 0);
assert_eq!(event.data(), 0);
event = EpollEvent::new(EventSet::IN, 2);
assert_eq!(event.events(), 1);
assert_eq!(event.event_set(), EventSet::IN);
assert_eq!(event.data(), 2);
assert_eq!(event.fd(), 2);
}
#[test]
fn test_events_debug() {
let events = EpollEvent::new(EventSet::IN, 42);
assert_eq!(format!("{:?}", events), "{ events: 1, data: 42 }")
}
#[test]
fn test_epoll() {
const DEFAULT__TIMEOUT: i32 = 250;
const EVENT_BUFFER_SIZE: usize = 128;
let epoll = Epoll::new().unwrap();
assert_eq!(epoll.epoll_fd, epoll.as_raw_fd());
// Let's test different scenarios for `epoll_ctl()` and `epoll_wait()` functionality.
let event_fd_1 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
// For EPOLLOUT to be available it is enough only to be possible to write a value of
// at least 1 to the eventfd counter without blocking.
// If we write a value greater than 0 to this counter, the fd will be available for
// EPOLLIN events too.
event_fd_1.write(1).unwrap();
let mut event_1 =
EpollEvent::new(EventSet::IN | EventSet::OUT, event_fd_1.as_raw_fd() as u64);
// For EPOLL_CTL_ADD behavior we will try to add some fds with different event masks into
// the interest list of epoll instance.
assert!(epoll
.ctl(
ControlOperation::Add,
event_fd_1.as_raw_fd() as i32,
event_1
)
.is_ok());
// We can't add twice the same fd to epoll interest list.
assert!(epoll
.ctl(
ControlOperation::Add,
event_fd_1.as_raw_fd() as i32,
event_1
)
.is_err());
let event_fd_2 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
event_fd_2.write(1).unwrap();
assert!(epoll
.ctl(
ControlOperation::Add,
event_fd_2.as_raw_fd() as i32,
// For this fd, we want an Event instance that has `data` field set to other
// value than the value of the fd and `events` without EPOLLIN type set.
EpollEvent::new(EventSet::OUT, 10)
)
.is_ok());
// For the following eventfd we won't write anything to its counter, so we expect EPOLLIN
// event to not be available for this fd, even if we say that we want to monitor this type
// of event via EPOLL_CTL_ADD operation.
let event_fd_3 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
let event_3 = EpollEvent::new(EventSet::OUT | EventSet::IN, event_fd_3.as_raw_fd() as u64);
assert!(epoll
.ctl(
ControlOperation::Add,
event_fd_3.as_raw_fd() as i32,
event_3
)
.is_ok());
// Let's check `epoll_wait()` behavior for our epoll instance.
let mut ready_events = vec![EpollEvent::default(); EVENT_BUFFER_SIZE];
let mut ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
// We expect to have 3 fds in the ready list of epoll instance.
assert_eq!(ev_count, 3);
// Let's check also the Event values that are now returned in the ready list.
assert_eq!(ready_events[0].data(), event_fd_1.as_raw_fd() as u64);
// For this fd, `data` field was populated with random data instead of the
// corresponding fd value.
assert_eq!(ready_events[1].data(), 10);
assert_eq!(ready_events[2].data(), event_fd_3.as_raw_fd() as u64);
// EPOLLIN and EPOLLOUT should be available for this fd.
assert_eq!(
ready_events[0].events(),
(EventSet::IN | EventSet::OUT).bits()
);
// Only EPOLLOUT is expected because we didn't want to monitor EPOLLIN on this fd.
assert_eq!(ready_events[1].events(), EventSet::OUT.bits());
// Only EPOLLOUT too because eventfd counter value is 0 (we didn't write a value
// greater than 0 to it).
assert_eq!(ready_events[2].events(), EventSet::OUT.bits());
// Now we're gonna modify the Event instance for a fd to test EPOLL_CTL_MOD
// behavior.
// We create here a new Event with some events, other than those previously set,
// that we want to monitor this time on event_fd_1.
event_1 = EpollEvent::new(EventSet::OUT, 20);
assert!(epoll
.ctl(
ControlOperation::Modify,
event_fd_1.as_raw_fd() as i32,
event_1
)
.is_ok());
let event_fd_4 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
// Can't modify a fd that wasn't added to epoll interest list.
assert!(epoll
.ctl(
ControlOperation::Modify,
event_fd_4.as_raw_fd() as i32,
EpollEvent::default()
)
.is_err());
let _ = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
// Let's check that Event fields were indeed changed for the `event_fd_1` fd.
assert_eq!(ready_events[0].data(), 20);
// EPOLLOUT is now available for this fd as we've intended with EPOLL_CTL_MOD operation.
assert_eq!(ready_events[0].events(), EventSet::OUT.bits());
// Now let's set for a fd to not have any events monitored.
assert!(epoll
.ctl(
ControlOperation::Modify,
event_fd_1.as_raw_fd() as i32,
EpollEvent::default()
)
.is_ok());
// In this particular case we expect to remain only with 2 fds in the ready list.
ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
assert_eq!(ev_count, 2);
// Let's also delete a fd from the interest list.
assert!(epoll
.ctl(
ControlOperation::Delete,
event_fd_2.as_raw_fd() as i32,
EpollEvent::default()
)
.is_ok());
// We expect to have only one fd remained in the ready list (event_fd_3).
ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
assert_eq!(ev_count, 1);
assert_eq!(ready_events[0].data(), event_fd_3.as_raw_fd() as u64);
assert_eq!(ready_events[0].events(), EventSet::OUT.bits());
// If we try to remove a fd from epoll interest list that wasn't added before it will fail.
assert!(epoll
.ctl(
ControlOperation::Delete,
event_fd_4.as_raw_fd() as i32,
EpollEvent::default()
)
.is_err());
}
}