mirror of
https://chromium.googlesource.com/crosvm/crosvm
synced 2025-01-27 02:28:22 +00:00
fec86cc3ae
Calculate the number of bits necessary to represent the enum variant using the next_power_of_two() and trailing_zeros() functions from the primitive usize type. Also add a test to ensure that the returned value is correct when there is only one variant in the enum. BUG=none TEST=unit tests Change-Id: Ibd15efd4f06e17a74489fee04ff19aca0dde68b2 Signed-off-by: Chirantan Ekbote <chirantan@chromium.org> Reviewed-on: https://chromium-review.googlesource.com/959624 Reviewed-by: Zach Reizner <zachr@chromium.org>
462 lines
17 KiB
Rust
462 lines
17 KiB
Rust
// Copyright 2018 The Chromium OS Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
extern crate proc_macro;
|
|
|
|
use proc_macro::TokenStream;
|
|
use std::fmt::Write;
|
|
use std::mem;
|
|
use std::str::FromStr;
|
|
|
|
#[cfg(test)]
|
|
mod tests;
|
|
|
|
// This file is meant to be read from top to bottom to reflect how this code processes and generates
|
|
// Rust enum definitions and implementations. The algorithm overview:
|
|
// 1) Split the rust source by whitespace (`str::split_whitespace`).
|
|
// 2a) Attempt to tokenize each piece (see: `Tokenized`, all functions starting with `matches`).
|
|
// 2b) Feed the token to the `ParseState` (see `ParseState::handle_token`).
|
|
// 3) After the source is fully processed, the `ParseState` has an `EnumModel` representing the
|
|
// input enum.
|
|
// 4) Glue together an implementation of PollToken using a template.
|
|
|
|
// A single token after splitting input source by white space and simple stateless matching.
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
enum Tokenized {
|
|
// `enum`
|
|
Enum,
|
|
|
|
// `pub' | `pub(crate)`
|
|
Visiblity,
|
|
|
|
// `Hello`, `index`, `data,`
|
|
Ident(String),
|
|
|
|
// `index:`, 'first:`
|
|
FieldIdent(String),
|
|
|
|
// `Socket(u32)`, `Client(usize),`,
|
|
IdentAndType(String, String),
|
|
|
|
// `{`
|
|
OpenBrace,
|
|
|
|
// `}`, `},`
|
|
CloseBrace,
|
|
}
|
|
|
|
// Attempts to match strings of the form "identifier" with optional trailing comma.
|
|
fn matches_ident(s: &str) -> Option<String> {
|
|
let ident = s.trim_right_matches(',');
|
|
if !ident.is_empty() && ident.chars().all(char::is_alphanumeric) {
|
|
Some(ident.to_owned())
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
// Attempts to match strings of the form "Identifier(Type)" with optional trailing comma. If the
|
|
// given string matches, the identifier and type are returned as a 2-tuple receptively.
|
|
fn matches_ident_and_type(s: &str) -> Option<(String, String)> {
|
|
let mut buffer = String::new();
|
|
let mut ident = String::new();
|
|
let mut type_ = String::new();
|
|
let mut brace_depth = 0;
|
|
for c in s.chars() {
|
|
match c {
|
|
'(' if brace_depth == 0 && !buffer.is_empty() && ident.is_empty() => {
|
|
mem::swap(&mut ident, &mut buffer);
|
|
brace_depth += 1;
|
|
}
|
|
')' if brace_depth == 1 && !buffer.is_empty() && type_.is_empty() => {
|
|
mem::swap(&mut type_, &mut buffer);
|
|
brace_depth -= 1;
|
|
}
|
|
',' => {}
|
|
c if c.is_alphanumeric() => buffer.push(c),
|
|
_ => return None,
|
|
}
|
|
}
|
|
if !ident.is_empty() && !type_.is_empty() {
|
|
Some((ident, type_))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
// Attempts to match strings of the form "identifier:".
|
|
fn matches_field_ident(s: &str) -> Option<String> {
|
|
let field_ident = s.trim_right_matches(':');
|
|
if s.ends_with(':') && field_ident.chars().all(char::is_alphanumeric) {
|
|
Some(field_ident.to_owned())
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
impl Tokenized {
|
|
fn from_str(s: &str) -> Tokenized {
|
|
if s.starts_with("pub(") {
|
|
return Tokenized::Visiblity;
|
|
}
|
|
match s {
|
|
"enum" => Tokenized::Enum,
|
|
"pub" => Tokenized::Visiblity,
|
|
"{" => Tokenized::OpenBrace,
|
|
"}" | "}," => Tokenized::CloseBrace,
|
|
_ => {
|
|
// Try to match from most specific to least specific.
|
|
if let Some(ident) = matches_field_ident(s) {
|
|
Tokenized::FieldIdent(ident)
|
|
} else if let Some((ident, type_)) = matches_ident_and_type(s) {
|
|
Tokenized::IdentAndType(ident, type_)
|
|
} else if let Some(ident) = matches_ident(s) {
|
|
Tokenized::Ident(ident)
|
|
} else {
|
|
panic!("unable to parse token: {}", s)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Data field for an enum, with possible field name.
|
|
#[derive(Debug, PartialEq)]
|
|
struct EnumVariantData {
|
|
type_: String,
|
|
name: Option<String>,
|
|
}
|
|
|
|
// Data for one variant of an enum, with optional single data field.
|
|
#[derive(Debug, PartialEq)]
|
|
struct EnumVariant {
|
|
name: String,
|
|
data: Option<EnumVariantData>,
|
|
}
|
|
|
|
// Data for an entire enum type.
|
|
#[derive(Debug, Default, PartialEq)]
|
|
struct EnumModel {
|
|
name: String,
|
|
variants: Vec<EnumVariant>,
|
|
}
|
|
// Note: impl for EnumModel is below the parsing code and definitions because all of the methods are
|
|
// for generating the PollToken impl.
|
|
|
|
// Labels for each of the states in the parsing state machine. The '->` symbol means that the given
|
|
// state may transition to the state pointed to.
|
|
#[derive(PartialEq, Debug)]
|
|
enum States {
|
|
// Initial state, expecting to see visibility rules (e.g. `pub`) or `enum` keyword.
|
|
Start, // -> Ident
|
|
|
|
// Expect to see the name of the enum field.
|
|
Ident, // -> Brace
|
|
|
|
// Expect to see an opening brace.
|
|
Brace, // -> VariantIdent, -> End
|
|
|
|
// Expect to see a variant's name.
|
|
VariantIdent, // -> VariantIdent, -> VariantData, -> End
|
|
|
|
// Expect to see the field name of a variant's data.
|
|
VariantData, // -> VariantIdent, -> VariantDataType
|
|
|
|
// Expect to see the tye name of a variant's data.
|
|
VariantDataType, // -> VariantData
|
|
|
|
// Expect to see no more tokens.
|
|
End,
|
|
}
|
|
|
|
// The state machine for parsing a stream of `Tokenized`. After the States::End state is reached, a
|
|
// complete `EnumModel` is ready to be used for generating an implementation.
|
|
struct ParseState {
|
|
current_state: States,
|
|
current_variant: Option<EnumVariant>,
|
|
model: EnumModel,
|
|
}
|
|
|
|
impl ParseState {
|
|
fn new() -> ParseState {
|
|
ParseState {
|
|
current_state: States::Start,
|
|
current_variant: Default::default(),
|
|
model: Default::default(),
|
|
}
|
|
}
|
|
|
|
// Handles the next token in the stream of tokens.
|
|
fn handle_token(&mut self, tok: Tokenized) {
|
|
match self.current_state {
|
|
States::Start => self.handle_start(tok),
|
|
States::Ident => self.handle_ident(tok),
|
|
States::Brace => self.handle_brace(tok),
|
|
States::VariantIdent => self.handle_variant_ident(tok),
|
|
States::VariantData => self.handle_variant_data(tok),
|
|
States::VariantDataType => self.handle_variant_data_type(tok),
|
|
States::End => self.handle_end(tok),
|
|
}
|
|
}
|
|
|
|
// All the following are handlers name after the current state that handle the next token.
|
|
|
|
fn handle_start(&mut self, tok: Tokenized) {
|
|
self.current_state = match tok {
|
|
Tokenized::Enum => States::Ident,
|
|
Tokenized::Visiblity => States::Start,
|
|
_ => panic!("derives for enum types only"),
|
|
|
|
};
|
|
}
|
|
|
|
fn handle_ident(&mut self, tok: Tokenized) {
|
|
self.current_state = match tok {
|
|
Tokenized::Ident(ident) => {
|
|
self.model.name = ident;
|
|
States::Brace
|
|
}
|
|
_ => panic!("unexpected token: {:?}", tok),
|
|
};
|
|
}
|
|
|
|
fn handle_brace(&mut self, tok: Tokenized) {
|
|
self.current_state = match tok {
|
|
Tokenized::OpenBrace => States::VariantIdent,
|
|
Tokenized::CloseBrace => States::End,
|
|
_ => panic!("unexpected token: {:?}", tok),
|
|
};
|
|
}
|
|
|
|
fn handle_variant_ident(&mut self, tok: Tokenized) {
|
|
// This handler is the most complex because it has the most branches for the new
|
|
// `current_state`. Adding to that complexity is that many branches indicate a new variant
|
|
// is being handled, which means the old `current_variant` needs to be added to `variants`
|
|
// and a fresh one needs to be started with the fresh data embedded in the token.
|
|
self.current_state = match tok {
|
|
Tokenized::Ident(ident) => {
|
|
let mut variant = Some(EnumVariant {
|
|
name: ident,
|
|
data: None,
|
|
});
|
|
mem::swap(&mut variant, &mut self.current_variant);
|
|
if let Some(variant) = variant {
|
|
self.model.variants.push(variant);
|
|
}
|
|
States::VariantIdent
|
|
}
|
|
Tokenized::IdentAndType(ident, type_) => {
|
|
let variant_data = EnumVariantData {
|
|
type_: type_,
|
|
name: None,
|
|
};
|
|
let mut variant = Some(EnumVariant {
|
|
name: ident,
|
|
data: Some(variant_data),
|
|
});
|
|
mem::swap(&mut variant, &mut self.current_variant);
|
|
if let Some(variant) = variant {
|
|
self.model.variants.push(variant);
|
|
}
|
|
States::VariantIdent
|
|
}
|
|
Tokenized::OpenBrace => States::VariantData,
|
|
Tokenized::CloseBrace => {
|
|
let mut variant = Default::default();
|
|
mem::swap(&mut variant, &mut self.current_variant);
|
|
if let Some(variant) = variant {
|
|
self.model.variants.push(variant);
|
|
}
|
|
States::End
|
|
}
|
|
_ => panic!("unexpected token: {:?}", tok),
|
|
};
|
|
}
|
|
|
|
fn handle_variant_data(&mut self, tok: Tokenized) {
|
|
let variant = self.current_variant.as_mut().unwrap();
|
|
self.current_state = match tok {
|
|
Tokenized::FieldIdent(ident) => {
|
|
assert!(variant.data.is_none(),
|
|
"enum variant can only have one field");
|
|
variant.data = Some(EnumVariantData {
|
|
type_: "".to_owned(),
|
|
name: Some(ident),
|
|
});
|
|
States::VariantDataType
|
|
}
|
|
Tokenized::CloseBrace => States::VariantIdent,
|
|
_ => panic!("unexpected token: {:?}", tok),
|
|
};
|
|
}
|
|
|
|
fn handle_variant_data_type(&mut self, tok: Tokenized) {
|
|
let variant = self.current_variant.as_mut().unwrap();
|
|
let variant_data = variant.data.as_mut().unwrap();
|
|
self.current_state = match tok {
|
|
Tokenized::Ident(ident) => {
|
|
variant_data.type_ = ident;
|
|
States::VariantData
|
|
}
|
|
_ => panic!("unexpected token: {:?}", tok),
|
|
};
|
|
}
|
|
|
|
fn handle_end(&mut self, tok: Tokenized) {
|
|
panic!("unexpected tokens past ending brace: {:?}", tok);
|
|
}
|
|
}
|
|
|
|
// Continued from the above `EnumModel` definition. All methods are used for generating PollToken
|
|
// implementation. The method for packing an enum into a u64 is as follows:
|
|
// 1) Reserve the lowest "ceil(log_2(x))" bits where x is the number of enum variants.
|
|
// 2) Store the enum variant's index (0-based index based on order in the enum definition) in
|
|
// reserved bits.
|
|
// 3) If there is data in the enum variant, store the data in remaining bits.
|
|
// The method for unpacking is as follows
|
|
// 1) Mask the raw token to just the reserved bits
|
|
// 2) Match the reserved bits to the enum variant token.
|
|
// 3) If the indicated enum variant had data, extract it from the unreserved bits.
|
|
impl EnumModel {
|
|
// Calculates the number of bits needed to store the variant index. Essentially the log base 2
|
|
// of the number of variants, rounded up.
|
|
fn variant_bits(&self) -> u32 {
|
|
// The degenerate case of no variants.
|
|
if self.variants.is_empty() {
|
|
return 0;
|
|
}
|
|
self.variants.len().next_power_of_two().trailing_zeros()
|
|
}
|
|
|
|
// Generates the function body for `as_raw_token`.
|
|
fn generate_as_raw_token(&self) -> String {
|
|
let variant_bits = self.variant_bits();
|
|
let mut match_statement = "match *self {\n".to_owned();
|
|
|
|
// Each iteration corresponds to one variant's match arm.
|
|
for (index, variant) in self.variants.iter().enumerate() {
|
|
// The capture string is for everything between the variant identifier and the `=>` in
|
|
// the match arm: the variant's data capture.
|
|
let capture = match variant.data.as_ref() {
|
|
Some(&EnumVariantData { name: Some(ref name), .. }) => {
|
|
format!("{{ {}: data }}", name)
|
|
}
|
|
Some(&EnumVariantData { .. }) => "(data)".to_owned(),
|
|
None => "".to_owned(),
|
|
};
|
|
|
|
// The modifier string ORs the variant index with extra bits from the variant data
|
|
// field.
|
|
let modifer = if variant.data.is_some() {
|
|
format!(" | ((data as u64) << {})", variant_bits)
|
|
} else {
|
|
"".to_owned()
|
|
};
|
|
|
|
// Assembly of the match arm.
|
|
write!(match_statement,
|
|
"{}::{}{} => {}{},\n",
|
|
self.name,
|
|
variant.name,
|
|
capture,
|
|
index,
|
|
modifer)
|
|
.unwrap();
|
|
}
|
|
match_statement.push_str("}");
|
|
match_statement
|
|
}
|
|
|
|
// Generates the function body for `from_raw_token`.
|
|
fn generate_from_raw_token(&self) -> String {
|
|
let variant_bits = self.variant_bits();
|
|
let variant_mask = (1 << variant_bits) - 1;
|
|
|
|
// The match expression only matches the bits for the variant index.
|
|
let mut match_statement = format!("match data & 0x{:02x} {{\n", variant_mask);
|
|
|
|
// Each iteration corresponds to one variant's match arm.
|
|
for (index, variant) in self.variants.iter().enumerate() {
|
|
// The data string is for extracting the enum variant's data bits out of the raw token
|
|
// data, which includes both variant index and data bits.
|
|
let data = match variant.data.as_ref() {
|
|
Some(&EnumVariantData {
|
|
name: Some(ref name),
|
|
ref type_,
|
|
}) => format!("{{ {}: (data >> {}) as {} }}", name, variant_bits, type_),
|
|
Some(&EnumVariantData {
|
|
name: None,
|
|
ref type_,
|
|
}) => format!("((data >> {}) as {})", variant_bits, type_),
|
|
None => "".to_owned(),
|
|
};
|
|
|
|
// Assembly of the match arm.
|
|
write!(match_statement,
|
|
"{} => {}::{}{},\n",
|
|
index,
|
|
self.name,
|
|
variant.name,
|
|
data)
|
|
.unwrap();
|
|
}
|
|
match_statement.push_str("_ => unreachable!()\n}");
|
|
match_statement
|
|
}
|
|
}
|
|
|
|
// Because unit tests cannot create `TokenStream`s (apparently), we have an inner implementation
|
|
// that deals in strings.
|
|
fn poll_token_inner(src: &str) -> String {
|
|
let src_tokens = src.split_whitespace();
|
|
|
|
// Parsing is done in two interleaved stages, tokenizing without context, followed by parsing
|
|
// via state machine.
|
|
let mut state = ParseState::new();
|
|
for src_tok in src_tokens {
|
|
let tok = Tokenized::from_str(src_tok);
|
|
state.handle_token(tok);
|
|
}
|
|
|
|
assert_eq!(state.current_state,
|
|
States::End,
|
|
"unexpected end after parsing source enum");
|
|
|
|
// Given our basic model of a user given enum that is suitable as a token, we generate the
|
|
// implementation. The implementation is NOT always well formed, such as when a variant's data
|
|
// type is not bit shiftable or castable to u64, but we let Rust generate such errors as it
|
|
// would be difficult to detect every kind of error. Importantly, every implementation that we
|
|
// generate here and goes on to compile succesfully is sound.
|
|
let model = state.model;
|
|
format!("impl PollToken for {} {{
|
|
fn as_raw_token(&self) -> u64 {{
|
|
{}
|
|
}}
|
|
|
|
fn from_raw_token(data: u64) -> Self {{
|
|
{}
|
|
}}
|
|
}}",
|
|
model.name,
|
|
model.generate_as_raw_token(),
|
|
model.generate_from_raw_token())
|
|
}
|
|
|
|
/// Implements the PollToken trait for a given `enum`.
|
|
///
|
|
/// There are limitations on what `enum`s this custom derive will work on:
|
|
///
|
|
/// * Each variant must be a unit variant (no data), or have a single (un)named data field.
|
|
/// * If a variant has data, it must be a primitive type castable to and from a `u64`.
|
|
/// * If a variant data has size greater than or equal to a `u64`, its most significant bits must be
|
|
/// zero. The number of bits truncated is equal to the number of bits used to store the variant
|
|
/// index plus the number of bits above 64.
|
|
#[proc_macro_derive(PollToken)]
|
|
pub fn poll_token(input: TokenStream) -> TokenStream {
|
|
// The token stream gets converted to a string in a rather regular way, which makes parsing
|
|
// simpler. In particular, whitespace from the source enum is not carried over, instead replaced
|
|
// with whatever the token stream's to_string function outputs. The rust parser has already
|
|
// validated the syntax, so we can make lots of assumptions about the source being well formed.
|
|
TokenStream::from_str(&poll_token_inner(&input.to_string())).unwrap()
|
|
}
|