Skip to content

Codify and Resolve modifiers #46

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
12 changes: 8 additions & 4 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
use self::shared::ModifierSet;
use std::fmt::Write;
use std::iter::Peekable;
use std::path::Path;

type StrResult<T> = Result<T, String>;

#[path = "src/shared.rs"]
mod shared;

/// A module of definitions.
struct Module<'a>(Vec<(&'a str, Binding<'a>)>);

Expand All @@ -29,7 +33,7 @@ enum Def<'a> {
/// A symbol, either a leaf or with modifiers.
enum Symbol<'a> {
Single(char),
Multi(Vec<(&'a str, char)>),
Multi(Vec<(ModifierSet<&'a str>, char)>),
}

/// A single line during parsing.
Expand All @@ -40,7 +44,7 @@ enum Line<'a> {
ModuleStart(&'a str),
ModuleEnd,
Symbol(&'a str, Option<char>),
Variant(&'a str, char),
Variant(ModifierSet<&'a str>, char),
}

fn main() {
Expand Down Expand Up @@ -110,7 +114,7 @@ fn tokenize(line: &str) -> StrResult<Line> {
validate_ident(part)?;
}
let c = decode_char(tail.ok_or("missing char")?)?;
Line::Variant(rest, c)
Line::Variant(ModifierSet::new_unchecked(rest), c)
} else {
validate_ident(head)?;
let c = tail.map(decode_char).transpose()?;
Expand Down Expand Up @@ -167,7 +171,7 @@ fn parse<'a>(

let symbol = if variants.len() > 0 {
if let Some(c) = c {
variants.insert(0, ("", c));
variants.insert(0, (ModifierSet::default(), c));
}
Symbol::Multi(variants)
} else {
Expand Down
52 changes: 48 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
/*!
Human-friendly notation for Unicode symbols.
*/
//! Human-friendly notation for Unicode symbols.
//!
//! ## Model
//! A [`Symbol`] is a collection of one or more _variants_.
//! Each variant is identified by a set of [_modifiers_](ModifierSet)
//! and has a single character as its value.
//! The modifiers themselves can in principle be any non-empty strings
//! that don't contain the character `.`, but codex only defines
//! ones that are entirely made of ASCII alphabetical characters.

pub use self::shared::ModifierSet;

mod shared;

/// A module of definitions.
#[derive(Debug, Copy, Clone)]
Expand Down Expand Up @@ -52,7 +62,41 @@ pub enum Symbol {
/// A symbol without modifiers.
Single(char),
/// A symbol with named modifiers. The symbol defaults to its first variant.
Multi(&'static [(&'static str, char)]),
Multi(&'static [(ModifierSet<&'static str>, char)]),
}

impl Symbol {
/// Get the symbol's character for a given set of modifiers.
pub fn get(&self, modifs: ModifierSet<&str>) -> Option<char> {
match self {
Self::Single(c) => modifs.is_empty().then_some(*c),
Self::Multi(list) => modifs.best_match_in(list.iter().copied()),
}
}

/// The characters that are covered by this symbol.
pub fn variants(&self) -> impl Iterator<Item = (ModifierSet<&str>, char)> {
enum Variants {
Single(std::iter::Once<char>),
Multi(std::slice::Iter<'static, (ModifierSet<&'static str>, char)>),
}
let mut iter = match self {
Self::Single(c) => Variants::Single(std::iter::once(*c)),
Self::Multi(sl) => Variants::Multi(sl.iter()),
};
std::iter::from_fn(move || match &mut iter {
Variants::Single(iter) => Some((ModifierSet::default(), iter.next()?)),
Variants::Multi(iter) => iter.next().copied(),
})
}

/// Possible modifiers for this symbol.
pub fn modifiers(&self) -> impl Iterator<Item = &str> + '_ {
self.variants()
.flat_map(|(m, _)| m.into_iter())
.collect::<std::collections::BTreeSet<_>>()
.into_iter()
}
}

/// A module that contains the other top-level modules.
Expand Down
214 changes: 214 additions & 0 deletions src/shared.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
use std::ops::Deref;

/// A set of modifiers.
///
/// Beware: The [`Eq`] and [`Hash`] implementations are dependent on the ordering
/// of the modifiers, in opposition to what a set would usually constitute.
/// To test for set-wise equality, use [`iter`](Self::iter) and collect into a
/// true set type like [`HashSet`](std::collections::HashSet).
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
// note: the visibility needs to be `pub(crate)`,
// since build.rs outputs `ModifierSet(...)`
Comment on lines +10 to +11
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is outdated since it uses new_unchecked

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the point where it gets output is

codex/build.rs

Line 217 in 34edb09

Symbol::Multi(list) => write!(buf, "Multi(&{list:?})").unwrap(),

which uses the debug-formatting, which outputs ModifierSet(<string>).
Changing that would of course be possible, but it'd require constructing the slice manually instead of just using {:?} there.

pub struct ModifierSet<S>(pub(crate) S);

impl<S: Default> Default for ModifierSet<S> {
/// Construct the default modifier set.
///
/// This is typically the empty set,
/// though the remark from [`Self::new_unchecked`] applies
/// since `S::default()` could technically be anything.
fn default() -> Self {
Self(S::default())
}
}

impl<S: Deref<Target = str>> ModifierSet<S> {
/// Convert the underlying string to a slice.
pub fn as_deref(&self) -> ModifierSet<&str> {
ModifierSet(&self.0)
}

/// Get the string of modifiers separated by `.`.
pub fn as_str(&self) -> &str {
&self.0
}

/// Construct a modifier set from a string,
/// where modifiers are separated by the character `.`.
///
/// It is not unsafe to use this function wrongly, but it can produce
/// unexpected results down the line. Correct usage should ensure that
/// `s` does not contain any empty modifiers (i.e. the sequence `..`)
/// and that no modifier occurs twice.
pub fn new_unchecked(s: S) -> Self {
Self(s)
}

/// Whether `self` is empty.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}

/// Add a modifier to the set, without checking that it is a valid modifier.
///
/// It is not unsafe to use this method wrongly, but that can produce
/// unexpected results down the line. Correct usage should ensure that
/// `modifier` is not empty and doesn't contain the character `.`.
pub fn add_unchecked(&mut self, m: &str)
where
S: for<'a> std::ops::AddAssign<&'a str>,
{
if !self.0.is_empty() {
self.0 += ".";
}
self.0 += m;
}

/// Iterate over the list of modifiers in an arbitrary order.
pub fn iter(&self) -> impl Iterator<Item = &str> {
self.into_iter()
}

/// Whether the set contains the modifier `m`.
pub fn contains(&self, m: &str) -> bool {
self.iter().any(|lhs| lhs == m)
}

/// Whether all modifiers in `self` are also present in `other`.
pub fn is_subset(&self, other: ModifierSet<&str>) -> bool {
self.iter().all(|m| other.contains(m))
}

/// Find the best match from the list.
///
/// To be considered a match, the modifier set must be a superset of
/// (or equal to) `self`. Among different matches, the best one is selected
/// by the following two criteria (in order):
/// 1. Number of modifiers in common with `self` (more is better).
/// 2. Total number of modifiers (fewer is better).
///
/// If there are multiple best matches, the first of them is returned.
pub fn best_match_in<'a, T>(
&self,
variants: impl Iterator<Item = (ModifierSet<&'a str>, T)>,
) -> Option<T> {
let mut best = None;
let mut best_score = None;

// Find the best table entry with this name.
for candidate in variants.filter(|(set, _)| self.is_subset(*set)) {
let mut matching = 0;
let mut total = 0;
for modifier in candidate.0.iter() {
if self.contains(modifier) {
matching += 1;
}
total += 1;
}

let score = (matching, std::cmp::Reverse(total));
if best_score.is_none_or(|b| score > b) {
best = Some(candidate.1);
best_score = Some(score);
}
}

best
}
}

impl<'a, S: Deref<Target = str>> IntoIterator for &'a ModifierSet<S> {
type Item = &'a str;
type IntoIter = std::str::Split<'a, char>;

/// Iterate over the list of modifiers in an arbitrary order.
fn into_iter(self) -> Self::IntoIter {
let mut iter = self.0.split('.');
if self.0.is_empty() {
// empty the iterator
let _ = iter.next();
}
iter
}
}

impl<'a> IntoIterator for ModifierSet<&'a str> {
type Item = &'a str;
type IntoIter = std::str::Split<'a, char>;

/// Iterate over the list of modifiers in an arbitrary order.
fn into_iter(self) -> Self::IntoIter {
let mut iter = self.0.split('.');
if self.0.is_empty() {
// empty the iterator
let _ = iter.next();
}
iter
}
}

#[cfg(test)]
mod tests {
type ModifierSet = super::ModifierSet<&'static str>;

#[test]
fn default_is_empty() {
assert!(ModifierSet::default().is_empty());
}

#[test]
fn iter_count() {
assert_eq!(ModifierSet::default().iter().count(), 0);
assert_eq!(ModifierSet::new_unchecked("a").iter().count(), 1);
assert_eq!(ModifierSet::new_unchecked("a.b").iter().count(), 2);
assert_eq!(ModifierSet::new_unchecked("a.b.c").iter().count(), 3);
}

#[test]
fn subset() {
assert!(
ModifierSet::new_unchecked("a").is_subset(ModifierSet::new_unchecked("a.b"))
);
assert!(
ModifierSet::new_unchecked("a").is_subset(ModifierSet::new_unchecked("b.a"))
);
assert!(ModifierSet::new_unchecked("a.b")
.is_subset(ModifierSet::new_unchecked("b.c.a")));
}

#[test]
fn best_match() {
// 1. more modifiers in common with self
assert_eq!(
ModifierSet::new_unchecked("a.b").best_match_in([
(ModifierSet::new_unchecked("a.c"), 1),
(ModifierSet::new_unchecked("a.b"), 2),
].into_iter()),
Some(2)
);
// 2. fewer modifiers in general
assert_eq!(
ModifierSet::new_unchecked("a").best_match_in([
(ModifierSet::new_unchecked("a"), 1),
(ModifierSet::new_unchecked("a.b"), 2),
].into_iter()),
Some(1)
);
// the first rule takes priority over the second
assert_eq!(
ModifierSet::new_unchecked("a.b").best_match_in([
(ModifierSet::new_unchecked("a"), 1),
(ModifierSet::new_unchecked("a.b"), 2),
].into_iter()),
Some(2)
);
// among multiple best matches, the first one is returned
assert_eq!(
ModifierSet::default().best_match_in([
(ModifierSet::new_unchecked("a"), 1),
(ModifierSet::new_unchecked("b"), 2)
].into_iter()),
Some(1)
);
}
}