From 1de53730bce0bd3f9de89db1da7c19b7889b9a75 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Fri, 27 Jan 2023 12:04:23 +0100 Subject: Symbol values and modules --- src/model/library.rs | 2 - src/model/mod.rs | 2 + src/model/scope.rs | 9 +++ src/model/symbol.rs | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/model/value.rs | 36 +++++++++++- src/syntax/ast.rs | 14 ----- src/syntax/kind.rs | 4 -- src/syntax/lexer.rs | 57 ++----------------- 8 files changed, 202 insertions(+), 74 deletions(-) create mode 100644 src/model/symbol.rs (limited to 'src') diff --git a/src/model/library.rs b/src/model/library.rs index cd9db10f..773342b3 100644 --- a/src/model/library.rs +++ b/src/model/library.rs @@ -43,8 +43,6 @@ pub struct LangItems { pub text_id: NodeId, /// Get the string if this is a text node. pub text_str: fn(&Content) -> Option<&str>, - /// Symbol notation: `:arrow:l:`. - pub symbol: fn(notation: EcoString) -> Content, /// A smart quote: `'` or `"`. pub smart_quote: fn(double: bool) -> Content, /// A paragraph break. diff --git a/src/model/mod.rs b/src/model/mod.rs index d84fe464..d96a314c 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -23,6 +23,7 @@ mod module; mod ops; mod realize; mod scope; +mod symbol; mod typeset; #[doc(hidden)] @@ -42,5 +43,6 @@ pub use self::realize::*; pub use self::scope::*; pub use self::str::*; pub use self::styles::*; +pub use self::symbol::*; pub use self::typeset::*; pub use self::value::*; diff --git a/src/model/scope.rs b/src/model/scope.rs index bb0d4684..40307cba 100644 --- a/src/model/scope.rs +++ b/src/model/scope.rs @@ -108,6 +108,15 @@ impl Scope { self.0.insert(var.into(), Slot::new(value.into(), Kind::Captured)); } + /// Copy definitions from another scope that aren't yet defined in this one. + pub fn copy_from(&mut self, other: &Self) { + for (name, value) in other.iter() { + self.0 + .entry(name.clone()) + .or_insert_with(|| Slot::new(value.clone(), Kind::Normal)); + } + } + /// Try to access a variable immutably. pub fn get(&self, var: &str) -> Option<&Value> { self.0.get(var).map(Slot::read) diff --git a/src/model/symbol.rs b/src/model/symbol.rs new file mode 100644 index 00000000..ac1d2b10 --- /dev/null +++ b/src/model/symbol.rs @@ -0,0 +1,152 @@ +use std::cmp::Reverse; +use std::collections::BTreeSet; +use std::fmt::{self, Debug, Formatter, Write}; + +use crate::diag::StrResult; +use crate::util::EcoString; + +/// Define a list of symbols. +#[macro_export] +#[doc(hidden)] +macro_rules! __symbols { + ($func:ident, $($name:ident: $value:tt),* $(,)?) => { + pub(super) fn $func(scope: &mut $crate::model::Scope) { + $(scope.define(stringify!($name), $crate::model::symbols!(@one $value));)* + } + }; + (@one $c:literal) => { $crate::model::Symbol::new($c) }; + (@one [$($first:literal $(: $second:literal)?),* $(,)?]) => { + $crate::model::Symbol::list(&[ + $($crate::model::symbols!(@pair $first $(: $second)?)),* + ]) + }; + (@pair $first:literal) => { ("", $first) }; + (@pair $first:literal: $second:literal) => { ($first, $second) }; +} + +#[doc(inline)] +pub use crate::__symbols as symbols; + +/// A symbol. +#[derive(Clone, Eq, PartialEq, Hash)] +pub struct Symbol { + repr: Repr, + modifiers: EcoString, +} + +/// A collection of symbols. +#[derive(Clone, Eq, PartialEq, Hash)] +enum Repr { + Single(char), + List(&'static [(&'static str, char)]), +} + +impl Symbol { + /// Create a new symbol from a single character. + pub fn new(c: char) -> Self { + Self { repr: Repr::Single(c), modifiers: EcoString::new() } + } + + /// Create a symbol with variants. + #[track_caller] + pub fn list(list: &'static [(&'static str, char)]) -> Self { + debug_assert!(!list.is_empty()); + Self { + repr: Repr::List(list), + modifiers: EcoString::new(), + } + } + + /// Get the symbol's text. + pub fn get(&self) -> char { + match self.repr { + Repr::Single(c) => c, + Repr::List(list) => find(list, &self.modifiers).unwrap(), + } + } + + /// Apply a modifier to the symbol. + pub fn modified(mut self, modifier: &str) -> StrResult { + if !self.modifiers.is_empty() { + self.modifiers.push('.'); + } + self.modifiers.push_str(modifier); + if match self.repr { + Repr::Single(_) => true, + Repr::List(list) => find(list, &self.modifiers).is_none(), + } { + Err("unknown modifier")? + } + Ok(self) + } + + /// The characters that are covered by this symbol. + pub fn chars(&self) -> impl Iterator { + let (first, slice) = match self.repr { + Repr::Single(c) => (Some(c), [].as_slice()), + Repr::List(list) => (None, list), + }; + first.into_iter().chain(slice.iter().map(|&(_, c)| c)) + } + + /// Possible modifiers. + pub fn modifiers(&self) -> impl Iterator + '_ { + let mut set = BTreeSet::new(); + if let Repr::List(list) = self.repr { + for modifier in list.iter().flat_map(|(name, _)| name.split('.')) { + if !modifier.is_empty() && !contained(&self.modifiers, modifier) { + set.insert(modifier); + } + } + } + set.into_iter() + } +} + +impl Debug for Symbol { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.write_char(self.get()) + } +} + +/// Find the best symbol from the list. +fn find(list: &[(&str, char)], modifiers: &str) -> Option { + let mut best = None; + let mut best_score = None; + + // Find the best table entry with this name. + 'outer: for candidate in list { + for modifier in parts(modifiers) { + if !contained(candidate.0, modifier) { + continue 'outer; + } + } + + let mut matching = 0; + let mut total = 0; + for modifier in parts(candidate.0) { + if contained(modifiers, modifier) { + matching += 1; + } + total += 1; + } + + let score = (matching, Reverse(total)); + if best_score.map_or(true, |b| score > b) { + best = Some(candidate.1); + best_score = Some(score); + } + } + + best +} + +/// Split a modifier list into its parts. +fn parts(modifiers: &str) -> impl Iterator { + modifiers.split('.').filter(|s| !s.is_empty()) +} + +/// Whether the modifier string contains the modifier `m`. +fn contained(modifiers: &str, m: &str) -> bool { + parts(modifiers).any(|part| part == m) +} diff --git a/src/model/value.rs b/src/model/value.rs index 8103b211..15656c42 100644 --- a/src/model/value.rs +++ b/src/model/value.rs @@ -7,10 +7,12 @@ use std::sync::Arc; use siphasher::sip128::{Hasher128, SipHasher}; use super::{ - format_str, ops, Args, Array, Cast, CastInfo, Content, Dict, Func, Label, Module, Str, + format_str, ops, Args, Array, Cast, CastInfo, Content, Dict, Func, Label, Module, + Str, Symbol, }; use crate::diag::StrResult; use crate::geom::{Abs, Angle, Color, Em, Fr, Length, Ratio, Rel, RgbaColor}; +use crate::syntax::Span; use crate::util::{format_eco, EcoString}; /// A computational value. @@ -38,6 +40,8 @@ pub enum Value { Fraction(Fr), /// A color value: `#f79143ff`. Color(Color), + /// A symbol: `arrow.l`. + Symbol(Symbol), /// A string: `"string"`. Str(Str), /// A label: ``. @@ -81,6 +85,7 @@ impl Value { Self::Relative(_) => Rel::::TYPE_NAME, Self::Fraction(_) => Fr::TYPE_NAME, Self::Color(_) => Color::TYPE_NAME, + Self::Symbol(_) => Symbol::TYPE_NAME, Self::Str(_) => Str::TYPE_NAME, Self::Label(_) => Label::TYPE_NAME, Self::Content(_) => Content::TYPE_NAME, @@ -98,11 +103,33 @@ impl Value { T::cast(self) } + /// Try to access a field on the value. + pub fn field(&self, field: &str) -> StrResult { + match self { + Self::Symbol(symbol) => symbol.clone().modified(&field).map(Self::Symbol), + Self::Dict(dict) => dict.at(&field).cloned(), + Self::Content(content) => content + .field(&field) + .ok_or_else(|| format_eco!("unknown field `{field}`")), + Self::Module(module) => module.get(&field).cloned(), + v => Err(format_eco!("cannot access fields on type {}", v.type_name())), + } + } + /// Return the debug representation of the value. pub fn repr(&self) -> Str { format_str!("{:?}", self) } + /// Attach a span to the value, if possibly. + pub fn spanned(self, span: Span) -> Self { + match self { + Value::Content(v) => Value::Content(v.spanned(span)), + Value::Func(v) => Value::Func(v.spanned(span)), + v => v, + } + } + /// Return the display representation of the value. pub fn display(self) -> Content { match self { @@ -110,6 +137,7 @@ impl Value { Self::Int(v) => item!(text)(format_eco!("{}", v)), Self::Float(v) => item!(text)(format_eco!("{}", v)), Self::Str(v) => item!(text)(v.into()), + Self::Symbol(v) => item!(text)(v.get().into()), Self::Content(v) => v, Self::Func(_) => Content::empty(), Self::Module(module) => module.content(), @@ -122,6 +150,8 @@ impl Value { match self { Self::Int(v) => item!(math_atom)(format_eco!("{}", v)), Self::Float(v) => item!(math_atom)(format_eco!("{}", v)), + Self::Symbol(v) => item!(math_atom)(v.get().into()), + Self::Str(v) => item!(math_atom)(v.into()), _ => self.display(), } } @@ -147,6 +177,7 @@ impl Debug for Value { Self::Relative(v) => Debug::fmt(v, f), Self::Fraction(v) => Debug::fmt(v, f), Self::Color(v) => Debug::fmt(v, f), + Self::Symbol(v) => Debug::fmt(v, f), Self::Str(v) => Debug::fmt(v, f), Self::Label(v) => Debug::fmt(v, f), Self::Content(_) => f.pad("[...]"), @@ -187,6 +218,7 @@ impl Hash for Value { Self::Relative(v) => v.hash(state), Self::Fraction(v) => v.hash(state), Self::Color(v) => v.hash(state), + Self::Symbol(v) => v.hash(state), Self::Str(v) => v.hash(state), Self::Label(v) => v.hash(state), Self::Content(v) => v.hash(state), @@ -398,11 +430,13 @@ primitive! { Rel: "relative length", } primitive! { Fr: "fraction", Fraction } primitive! { Color: "color", Color } +primitive! { Symbol: "symbol", Symbol } primitive! { Str: "string", Str } primitive! { Label: "label", Label } primitive! { Content: "content", Content, None => Content::empty(), + Symbol(symbol) => item!(text)(symbol.get().into()), Str(text) => item!(text)(text.into()) } primitive! { Array: "array", Array } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index b9186787..3b573f7d 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -89,8 +89,6 @@ pub enum Expr { /// A shorthand for a unicode codepoint. For example, `~` for non-breaking /// space or `-?` for a soft hyphen. Shorthand(Shorthand), - /// Symbol notation: `:arrow:l:`. - Symbol(Symbol), /// A smart quote: `'` or `"`. SmartQuote(SmartQuote), /// Strong content: `*Strong*`. @@ -413,18 +411,6 @@ impl Shorthand { } } -node! { - /// Symbol notation: `:arrow:l:`. - Symbol -} - -impl Symbol { - /// Get the symbol's notation. - pub fn get(&self) -> &str { - self.0.text().trim_matches(':') - } -} - node! { /// A smart quote: `'` or `"`. SmartQuote diff --git a/src/syntax/kind.rs b/src/syntax/kind.rs index 34e2fce7..f0a0bc5a 100644 --- a/src/syntax/kind.rs +++ b/src/syntax/kind.rs @@ -24,9 +24,6 @@ pub enum SyntaxKind { /// A shorthand for a unicode codepoint. For example, `~` for non-breaking /// space or `-?` for a soft hyphen. Shorthand, - /// Symbol notation: `:arrow:l:`. The string only contains the inner part - /// without leading and trailing dot. - Symbol, /// A smart quote: `'` or `"`. SmartQuote, /// Strong content: `*Strong*`. @@ -332,7 +329,6 @@ impl SyntaxKind { Self::Parbreak => "paragraph break", Self::Escape => "escape sequence", Self::Shorthand => "shorthand", - Self::Symbol => "symbol notation", Self::Strong => "strong content", Self::Emph => "emphasized content", Self::Raw => "raw block", diff --git a/src/syntax/lexer.rs b/src/syntax/lexer.rs index 0735270b..d4548b8b 100644 --- a/src/syntax/lexer.rs +++ b/src/syntax/lexer.rs @@ -167,21 +167,12 @@ impl Lexer<'_> { fn markup(&mut self, start: usize, c: char) -> SyntaxKind { match c { '\\' => self.backslash(), - ':' if self.s.at(is_id_start) => self.maybe_symbol(), '`' => self.raw(), 'h' if self.s.eat_if("ttp://") => self.link(), 'h' if self.s.eat_if("ttps://") => self.link(), '0'..='9' => self.numbering(start), '<' if self.s.at(is_id_continue) => self.label(), '@' if self.s.at(is_id_continue) => self.reference(), - '#' if self.s.eat_if('{') => SyntaxKind::LeftBrace, - '#' if self.s.eat_if('[') => SyntaxKind::LeftBracket, - '#' if self.s.at(is_id_start) => { - match keyword(self.s.eat_while(is_id_continue)) { - Some(keyword) => keyword, - None => SyntaxKind::Ident, - } - } '.' if self.s.eat_if("..") => SyntaxKind::Shorthand, '-' if self.s.eat_if("--") => SyntaxKind::Shorthand, @@ -190,8 +181,7 @@ impl Lexer<'_> { '*' if !self.in_word() => SyntaxKind::Star, '_' if !self.in_word() => SyntaxKind::Underscore, - '{' => SyntaxKind::LeftBrace, - '}' => SyntaxKind::RightBrace, + '#' if !self.s.at(char::is_whitespace) => SyntaxKind::Hashtag, '[' => SyntaxKind::LeftBracket, ']' => SyntaxKind::RightBracket, '\'' => SyntaxKind::SmartQuote, @@ -241,26 +231,6 @@ impl Lexer<'_> { } } - fn maybe_symbol(&mut self) -> SyntaxKind { - let start = self.s.cursor(); - let mut end = start; - while !self.s.eat_while(is_id_continue).is_empty() && self.s.at(':') { - end = self.s.cursor(); - self.s.eat(); - } - - self.s.jump(end); - - if start < end { - self.s.expect(':'); - SyntaxKind::Symbol - } else if self.mode == LexMode::Markup { - SyntaxKind::Colon - } else { - SyntaxKind::Atom - } - } - fn raw(&mut self) -> SyntaxKind { let mut backticks = 1; while self.s.eat_if('`') { @@ -408,7 +378,6 @@ impl Lexer<'_> { fn math(&mut self, start: usize, c: char) -> SyntaxKind { match c { '\\' => self.backslash(), - ':' if self.s.at(is_id_start) => self.maybe_symbol(), '"' => self.string(), '.' if self.s.eat_if("..") => SyntaxKind::Shorthand, @@ -434,9 +403,10 @@ impl Lexer<'_> { '^' => SyntaxKind::Hat, '&' => SyntaxKind::MathAlignPoint, - // Identifiers and symbol notation. + // Identifiers. c if is_math_id_start(c) && self.s.at(is_math_id_continue) => { - self.math_ident() + self.s.eat_while(is_math_id_continue); + SyntaxKind::MathIdent } // Other math atoms. @@ -444,25 +414,6 @@ impl Lexer<'_> { } } - fn math_ident(&mut self) -> SyntaxKind { - self.s.eat_while(is_math_id_continue); - - let mut symbol = false; - while self.s.eat_if(':') && !self.s.eat_while(char::is_alphanumeric).is_empty() { - symbol = true; - } - - if symbol { - return SyntaxKind::Symbol; - } - - if self.s.scout(-1) == Some(':') { - self.s.uneat(); - } - - SyntaxKind::Ident - } - fn atom(&mut self, start: usize, c: char) -> SyntaxKind { // Keep numbers and grapheme clusters together. if c.is_numeric() { -- cgit v1.2.3