From c18321a4c24b1bae9b935e3434aa114f930ca5f5 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Wed, 15 Sep 2021 12:29:42 +0200 Subject: Bugfix and tidying --- src/diag.rs | 1 + src/export/pdf.rs | 6 +- src/layout/tree.rs | 11 +- src/library/text.rs | 25 ++-- src/library/utility.rs | 3 +- src/parse/mod.rs | 26 ++-- src/parse/tokens.rs | 1 - src/source.rs | 5 +- src/syntax/ident.rs | 13 ++ src/syntax/token.rs | 3 - src/util/eco.rs | 398 ------------------------------------------------- src/util/eco_string.rs | 398 +++++++++++++++++++++++++++++++++++++++++++++++++ src/util/mac.rs | 25 ---- src/util/mac_roman.rs | 25 ++++ src/util/mod.rs | 8 +- 15 files changed, 477 insertions(+), 471 deletions(-) delete mode 100644 src/util/eco.rs create mode 100644 src/util/eco_string.rs delete mode 100644 src/util/mac.rs create mode 100644 src/util/mac_roman.rs (limited to 'src') diff --git a/src/diag.rs b/src/diag.rs index 61432e93..6249a7cd 100644 --- a/src/diag.rs +++ b/src/diag.rs @@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize}; use crate::syntax::{Span, Spanned}; /// Early-return with a vec-boxed [`Error`]. +#[macro_export] macro_rules! bail { ($span:expr, $message:expr $(,)?) => { return Err($crate::diag::Error::boxed($span, $message,)) diff --git a/src/export/pdf.rs b/src/export/pdf.rs index b762060e..3abda916 100644 --- a/src/export/pdf.rs +++ b/src/export/pdf.rs @@ -309,10 +309,10 @@ impl<'a> PdfExporter<'a> { let face = self.fonts.get(face_id); let ttf = face.ttf(); - let postcript_name = find_name(ttf.names(), name_id::POST_SCRIPT_NAME) + let postscript_name = find_name(ttf.names(), name_id::POST_SCRIPT_NAME) .unwrap_or_else(|| "unknown".to_string()); - let base_font = format!("ABCDEF+{}", postcript_name); + let base_font = format!("ABCDEF+{}", postscript_name); let base_font = Name(base_font.as_bytes()); let cmap_name = Name(b"Custom"); let system_info = SystemInfo { @@ -356,7 +356,7 @@ impl<'a> PdfExporter<'a> { }); let mut flags = FontFlags::empty(); - flags.set(FontFlags::SERIF, postcript_name.contains("Serif")); + flags.set(FontFlags::SERIF, postscript_name.contains("Serif")); flags.set(FontFlags::FIXED_PITCH, ttf.is_monospaced()); flags.set(FontFlags::ITALIC, ttf.is_italic()); flags.insert(FontFlags::SYMBOLIC); diff --git a/src/layout/tree.rs b/src/layout/tree.rs index 28cbbb51..36d0ac25 100644 --- a/src/layout/tree.rs +++ b/src/layout/tree.rs @@ -2,13 +2,12 @@ use std::fmt::{self, Debug, Formatter}; use super::*; -use std::any::Any; - -#[cfg(feature = "layout-cache")] -use std::hash::{Hash, Hasher}; - #[cfg(feature = "layout-cache")] -use fxhash::FxHasher64; +use { + fxhash::FxHasher64, + std::any::Any, + std::hash::{Hash, Hasher}, +}; /// A tree of layout nodes. #[derive(Debug)] diff --git a/src/library/text.rs b/src/library/text.rs index 863586f6..b8b3afcd 100644 --- a/src/library/text.rs +++ b/src/library/text.rs @@ -1,6 +1,5 @@ -use crate::layout::{Decoration, LineDecoration, LineKind, Paint}; - use super::*; +use crate::layout::{Decoration, LineDecoration, LineKind, Paint}; /// `font`: Configure the font. pub fn font(ctx: &mut EvalContext, args: &mut Args) -> TypResult { @@ -163,22 +162,22 @@ fn lang_dir(iso: &str) -> Dir { } } -/// `strike`: Set striken-through text. -pub fn strike(ctx: &mut EvalContext, args: &mut Args) -> TypResult { - line_impl(ctx, args, LineKind::Strikethrough) +/// `strike`: Typeset striken-through text. +pub fn strike(_: &mut EvalContext, args: &mut Args) -> TypResult { + line_impl(args, LineKind::Strikethrough) } -/// `underline`: Set underlined text. -pub fn underline(ctx: &mut EvalContext, args: &mut Args) -> TypResult { - line_impl(ctx, args, LineKind::Underline) +/// `underline`: Typeset underlined text. +pub fn underline(_: &mut EvalContext, args: &mut Args) -> TypResult { + line_impl(args, LineKind::Underline) } -/// `overline`: Set text with an overline. -pub fn overline(ctx: &mut EvalContext, args: &mut Args) -> TypResult { - line_impl(ctx, args, LineKind::Overline) +/// `overline`: Typeset text with an overline. +pub fn overline(_: &mut EvalContext, args: &mut Args) -> TypResult { + line_impl(args, LineKind::Overline) } -fn line_impl(_: &mut EvalContext, args: &mut Args, kind: LineKind) -> TypResult { +fn line_impl(args: &mut Args, kind: LineKind) -> TypResult { let stroke = args.named("stroke")?.or_else(|| args.eat()); let thickness = args.named::("thickness")?.or_else(|| args.eat()); let offset = args.named("offset")?; @@ -196,7 +195,7 @@ fn line_impl(_: &mut EvalContext, args: &mut Args, kind: LineKind) -> TypResult< Ok(Value::Template(body)) } -/// `link`: Set a link. +/// `link`: Typeset text as a link. pub fn link(_: &mut EvalContext, args: &mut Args) -> TypResult { let url = args.expect::("url")?; diff --git a/src/library/utility.rs b/src/library/utility.rs index f5fab6ee..e6d5476f 100644 --- a/src/library/utility.rs +++ b/src/library/utility.rs @@ -1,9 +1,8 @@ use std::cmp::Ordering; use std::str::FromStr; -use crate::color::{Color, RgbaColor}; - use super::*; +use crate::color::{Color, RgbaColor}; /// `type`: The name of a value's type. pub fn type_(_: &mut EvalContext, args: &mut Args) -> TypResult { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 649b4eb8..498badca 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -17,7 +17,7 @@ use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; -/// Parse a string of source code. +/// Parse a source file. pub fn parse(source: &SourceFile) -> TypResult { let mut p = Parser::new(source); let tree = tree(&mut p); @@ -48,13 +48,14 @@ fn tree_indented(p: &mut Parser, column: usize) -> SyntaxTree { }) } -/// Parse a syntax tree. +/// Parse a syntax tree while the peeked token satisifies a condition. +/// +/// If `at_start` is true, things like headings that may only appear at the +/// beginning of a line or template are allowed. fn tree_while(p: &mut Parser, mut at_start: bool, f: &mut F) -> SyntaxTree where F: FnMut(&mut Parser) -> bool, { - // We use `at_start` to keep track of whether we are at the start of a line - // or template to know whether things like headings are allowed. let mut tree = vec![]; while !p.eof() && f(p) { if let Some(node) = node(p, &mut at_start) { @@ -94,8 +95,8 @@ fn node(p: &mut Parser, at_start: &mut bool) -> Option { Token::Underscore => SyntaxNode::Emph(span), Token::Raw(t) => raw(p, t), Token::Eq if *at_start => return Some(heading(p)), - Token::Hyph if *at_start => return Some(list_item(p)), - Token::Numbering(number) if *at_start => return Some(enum_item(p, number)), + Token::Hyph if *at_start => return Some(list_node(p)), + Token::Numbering(number) if *at_start => return Some(enum_node(p, number)), // Line-based markup that is not currently at the start of the line. Token::Eq | Token::Hyph | Token::Numbering(_) => { @@ -196,7 +197,7 @@ fn heading(p: &mut Parser) -> SyntaxNode { } /// Parse a single list item. -fn list_item(p: &mut Parser) -> SyntaxNode { +fn list_node(p: &mut Parser) -> SyntaxNode { let start = p.next_start(); let column = p.column(start); p.eat_assert(Token::Hyph); @@ -205,7 +206,7 @@ fn list_item(p: &mut Parser) -> SyntaxNode { } /// Parse a single enum item. -fn enum_item(p: &mut Parser, number: Option) -> SyntaxNode { +fn enum_node(p: &mut Parser, number: Option) -> SyntaxNode { let start = p.next_start(); let column = p.column(start); p.eat_assert(Token::Numbering(number)); @@ -243,10 +244,7 @@ fn expr_with(p: &mut Parser, atomic: bool, min_prec: usize) -> Option { loop { // Exclamation mark, parenthesis or bracket means this is a function // call. - if matches!( - p.peek_direct(), - Some(Token::Excl | Token::LeftParen | Token::LeftBracket), - ) { + if matches!(p.peek_direct(), Some(Token::LeftParen | Token::LeftBracket)) { lhs = call(p, lhs)?; continue; } @@ -520,7 +518,7 @@ fn idents(p: &mut Parser, items: Vec) -> Vec { iter.collect() } -// Parse a template value: `[...]`. +// Parse a template block: `[...]`. fn template(p: &mut Parser) -> Expr { p.start_group(Group::Bracket, TokenMode::Markup); let tree = tree(p); @@ -528,7 +526,7 @@ fn template(p: &mut Parser) -> Expr { Expr::Template(Box::new(TemplateExpr { span, tree })) } -/// Parse a block expression: `{...}`. +/// Parse a code block: `{...}`. fn block(p: &mut Parser, scoping: bool) -> Expr { p.start_group(Group::Brace, TokenMode::Code); let mut exprs = vec![]; diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 91d3ab7f..c6bf3a90 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -142,7 +142,6 @@ impl<'s> Tokens<'s> { '-' => Token::Hyph, '*' => Token::Star, '/' => Token::Slash, - '!' => Token::Excl, '=' => Token::Eq, '<' => Token::Lt, '>' => Token::Gt, diff --git a/src/source.rs b/src/source.rs index b02af1b6..ac56f7ce 100644 --- a/src/source.rs +++ b/src/source.rs @@ -6,14 +6,15 @@ use std::ops::Range; use std::path::{Path, PathBuf}; use std::rc::Rc; -#[cfg(feature = "codespan-reporting")] -use codespan_reporting::files::{self, Files}; use serde::{Deserialize, Serialize}; use crate::loading::{FileHash, Loader}; use crate::parse::{is_newline, Scanner}; use crate::util::PathExt; +#[cfg(feature = "codespan-reporting")] +use codespan_reporting::files::{self, Files}; + /// A unique identifier for a loaded source file. #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] pub struct SourceId(u32); diff --git a/src/syntax/ident.rs b/src/syntax/ident.rs index c47e6fb1..398e2ff9 100644 --- a/src/syntax/ident.rs +++ b/src/syntax/ident.rs @@ -1,3 +1,4 @@ +use std::borrow::Borrow; use std::ops::Deref; use unicode_xid::UnicodeXID; @@ -53,6 +54,18 @@ impl AsRef for Ident { } } +impl Borrow for Ident { + fn borrow(&self) -> &str { + self + } +} + +impl From<&Ident> for EcoString { + fn from(ident: &Ident) -> Self { + ident.string.clone() + } +} + /// Whether a string is a valid identifier. pub fn is_ident(string: &str) -> bool { let mut chars = string.chars(); diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 219395cf..22dd104b 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -39,8 +39,6 @@ pub enum Token<'s> { Hyph, /// A slash: `/`. Slash, - /// An exlamation mark. - Excl, /// A single equals sign: `=`. Eq, /// Two equals signs: `==`. @@ -223,7 +221,6 @@ impl<'s> Token<'s> { Self::Plus => "plus", Self::Hyph => "minus", Self::Slash => "slash", - Self::Excl => "exclamation mark", Self::Eq => "assignment operator", Self::EqEq => "equality operator", Self::ExclEq => "inequality operator", diff --git a/src/util/eco.rs b/src/util/eco.rs deleted file mode 100644 index f1dfdfaf..00000000 --- a/src/util/eco.rs +++ /dev/null @@ -1,398 +0,0 @@ -use std::borrow::Borrow; -use std::cmp::Ordering; -use std::fmt::{self, Debug, Display, Formatter, Write}; -use std::hash::{Hash, Hasher}; -use std::ops::Deref; -use std::rc::Rc; - -/// An economical string with inline storage and clone-on-write semantics. -#[derive(Clone)] -pub struct EcoString(Repr); - -/// The internal representation. Either: -/// - inline when below a certain number of bytes, -/// - or reference-counted on the heap with COW semantics. -#[derive(Clone)] -enum Repr { - Small { buf: [u8; LIMIT], len: u8 }, - Large(Rc), -} - -/// The maximum number of bytes that can be stored inline. -/// -/// The value is chosen such that an `EcoString` fits exactly into 16 bytes -/// (which are needed anyway due to the `Rc`s alignment, at least on 64-bit -/// platforms). -/// -/// Must be at least 4 to hold any char. -const LIMIT: usize = 14; - -impl EcoString { - /// Create a new, empty string. - pub fn new() -> Self { - Self(Repr::Small { buf: [0; LIMIT], len: 0 }) - } - - /// Create a new, empty string with the given `capacity`. - pub fn with_capacity(capacity: usize) -> Self { - if capacity <= LIMIT { - Self::new() - } else { - Self(Repr::Large(Rc::new(String::with_capacity(capacity)))) - } - } - - /// Create an instance from an existing string-like type. - pub fn from_str(s: S) -> Self - where - S: AsRef + Into, - { - let slice = s.as_ref(); - let len = slice.len(); - Self(if len <= LIMIT { - let mut buf = [0; LIMIT]; - buf[.. len].copy_from_slice(slice.as_bytes()); - Repr::Small { buf, len: len as u8 } - } else { - Repr::Large(Rc::new(s.into())) - }) - } - - /// Whether the string is empty. - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// The length of the string in bytes. - pub fn len(&self) -> usize { - match &self.0 { - Repr::Small { len, .. } => usize::from(*len), - Repr::Large(string) => string.len(), - } - } - - /// A string slice containing the entire string. - pub fn as_str(&self) -> &str { - self - } - - /// Append the given character at the end. - pub fn push(&mut self, c: char) { - match &mut self.0 { - Repr::Small { buf, len } => { - let prev = usize::from(*len); - if c.len_utf8() == 1 && prev < LIMIT { - buf[prev] = c as u8; - *len += 1; - } else { - self.push_str(c.encode_utf8(&mut [0; 4])); - } - } - Repr::Large(rc) => Rc::make_mut(rc).push(c), - } - } - - /// Append the given string slice at the end. - pub fn push_str(&mut self, string: &str) { - match &mut self.0 { - Repr::Small { buf, len } => { - let prev = usize::from(*len); - let new = prev + string.len(); - if new <= LIMIT { - buf[prev .. new].copy_from_slice(string.as_bytes()); - *len = new as u8; - } else { - let mut spilled = String::with_capacity(new); - spilled.push_str(self); - spilled.push_str(string); - self.0 = Repr::Large(Rc::new(spilled)); - } - } - Repr::Large(rc) => Rc::make_mut(rc).push_str(string), - } - } - - /// Remove the last character from the string. - pub fn pop(&mut self) -> Option { - let c = self.as_str().chars().rev().next()?; - match &mut self.0 { - Repr::Small { len, .. } => { - *len -= c.len_utf8() as u8; - } - Repr::Large(rc) => { - Rc::make_mut(rc).pop(); - } - } - Some(c) - } - - /// Clear the string. - pub fn clear(&mut self) { - match &mut self.0 { - Repr::Small { len, .. } => *len = 0, - Repr::Large(rc) => { - if Rc::strong_count(rc) == 1 { - Rc::make_mut(rc).clear(); - } else { - *self = Self::new(); - } - } - } - } - - /// Repeat this string `n` times. - pub fn repeat(&self, n: usize) -> Self { - if n == 0 { - return Self::new(); - } - - if let Repr::Small { buf, len } = &self.0 { - let prev = usize::from(*len); - let new = prev.saturating_mul(n); - if new <= LIMIT { - let src = &buf[.. prev]; - let mut buf = [0; LIMIT]; - for i in 0 .. n { - buf[prev * i .. prev * (i + 1)].copy_from_slice(src); - } - return Self(Repr::Small { buf, len: new as u8 }); - } - } - - self.as_str().repeat(n).into() - } -} - -impl Deref for EcoString { - type Target = str; - - fn deref(&self) -> &str { - match &self.0 { - // Safety: - // The buffer contents stem from correct UTF-8 sources: - // - Valid ASCII characters - // - Other string slices - // - Chars that were encoded with char::encode_utf8 - // Furthermore, we still do the bounds-check on the len in case - // it gets corrupted somehow. - Repr::Small { buf, len } => unsafe { - std::str::from_utf8_unchecked(&buf[.. usize::from(*len)]) - }, - Repr::Large(string) => string.as_str(), - } - } -} - -impl Default for EcoString { - fn default() -> Self { - Self::new() - } -} - -impl Debug for EcoString { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - Debug::fmt(self.as_str(), f) - } -} - -impl Display for EcoString { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - Display::fmt(self.as_str(), f) - } -} - -impl Eq for EcoString {} - -impl PartialEq for EcoString { - fn eq(&self, other: &Self) -> bool { - self.as_str().eq(other.as_str()) - } -} - -impl PartialEq for EcoString { - fn eq(&self, other: &str) -> bool { - self.as_str().eq(other) - } -} - -impl PartialEq<&str> for EcoString { - fn eq(&self, other: &&str) -> bool { - self.as_str().eq(*other) - } -} - -impl Ord for EcoString { - fn cmp(&self, other: &Self) -> Ordering { - self.as_str().cmp(other.as_str()) - } -} - -impl PartialOrd for EcoString { - fn partial_cmp(&self, other: &Self) -> Option { - self.as_str().partial_cmp(other.as_str()) - } -} - -impl Hash for EcoString { - fn hash(&self, state: &mut H) { - self.as_str().hash(state); - } -} - -impl Write for EcoString { - fn write_str(&mut self, s: &str) -> fmt::Result { - self.push_str(s); - Ok(()) - } - - fn write_char(&mut self, c: char) -> fmt::Result { - self.push(c); - Ok(()) - } -} - -impl AsRef for EcoString { - fn as_ref(&self) -> &str { - self - } -} - -impl Borrow for EcoString { - fn borrow(&self) -> &str { - self - } -} - -impl From<&Self> for EcoString { - fn from(s: &Self) -> Self { - s.clone() - } -} - -impl From for EcoString { - fn from(c: char) -> Self { - let mut buf = [0; LIMIT]; - let len = c.encode_utf8(&mut buf).len(); - Self(Repr::Small { buf, len: len as u8 }) - } -} - -impl From<&str> for EcoString { - fn from(s: &str) -> Self { - Self::from_str(s) - } -} - -impl From for EcoString { - fn from(s: String) -> Self { - Self::from_str(s) - } -} - -impl From for String { - fn from(s: EcoString) -> Self { - match s.0 { - Repr::Small { .. } => s.as_str().to_owned(), - Repr::Large(rc) => match Rc::try_unwrap(rc) { - Ok(string) => string, - Err(rc) => (*rc).clone(), - }, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - const ALPH: &str = "abcdefghijklmnopqrstuvwxyz"; - - #[test] - fn test_str_new() { - // Test inline strings. - assert_eq!(EcoString::new(), ""); - assert_eq!(EcoString::from('a'), "a"); - assert_eq!(EcoString::from('😀'), "😀"); - assert_eq!(EcoString::from("abc"), "abc"); - - // Test around the inline limit. - assert_eq!(EcoString::from(&ALPH[.. LIMIT - 1]), ALPH[.. LIMIT - 1]); - assert_eq!(EcoString::from(&ALPH[.. LIMIT]), ALPH[.. LIMIT]); - assert_eq!(EcoString::from(&ALPH[.. LIMIT + 1]), ALPH[.. LIMIT + 1]); - - // Test heap string. - assert_eq!(EcoString::from(ALPH), ALPH); - } - - #[test] - fn test_str_push() { - let mut v = EcoString::new(); - v.push('a'); - v.push('b'); - v.push_str("cd😀"); - assert_eq!(v, "abcd😀"); - assert_eq!(v.len(), 8); - - // Test fully filling the inline storage. - v.push_str("efghij"); - assert_eq!(v.len(), LIMIT); - - // Test spilling with `push`. - let mut a = v.clone(); - a.push('k'); - assert_eq!(a, "abcd😀efghijk"); - assert_eq!(a.len(), 15); - - // Test spilling with `push_str`. - let mut b = v.clone(); - b.push_str("klmn"); - assert_eq!(b, "abcd😀efghijklmn"); - assert_eq!(b.len(), 18); - - // v should be unchanged. - assert_eq!(v.len(), LIMIT); - } - - #[test] - fn test_str_pop() { - // Test with inline string. - let mut v = EcoString::from("Hello World!"); - assert_eq!(v.pop(), Some('!')); - assert_eq!(v, "Hello World"); - - // Remove one-by-one. - for _ in 0 .. 10 { - v.pop(); - } - - assert_eq!(v, "H"); - assert_eq!(v.pop(), Some('H')); - assert_eq!(v, ""); - assert!(v.is_empty()); - - // Test with large string. - let mut v = EcoString::from(ALPH); - assert_eq!(v.pop(), Some('z')); - assert_eq!(v.len(), 25); - } - - #[test] - fn test_str_index() { - // Test that we can use the index syntax. - let v = EcoString::from("abc"); - assert_eq!(&v[.. 2], "ab"); - } - - #[test] - fn test_str_repeat() { - // Test with empty string. - assert_eq!(EcoString::new().repeat(0), ""); - assert_eq!(EcoString::new().repeat(100), ""); - - // Test non-spilling and spilling case. - let v = EcoString::from("abc"); - assert_eq!(v.repeat(0), ""); - assert_eq!(v.repeat(3), "abcabcabc"); - assert_eq!(v.repeat(5), "abcabcabcabcabc"); - } -} diff --git a/src/util/eco_string.rs b/src/util/eco_string.rs new file mode 100644 index 00000000..f1dfdfaf --- /dev/null +++ b/src/util/eco_string.rs @@ -0,0 +1,398 @@ +use std::borrow::Borrow; +use std::cmp::Ordering; +use std::fmt::{self, Debug, Display, Formatter, Write}; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; +use std::rc::Rc; + +/// An economical string with inline storage and clone-on-write semantics. +#[derive(Clone)] +pub struct EcoString(Repr); + +/// The internal representation. Either: +/// - inline when below a certain number of bytes, +/// - or reference-counted on the heap with COW semantics. +#[derive(Clone)] +enum Repr { + Small { buf: [u8; LIMIT], len: u8 }, + Large(Rc), +} + +/// The maximum number of bytes that can be stored inline. +/// +/// The value is chosen such that an `EcoString` fits exactly into 16 bytes +/// (which are needed anyway due to the `Rc`s alignment, at least on 64-bit +/// platforms). +/// +/// Must be at least 4 to hold any char. +const LIMIT: usize = 14; + +impl EcoString { + /// Create a new, empty string. + pub fn new() -> Self { + Self(Repr::Small { buf: [0; LIMIT], len: 0 }) + } + + /// Create a new, empty string with the given `capacity`. + pub fn with_capacity(capacity: usize) -> Self { + if capacity <= LIMIT { + Self::new() + } else { + Self(Repr::Large(Rc::new(String::with_capacity(capacity)))) + } + } + + /// Create an instance from an existing string-like type. + pub fn from_str(s: S) -> Self + where + S: AsRef + Into, + { + let slice = s.as_ref(); + let len = slice.len(); + Self(if len <= LIMIT { + let mut buf = [0; LIMIT]; + buf[.. len].copy_from_slice(slice.as_bytes()); + Repr::Small { buf, len: len as u8 } + } else { + Repr::Large(Rc::new(s.into())) + }) + } + + /// Whether the string is empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// The length of the string in bytes. + pub fn len(&self) -> usize { + match &self.0 { + Repr::Small { len, .. } => usize::from(*len), + Repr::Large(string) => string.len(), + } + } + + /// A string slice containing the entire string. + pub fn as_str(&self) -> &str { + self + } + + /// Append the given character at the end. + pub fn push(&mut self, c: char) { + match &mut self.0 { + Repr::Small { buf, len } => { + let prev = usize::from(*len); + if c.len_utf8() == 1 && prev < LIMIT { + buf[prev] = c as u8; + *len += 1; + } else { + self.push_str(c.encode_utf8(&mut [0; 4])); + } + } + Repr::Large(rc) => Rc::make_mut(rc).push(c), + } + } + + /// Append the given string slice at the end. + pub fn push_str(&mut self, string: &str) { + match &mut self.0 { + Repr::Small { buf, len } => { + let prev = usize::from(*len); + let new = prev + string.len(); + if new <= LIMIT { + buf[prev .. new].copy_from_slice(string.as_bytes()); + *len = new as u8; + } else { + let mut spilled = String::with_capacity(new); + spilled.push_str(self); + spilled.push_str(string); + self.0 = Repr::Large(Rc::new(spilled)); + } + } + Repr::Large(rc) => Rc::make_mut(rc).push_str(string), + } + } + + /// Remove the last character from the string. + pub fn pop(&mut self) -> Option { + let c = self.as_str().chars().rev().next()?; + match &mut self.0 { + Repr::Small { len, .. } => { + *len -= c.len_utf8() as u8; + } + Repr::Large(rc) => { + Rc::make_mut(rc).pop(); + } + } + Some(c) + } + + /// Clear the string. + pub fn clear(&mut self) { + match &mut self.0 { + Repr::Small { len, .. } => *len = 0, + Repr::Large(rc) => { + if Rc::strong_count(rc) == 1 { + Rc::make_mut(rc).clear(); + } else { + *self = Self::new(); + } + } + } + } + + /// Repeat this string `n` times. + pub fn repeat(&self, n: usize) -> Self { + if n == 0 { + return Self::new(); + } + + if let Repr::Small { buf, len } = &self.0 { + let prev = usize::from(*len); + let new = prev.saturating_mul(n); + if new <= LIMIT { + let src = &buf[.. prev]; + let mut buf = [0; LIMIT]; + for i in 0 .. n { + buf[prev * i .. prev * (i + 1)].copy_from_slice(src); + } + return Self(Repr::Small { buf, len: new as u8 }); + } + } + + self.as_str().repeat(n).into() + } +} + +impl Deref for EcoString { + type Target = str; + + fn deref(&self) -> &str { + match &self.0 { + // Safety: + // The buffer contents stem from correct UTF-8 sources: + // - Valid ASCII characters + // - Other string slices + // - Chars that were encoded with char::encode_utf8 + // Furthermore, we still do the bounds-check on the len in case + // it gets corrupted somehow. + Repr::Small { buf, len } => unsafe { + std::str::from_utf8_unchecked(&buf[.. usize::from(*len)]) + }, + Repr::Large(string) => string.as_str(), + } + } +} + +impl Default for EcoString { + fn default() -> Self { + Self::new() + } +} + +impl Debug for EcoString { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + Debug::fmt(self.as_str(), f) + } +} + +impl Display for EcoString { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + Display::fmt(self.as_str(), f) + } +} + +impl Eq for EcoString {} + +impl PartialEq for EcoString { + fn eq(&self, other: &Self) -> bool { + self.as_str().eq(other.as_str()) + } +} + +impl PartialEq for EcoString { + fn eq(&self, other: &str) -> bool { + self.as_str().eq(other) + } +} + +impl PartialEq<&str> for EcoString { + fn eq(&self, other: &&str) -> bool { + self.as_str().eq(*other) + } +} + +impl Ord for EcoString { + fn cmp(&self, other: &Self) -> Ordering { + self.as_str().cmp(other.as_str()) + } +} + +impl PartialOrd for EcoString { + fn partial_cmp(&self, other: &Self) -> Option { + self.as_str().partial_cmp(other.as_str()) + } +} + +impl Hash for EcoString { + fn hash(&self, state: &mut H) { + self.as_str().hash(state); + } +} + +impl Write for EcoString { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push_str(s); + Ok(()) + } + + fn write_char(&mut self, c: char) -> fmt::Result { + self.push(c); + Ok(()) + } +} + +impl AsRef for EcoString { + fn as_ref(&self) -> &str { + self + } +} + +impl Borrow for EcoString { + fn borrow(&self) -> &str { + self + } +} + +impl From<&Self> for EcoString { + fn from(s: &Self) -> Self { + s.clone() + } +} + +impl From for EcoString { + fn from(c: char) -> Self { + let mut buf = [0; LIMIT]; + let len = c.encode_utf8(&mut buf).len(); + Self(Repr::Small { buf, len: len as u8 }) + } +} + +impl From<&str> for EcoString { + fn from(s: &str) -> Self { + Self::from_str(s) + } +} + +impl From for EcoString { + fn from(s: String) -> Self { + Self::from_str(s) + } +} + +impl From for String { + fn from(s: EcoString) -> Self { + match s.0 { + Repr::Small { .. } => s.as_str().to_owned(), + Repr::Large(rc) => match Rc::try_unwrap(rc) { + Ok(string) => string, + Err(rc) => (*rc).clone(), + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const ALPH: &str = "abcdefghijklmnopqrstuvwxyz"; + + #[test] + fn test_str_new() { + // Test inline strings. + assert_eq!(EcoString::new(), ""); + assert_eq!(EcoString::from('a'), "a"); + assert_eq!(EcoString::from('😀'), "😀"); + assert_eq!(EcoString::from("abc"), "abc"); + + // Test around the inline limit. + assert_eq!(EcoString::from(&ALPH[.. LIMIT - 1]), ALPH[.. LIMIT - 1]); + assert_eq!(EcoString::from(&ALPH[.. LIMIT]), ALPH[.. LIMIT]); + assert_eq!(EcoString::from(&ALPH[.. LIMIT + 1]), ALPH[.. LIMIT + 1]); + + // Test heap string. + assert_eq!(EcoString::from(ALPH), ALPH); + } + + #[test] + fn test_str_push() { + let mut v = EcoString::new(); + v.push('a'); + v.push('b'); + v.push_str("cd😀"); + assert_eq!(v, "abcd😀"); + assert_eq!(v.len(), 8); + + // Test fully filling the inline storage. + v.push_str("efghij"); + assert_eq!(v.len(), LIMIT); + + // Test spilling with `push`. + let mut a = v.clone(); + a.push('k'); + assert_eq!(a, "abcd😀efghijk"); + assert_eq!(a.len(), 15); + + // Test spilling with `push_str`. + let mut b = v.clone(); + b.push_str("klmn"); + assert_eq!(b, "abcd😀efghijklmn"); + assert_eq!(b.len(), 18); + + // v should be unchanged. + assert_eq!(v.len(), LIMIT); + } + + #[test] + fn test_str_pop() { + // Test with inline string. + let mut v = EcoString::from("Hello World!"); + assert_eq!(v.pop(), Some('!')); + assert_eq!(v, "Hello World"); + + // Remove one-by-one. + for _ in 0 .. 10 { + v.pop(); + } + + assert_eq!(v, "H"); + assert_eq!(v.pop(), Some('H')); + assert_eq!(v, ""); + assert!(v.is_empty()); + + // Test with large string. + let mut v = EcoString::from(ALPH); + assert_eq!(v.pop(), Some('z')); + assert_eq!(v.len(), 25); + } + + #[test] + fn test_str_index() { + // Test that we can use the index syntax. + let v = EcoString::from("abc"); + assert_eq!(&v[.. 2], "ab"); + } + + #[test] + fn test_str_repeat() { + // Test with empty string. + assert_eq!(EcoString::new().repeat(0), ""); + assert_eq!(EcoString::new().repeat(100), ""); + + // Test non-spilling and spilling case. + let v = EcoString::from("abc"); + assert_eq!(v.repeat(0), ""); + assert_eq!(v.repeat(3), "abcabcabc"); + assert_eq!(v.repeat(5), "abcabcabcabcabc"); + } +} diff --git a/src/util/mac.rs b/src/util/mac.rs deleted file mode 100644 index 95e8fcd6..00000000 --- a/src/util/mac.rs +++ /dev/null @@ -1,25 +0,0 @@ -/// Decode mac roman encoded bytes into a string. -pub fn decode_mac_roman(coded: &[u8]) -> String { - coded.iter().copied().map(char_from_mac_roman).collect() -} - -/// Convert a mac roman coded character to a unicode char. -fn char_from_mac_roman(code: u8) -> char { - #[rustfmt::skip] - const TABLE: [char; 128] = [ - 'Ä', 'Å', 'Ç', 'É', 'Ñ', 'Ö', 'Ü', 'á', 'à', 'â', 'ä', 'ã', 'å', 'ç', 'é', 'è', - 'ê', 'ë', 'í', 'ì', 'î', 'ï', 'ñ', 'ó', 'ò', 'ô', 'ö', 'õ', 'ú', 'ù', 'û', 'ü', - '†', '°', '¢', '£', '§', '•', '¶', 'ß', '®', '©', '™', '´', '¨', '≠', 'Æ', 'Ø', - '∞', '±', '≤', '≥', '¥', 'µ', '∂', '∑', '∏', 'π', '∫', 'ª', 'º', 'Ω', 'æ', 'ø', - '¿', '¡', '¬', '√', 'ƒ', '≈', '∆', '«', '»', '…', '\u{a0}', 'À', 'Ã', 'Õ', 'Œ', 'œ', - '–', '—', '“', '”', '‘', '’', '÷', '◊', 'ÿ', 'Ÿ', '⁄', '€', '‹', '›', 'fi', 'fl', - '‡', '·', '‚', '„', '‰', 'Â', 'Ê', 'Á', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'Ó', 'Ô', - '\u{f8ff}', 'Ò', 'Ú', 'Û', 'Ù', 'ı', 'ˆ', '˜', '¯', '˘', '˙', '˚', '¸', '˝', '˛', 'ˇ', - ]; - - if code < 128 { - code as char - } else { - TABLE[(code - 128) as usize] - } -} diff --git a/src/util/mac_roman.rs b/src/util/mac_roman.rs new file mode 100644 index 00000000..95e8fcd6 --- /dev/null +++ b/src/util/mac_roman.rs @@ -0,0 +1,25 @@ +/// Decode mac roman encoded bytes into a string. +pub fn decode_mac_roman(coded: &[u8]) -> String { + coded.iter().copied().map(char_from_mac_roman).collect() +} + +/// Convert a mac roman coded character to a unicode char. +fn char_from_mac_roman(code: u8) -> char { + #[rustfmt::skip] + const TABLE: [char; 128] = [ + 'Ä', 'Å', 'Ç', 'É', 'Ñ', 'Ö', 'Ü', 'á', 'à', 'â', 'ä', 'ã', 'å', 'ç', 'é', 'è', + 'ê', 'ë', 'í', 'ì', 'î', 'ï', 'ñ', 'ó', 'ò', 'ô', 'ö', 'õ', 'ú', 'ù', 'û', 'ü', + '†', '°', '¢', '£', '§', '•', '¶', 'ß', '®', '©', '™', '´', '¨', '≠', 'Æ', 'Ø', + '∞', '±', '≤', '≥', '¥', 'µ', '∂', '∑', '∏', 'π', '∫', 'ª', 'º', 'Ω', 'æ', 'ø', + '¿', '¡', '¬', '√', 'ƒ', '≈', '∆', '«', '»', '…', '\u{a0}', 'À', 'Ã', 'Õ', 'Œ', 'œ', + '–', '—', '“', '”', '‘', '’', '÷', '◊', 'ÿ', 'Ÿ', '⁄', '€', '‹', '›', 'fi', 'fl', + '‡', '·', '‚', '„', '‰', 'Â', 'Ê', 'Á', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'Ó', 'Ô', + '\u{f8ff}', 'Ò', 'Ú', 'Û', 'Ù', 'ı', 'ˆ', '˜', '¯', '˘', '˙', '˚', '¸', '˝', '˛', 'ˇ', + ]; + + if code < 128 { + code as char + } else { + TABLE[(code - 128) as usize] + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 1abdcbc9..05dc9025 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,10 +1,10 @@ //! Utilities. -mod eco; -mod mac; +mod eco_string; +mod mac_roman; -pub use eco::EcoString; -pub use mac::decode_mac_roman; +pub use eco_string::EcoString; +pub use mac_roman::decode_mac_roman; use std::cell::RefMut; use std::cmp::Ordering; -- cgit v1.2.3