summaryrefslogtreecommitdiff
path: root/src/parse
diff options
context:
space:
mode:
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/incremental.rs8
-rw-r--r--src/parse/mod.rs39
-rw-r--r--src/parse/parser.rs84
-rw-r--r--src/parse/tokens.rs17
4 files changed, 100 insertions, 48 deletions
diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs
index 9dd5bec1..fb927c24 100644
--- a/src/parse/incremental.rs
+++ b/src/parse/incremental.rs
@@ -435,10 +435,12 @@ impl NodeKind {
| Self::LeftParen
| Self::RightParen => SuccessionRule::Unsafe,
+ // These work similar to parentheses.
+ Self::Star | Self::Underscore => SuccessionRule::Unsafe,
+
// Replacing an operator can change whether the parent is an
- // operation which makes it unsafe. The star can appear in markup.
- Self::Star
- | Self::Comma
+ // operation which makes it unsafe.
+ Self::Comma
| Self::Semicolon
| Self::Colon
| Self::Plus
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index a9839ed6..b8ef3066 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -21,7 +21,7 @@ use crate::util::EcoString;
/// Parse a source file.
pub fn parse(src: &str) -> Rc<GreenNode> {
let mut p = Parser::new(src, TokenMode::Markup);
- markup(&mut p);
+ markup(&mut p, true);
match p.finish().into_iter().next() {
Some(Green::Node(node)) => node,
_ => unreachable!(),
@@ -61,7 +61,7 @@ pub fn parse_markup(
) -> Option<(Vec<Green>, bool)> {
let mut p = Parser::with_prefix(prefix, src, TokenMode::Markup);
if min_column == 0 {
- markup(&mut p);
+ markup(&mut p, true);
} else {
markup_indented(&mut p, min_column);
}
@@ -128,8 +128,8 @@ pub fn parse_comment(
}
/// Parse markup.
-fn markup(p: &mut Parser) {
- markup_while(p, true, 0, &mut |_| true)
+fn markup(p: &mut Parser, at_start: bool) {
+ markup_while(p, at_start, 0, &mut |_| true)
}
/// Parse markup that stays right of the given column.
@@ -191,8 +191,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| NodeKind::EnDash
| NodeKind::EmDash
| NodeKind::NonBreakingSpace
- | NodeKind::Emph
- | NodeKind::Strong
| NodeKind::Linebreak
| NodeKind::Raw(_)
| NodeKind::Math(_)
@@ -200,6 +198,9 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
p.eat();
}
+ // Grouping markup.
+ NodeKind::Star => strong(p),
+ NodeKind::Underscore => emph(p),
NodeKind::Eq => heading(p, *at_start),
NodeKind::Minus => list_node(p, *at_start),
NodeKind::EnumNumbering(_) => enum_node(p, *at_start),
@@ -227,6 +228,24 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
*at_start = false;
}
+/// Parse strong content.
+fn strong(p: &mut Parser) {
+ p.perform(NodeKind::Strong, |p| {
+ p.start_group(Group::Strong);
+ markup(p, false);
+ p.end_group();
+ })
+}
+
+/// Parse emphasized content.
+fn emph(p: &mut Parser) {
+ p.perform(NodeKind::Emph, |p| {
+ p.start_group(Group::Emph);
+ markup(p, false);
+ p.end_group();
+ })
+}
+
/// Parse a heading.
fn heading(p: &mut Parser, at_start: bool) {
let marker = p.marker();
@@ -234,7 +253,7 @@ fn heading(p: &mut Parser, at_start: bool) {
p.eat_assert(&NodeKind::Eq);
while p.eat_if(&NodeKind::Eq) {}
- if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
+ if at_start && p.peek().map_or(true, |kind| kind.is_space()) {
let column = p.column(p.prev_end());
markup_indented(p, column);
marker.end(p, NodeKind::Heading);
@@ -250,7 +269,7 @@ fn list_node(p: &mut Parser, at_start: bool) {
let text: EcoString = p.peek_src().into();
p.eat_assert(&NodeKind::Minus);
- if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
+ if at_start && p.peek().map_or(true, |kind| kind.is_space()) {
let column = p.column(p.prev_end());
markup_indented(p, column);
marker.end(p, NodeKind::List);
@@ -265,7 +284,7 @@ fn enum_node(p: &mut Parser, at_start: bool) {
let text: EcoString = p.peek_src().into();
p.eat();
- if at_start && p.peek().map_or(true, |kind| kind.is_whitespace()) {
+ if at_start && p.peek().map_or(true, |kind| kind.is_space()) {
let column = p.column(p.prev_end());
markup_indented(p, column);
marker.end(p, NodeKind::Enum);
@@ -620,7 +639,7 @@ fn params(p: &mut Parser, marker: Marker) {
fn template(p: &mut Parser) {
p.perform(NodeKind::Template, |p| {
p.start_group(Group::Bracket);
- markup(p);
+ markup(p, true);
p.end_group();
});
}
diff --git a/src/parse/parser.rs b/src/parse/parser.rs
index 0184c198..db003e72 100644
--- a/src/parse/parser.rs
+++ b/src/parse/parser.rs
@@ -239,17 +239,18 @@ impl<'s> Parser<'s> {
pub fn start_group(&mut self, kind: Group) {
self.groups.push(GroupEntry { kind, prev_mode: self.tokens.mode() });
self.tokens.set_mode(match kind {
- Group::Bracket => TokenMode::Markup,
- _ => TokenMode::Code,
+ Group::Bracket | Group::Strong | Group::Emph => TokenMode::Markup,
+ Group::Paren | Group::Brace | Group::Expr | Group::Imports => TokenMode::Code,
});
- self.repeek();
match kind {
Group::Paren => self.eat_assert(&NodeKind::LeftParen),
Group::Bracket => self.eat_assert(&NodeKind::LeftBracket),
Group::Brace => self.eat_assert(&NodeKind::LeftBrace),
- Group::Expr => {}
- Group::Imports => {}
+ Group::Strong => self.eat_assert(&NodeKind::Star),
+ Group::Emph => self.eat_assert(&NodeKind::Underscore),
+ Group::Expr => self.repeek(),
+ Group::Imports => self.repeek(),
}
}
@@ -273,6 +274,8 @@ impl<'s> Parser<'s> {
Group::Paren => Some((NodeKind::RightParen, true)),
Group::Bracket => Some((NodeKind::RightBracket, true)),
Group::Brace => Some((NodeKind::RightBrace, true)),
+ Group::Strong => Some((NodeKind::Star, true)),
+ Group::Emph => Some((NodeKind::Underscore, true)),
Group::Expr => Some((NodeKind::Semicolon, false)),
Group::Imports => None,
} {
@@ -322,9 +325,11 @@ impl<'s> Parser<'s> {
Some(NodeKind::RightParen) => self.inside(Group::Paren),
Some(NodeKind::RightBracket) => self.inside(Group::Bracket),
Some(NodeKind::RightBrace) => self.inside(Group::Brace),
+ Some(NodeKind::Star) => self.inside(Group::Strong),
+ Some(NodeKind::Underscore) => self.inside(Group::Emph),
Some(NodeKind::Semicolon) => self.inside(Group::Expr),
Some(NodeKind::From) => self.inside(Group::Imports),
- Some(NodeKind::Space(n)) => *n >= 1 && self.stop_at_newline(),
+ Some(NodeKind::Space(n)) => self.space_ends_group(*n),
Some(_) => false,
None => true,
};
@@ -332,31 +337,34 @@ impl<'s> Parser<'s> {
/// Returns whether the given type can be skipped over.
fn is_trivia(&self, token: &NodeKind) -> bool {
- Self::is_trivia_ext(token, self.stop_at_newline())
- }
-
- /// Returns whether the given type can be skipped over given the current
- /// newline mode.
- fn is_trivia_ext(token: &NodeKind, stop_at_newline: bool) -> bool {
match token {
- NodeKind::Space(n) => *n == 0 || !stop_at_newline,
+ NodeKind::Space(n) => !self.space_ends_group(*n),
NodeKind::LineComment => true,
NodeKind::BlockComment => true,
_ => false,
}
}
- /// Whether the active group must end at a newline.
- fn stop_at_newline(&self) -> bool {
- matches!(
- self.groups.last().map(|group| group.kind),
- Some(Group::Expr | Group::Imports)
- )
+ /// Whether a space with the given number of newlines ends the current group.
+ fn space_ends_group(&self, n: usize) -> bool {
+ if n == 0 {
+ return false;
+ }
+
+ match self.groups.last().map(|group| group.kind) {
+ Some(Group::Strong | Group::Emph) => n >= 2,
+ Some(Group::Expr | Group::Imports) => n >= 1,
+ _ => false,
+ }
}
- /// Whether we are inside the given group.
+ /// Whether we are inside the given group (can be nested).
fn inside(&self, kind: Group) -> bool {
- self.groups.iter().any(|g| g.kind == kind)
+ self.groups
+ .iter()
+ .rev()
+ .take_while(|g| !kind.is_weak() || g.kind.is_weak())
+ .any(|g| g.kind == kind)
}
}
@@ -431,15 +439,20 @@ impl Marker {
F: Fn(&Green) -> Result<(), &'static str>,
{
for child in &mut p.children[self.0 ..] {
- if (p.tokens.mode() == TokenMode::Markup
- || !Parser::is_trivia_ext(child.kind(), false))
- && !child.kind().is_error()
- {
- if let Err(msg) = f(child) {
- let error = NodeKind::Error(ErrorPos::Full, msg.into());
- let inner = mem::take(child);
- *child = GreenNode::with_child(error, inner).into();
- }
+ // Don't expose errors.
+ if child.kind().is_error() {
+ continue;
+ }
+
+ // Don't expose trivia in code.
+ if p.tokens.mode() == TokenMode::Code && child.kind().is_trivia() {
+ continue;
+ }
+
+ if let Err(msg) = f(child) {
+ let error = NodeKind::Error(ErrorPos::Full, msg.into());
+ let inner = mem::take(child);
+ *child = GreenNode::with_child(error, inner).into();
}
}
}
@@ -485,12 +498,23 @@ pub enum Group {
Brace,
/// A parenthesized group: `(...)`.
Paren,
+ /// A group surrounded with stars: `*...*`.
+ Strong,
+ /// A group surrounded with underscore: `_..._`.
+ Emph,
/// A group ended by a semicolon or a line break: `;`, `\n`.
Expr,
/// A group for import items, ended by a semicolon, line break or `from`.
Imports,
}
+impl Group {
+ /// Whether the group can only force other weak groups to end.
+ fn is_weak(self) -> bool {
+ matches!(self, Group::Strong | Group::Emph)
+ }
+}
+
/// Allows parser methods to use the try operator. Never returned top-level
/// because the parser recovers from all errors.
pub type ParseResult<T = ()> = Result<T, ParseError>;
diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs
index eef7a72d..d741dea1 100644
--- a/src/parse/tokens.rs
+++ b/src/parse/tokens.rs
@@ -123,8 +123,8 @@ impl<'s> Tokens<'s> {
// Markup.
'~' => NodeKind::NonBreakingSpace,
- '*' => NodeKind::Strong,
- '_' => NodeKind::Emph,
+ '*' if !self.in_word() => NodeKind::Star,
+ '_' if !self.in_word() => NodeKind::Underscore,
'`' => self.raw(),
'$' => self.math(),
'-' => self.hyph(),
@@ -527,6 +527,13 @@ impl<'s> Tokens<'s> {
NodeKind::BlockComment
}
+ fn in_word(&self) -> bool {
+ let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
+ let prev = self.s.get(.. self.s.last_index()).chars().next_back();
+ let next = self.s.peek();
+ alphanumeric(prev) && alphanumeric(next)
+ }
+
fn maybe_in_url(&self) -> bool {
self.mode == TokenMode::Markup && self.s.eaten().ends_with(":/")
}
@@ -651,7 +658,7 @@ mod tests {
('/', None, "[", LeftBracket),
('/', None, "//", LineComment),
('/', None, "/**/", BlockComment),
- ('/', Some(Markup), "*", Strong),
+ ('/', Some(Markup), "*", Star),
('/', Some(Markup), "$ $", Math(" ", false)),
('/', Some(Markup), r"\\", Escape('\\')),
('/', Some(Markup), "#let", Let),
@@ -790,8 +797,8 @@ mod tests {
#[test]
fn test_tokenize_markup_symbols() {
// Test markup tokens.
- t!(Markup[" a1"]: "*" => Strong);
- t!(Markup: "_" => Emph);
+ t!(Markup[" a1"]: "*" => Star);
+ t!(Markup: "_" => Underscore);
t!(Markup[""]: "===" => Eq, Eq, Eq);
t!(Markup["a1/"]: "= " => Eq, Space(0));
t!(Markup: "~" => NonBreakingSpace);