Skip to content

Commit 4f62442

Browse files
committed
Implement RFC 3503: frontmatters
Supercedes #137193
1 parent d2eadb7 commit 4f62442

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+948
-22
lines changed

compiler/rustc_ast_passes/src/feature_gate.rs

+1
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session, features: &Features) {
514514
gate_all!(contracts_internals, "contract internal machinery is for internal use only");
515515
gate_all!(where_clause_attrs, "attributes in `where` clause are unstable");
516516
gate_all!(super_let, "`super let` is experimental");
517+
gate_all!(frontmatter, "frontmatters are experimental");
517518

518519
if !visitor.features.never_patterns() {
519520
if let Some(spans) = spans.get(&sym::never_patterns) {

compiler/rustc_feature/src/unstable.rs

+2
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,8 @@ declare_features! (
506506
(incomplete, fn_delegation, "1.76.0", Some(118212)),
507507
/// Allows impls for the Freeze trait.
508508
(internal, freeze_impls, "1.78.0", Some(121675)),
509+
/// Frontmatter `---` blocks for use by external tools.
510+
(unstable, frontmatter, "CURRENT_RUSTC_VERSION", Some(136889)),
509511
/// Allows defining gen blocks and `gen fn`.
510512
(unstable, gen_blocks, "1.75.0", Some(117078)),
511513
/// Infer generic args for both consts and types.

compiler/rustc_lexer/src/cursor.rs

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
use std::str::Chars;
22

3+
pub enum FrontmatterAllowed {
4+
Yes,
5+
No,
6+
}
7+
38
/// Peekable iterator over a char sequence.
49
///
510
/// Next characters can be peeked via `first` method,
@@ -8,17 +13,19 @@ pub struct Cursor<'a> {
813
len_remaining: usize,
914
/// Iterator over chars. Slightly faster than a &str.
1015
chars: Chars<'a>,
16+
pub(crate) frontmatter_allowed: FrontmatterAllowed,
1117
#[cfg(debug_assertions)]
1218
prev: char,
1319
}
1420

1521
pub(crate) const EOF_CHAR: char = '\0';
1622

1723
impl<'a> Cursor<'a> {
18-
pub fn new(input: &'a str) -> Cursor<'a> {
24+
pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> {
1925
Cursor {
2026
len_remaining: input.len(),
2127
chars: input.chars(),
28+
frontmatter_allowed,
2229
#[cfg(debug_assertions)]
2330
prev: EOF_CHAR,
2431
}
@@ -95,6 +102,11 @@ impl<'a> Cursor<'a> {
95102
Some(c)
96103
}
97104

105+
/// Moves to a substring by a number of bytes.
106+
pub(crate) fn bump_bytes(&mut self, n: usize) {
107+
self.chars = self.as_str()[n..].chars();
108+
}
109+
98110
/// Eats symbols while predicate returns true or until the end of file is reached.
99111
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
100112
// It was tried making optimized version of this for eg. line comments, but

compiler/rustc_lexer/src/lib.rs

+139-7
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION;
3535

3636
use self::LiteralKind::*;
3737
use self::TokenKind::*;
38-
pub use crate::cursor::Cursor;
3938
use crate::cursor::EOF_CHAR;
39+
pub use crate::cursor::{Cursor, FrontmatterAllowed};
4040

4141
/// Parsed token.
4242
/// It doesn't contain information about data that has been parsed,
@@ -57,17 +57,27 @@ impl Token {
5757
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
5858
pub enum TokenKind {
5959
/// A line comment, e.g. `// comment`.
60-
LineComment { doc_style: Option<DocStyle> },
60+
LineComment {
61+
doc_style: Option<DocStyle>,
62+
},
6163

6264
/// A block comment, e.g. `/* block comment */`.
6365
///
6466
/// Block comments can be recursive, so a sequence like `/* /* */`
6567
/// will not be considered terminated and will result in a parsing error.
66-
BlockComment { doc_style: Option<DocStyle>, terminated: bool },
68+
BlockComment {
69+
doc_style: Option<DocStyle>,
70+
terminated: bool,
71+
},
6772

6873
/// Any whitespace character sequence.
6974
Whitespace,
7075

76+
Frontmatter {
77+
has_invalid_preceding_whitespace: bool,
78+
invalid_infostring: bool,
79+
},
80+
7181
/// An identifier or keyword, e.g. `ident` or `continue`.
7282
Ident,
7383

@@ -109,10 +119,15 @@ pub enum TokenKind {
109119
/// this type will need to check for and reject that case.
110120
///
111121
/// See [LiteralKind] for more details.
112-
Literal { kind: LiteralKind, suffix_start: u32 },
122+
Literal {
123+
kind: LiteralKind,
124+
suffix_start: u32,
125+
},
113126

114127
/// A lifetime, e.g. `'a`.
115-
Lifetime { starts_with_number: bool },
128+
Lifetime {
129+
starts_with_number: bool,
130+
},
116131

117132
/// `;`
118133
Semi,
@@ -280,7 +295,7 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
280295
#[inline]
281296
pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> {
282297
debug_assert!(!input.is_empty());
283-
let mut cursor = Cursor::new(input);
298+
let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
284299
// Move past the leading `r` or `br`.
285300
for _ in 0..prefix_len {
286301
cursor.bump().unwrap();
@@ -290,7 +305,7 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
290305

291306
/// Creates an iterator that produces tokens from the input string.
292307
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> {
293-
let mut cursor = Cursor::new(input);
308+
let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
294309
std::iter::from_fn(move || {
295310
let token = cursor.advance_token();
296311
if token.kind != TokenKind::Eof { Some(token) } else { None }
@@ -361,7 +376,34 @@ impl Cursor<'_> {
361376
Some(c) => c,
362377
None => return Token::new(TokenKind::Eof, 0),
363378
};
379+
364380
let token_kind = match first_char {
381+
c if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
382+
&& is_whitespace(c) =>
383+
{
384+
let mut last = first_char;
385+
while is_whitespace(self.first()) {
386+
let Some(c) = self.bump() else {
387+
break;
388+
};
389+
last = c;
390+
}
391+
// invalid frontmatter opening as whitespace preceding it isn't newline.
392+
// combine the whitespace and the frontmatter to a single token as we shall
393+
// error later.
394+
if last != '\n' && self.as_str().starts_with("---") {
395+
self.bump();
396+
self.frontmatter(true)
397+
} else {
398+
Whitespace
399+
}
400+
}
401+
'-' if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
402+
&& self.as_str().starts_with("--") =>
403+
{
404+
// happy path
405+
self.frontmatter(false)
406+
}
365407
// Slash, comment or block comment.
366408
'/' => match self.first() {
367409
'/' => self.line_comment(),
@@ -464,11 +506,101 @@ impl Cursor<'_> {
464506
c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
465507
_ => Unknown,
466508
};
509+
if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
510+
&& !matches!(token_kind, Whitespace)
511+
{
512+
// stop allowing frontmatters after first non-whitespace token
513+
self.frontmatter_allowed = FrontmatterAllowed::No;
514+
}
467515
let res = Token::new(token_kind, self.pos_within_token());
468516
self.reset_pos_within_token();
469517
res
470518
}
471519

520+
/// Given that one `-` was eaten, eat the rest of the frontmatter.
521+
fn frontmatter(&mut self, has_invalid_preceding_whitespace: bool) -> TokenKind {
522+
debug_assert_eq!('-', self.prev());
523+
524+
let pos = self.pos_within_token();
525+
self.eat_while(|c| c == '-');
526+
527+
// one `-` is eaten by the caller.
528+
let length_opening = self.pos_within_token() - pos + 1;
529+
530+
// must be ensured by the caller
531+
debug_assert!(length_opening >= 3);
532+
533+
self.eat_identifier();
534+
self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
535+
let invalid_infostring = self.first() != '\n';
536+
537+
let mut s = self.as_str();
538+
let mut found = false;
539+
while let Some(closing) = s.find(&"-".repeat(length_opening as usize)) {
540+
let preceding_chars_start = s[..closing].rfind("\n").map_or(0, |i| i + 1);
541+
if s[preceding_chars_start..closing].chars().all(is_whitespace) {
542+
// candidate found
543+
self.bump_bytes(closing);
544+
// in case like
545+
// ---cargo
546+
// --- blahblah
547+
// or
548+
// ---cargo
549+
// ----
550+
// combine those stuff into this frontmatter token such that it gets detected later.
551+
self.eat_until(b'\n');
552+
found = true;
553+
break;
554+
} else {
555+
s = &s[closing + length_opening as usize..];
556+
}
557+
}
558+
559+
if !found {
560+
// recovery strategy: a closing statement might have precending whitespace/newline
561+
// but not have enough dashes to properly close. In this case, we eat until there,
562+
// and report a mismatch in the parser.
563+
let mut rest = self.as_str();
564+
// We can look for a shorter closing (starting with four dashes but closing with three)
565+
// and other indications that Rust has started and the infostring has ended.
566+
let mut potential_closing = rest
567+
.find("\n---")
568+
// n.b. only in the case where there are dashes, we move the index to the line where
569+
// the dashes start as we eat to include that line. For other cases those are Rust code
570+
// and not included in the frontmatter.
571+
.map(|x| x + 1)
572+
.or_else(|| rest.find("\nuse"))
573+
.or_else(|| rest.find("\n//!"))
574+
.or_else(|| rest.find("\n#!["));
575+
576+
if potential_closing.is_none() {
577+
// a less fortunate recovery if all else fails which finds any dashes preceded by whitespace
578+
// on a standalone line. Might be wrong.
579+
while let Some(closing) = rest.find("---") {
580+
let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
581+
if rest[preceding_chars_start..closing].chars().all(is_whitespace) {
582+
// candidate found
583+
potential_closing = Some(closing);
584+
break;
585+
} else {
586+
rest = &rest[closing + 3..];
587+
}
588+
}
589+
}
590+
591+
if let Some(potential_closing) = potential_closing {
592+
// bump to the potential closing, and eat everything on that line.
593+
self.bump_bytes(potential_closing);
594+
self.eat_until(b'\n');
595+
} else {
596+
// eat everything. this will get reported as an unclosed frontmatter.
597+
self.eat_while(|_| true);
598+
}
599+
}
600+
601+
Frontmatter { has_invalid_preceding_whitespace, invalid_infostring }
602+
}
603+
472604
fn line_comment(&mut self) -> TokenKind {
473605
debug_assert!(self.prev() == '/' && self.first() == '/');
474606
self.bump();

compiler/rustc_lexer/src/tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use super::*;
44

55
fn check_raw_str(s: &str, expected: Result<u8, RawStrError>) {
66
let s = &format!("r{}", s);
7-
let mut cursor = Cursor::new(s);
7+
let mut cursor = Cursor::new(s, FrontmatterAllowed::No);
88
cursor.bump();
99
let res = cursor.raw_double_quoted_string(0);
1010
assert_eq!(res, expected);

compiler/rustc_parse/messages.ftl

+13
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,19 @@ parse_forgot_paren = perhaps you forgot parentheses?
297297
parse_found_expr_would_be_stmt = expected expression, found `{$token}`
298298
.label = expected expression
299299
300+
parse_frontmatter_extra_characters_after_close = extra characters after frontmatter close are not allowed
301+
parse_frontmatter_invalid_close_preceding_whitespace = invalid preceding whitespace for frontmatter close
302+
.note = frontmatter close should not be preceded by whitespace
303+
parse_frontmatter_invalid_infostring = invalid infostring for frontmatter
304+
.note = frontmatter infostrings must be a single identifier immediately following the opening
305+
parse_frontmatter_invalid_opening_preceding_whitespace = invalid preceding whitespace for frontmatter opening
306+
.note = frontmatter opening should not be preceded by whitespace
307+
parse_frontmatter_length_mismatch = frontmatter close does not match the opening
308+
.label_opening = the opening here has {$len_opening} dashes...
309+
.label_close = ...while the close has {$len_close} dashes
310+
parse_frontmatter_unclosed = unclosed frontmatter
311+
.note = frontmatter opening here was not closed
312+
300313
parse_function_body_equals_expr = function body cannot be `= expression;`
301314
.suggestion = surround the expression with `{"{"}` and `{"}"}` instead of `=` and `;`
302315

compiler/rustc_parse/src/errors.rs

+55
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,61 @@ pub(crate) struct FoundExprWouldBeStmt {
735735
pub suggestion: ExprParenthesesNeeded,
736736
}
737737

738+
#[derive(Diagnostic)]
739+
#[diag(parse_frontmatter_extra_characters_after_close)]
740+
pub(crate) struct FrontmatterExtraCharactersAfterClose {
741+
#[primary_span]
742+
pub span: Span,
743+
}
744+
745+
#[derive(Diagnostic)]
746+
#[diag(parse_frontmatter_invalid_infostring)]
747+
#[note]
748+
pub(crate) struct FrontmatterInvalidInfostring {
749+
#[primary_span]
750+
pub span: Span,
751+
}
752+
753+
#[derive(Diagnostic)]
754+
#[diag(parse_frontmatter_invalid_opening_preceding_whitespace)]
755+
pub(crate) struct FrontmatterInvalidOpeningPrecedingWhitespace {
756+
#[primary_span]
757+
pub span: Span,
758+
#[note]
759+
pub note_span: Span,
760+
}
761+
762+
#[derive(Diagnostic)]
763+
#[diag(parse_frontmatter_unclosed)]
764+
pub(crate) struct FrontmatterUnclosed {
765+
#[primary_span]
766+
pub span: Span,
767+
#[note]
768+
pub note_span: Span,
769+
}
770+
771+
#[derive(Diagnostic)]
772+
#[diag(parse_frontmatter_invalid_close_preceding_whitespace)]
773+
pub(crate) struct FrontmatterInvalidClosingPrecedingWhitespace {
774+
#[primary_span]
775+
pub span: Span,
776+
#[note]
777+
pub note_span: Span,
778+
}
779+
780+
#[derive(Diagnostic)]
781+
#[diag(parse_frontmatter_length_mismatch)]
782+
pub(crate) struct FrontmatterLengthMismatch {
783+
#[primary_span]
784+
pub span: Span,
785+
#[label(parse_label_opening)]
786+
pub opening: Span,
787+
#[label(parse_label_close)]
788+
pub close: Span,
789+
pub len_opening: usize,
790+
pub len_close: usize,
791+
}
792+
738793
#[derive(Diagnostic)]
739794
#[diag(parse_leading_plus_not_supported)]
740795
pub(crate) struct LeadingPlusNotSupported {

0 commit comments

Comments
 (0)