Skip to content

Streamline StringReader::bump #50566

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion src/libsyntax/parse/lexer/comments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,19 @@ fn read_block_comment(rdr: &mut StringReader,
debug!(">>> block comment");
let p = rdr.pos;
let mut lines: Vec<String> = Vec::new();
let col = rdr.col;

// Count the number of chars since the start of the line by rescanning.
let mut src_index = rdr.src_index(rdr.filemap.line_begin_pos());
let end_src_index = rdr.src_index(rdr.pos);
assert!(src_index <= end_src_index);
let mut n = 0;
while src_index < end_src_index {
let c = char_at(&rdr.src, src_index);
src_index += c.len_utf8();
n += 1;
}
let col = CharPos(n);

rdr.bump();
rdr.bump();

Expand Down
106 changes: 42 additions & 64 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,11 @@ pub struct StringReader<'a> {
pub next_pos: BytePos,
/// The absolute offset within the codemap of the current character
pub pos: BytePos,
/// The column of the next character to read
pub col: CharPos,
/// The current character (which has been read from self.pos)
pub ch: Option<char>,
pub filemap: Lrc<syntax_pos::FileMap>,
/// If Some, stop reading the source at this position (inclusive).
pub terminator: Option<BytePos>,
/// Stop reading src at this index.
pub end_src_index: usize,
/// Whether to record new-lines and multibyte chars in filemap.
/// This is only necessary the first time a filemap is lexed.
/// If part of a filemap is being re-lexed, this should be set to false.
Expand All @@ -61,7 +59,7 @@ pub struct StringReader<'a> {
pub fatal_errs: Vec<DiagnosticBuilder<'a>>,
// cache a direct reference to the source text, so that we don't have to
// retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
source_text: Lrc<String>,
src: Lrc<String>,
/// Stack of open delimiters and their spans. Used for error message.
token: token::Token,
span: Span,
Expand Down Expand Up @@ -113,14 +111,7 @@ impl<'a> StringReader<'a> {
self.unwrap_or_abort(res)
}
fn is_eof(&self) -> bool {
if self.ch.is_none() {
return true;
}

match self.terminator {
Some(t) => self.next_pos > t,
None => false,
}
self.ch.is_none()
}
/// Return the next token. EFFECT: advances the string_reader.
pub fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
Expand Down Expand Up @@ -176,21 +167,20 @@ impl<'a> StringReader<'a> {
filemap.name));
}

let source_text = (*filemap.src.as_ref().unwrap()).clone();
let src = (*filemap.src.as_ref().unwrap()).clone();

StringReader {
sess,
next_pos: filemap.start_pos,
pos: filemap.start_pos,
col: CharPos(0),
ch: Some('\n'),
filemap,
terminator: None,
end_src_index: src.len(),
save_new_lines_and_multibyte: true,
// dummy values; not read
peek_tok: token::Eof,
peek_span: syntax_pos::DUMMY_SP,
source_text,
src,
fatal_errs: Vec::new(),
token: token::Eof,
span: syntax_pos::DUMMY_SP,
Expand Down Expand Up @@ -222,7 +212,7 @@ impl<'a> StringReader<'a> {
// Seek the lexer to the right byte range.
sr.save_new_lines_and_multibyte = false;
sr.next_pos = span.lo();
sr.terminator = Some(span.hi());
sr.end_src_index = sr.src_index(span.hi());

sr.bump();

Expand Down Expand Up @@ -326,9 +316,7 @@ impl<'a> StringReader<'a> {
/// offending string to the error message
fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError {
m.push_str(": ");
let from = self.byte_offset(from_pos).to_usize();
let to = self.byte_offset(to_pos).to_usize();
m.push_str(&self.source_text[from..to]);
m.push_str(&self.src[self.src_index(from_pos)..self.src_index(to_pos)]);
self.fatal_span_(from_pos, to_pos, &m[..])
}

Expand All @@ -354,8 +342,9 @@ impl<'a> StringReader<'a> {
Ok(())
}

fn byte_offset(&self, pos: BytePos) -> BytePos {
(pos - self.filemap.start_pos)
#[inline]
fn src_index(&self, pos: BytePos) -> usize {
(pos - self.filemap.start_pos).to_usize()
}

/// Calls `f` with a string slice of the source text spanning from `start`
Expand Down Expand Up @@ -386,7 +375,7 @@ impl<'a> StringReader<'a> {
fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
where F: FnOnce(&str) -> T
{
f(&self.source_text[self.byte_offset(start).to_usize()..self.byte_offset(end).to_usize()])
f(&self.src[self.src_index(start)..self.src_index(end)])
}

/// Converts CRLF to LF in the given string, raising an error on bare CR.
Expand Down Expand Up @@ -438,47 +427,39 @@ impl<'a> StringReader<'a> {
}
}


/// Advance the StringReader by one character. If a newline is
/// discovered, add it to the FileMap's list of line start offsets.
pub fn bump(&mut self) {
let new_pos = self.next_pos;
let new_byte_offset = self.byte_offset(new_pos).to_usize();
let end = self.terminator.map_or(self.source_text.len(), |t| {
self.byte_offset(t).to_usize()
});
if new_byte_offset < end {
let old_ch_is_newline = self.ch.unwrap() == '\n';
let new_ch = char_at(&self.source_text, new_byte_offset);
let new_ch_len = new_ch.len_utf8();

self.ch = Some(new_ch);
self.pos = new_pos;
self.next_pos = new_pos + Pos::from_usize(new_ch_len);
if old_ch_is_newline {
let next_src_index = self.src_index(self.next_pos);
if next_src_index < self.end_src_index {
let next_ch = char_at(&self.src, next_src_index);
let next_ch_len = next_ch.len_utf8();

if self.ch.unwrap() == '\n' {
if self.save_new_lines_and_multibyte {
self.filemap.next_line(self.pos);
self.filemap.next_line(self.next_pos);
}
self.col = CharPos(0);
} else {
self.col = self.col + CharPos(1);
}
if new_ch_len > 1 {
if next_ch_len > 1 {
if self.save_new_lines_and_multibyte {
self.filemap.record_multibyte_char(self.pos, new_ch_len);
self.filemap.record_multibyte_char(self.next_pos, next_ch_len);
}
}
self.filemap.record_width(self.pos, new_ch);
self.filemap.record_width(self.next_pos, next_ch);

self.ch = Some(next_ch);
self.pos = self.next_pos;
self.next_pos = self.next_pos + Pos::from_usize(next_ch_len);
} else {
self.ch = None;
self.pos = new_pos;
self.pos = self.next_pos;
}
}

pub fn nextch(&self) -> Option<char> {
let offset = self.byte_offset(self.next_pos).to_usize();
if offset < self.source_text.len() {
Some(char_at(&self.source_text, offset))
let next_src_index = self.src_index(self.next_pos);
if next_src_index < self.end_src_index {
Some(char_at(&self.src, next_src_index))
} else {
None
}
Expand All @@ -489,17 +470,15 @@ impl<'a> StringReader<'a> {
}

pub fn nextnextch(&self) -> Option<char> {
let offset = self.byte_offset(self.next_pos).to_usize();
let s = &self.source_text[..];
if offset >= s.len() {
return None;
}
let next = offset + char_at(s, offset).len_utf8();
if next < s.len() {
Some(char_at(s, next))
} else {
None
let next_src_index = self.src_index(self.next_pos);
if next_src_index < self.end_src_index {
let next_next_src_index =
next_src_index + char_at(&self.src, next_src_index).len_utf8();
if next_next_src_index < self.end_src_index {
return Some(char_at(&self.src, next_next_src_index));
}
}
None
}

pub fn nextnextch_is(&self, c: char) -> bool {
Expand Down Expand Up @@ -1359,8 +1338,8 @@ impl<'a> StringReader<'a> {
loop {
self.bump();
if self.ch_is('\'') {
let start = self.byte_offset(start).to_usize();
let end = self.byte_offset(self.pos).to_usize();
let start = self.src_index(start);
let end = self.src_index(self.pos);
self.bump();
let span = self.mk_sp(start_with_quote, self.pos);
self.sess.span_diagnostic
Expand All @@ -1369,8 +1348,7 @@ impl<'a> StringReader<'a> {
.span_suggestion(span,
"if you meant to write a `str` literal, \
use double quotes",
format!("\"{}\"",
&self.source_text[start..end]))
format!("\"{}\"", &self.src[start..end]))
.emit();
return Ok(token::Literal(token::Str_(Symbol::intern("??")), None))
}
Expand Down
1 change: 1 addition & 0 deletions src/libsyntax/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#[inline]
pub fn char_at(s: &str, byte: usize) -> char {
s[byte..].chars().next().unwrap()
}
10 changes: 10 additions & 0 deletions src/libsyntax_pos/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,15 @@ impl FileMap {
lines.push(pos);
}

/// Return the BytePos of the beginning of the current line.
pub fn line_begin_pos(&self) -> BytePos {
let lines = self.lines.borrow();
match lines.last() {
Some(&line_pos) => line_pos,
None => self.start_pos,
}
}

/// Add externally loaded source.
/// If the hash of the input doesn't match or no input is supplied via None,
/// it is interpreted as an error and the corresponding enum variant is set.
Expand Down Expand Up @@ -1047,6 +1056,7 @@ impl FileMap {
self.multibyte_chars.borrow_mut().push(mbc);
}

#[inline]
pub fn record_width(&self, pos: BytePos, ch: char) {
let width = match ch {
'\t' =>
Expand Down