Skip to content

ref: Sort tokens in SourceMap #91

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# Changelog

## Unreleased

### Various fixes and improvements

- ref: Tokens within a sourcemap are now always sorted by their position in the
minified file (#91) by @loewenheim.
Consequently:
- the type `IndexIter` and the functions `get_index_size`, `index_iter`,
and `idx_from_token` have been deleted;
- the function `sourcemap_from_token` has been turned into the method
`sourcemap` on `Token`;
- the `idx` parameter of `SourceMap::get_token` now has the type `usize`.


## 8.0.1

### Various fixes & improvements
Expand Down
2 changes: 0 additions & 2 deletions src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,6 @@ fn serialize_mappings(sm: &SourceMap) -> String {
let mut prev_src_id = 0;

for (idx, token) in sm.tokens().enumerate() {
let idx = idx as u32;

if token.get_dst_line() != prev_dst_line {
prev_dst_col = 0;
while token.get_dst_line() != prev_dst_line {
Expand Down
7 changes: 4 additions & 3 deletions src/hermes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,10 @@ impl SourceMapHermes {

// Find the closest mapping, just like here:
// https://github.com/facebook/metro/blob/63b523eb20e7bdf62018aeaf195bb5a3a1a67f36/packages/metro-symbolicate/src/SourceMetadataMapConsumer.js#L204-L231
let mapping = greatest_lower_bound(&function_map.mappings, &token.get_src(), |o| {
(o.line, o.column)
})?;
let (_mapping_idx, mapping) =
greatest_lower_bound(&function_map.mappings, &token.get_src(), |o| {
(o.line, o.column)
})?;
function_map
.names
.get(mapping.name_index as usize)
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ pub use crate::errors::{Error, Result};
pub use crate::hermes::SourceMapHermes;
pub use crate::sourceview::SourceView;
pub use crate::types::{
DecodedMap, IndexIter, NameIter, RawToken, RewriteOptions, SourceContentsIter, SourceIter,
SourceMap, SourceMapIndex, SourceMapSection, SourceMapSectionIter, Token, TokenIter,
DecodedMap, NameIter, RawToken, RewriteOptions, SourceContentsIter, SourceIter, SourceMap,
SourceMapIndex, SourceMapSection, SourceMapSectionIter, Token, TokenIter,
};
pub use crate::utils::make_relative_path;

Expand Down
14 changes: 4 additions & 10 deletions src/sourceview.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use if_chain::if_chain;
use crate::detector::{locate_sourcemap_reference_slice, SourceMapRef};
use crate::errors::Result;
use crate::js_identifiers::{get_javascript_token, is_valid_javascript_identifier};
use crate::types::{idx_from_token, sourcemap_from_token, Token};
use crate::types::Token;

/// An iterator that iterates over tokens in reverse.
pub struct RevTokenIter<'view, 'map> {
Expand All @@ -24,17 +24,11 @@ impl<'view, 'map> Iterator for RevTokenIter<'view, 'map> {
type Item = (Token<'map>, Option<&'view str>);

fn next(&mut self) -> Option<(Token<'map>, Option<&'view str>)> {
let token = match self.token.take() {
None => {
return None;
}
Some(token) => token,
};
let token = self.token.take()?;
let idx = token.idx;

let idx = idx_from_token(&token);
if idx > 0 {
let sm = sourcemap_from_token(&token);
self.token = sm.get_token(idx - 1);
self.token = token.sm.get_token(idx - 1);
}

// if we are going to the same line as we did last iteration, we don't have to scan
Expand Down
97 changes: 29 additions & 68 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,16 @@ pub struct RawToken {
#[derive(Copy, Clone)]
pub struct Token<'a> {
raw: &'a RawToken,
i: &'a SourceMap,
pub(crate) sm: &'a SourceMap,
pub(crate) idx: usize,
offset: u32,
idx: u32,
}

impl<'a> Token<'a> {
/// The sourcemap this token is linked to.
pub fn sourcemap(&self) -> &'a SourceMap {
self.sm
}
}

impl<'a> PartialEq for Token<'a> {
Expand Down Expand Up @@ -241,7 +248,7 @@ impl<'a> Token<'a> {
if self.raw.src_id == !0 {
None
} else {
self.i.get_source(self.raw.src_id)
self.sm.get_source(self.raw.src_id)
}
}

Expand All @@ -255,7 +262,7 @@ impl<'a> Token<'a> {
if self.raw.name_id == !0 {
None
} else {
self.i.get_name(self.raw.name_id)
self.sm.get_name(self.raw.name_id)
}
}

Expand Down Expand Up @@ -287,7 +294,7 @@ impl<'a> Token<'a> {

/// Returns the referenced source view.
pub fn get_source_view(&self) -> Option<&SourceView> {
self.i.get_source_view(self.get_src_id())
self.sm.get_source_view(self.get_src_id())
}

/// If true, this token is a range token.
Expand All @@ -298,18 +305,10 @@ impl<'a> Token<'a> {
}
}

pub fn idx_from_token(token: &Token<'_>) -> u32 {
token.idx
}

pub fn sourcemap_from_token<'a>(token: &Token<'a>) -> &'a SourceMap {
token.i
}

/// Iterates over all tokens in a sourcemap
pub struct TokenIter<'a> {
i: &'a SourceMap,
next_idx: u32,
next_idx: usize,
}

impl<'a> TokenIter<'a> {
Expand Down Expand Up @@ -390,23 +389,6 @@ impl<'a> Iterator for NameIter<'a> {
}
}

/// Iterates over all index items in a sourcemap
pub struct IndexIter<'a> {
i: &'a SourceMap,
next_idx: usize,
}

impl<'a> Iterator for IndexIter<'a> {
type Item = (u32, u32, u32);

fn next(&mut self) -> Option<(u32, u32, u32)> {
self.i.index.get(self.next_idx).map(|idx| {
self.next_idx += 1;
*idx
})
}
}

impl<'a> fmt::Debug for Token<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "<Token {self:#}>")
Expand Down Expand Up @@ -481,7 +463,6 @@ pub struct SourceMapIndex {
pub struct SourceMap {
pub(crate) file: Option<Arc<str>>,
pub(crate) tokens: Vec<RawToken>,
pub(crate) index: Vec<(u32, u32, u32)>,
pub(crate) names: Vec<Arc<str>>,
pub(crate) source_root: Option<Arc<str>>,
pub(crate) sources: Vec<Arc<str>>,
Expand Down Expand Up @@ -596,21 +577,15 @@ impl SourceMap {
/// - `sources_content` optional source contents
pub fn new(
file: Option<Arc<str>>,
tokens: Vec<RawToken>,
mut tokens: Vec<RawToken>,
names: Vec<Arc<str>>,
sources: Vec<Arc<str>>,
sources_content: Option<Vec<Option<Arc<str>>>>,
) -> SourceMap {
let mut index: Vec<_> = tokens
.iter()
.enumerate()
.map(|(idx, token)| (token.dst_line, token.dst_col, idx as u32))
.collect();
index.sort_unstable();
tokens.sort_unstable_by_key(|t| (t.dst_line, t.dst_col));
SourceMap {
file,
tokens,
index,
names,
source_root: None,
sources,
Expand Down Expand Up @@ -681,10 +656,10 @@ impl SourceMap {
}

/// Looks up a token by its index.
pub fn get_token(&self, idx: u32) -> Option<Token<'_>> {
self.tokens.get(idx as usize).map(|raw| Token {
pub fn get_token(&self, idx: usize) -> Option<Token<'_>> {
self.tokens.get(idx).map(|raw| Token {
raw,
i: self,
sm: self,
idx,
offset: 0,
})
Expand All @@ -705,9 +680,15 @@ impl SourceMap {

/// Looks up the closest token to a given 0-indexed line and column.
pub fn lookup_token(&self, line: u32, col: u32) -> Option<Token<'_>> {
let ii = greatest_lower_bound(&self.index, &(line, col), |ii| (ii.0, ii.1))?;
let (idx, raw) =
greatest_lower_bound(&self.tokens, &(line, col), |t| (t.dst_line, t.dst_col))?;

let mut token = self.get_token(ii.2)?;
let mut token = Token {
raw,
sm: self,
idx,
offset: 0,
};

if token.is_range() {
token.offset = col - token.get_dst_col();
Expand Down Expand Up @@ -827,19 +808,6 @@ impl SourceMap {
self.names.clear();
}

/// Returns the number of items in the index
pub fn get_index_size(&self) -> usize {
self.index.len()
}

/// Returns the number of items in the index
pub fn index_iter(&self) -> IndexIter<'_> {
IndexIter {
i: self,
next_idx: 0,
}
}

/// This rewrites the sourcemap according to the provided rewrite
/// options.
///
Expand Down Expand Up @@ -998,7 +966,6 @@ impl SourceMap {
// the `self` tokens and `src_line/col` for the `adjustment` tokens.
let self_tokens = std::mem::take(&mut self.tokens);
let original_ranges = create_ranges(self_tokens, |t| (t.dst_line, t.dst_col));
self.index.clear();
let adjustment_ranges =
create_ranges(adjustment.tokens.clone(), |t| (t.src_line, t.src_col));

Expand Down Expand Up @@ -1059,14 +1026,8 @@ impl SourceMap {
}
}

// Regenerate the index
self.index.extend(
self.tokens
.iter()
.enumerate()
.map(|(idx, token)| (token.dst_line, token.dst_col, idx as u32)),
);
self.index.sort_unstable();
self.tokens
.sort_unstable_by_key(|t| (t.dst_line, t.dst_col));
}
}

Expand Down Expand Up @@ -1190,7 +1151,7 @@ impl SourceMapIndex {
/// If a sourcemap is encountered that is not embedded but just
/// externally referenced it is silently skipped.
pub fn lookup_token(&self, line: u32, col: u32) -> Option<Token<'_>> {
let section =
let (_section_idx, section) =
greatest_lower_bound(&self.sections, &(line, col), SourceMapSection::get_offset)?;
let map = section.get_sourcemap()?;
let (off_line, off_col) = section.get_offset();
Expand Down
26 changes: 13 additions & 13 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,13 @@ pub fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>(
slice: &'a [T],
key: &K,
map: F,
) -> Option<&'a T> {
) -> Option<(usize, &'a T)> {
let mut idx = match slice.binary_search_by_key(key, &map) {
Ok(index) => index,
Err(index) => {
// If there is no match, then we know for certain that the index is where we should
// insert a new token, and that the token directly before is the greatest lower bound.
return slice.get(index.checked_sub(1)?);
return slice.get(index.checked_sub(1)?).map(|res| (index, res));
}
};

Expand All @@ -138,7 +138,7 @@ pub fn greatest_lower_bound<'a, T, K: Ord, F: Fn(&'a T) -> K>(
break;
}
}
slice.get(idx)
slice.get(idx).map(|res| (idx, res))
}

#[test]
Expand Down Expand Up @@ -201,27 +201,27 @@ fn test_greatest_lower_bound() {
let cmp = |&(i, _id)| i;

let haystack = vec![(1, 1)];
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);

let haystack = vec![(1, 1), (1, 2)];
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 2)));
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 2));
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);

let haystack = vec![(1, 1), (1, 2), (1, 3)];
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 3)));
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 3));
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);

let haystack = vec![(1, 1), (1, 2), (1, 3), (1, 4)];
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 4)));
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 4));
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);

let haystack = vec![(1, 1), (1, 2), (1, 3), (1, 4), (1, 5)];
assert_eq!(greatest_lower_bound(&haystack, &1, cmp), Some(&(1, 1)));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp), Some(&(1, 5)));
assert_eq!(greatest_lower_bound(&haystack, &1, cmp).unwrap().1, &(1, 1));
assert_eq!(greatest_lower_bound(&haystack, &2, cmp).unwrap().1, &(1, 5));
assert_eq!(greatest_lower_bound(&haystack, &0, cmp), None);
}