@@ -214,7 +214,8 @@ pub struct Tokenizer<'a> {
214
214
/// ensure that computing the column will give the result in units
215
215
/// of UTF-16 characters.
216
216
current_line_start_position : usize ,
217
- current_position : usize ,
217
+ position_difference : u16 ,
218
+ current_line_start_difference : u16 ,
218
219
current_line_number : u32 ,
219
220
var_or_env_functions : SeenStatus ,
220
221
source_map_url : Option < & ' a str > ,
@@ -235,8 +236,9 @@ impl<'a> Tokenizer<'a> {
235
236
input,
236
237
position : 0 ,
237
238
current_line_start_position : 0 ,
238
- current_position : 0 ,
239
+ current_line_start_difference : 0 ,
239
240
current_line_number : 0 ,
241
+ position_difference : 0 ,
240
242
var_or_env_functions : SeenStatus :: DontCare ,
241
243
source_map_url : None ,
242
244
source_url : None ,
@@ -279,7 +281,12 @@ impl<'a> Tokenizer<'a> {
279
281
pub fn current_source_location ( & self ) -> SourceLocation {
280
282
SourceLocation {
281
283
line : self . current_line_number ,
282
- column : ( self . position - self . current_line_start_position + 1 ) as u32 ,
284
+ column : (
285
+ self . position -
286
+ self . current_line_start_position -
287
+ ( self . position_difference - self . current_line_start_difference ) as usize
288
+ + 1
289
+ ) as u32 ,
283
290
}
284
291
}
285
292
@@ -298,7 +305,8 @@ impl<'a> Tokenizer<'a> {
298
305
ParserState {
299
306
position : self . position ,
300
307
current_line_start_position : self . current_line_start_position ,
301
- current_position : self . current_position ,
308
+ current_line_start_difference : self . current_line_start_difference ,
309
+ position_difference : self . position_difference ,
302
310
current_line_number : self . current_line_number ,
303
311
at_start_of : None ,
304
312
}
@@ -308,7 +316,8 @@ impl<'a> Tokenizer<'a> {
308
316
pub fn reset ( & mut self , state : & ParserState ) {
309
317
self . position = state. position ;
310
318
self . current_line_start_position = state. current_line_start_position ;
311
- self . current_position = state. current_position ;
319
+ self . current_line_start_difference = state. current_line_start_difference ;
320
+ self . position_difference = state. position_difference ;
312
321
self . current_line_number = state. current_line_number ;
313
322
}
314
323
@@ -374,7 +383,6 @@ impl<'a> Tokenizer<'a> {
374
383
debug_assert ! ( b != b'\r' && b != b'\n' && b != b'\x0C' ) ;
375
384
}
376
385
}
377
- self . current_position = self . current_position . wrapping_add ( n) ;
378
386
self . position += n
379
387
}
380
388
@@ -396,8 +404,7 @@ impl<'a> Tokenizer<'a> {
396
404
debug_assert ! ( self . next_byte_unchecked( ) & 0xF0 == 0xF0 ) ;
397
405
// This takes two UTF-16 characters to represent, so we
398
406
// actually have an undercount.
399
- self . current_line_start_position = self . current_line_start_position . wrapping_sub ( 1 ) ;
400
- self . current_position = self . current_position . wrapping_add ( 2 ) ;
407
+ self . position_difference = self . position_difference . wrapping_sub ( 1 ) ;
401
408
self . position += 1 ;
402
409
}
403
410
@@ -409,7 +416,7 @@ impl<'a> Tokenizer<'a> {
409
416
// Continuation bytes contribute to column overcount. Note
410
417
// that due to the special case for the 4-byte sequence intro,
411
418
// we must use wrapping add here.
412
- self . current_line_start_position = self . current_line_start_position . wrapping_add ( 1 ) ;
419
+ self . position_difference = self . position_difference . wrapping_add ( 1 ) ;
413
420
self . position += 1 ;
414
421
}
415
422
@@ -422,14 +429,11 @@ impl<'a> Tokenizer<'a> {
422
429
if byte & 0xF0 == 0xF0 {
423
430
// This takes two UTF-16 characters to represent, so we
424
431
// actually have an undercount.
425
- self . current_line_start_position = self . current_line_start_position . wrapping_sub ( 1 ) ;
426
- self . current_position = self . current_position . wrapping_add ( 2 ) ;
432
+ self . position_difference = self . position_difference . wrapping_sub ( 1 ) ;
427
433
} else if byte & 0xC0 == 0x80 {
428
434
// Note that due to the special case for the 4-byte
429
435
// sequence intro, we must use wrapping add here.
430
- self . current_line_start_position = self . current_line_start_position . wrapping_add ( 1 ) ;
431
- } else {
432
- self . current_position = self . current_position . wrapping_add ( 1 ) ;
436
+ self . position_difference = self . position_difference . wrapping_add ( 1 ) ;
433
437
}
434
438
}
435
439
@@ -448,12 +452,11 @@ impl<'a> Tokenizer<'a> {
448
452
let byte = self . next_byte_unchecked ( ) ;
449
453
debug_assert ! ( byte == b'\r' || byte == b'\n' || byte == b'\x0C' ) ;
450
454
self . position += 1 ;
451
- self . current_position = self . current_position . wrapping_add ( 1 ) ;
452
455
if byte == b'\r' && self . next_byte ( ) == Some ( b'\n' ) {
453
456
self . position += 1 ;
454
- self . current_position = self . current_position . wrapping_add ( 1 ) ;
455
457
}
456
458
self . current_line_start_position = self . position ;
459
+ self . current_line_start_difference = self . position_difference ;
457
460
self . current_line_number += 1 ;
458
461
}
459
462
@@ -467,14 +470,13 @@ impl<'a> Tokenizer<'a> {
467
470
fn consume_char ( & mut self ) -> char {
468
471
let c = self . next_char ( ) ;
469
472
let len_utf8 = c. len_utf8 ( ) ;
473
+ let len_utf16 = c. len_utf16 ( ) ;
470
474
self . position += len_utf8;
471
475
// Note that due to the special case for the 4-byte sequence
472
476
// intro, we must use wrapping add here.
473
- let len_utf16 = c. len_utf16 ( ) ;
474
- self . current_line_start_position = self
475
- . current_line_start_position
476
- . wrapping_add ( len_utf8 - len_utf16) ;
477
- self . current_position = self . current_position . wrapping_add ( len_utf16) ;
477
+ self . position_difference = self
478
+ . position_difference
479
+ . wrapping_add ( ( len_utf8 - len_utf16) as u16 ) ;
478
480
c
479
481
}
480
482
@@ -1164,16 +1166,12 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
1164
1166
}
1165
1167
} ;
1166
1168
match_byte ! { b,
1167
- b' ' | b'\t' => {
1168
- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
1169
- } ,
1169
+ b' ' | b'\t' => { } ,
1170
1170
b'\n' | b'\x0C' => {
1171
1171
newlines += 1 ;
1172
1172
last_newline = offset;
1173
- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
1174
1173
}
1175
1174
b'\r' => {
1176
- tokenizer. current_position = tokenizer. current_position. wrapping_add( 1 ) ;
1177
1175
if from_start. as_bytes( ) . get( offset + 1 ) != Some ( & b'\n' ) {
1178
1176
newlines += 1 ;
1179
1177
last_newline = offset;
0 commit comments