@@ -82,11 +82,7 @@ function Base.read!(io::IO, model::Model{T}) where {T}
82
82
" No file contents are allowed after `end`." ,
83
83
)
84
84
else
85
- _throw_parse_error (
86
- state,
87
- token,
88
- " Parsing this section is not supported by the current reader." ,
89
- )
85
+ _expect (state, token, _TOKEN_KEYWORD)
90
86
end
91
87
end
92
88
# if keyword != :END
@@ -190,8 +186,8 @@ This dictionary makes `_TokenKind` to a string that is used when printing error
190
186
messages. The string must complete the sentence "We expected this token to be ".
191
187
"""
192
188
const _KIND_TO_MSG = Dict {_TokenKind,String} (
193
- _TOKEN_KEYWORD => " a keyword" ,
194
- _TOKEN_IDENTIFIER => " a variable name " ,
189
+ _TOKEN_KEYWORD => " a keyword defining a new section " ,
190
+ _TOKEN_IDENTIFIER => " an identifier " ,
195
191
_TOKEN_NUMBER => " a number" ,
196
192
_TOKEN_ADDITION => " the symbol `+`" ,
197
193
_TOKEN_SUBTRACTION => " the symbol `-`" ,
@@ -206,7 +202,7 @@ const _KIND_TO_MSG = Dict{_TokenKind,String}(
206
202
_TOKEN_COLON => " the symbol `:`" ,
207
203
_TOKEN_IMPLIES => " the symbol `->`" ,
208
204
_TOKEN_NEWLINE => " a new line" ,
209
- _TOKEN_UNKNOWN => " some unknown symbol " ,
205
+ _TOKEN_UNKNOWN => " a token " ,
210
206
)
211
207
212
208
"""
@@ -296,27 +292,66 @@ struct ParseError <: Exception
296
292
msg:: String
297
293
end
298
294
295
+ _is_utf8_start (b:: UInt8 ) = b < 0x80 || (0xC0 <= b <= 0xF7 )
296
+
297
+ function _get_line_about_pos (io:: IO , pos:: Int , width:: Int )
298
+ seek (io, max (0 , pos - width))
299
+ # This byte might be an invalid or continuation byte. We need to seek
300
+ # forward until we reach a new valid byte.
301
+ while ! _is_utf8_start (peek (io, UInt8))
302
+ read (io, UInt8)
303
+ end
304
+ char = Char[]
305
+ mark = 0
306
+ while ! eof (io) && position (io) <= pos + width
307
+ c = read (io, Char)
308
+ if c == ' \n '
309
+ if position (io) < pos
310
+ empty! (char)
311
+ else
312
+ break
313
+ end
314
+ elseif c != ' \r '
315
+ push! (char, c)
316
+ end
317
+ if position (io) == pos
318
+ mark = length (char)
319
+ end
320
+ end
321
+ if mark == 0
322
+ mark = length (char) + 1
323
+ end
324
+ return String (char), mark
325
+ end
326
+
299
327
function _throw_parse_error (state:: _LexerState , token:: _Token , msg:: String )
300
- offset = min (40 , token. pos)
301
- seek (state. io, token. pos - offset)
302
- line = String (read (state. io, 2 * offset))
303
- i = something (findprev (' \n ' , line, offset- 1 ), 0 )
304
- j = something (findnext (' \n ' , line, offset), length (line) + 1 )
305
- extract = replace (line[(i+ 1 ): (j- 1 )], " \r " => " " )
306
- help = string (extract, " \n " , " " ^ (offset - i + - 1 ), " ^\n " , msg)
328
+ line, mark = _get_line_about_pos (state. io, token. pos, 40 )
329
+ help = string (
330
+ line,
331
+ " \n " ,
332
+ " " ^ (mark - 1 ),
333
+ " ^\n Got " ,
334
+ _KIND_TO_MSG[token. kind],
335
+ _with_value (token. value),
336
+ " . " ,
337
+ msg,
338
+ )
307
339
return throw (ParseError (state. line, help))
308
340
end
309
341
310
342
function Base. showerror (io:: IO , err:: ParseError )
311
343
return print (io, " Error parsing LP file on line $(err. line) :\n " , err. msg)
312
344
end
313
345
346
+ _with_value (:: Nothing ) = " "
347
+ _with_value (x:: String ) = string (" with value `" , x, " `" )
348
+
314
349
function _expect (state:: _LexerState , token:: _Token , kind:: _TokenKind )
315
350
if token. kind != kind
316
351
_throw_parse_error (
317
352
state,
318
353
token,
319
- string ( " We expected this token to be " , _KIND_TO_MSG[kind]),
354
+ " We expected this token to be $( _KIND_TO_MSG[kind]). " ,
320
355
)
321
356
end
322
357
return token
@@ -346,6 +381,9 @@ function Base.read(state::_LexerState, ::Type{_Token})
346
381
end
347
382
popfirst! (state. peek_tokens)
348
383
state. current_token = token
384
+ if token. kind == _TOKEN_NEWLINE
385
+ state. line += 1
386
+ end
349
387
return token
350
388
end
351
389
@@ -445,7 +483,6 @@ function _peek_inner(state::_LexerState)
445
483
while (c = peek (state, Char)) != = nothing
446
484
pos = position (state. io)
447
485
if c == ' \n '
448
- state. line += 1
449
486
_ = read (state, Char)
450
487
return _Token (_TOKEN_NEWLINE, nothing , pos)
451
488
elseif isspace (c) # Whitespace
@@ -480,11 +517,17 @@ function _peek_inner(state::_LexerState)
480
517
_ = read (state, Char) # Allow <=, >=, and ==
481
518
end
482
519
return _Token (op, nothing , pos)
520
+ elseif _is_identifier (c) && ! _is_starting_identifier (c)
521
+ _throw_parse_error (
522
+ state,
523
+ _Token (_TOKEN_UNKNOWN, " $c " , pos),
524
+ " This character is not supported at the start of an identifier." ,
525
+ )
483
526
else
484
527
_throw_parse_error (
485
528
state,
486
529
_Token (_TOKEN_UNKNOWN, " $c " , pos),
487
- " This character is not supported an LP file." ,
530
+ " This character is not supported in an LP file." ,
488
531
)
489
532
end
490
533
end
@@ -574,7 +617,11 @@ function _parse_number(state::_LexerState, cache::_ReadCache{T})::T where {T}
574
617
_expect (state, token, _TOKEN_NUMBER)
575
618
ret = tryparse (T, token. value)
576
619
if ret === nothing
577
- _throw_parse_error (state, token, " We expected this to be a number." )
620
+ _throw_parse_error (
621
+ state,
622
+ _Token (_TOKEN_IDENTIFIER, token. value, token. pos),
623
+ " We were unable to parse this as a number." ,
624
+ )
578
625
end
579
626
return ret
580
627
end
@@ -751,7 +798,7 @@ function _parse_term(
751
798
return _throw_parse_error (
752
799
state,
753
800
token,
754
- " Got $(_KIND_TO_MSG[token . kind]) , but we expected this to be a new term in the expression." ,
801
+ " We expected this to be a new term in the expression." ,
755
802
)
756
803
end
757
804
@@ -815,9 +862,7 @@ function _parse_set_suffix(state, cache)
815
862
p = read (state, _Token)
816
863
if _compare_case_insenstive (p, " free" )
817
864
return nothing
818
- end
819
- _skip_newlines (state)
820
- if p. kind == _TOKEN_GREATER_THAN
865
+ elseif p. kind == _TOKEN_GREATER_THAN
821
866
rhs = _parse_number (state, cache)
822
867
return MOI. GreaterThan (rhs)
823
868
elseif p. kind == _TOKEN_LESS_THAN
@@ -830,7 +875,7 @@ function _parse_set_suffix(state, cache)
830
875
_throw_parse_error (
831
876
state,
832
877
p,
833
- " We expected this to be an inequality like `>=`, `<=` , or `==`." ,
878
+ " We expected this to be an inequality like `>=`, `<=`, or `==`." ,
834
879
)
835
880
end
836
881
end
@@ -856,7 +901,7 @@ function _parse_set_prefix(state, cache)
856
901
_throw_parse_error (
857
902
state,
858
903
p,
859
- " We expected this to be an inequality like `>=`, `<=` , or `==`." ,
904
+ " We expected this to be an inequality like `>=`, `<=`, or `==`." ,
860
905
)
861
906
end
862
907
end
0 commit comments