Skip to content

Commit 4225805

Browse files
authored
[FileFormats.LP] add explicit tests for the ParseError messages (#2852)
1 parent c691dc4 commit 4225805

File tree

5 files changed

+352
-168
lines changed

5 files changed

+352
-168
lines changed

src/FileFormats/LP/read.jl

Lines changed: 70 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,7 @@ function Base.read!(io::IO, model::Model{T}) where {T}
8282
"No file contents are allowed after `end`.",
8383
)
8484
else
85-
_throw_parse_error(
86-
state,
87-
token,
88-
"Parsing this section is not supported by the current reader.",
89-
)
85+
_expect(state, token, _TOKEN_KEYWORD)
9086
end
9187
end
9288
# if keyword != :END
@@ -190,8 +186,8 @@ This dictionary makes `_TokenKind` to a string that is used when printing error
190186
messages. The string must complete the sentence "We expected this token to be ".
191187
"""
192188
const _KIND_TO_MSG = Dict{_TokenKind,String}(
193-
_TOKEN_KEYWORD => "a keyword",
194-
_TOKEN_IDENTIFIER => "a variable name",
189+
_TOKEN_KEYWORD => "a keyword defining a new section",
190+
_TOKEN_IDENTIFIER => "an identifier",
195191
_TOKEN_NUMBER => "a number",
196192
_TOKEN_ADDITION => "the symbol `+`",
197193
_TOKEN_SUBTRACTION => "the symbol `-`",
@@ -206,7 +202,7 @@ const _KIND_TO_MSG = Dict{_TokenKind,String}(
206202
_TOKEN_COLON => "the symbol `:`",
207203
_TOKEN_IMPLIES => "the symbol `->`",
208204
_TOKEN_NEWLINE => "a new line",
209-
_TOKEN_UNKNOWN => "some unknown symbol",
205+
_TOKEN_UNKNOWN => "a token",
210206
)
211207

212208
"""
@@ -296,27 +292,66 @@ struct ParseError <: Exception
296292
msg::String
297293
end
298294

295+
_is_utf8_start(b::UInt8) = b < 0x80 || (0xC0 <= b <= 0xF7)
296+
297+
function _get_line_about_pos(io::IO, pos::Int, width::Int)
298+
seek(io, max(0, pos - width))
299+
# This byte might be an invalid or continuation byte. We need to seek
300+
# forward until we reach a new valid byte.
301+
while !_is_utf8_start(peek(io, UInt8))
302+
read(io, UInt8)
303+
end
304+
char = Char[]
305+
mark = 0
306+
while !eof(io) && position(io) <= pos + width
307+
c = read(io, Char)
308+
if c == '\n'
309+
if position(io) < pos
310+
empty!(char)
311+
else
312+
break
313+
end
314+
elseif c != '\r'
315+
push!(char, c)
316+
end
317+
if position(io) == pos
318+
mark = length(char)
319+
end
320+
end
321+
if mark == 0
322+
mark = length(char) + 1
323+
end
324+
return String(char), mark
325+
end
326+
299327
function _throw_parse_error(state::_LexerState, token::_Token, msg::String)
300-
offset = min(40, token.pos)
301-
seek(state.io, token.pos - offset)
302-
line = String(read(state.io, 2 * offset))
303-
i = something(findprev('\n', line, offset-1), 0)
304-
j = something(findnext('\n', line, offset), length(line) + 1)
305-
extract = replace(line[(i+1):(j-1)], "\r" => "")
306-
help = string(extract, "\n", " "^(offset - i + - 1), "^\n", msg)
328+
line, mark = _get_line_about_pos(state.io, token.pos, 40)
329+
help = string(
330+
line,
331+
"\n",
332+
" "^(mark - 1),
333+
"^\nGot ",
334+
_KIND_TO_MSG[token.kind],
335+
_with_value(token.value),
336+
". ",
337+
msg,
338+
)
307339
return throw(ParseError(state.line, help))
308340
end
309341

310342
function Base.showerror(io::IO, err::ParseError)
311343
return print(io, "Error parsing LP file on line $(err.line):\n", err.msg)
312344
end
313345

346+
_with_value(::Nothing) = ""
347+
_with_value(x::String) = string(" with value `", x, "`")
348+
314349
function _expect(state::_LexerState, token::_Token, kind::_TokenKind)
315350
if token.kind != kind
316351
_throw_parse_error(
317352
state,
318353
token,
319-
string("We expected this token to be ", _KIND_TO_MSG[kind]),
354+
"We expected this token to be $(_KIND_TO_MSG[kind]).",
320355
)
321356
end
322357
return token
@@ -346,6 +381,9 @@ function Base.read(state::_LexerState, ::Type{_Token})
346381
end
347382
popfirst!(state.peek_tokens)
348383
state.current_token = token
384+
if token.kind == _TOKEN_NEWLINE
385+
state.line += 1
386+
end
349387
return token
350388
end
351389

@@ -445,7 +483,6 @@ function _peek_inner(state::_LexerState)
445483
while (c = peek(state, Char)) !== nothing
446484
pos = position(state.io)
447485
if c == '\n'
448-
state.line += 1
449486
_ = read(state, Char)
450487
return _Token(_TOKEN_NEWLINE, nothing, pos)
451488
elseif isspace(c) # Whitespace
@@ -480,11 +517,17 @@ function _peek_inner(state::_LexerState)
480517
_ = read(state, Char) # Allow <=, >=, and ==
481518
end
482519
return _Token(op, nothing, pos)
520+
elseif _is_identifier(c) && !_is_starting_identifier(c)
521+
_throw_parse_error(
522+
state,
523+
_Token(_TOKEN_UNKNOWN, "$c", pos),
524+
"This character is not supported at the start of an identifier.",
525+
)
483526
else
484527
_throw_parse_error(
485528
state,
486529
_Token(_TOKEN_UNKNOWN, "$c", pos),
487-
"This character is not supported an LP file.",
530+
"This character is not supported in an LP file.",
488531
)
489532
end
490533
end
@@ -574,7 +617,11 @@ function _parse_number(state::_LexerState, cache::_ReadCache{T})::T where {T}
574617
_expect(state, token, _TOKEN_NUMBER)
575618
ret = tryparse(T, token.value)
576619
if ret === nothing
577-
_throw_parse_error(state, token, "We expected this to be a number.")
620+
_throw_parse_error(
621+
state,
622+
_Token(_TOKEN_IDENTIFIER, token.value, token.pos),
623+
"We were unable to parse this as a number.",
624+
)
578625
end
579626
return ret
580627
end
@@ -751,7 +798,7 @@ function _parse_term(
751798
return _throw_parse_error(
752799
state,
753800
token,
754-
"Got $(_KIND_TO_MSG[token.kind]), but we expected this to be a new term in the expression.",
801+
"We expected this to be a new term in the expression.",
755802
)
756803
end
757804

@@ -815,9 +862,7 @@ function _parse_set_suffix(state, cache)
815862
p = read(state, _Token)
816863
if _compare_case_insenstive(p, "free")
817864
return nothing
818-
end
819-
_skip_newlines(state)
820-
if p.kind == _TOKEN_GREATER_THAN
865+
elseif p.kind == _TOKEN_GREATER_THAN
821866
rhs = _parse_number(state, cache)
822867
return MOI.GreaterThan(rhs)
823868
elseif p.kind == _TOKEN_LESS_THAN
@@ -830,7 +875,7 @@ function _parse_set_suffix(state, cache)
830875
_throw_parse_error(
831876
state,
832877
p,
833-
"We expected this to be an inequality like `>=`, `<=` ,or `==`.",
878+
"We expected this to be an inequality like `>=`, `<=`, or `==`.",
834879
)
835880
end
836881
end
@@ -856,7 +901,7 @@ function _parse_set_prefix(state, cache)
856901
_throw_parse_error(
857902
state,
858903
p,
859-
"We expected this to be an inequality like `>=`, `<=` ,or `==`.",
904+
"We expected this to be an inequality like `>=`, `<=`, or `==`.",
860905
)
861906
end
862907
end

0 commit comments

Comments
 (0)