From f771fb30433bc042b1d2b520475d8f26fe9a540b Mon Sep 17 00:00:00 2001 From: Nesmeyanov Kirill Date: Wed, 1 Nov 2017 21:28:15 +0300 Subject: [PATCH 1/2] Improve exceptions --- Exception/ExceptionHelper.php | 161 ++++++++++++++++++++++++++++++++++ Llk/Lexer.php | 22 ++--- Llk/Parser.php | 36 +++----- 3 files changed, 183 insertions(+), 36 deletions(-) create mode 100644 Exception/ExceptionHelper.php diff --git a/Exception/ExceptionHelper.php b/Exception/ExceptionHelper.php new file mode 100644 index 00000000..d4989476 --- /dev/null +++ b/Exception/ExceptionHelper.php @@ -0,0 +1,161 @@ +getErrorInfo($text, $bytesOffset); + $code = $this->getAffectedCodeAsString($result['trace']); + + $column = $this->getMbColumnPosition($code, $result['column']); + + return [ + 'line' => $result['line'], + 'code' => $code, + 'column' => $column, + 'highlight' => $this->getStringHighligher($column), + ]; + } + + /** + * Returns the last line with an error. If the error occurred on + * the line where there is no visible part, before complements + * it with the previous ones. + * + * @param array|string[] $textLines List of code lines + * @return string + */ + private function getAffectedCodeAsString(array $textLines) + { + $result = ''; + $i = 0; + + while (\count($textLines) && ++$i) { + $textLine = \array_pop($textLines); + $result = $textLine . ($i > 1 ? "\n" . $result : ''); + + if (\trim($textLine)) { + break; + } + } + + return $result; + } + + /** + * The method draws the highlight of the error place. + * + * @param int $charsOffset Error offset in symbols + * @return string + */ + private function getStringHighligher($charsOffset) + { + $prefix = ''; + + if ($charsOffset > 0) { + $prefix = \str_repeat(' ', $charsOffset); + } + + return $prefix . '↑'; + } + + /** + * Returns the error location in UTF characters by the offset in bytes. + * + * @param string $line The code line from which we get a offset in the characters + * @param int $bytesOffset Length of offset in bytes + * @return int + */ + private function getMbColumnPosition($line, $bytesOffset) + { + $slice = \substr($line, 0, $bytesOffset); + + return \mb_strlen($slice, 'UTF-8'); + } + + /** + * Returns information about the error location: line, column and affected text lines. + * + * @param string $text The source code in which we search for a line and a column + * @param int $bytesOffset Offset in bytes relative to the beginning of the source code + * @return array + */ + private function getErrorInfo($text, $bytesOffset) + { + $result = [ + 'line' => 1, + 'column' => 0, + 'trace' => [], + ]; + + $current = 0; + + foreach (\explode("\n", $text) as $line => $code) { + $previous = $current; + $current += \strlen($code) + 1; + $result['trace'][] = $code; + + if ($current > $bytesOffset) { + return [ + 'line' => $line + 1, + 'column' => $bytesOffset - $previous, + 'trace' => $result['trace'] + ]; + } + } + + return $result; + } +} diff --git a/Llk/Lexer.php b/Llk/Lexer.php index 68513678..cd873c7e 100644 --- a/Llk/Lexer.php +++ b/Llk/Lexer.php @@ -48,6 +48,8 @@ */ class Lexer { + use Compiler\Exception\ExceptionHelper; + /** * Lexer state. * @@ -83,8 +85,6 @@ class Lexer */ protected $_pcreOptions = null; - - /** * Constructor. * @@ -147,18 +147,20 @@ public function lexMe($text, array $tokens) $nextToken = $this->nextToken($offset); if (null === $nextToken) { + $info = $this->getErrorPositionByOffset($text, $offset); + throw new Compiler\Exception\UnrecognizedToken( - 'Unrecognized token "%s" at line 1 and column %d:' . - "\n" . '%s' . "\n" . - str_repeat(' ', mb_strlen(substr($text, 0, $offset))) . '↑', + 'Unrecognized token "%s" at line %d and column %d: ' . "\n%s\n%s", 0, [ - mb_substr(substr($text, $offset), 0, 1), - $offset + 1, - $text + \mb_substr(\substr($text, $offset), 0, 1), + $info['line'], + $info['column'], + $info['code'], + $info['highlight'] ], - 1, - $offset + $info['line'], + $info['column'] ); } diff --git a/Llk/Parser.php b/Llk/Parser.php index ec35d1d2..b82333f1 100644 --- a/Llk/Parser.php +++ b/Llk/Parser.php @@ -49,6 +49,8 @@ */ class Parser { + use Compiler\Exception\ExceptionHelper; + /** * List of pragmas. * @@ -192,39 +194,21 @@ public function parse($text, $rule = null, $tree = true) $token = $this->_tokenSequence->current(); } - $offset = $token['offset']; - $line = 1; - $column = 1; - - if (!empty($text)) { - if (0 === $offset) { - $leftnl = 0; - } else { - $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0; - } - - $rightnl = strpos($text, "\n", $offset); - $line = substr_count($text, "\n", 0, $leftnl + 1) + 1; - $column = $offset - $leftnl + (0 === $leftnl); - - if (false !== $rightnl) { - $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n"); - } - } + $info = $this->getErrorPositionByOffset($text, $token['offset']); throw new Compiler\Exception\UnexpectedToken( - 'Unexpected token "%s" (%s) at line %d and column %d:' . - "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑', + 'Unexpected token "%s" (%s) at line %d and column %d: ' . "\n%s\n%s", 0, [ $token['value'], $token['token'], - $line, - $column, - $text + $info['line'], + $info['column'], + $info['code'], + $info['highlight'] ], - $line, - $column + $info['line'], + $info['column'] ); } } while (true); From c64f7506663793d8c8d5c40b9b795bcfa6342cee Mon Sep 17 00:00:00 2001 From: Serafim Date: Fri, 10 Nov 2017 02:25:46 +0300 Subject: [PATCH 2/2] Improve exceptions --- Exception/ExceptionHelper.php | 18 +++++++++--------- Exception/UnrecognizedToken.php | 23 +++++++++++++++++++++-- Llk/Lexer.php | 20 +++----------------- Llk/Parser.php | 26 +++++++------------------- 4 files changed, 40 insertions(+), 47 deletions(-) diff --git a/Exception/ExceptionHelper.php b/Exception/ExceptionHelper.php index d4989476..57110109 100644 --- a/Exception/ExceptionHelper.php +++ b/Exception/ExceptionHelper.php @@ -54,18 +54,18 @@ trait ExceptionHelper * @param int $bytesOffset Offset in bytes * @return array */ - protected function getErrorPositionByOffset($text, $bytesOffset) + protected static function getErrorPositionByOffset($text, $bytesOffset) { - $result = $this->getErrorInfo($text, $bytesOffset); - $code = $this->getAffectedCodeAsString($result['trace']); + $result = self::getErrorInfo($text, $bytesOffset); + $code = self::getAffectedCodeAsString($result['trace']); - $column = $this->getMbColumnPosition($code, $result['column']); + $column = self::getMbColumnPosition($code, $result['column']); return [ 'line' => $result['line'], 'code' => $code, 'column' => $column, - 'highlight' => $this->getStringHighligher($column), + 'highlight' => self::getStringHighligher($column), ]; } @@ -77,7 +77,7 @@ protected function getErrorPositionByOffset($text, $bytesOffset) * @param array|string[] $textLines List of code lines * @return string */ - private function getAffectedCodeAsString(array $textLines) + private static function getAffectedCodeAsString(array $textLines) { $result = ''; $i = 0; @@ -100,7 +100,7 @@ private function getAffectedCodeAsString(array $textLines) * @param int $charsOffset Error offset in symbols * @return string */ - private function getStringHighligher($charsOffset) + private static function getStringHighligher($charsOffset) { $prefix = ''; @@ -118,7 +118,7 @@ private function getStringHighligher($charsOffset) * @param int $bytesOffset Length of offset in bytes * @return int */ - private function getMbColumnPosition($line, $bytesOffset) + private static function getMbColumnPosition($line, $bytesOffset) { $slice = \substr($line, 0, $bytesOffset); @@ -132,7 +132,7 @@ private function getMbColumnPosition($line, $bytesOffset) * @param int $bytesOffset Offset in bytes relative to the beginning of the source code * @return array */ - private function getErrorInfo($text, $bytesOffset) + private static function getErrorInfo($text, $bytesOffset) { $result = [ 'line' => 1, diff --git a/Exception/UnrecognizedToken.php b/Exception/UnrecognizedToken.php index 59963c07..214fd945 100644 --- a/Exception/UnrecognizedToken.php +++ b/Exception/UnrecognizedToken.php @@ -46,6 +46,8 @@ */ class UnrecognizedToken extends Exception { + use ExceptionHelper; + /** * Column. * @@ -53,8 +55,6 @@ class UnrecognizedToken extends Exception */ protected $column = 0; - - /** * Override line and add column support. * @@ -74,6 +74,25 @@ public function __construct($message, $code, $arg, $line, $column) return; } + /** + * @param string $message Formatted message. + * @param string $text Source code + * @param int $offsetInBytes Error offset in bytes + * @param int $code Code (the ID). + * @return static + */ + public static function fromOffset($message, $text, $offsetInBytes, $code = 0) + { + $info = self::getErrorPositionByOffset($text, $offsetInBytes); + + // Formatted message + $message .= ' at line %s and column %s' . \PHP_EOL . + $info['code'] . \PHP_EOL . + $info['highlight']; + + return new static($message, $code, [$info['line'], $info['column']], $info['line'], $info['column']); + } + /** * Get column. * diff --git a/Llk/Lexer.php b/Llk/Lexer.php index cd873c7e..cb69102f 100644 --- a/Llk/Lexer.php +++ b/Llk/Lexer.php @@ -48,8 +48,6 @@ */ class Lexer { - use Compiler\Exception\ExceptionHelper; - /** * Lexer state. * @@ -147,21 +145,9 @@ public function lexMe($text, array $tokens) $nextToken = $this->nextToken($offset); if (null === $nextToken) { - $info = $this->getErrorPositionByOffset($text, $offset); - - throw new Compiler\Exception\UnrecognizedToken( - 'Unrecognized token "%s" at line %d and column %d: ' . "\n%s\n%s", - 0, - [ - \mb_substr(\substr($text, $offset), 0, 1), - $info['line'], - $info['column'], - $info['code'], - $info['highlight'] - ], - $info['line'], - $info['column'] - ); + $error = \sprintf('Unrecognized token "%s"', \mb_substr(\substr($text, $offset), 0, 1)); + + throw Compiler\Exception\UnrecognizedToken::fromOffset($error, $text, $offset); } if (true === $nextToken['keep']) { diff --git a/Llk/Parser.php b/Llk/Parser.php index b82333f1..40f5a8bc 100644 --- a/Llk/Parser.php +++ b/Llk/Parser.php @@ -49,8 +49,6 @@ */ class Parser { - use Compiler\Exception\ExceptionHelper; - /** * List of pragmas. * @@ -188,28 +186,18 @@ public function parse($text, $rule = null, $tree = true) } if (false === $this->backtrack()) { - $token = $this->_errorToken; + $token = $this->_errorToken; if (null === $this->_errorToken) { $token = $this->_tokenSequence->current(); } - $info = $this->getErrorPositionByOffset($text, $token['offset']); - - throw new Compiler\Exception\UnexpectedToken( - 'Unexpected token "%s" (%s) at line %d and column %d: ' . "\n%s\n%s", - 0, - [ - $token['value'], - $token['token'], - $info['line'], - $info['column'], - $info['code'], - $info['highlight'] - ], - $info['line'], - $info['column'] - ); + $error = \vsprintf('Unexpected token "%s" (%s)', [ + $token['value'], + $token['token'], + ]); + + throw Compiler\Exception\UnexpectedToken::fromOffset($error, $text, $token['offset']); } } while (true);