22-- |
33-- | #### unicode dependency
44-- |
5- -- | Some of the parsers in this module depend on the __unicode__ package.
5+ -- | Some of the parsers in this module depend on the
6+ -- | [__unicode__](https://pursuit.purescript.org/packages/purescript-unicode)
7+ -- | package.
68-- | The __unicode__ package is large; about half a megabyte unminified.
79-- | If code which depends on __parsing__ is “tree-shaken”
810-- | “dead-code-eliminated,” then
@@ -24,6 +26,8 @@ module Parsing.String.Basic
2426 , alphaNum
2527 , intDecimal
2628 , number
29+ , takeWhile
30+ , takeWhile1
2731 , whiteSpace
2832 , skipSpaces
2933 , oneOf
@@ -41,13 +45,13 @@ import Data.Int as Data.Int
4145import Data.Maybe (Maybe (..))
4246import Data.Number (infinity , nan )
4347import Data.Number as Data.Number
44- import Data.String (CodePoint , singleton , takeWhile )
48+ import Data.String (CodePoint , singleton )
49+ import Data.String as String
4550import Data.String.CodePoints (codePointFromChar )
4651import Data.String.CodeUnits as SCU
47- import Data.Tuple (fst )
4852import Parsing (ParserT , fail )
4953import Parsing.Combinators (choice , tryRethrow , (<?>), (<|>), (<~?>))
50- import Parsing.String (consumeWith , match , regex , satisfy , satisfyCodePoint , string )
54+ import Parsing.String (consumeWith , regex , satisfy , satisfyCodePoint , string )
5155import Partial.Unsafe (unsafeCrashWith )
5256
5357-- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`.
@@ -112,7 +116,7 @@ number =
112116 section <- numberRegex
113117 -- https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseFloat
114118 case Data.Number .fromString section of
115- Nothing -> fail $ " Number.fromString failed "
119+ Nothing -> fail " Expected Number "
116120 Just x -> pure x
117121 ] <|> fail " Expected Number"
118122
@@ -134,7 +138,7 @@ intDecimal :: forall m. ParserT String m Int
134138intDecimal = tryRethrow do
135139 section <- intDecimalRegex <|> fail " Expected Int"
136140 case Data.Int .fromString section of
137- Nothing -> fail $ " Int.fromString failed "
141+ Nothing -> fail " Expected Int "
138142 Just x -> pure x
139143
140144-- Non-exported regex is compiled at startup time.
@@ -153,17 +157,14 @@ satisfyCP p = satisfy (p <<< codePointFromChar)
153157-- | Always succeeds. Will consume only when matched whitespace string
154158-- | is non-empty.
155159whiteSpace :: forall m . ParserT String m String
156- whiteSpace = fst <$> match skipSpaces
160+ whiteSpace = takeWhile isSpace
157161
158162-- | Skip whitespace characters satisfying `Data.CodePoint.Unicode.isSpace`
159163-- | and throw them away.
160164-- |
161165-- | Always succeeds. Will only consume when some characters are skipped.
162166skipSpaces :: forall m . ParserT String m Unit
163- skipSpaces = consumeWith \input -> do
164- let consumed = takeWhile isSpace input
165- let remainder = SCU .drop (SCU .length consumed) input
166- Right { value: unit, consumed, remainder }
167+ skipSpaces = void whiteSpace
167168
168169-- | Match one of the BMP `Char`s in the array.
169170oneOf :: forall m . Array Char -> ParserT String m Char
@@ -180,3 +181,66 @@ oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> "one of " <> sho
180181-- | Match any Unicode character not in the array.
181182noneOfCodePoints :: forall m . Array CodePoint -> ParserT String m CodePoint
182183noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> " none of " <> show (singleton <$> ss)
184+
185+ -- | Take the longest `String` for which the characters satisfy the
186+ -- | predicate.
187+ -- |
188+ -- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
189+ -- | for useful predicates.
190+ -- |
191+ -- | Example:
192+ -- |
193+ -- | ```
194+ -- | runParser "Tackling the Awkward Squad" do
195+ -- | takeWhile Data.CodePoint.Unicode.isLetter
196+ -- | ```
197+ -- | ---
198+ -- | ```
199+ -- | Right "Tackling"
200+ -- | ```
201+ -- |
202+ -- | You should prefer `takeWhile isLetter` to
203+ -- | `fromCharArray <$> Data.Array.many letter`.
204+ takeWhile :: forall m . (CodePoint -> Boolean ) -> ParserT String m String
205+ takeWhile predicate =
206+ consumeWith \s ->
207+ let
208+ value = String .takeWhile predicate s
209+ in
210+ Right
211+ { consumed: value
212+ , remainder: SCU .drop (SCU .length value) s
213+ , value
214+ }
215+
216+ -- | Take the longest `String` for which the characters satisfy the
217+ -- | predicate. Require at least 1 character. You should supply an
218+ -- | expectation description for the error
219+ -- | message for when the predicate fails on the first character.
220+ -- |
221+ -- | See [__`Data.CodePoint.Unicode`__](https://pursuit.purescript.org/packages/purescript-unicode/docs/Data.CodePoint.Unicode)
222+ -- | for useful predicates.
223+ -- |
224+ -- | Example:
225+ -- |
226+ -- | ```
227+ -- | runParser "Tackling the Awkward Squad" do
228+ -- | takeWhile1 Data.CodePoint.Unicode.isLetter <?> "a letter"
229+ -- | ```
230+ -- | ---
231+ -- | ```
232+ -- | Right "Tackling"
233+ -- | ```
234+ takeWhile1 :: forall m . (CodePoint -> Boolean ) -> ParserT String m String
235+ takeWhile1 predicate =
236+ consumeWith \s ->
237+ let
238+ value = String .takeWhile predicate s
239+ len = SCU .length value
240+ in
241+ if len > 0 then Right
242+ { consumed: value
243+ , remainder: SCU .drop (SCU .length value) s
244+ , value
245+ }
246+ else Left " Expected character satisfying predicate"
0 commit comments