Skip to content

Primitives are the building blocks of all parsers. These operations match input directly and form the foundation of more complex parsers.

Parseff.consume matches an exact literal string. Returns the matched string.

val consume : string -> string
let parser () =
let _ = Parseff.consume "hello" in
Parseff.consume "world"
(* Matches "helloworld" *)

Parseff.char matches an exact character. Returns the matched character.

val char : char -> char
let comma () = Parseff.char ','
let left_paren () = Parseff.char '('
let right_paren () = Parseff.char ')'
(* Parse a comma-separated pair *)
let pair () =
let _ = left_paren () in
let a = Parseff.digit () in
let _ = comma () in
let b = Parseff.digit () in
let _ = right_paren () in
(a, b)
(* Matches "(1,2)" -> (1, 2) *)

Parseff.satisfy matches a character satisfying the given predicate. The ~label parameter is used in error messages.

val satisfy : (char -> bool) -> label:string -> char
(* Match any vowel *)
let vowel () =
Parseff.satisfy
(fun c -> String.contains "aeiouAEIOU" c)
~label:"vowel"
(* Match any digit *)
let digit_char () =
Parseff.satisfy
(fun c -> c >= '0' && c <= '9')
~label:"digit"
(* Match any uppercase letter *)
let uppercase () =
Parseff.satisfy
(fun c -> c >= 'A' && c <= 'Z')
~label:"uppercase letter"

These operations scan multiple characters efficiently.

Parseff.take_while consumes characters while the predicate holds. Returns the matched string (may be empty). Always succeeds.

val take_while : (char -> bool) -> string
(* Parse digits *)
let digits () = Parseff.take_while (fun c -> c >= '0' && c <= '9')
(* Parse identifier *)
let identifier () =
let first =
Parseff.satisfy
(fun c -> c = '_' || (c >= 'a' && c <= 'z'))
~label:"letter"
in
let rest =
Parseff.take_while (fun c ->
c = '_' || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
in
String.make 1 first ^ rest
(* "foo_bar123" -> "foo_bar123" *)
(* "123abc" -> identifier fails (first char must be letter) *)

Parseff.take_while1 is like Parseff.take_while but requires at least one character. Fails if no characters match.

val take_while1 : (char -> bool) -> label:string -> string
(* Parse non-empty digits *)
let digits1 () =
Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit"
(* Parse identifier (simpler than take_while approach) *)
let identifier1 () =
Parseff.take_while1
(fun c -> c = '_' || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
~label:"identifier character"
(* Matches "foo123" -> "foo123" *)
(* Fails on "" -> Error: expected identifier character *)

Parseff.skip_while skips characters while the predicate holds (returns unit). Always succeeds. More efficient than Parseff.take_while when you don’t need the matched string.

val skip_while : (char -> bool) -> unit
(* Skip spaces *)
let skip_spaces () = Parseff.skip_while (fun c -> c = ' ')
(* Parse comma-separated values *)
let csv_value () =
skip_spaces ();
let value = Parseff.take_while (fun c -> c <> ',' && c <> '\n') in
skip_spaces ();
value
(* " hello " -> "hello" *)

Parseff.match_regex matches a compiled regular expression. The regex must be compiled with Re.compile.

val match_regex : Re.re -> string
(* Pre-compile at module level. Never inside a parser function *)
let number_re = Re.compile (Re.Posix.re "[0-9]+")
let number_from_re () = Parseff.match_regex number_re
(* Regex. Use for complex patterns (alternation, grouping) *)
let number_re = Re.compile (Re.Posix.re "[0-9]+")
let number () = Parseff.match_regex number_re
(* take_while. No regex overhead for simple predicates *)
let number () =
Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit"

Parseff.fail aborts parsing with an error message.

val fail : string -> 'a
(* Validate range *)
let byte () =
let n =
int_of_string
(Parseff.take_while1
(fun c -> c >= '0' && c <= '9')
~label:"digit")
in
if n >= 0 && n <= 255 then n
else Parseff.fail "number must be between 0 and 255"
(* Parse: "128" -> 128 *)
(* Parse: "300" -> Error: number must be between 0 and 255 *)

Parseff.error aborts parsing with a user-defined error value. Custom errors are caught by Parseff.parse and returned in the result.

val error : 'e -> 'a
let validated_number () =
let s =
Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit"
in
let n = int_of_string s in
if n < 0 then Parseff.error (`Negative n)
else if n > 255 then Parseff.error (`Out_of_range n)
else n
let () =
match Parseff.parse "300" validated_number with
| Ok n -> Printf.printf "Got %d\n" n
| Error { error = `Out_of_range n; _ } ->
Printf.printf "%d is too large (max 255)\n" n
| Error { error = `Negative n; _ } ->
Printf.printf "%d is negative\n" n
| Error _ -> Printf.printf "Parse error\n"
let number_checked () =
let s =
Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit"
in
let n = int_of_string s in
if n > 255 then Parseff.error `Too_large
else if n < 0 then Parseff.error `Negative
else n

Parseff.end_of_input succeeds only if no input remains. Use this to ensure the entire input has been consumed.

val end_of_input : unit -> unit
(* Parse complete input *)
let complete_number () =
let n = Parseff.digit () in
Parseff.end_of_input ();
n
(* Matches: "5" -> 5 *)
(* Fails: "52" -> Error: expected end of input *)
(* Without end_of_input, "52" would succeed and return 5,
leaving "2" unparsed *)

Parseff.position returns the current parser offset in bytes from the start of the input.

val position : unit -> int