Primitives
Section titled “Primitives”Primitives are the building blocks of all parsers. These operations match input directly and form the foundation of more complex parsers.
String and character matching
Section titled “String and character matching”consume
Section titled “consume”Parseff.consume matches an exact literal string. Returns the matched string.
val consume : string -> stringlet parser () = let _ = Parseff.consume "hello" in Parseff.consume "world"
(* Matches "helloworld" *)Parseff.char matches an exact character. Returns the matched character.
val char : char -> charlet comma () = Parseff.char ','let left_paren () = Parseff.char '('let right_paren () = Parseff.char ')'
(* Parse a comma-separated pair *)let pair () = let _ = left_paren () in let a = Parseff.digit () in let _ = comma () in let b = Parseff.digit () in let _ = right_paren () in (a, b)
(* Matches "(1,2)" -> (1, 2) *)satisfy
Section titled “satisfy”Parseff.satisfy matches a character satisfying the given predicate. The ~label parameter is used in error messages.
val satisfy : (char -> bool) -> label:string -> char(* Match any vowel *)let vowel () = Parseff.satisfy (fun c -> String.contains "aeiouAEIOU" c) ~label:"vowel"
(* Match any digit *)let digit_char () = Parseff.satisfy (fun c -> c >= '0' && c <= '9') ~label:"digit"
(* Match any uppercase letter *)let uppercase () = Parseff.satisfy (fun c -> c >= 'A' && c <= 'Z') ~label:"uppercase letter"Character scanning
Section titled “Character scanning”These operations scan multiple characters efficiently.
take_while
Section titled “take_while”Parseff.take_while consumes characters while the predicate holds. Returns the matched string (may be empty). Always succeeds.
val take_while : (char -> bool) -> string(* Parse digits *)let digits () = Parseff.take_while (fun c -> c >= '0' && c <= '9')
(* Parse identifier *)let identifier () = let first = Parseff.satisfy (fun c -> c = '_' || (c >= 'a' && c <= 'z')) ~label:"letter" in let rest = Parseff.take_while (fun c -> c = '_' || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) in String.make 1 first ^ rest
(* "foo_bar123" -> "foo_bar123" *)(* "123abc" -> identifier fails (first char must be letter) *)take_while1
Section titled “take_while1”Parseff.take_while1 is like Parseff.take_while but requires at least one character. Fails if no characters match.
val take_while1 : (char -> bool) -> label:string -> string(* Parse non-empty digits *)let digits1 () = Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit"
(* Parse identifier (simpler than take_while approach) *)let identifier1 () = Parseff.take_while1 (fun c -> c = '_' || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) ~label:"identifier character"
(* Matches "foo123" -> "foo123" *)(* Fails on "" -> Error: expected identifier character *)skip_while
Section titled “skip_while”Parseff.skip_while skips characters while the predicate holds (returns unit). Always succeeds. More efficient than Parseff.take_while when you don’t need the matched string.
val skip_while : (char -> bool) -> unit(* Skip spaces *)let skip_spaces () = Parseff.skip_while (fun c -> c = ' ')
(* Parse comma-separated values *)let csv_value () = skip_spaces (); let value = Parseff.take_while (fun c -> c <> ',' && c <> '\n') in skip_spaces (); value
(* " hello " -> "hello" *)Regular expressions
Section titled “Regular expressions”match_regex
Section titled “match_regex”Parseff.match_regex matches a compiled regular expression. The regex must be compiled with Re.compile.
val match_regex : Re.re -> string(* Pre-compile at module level. Never inside a parser function *)let number_re = Re.compile (Re.Posix.re "[0-9]+")let number_from_re () = Parseff.match_regex number_re(* Regex. Use for complex patterns (alternation, grouping) *)let number_re = Re.compile (Re.Posix.re "[0-9]+")let number () = Parseff.match_regex number_re
(* take_while. No regex overhead for simple predicates *)let number () = Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit"Control flow
Section titled “Control flow”Parseff.fail aborts parsing with an error message.
val fail : string -> 'a(* Validate range *)let byte () = let n = int_of_string (Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit") in if n >= 0 && n <= 255 then n else Parseff.fail "number must be between 0 and 255"
(* Parse: "128" -> 128 *)(* Parse: "300" -> Error: number must be between 0 and 255 *)Parseff.error aborts parsing with a user-defined error value. Custom errors are caught by Parseff.parse and returned in the result.
val error : 'e -> 'alet validated_number () = let s = Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit" in let n = int_of_string s in if n < 0 then Parseff.error (`Negative n) else if n > 255 then Parseff.error (`Out_of_range n) else n
let () = match Parseff.parse "300" validated_number with | Ok n -> Printf.printf "Got %d\n" n | Error { error = `Out_of_range n; _ } -> Printf.printf "%d is too large (max 255)\n" n | Error { error = `Negative n; _ } -> Printf.printf "%d is negative\n" n | Error _ -> Printf.printf "Parse error\n"let number_checked () = let s = Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit" in let n = int_of_string s in if n > 255 then Parseff.error `Too_large else if n < 0 then Parseff.error `Negative else nend_of_input
Section titled “end_of_input”Parseff.end_of_input succeeds only if no input remains. Use this to ensure the entire input has been consumed.
val end_of_input : unit -> unit(* Parse complete input *)let complete_number () = let n = Parseff.digit () in Parseff.end_of_input (); n
(* Matches: "5" -> 5 *)(* Fails: "52" -> Error: expected end of input *)
(* Without end_of_input, "52" would succeed and return 5, leaving "2" unparsed *)position
Section titled “position”Parseff.position returns the current parser offset in bytes from the start of the input.
val position : unit -> int