Repetition and separation
Section titled “Repetition and separation”These combinators handle patterns that repeat: lists, separated values, delimited blocks, and operator chains.
Basic repetition
Section titled “Basic repetition”Parseff.many applies a parser zero or more times. Returns a list of results. Always succeeds (returns [] if the parser fails immediately).
val many : (unit -> 'a) -> unit -> 'a listlet digits () = Parseff.many Parseff.digit ()(* "123" -> [1; 2; 3] *)(* "" -> [] *)(* "abc" -> [] *)Parseff.many1 is like Parseff.many but requires at least one match. Fails if the parser doesn’t succeed at least once.
val many1 : (unit -> 'a) -> unit -> 'a listlet digits1 () = Parseff.many1 Parseff.digit ()(* "123" -> [1; 2; 3] *)(* "" -> Error *)(* "abc" -> Error *)Parseff.count applies a parser exactly n times. Fails if the parser doesn’t match n times.
val count : int -> (unit -> 'a) -> unit -> 'a listlet three_digits () = Parseff.count 3 Parseff.digit ()(* "123" -> [1; 2; 3] *)(* "12" -> Error *)Useful for fixed-width formats:
let hex_digit () = Parseff.satisfy (fun c -> (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) ~label:"hex digit"
(* Parse #RRGGBB color *)let hex_color () = let _ = Parseff.char '#' in let r = Parseff.count 2 hex_digit () in let g = Parseff.count 2 hex_digit () in let b = Parseff.count 2 hex_digit () in (r, g, b)(* "#ff00aa" -> (['f';'f'], ['0';'0'], ['a';'a']) *)Separated lists
Section titled “Separated lists”sep_by
Section titled “sep_by”Parseff.sep_by parses zero or more elements separated by a separator. The separator’s return value is discarded. Always succeeds.
val sep_by : (unit -> 'a) -> (unit -> 'b) -> unit -> 'a listlet csv_line () = Parseff.sep_by (fun () -> Parseff.take_while (fun c -> c <> ',' && c <> '\n')) (fun () -> Parseff.char ',') ()(* "a,b,c" -> ["a"; "b"; "c"] *)(* "" -> [""] *)sep_by1
Section titled “sep_by1”Parseff.sep_by1 is like Parseff.sep_by but requires at least one element.
val sep_by1 : (unit -> 'a) -> (unit -> 'b) -> unit -> 'a listlet csv_line1 () = Parseff.sep_by1 (fun () -> Parseff.take_while1 (fun c -> c <> ',' && c <> '\n') ~label:"value") (fun () -> Parseff.char ',') ()(* "a,b,c" -> ["a"; "b"; "c"] *)(* "a" -> ["a"] *)(* "" -> Error *)Delimiters and terminators
Section titled “Delimiters and terminators”between
Section titled “between”Parseff.between parses an opening delimiter, then the body, then a closing delimiter. Returns the body’s value.
val between : (unit -> 'a) -> (unit -> 'b) -> (unit -> 'c) -> unit -> 'clet parens p = Parseff.between (fun () -> Parseff.char '(') (fun () -> Parseff.char ')') p
let parenthesized_digit () = parens (fun () -> Parseff.skip_whitespace (); let n = Parseff.digit () in Parseff.skip_whitespace (); n) ()(* "(42)" -> 42 *)(* "( 42 )" -> Error (only parses one digit) *)Works well for bracketed structures:
let braces p = Parseff.between (fun () -> Parseff.char '{') (fun () -> Parseff.char '}') p
let brackets p = Parseff.between (fun () -> Parseff.char '[') (fun () -> Parseff.char ']') pend_by
Section titled “end_by”Parseff.end_by parses zero or more elements, each followed by a separator. Unlike Parseff.sep_by, the separator comes after each element (including the last).
val end_by : (unit -> 'a) -> (unit -> 'b) -> unit -> 'a list(* Parse semicolon-terminated statements *)let statements () = Parseff.end_by (fun () -> Parseff.take_while1 (fun c -> c <> ';' && c <> '\n') ~label:"statement") (fun () -> Parseff.char ';') ()(* "a;b;c;" -> ["a"; "b"; "c"] *)(* "" -> [] *)end_by1
Section titled “end_by1”Parseff.end_by1 is like Parseff.end_by but requires at least one element.
val end_by1 : (unit -> 'a) -> (unit -> 'b) -> unit -> 'a listOperator chains
Section titled “Operator chains”These combinators parse sequences of values joined by operators, handling associativity. They’re the standard tool for expression parsing with operator precedence.
chainl1
Section titled “chainl1”Parseff.chainl1 parses one or more values separated by an operator, combining them left-associatively. The operator parser returns a function that combines two values.
val chainl1 : (unit -> 'a) -> (unit -> 'a -> 'a -> 'a) -> unit -> 'a(* Parse "1-2-3" as ((1-2)-3) = -4 *)let subtraction () = Parseff.chainl1 (fun () -> Parseff.digit ()) (fun () -> let _ = Parseff.char '-' in fun a b -> a - b) ()(* "1-2-3" -> -4 (left-associative: (1-2)-3) *)chainr1
Section titled “chainr1”Parseff.chainr1 is like Parseff.chainl1 but combines right-associatively.
val chainr1 : (unit -> 'a) -> (unit -> 'a -> 'a -> 'a) -> unit -> 'a(* Parse "2^3^2" as 2^(3^2) = 512 *)let power () = Parseff.chainr1 (fun () -> Parseff.digit ()) (fun () -> let _ = Parseff.char '^' in fun a b -> int_of_float (float_of_int a ** float_of_int b)) ()(* "2^3^2" -> 512 (right-associative: 2^(3^2)) *)chainl
Section titled “chainl”Parseff.chainl is like Parseff.chainl1 but takes a default value. Returns the default if zero elements match.
val chainl : (unit -> 'a) -> (unit -> 'a -> 'a -> 'a) -> 'a -> unit -> 'alet maybe_subtract () = Parseff.chainl (fun () -> Parseff.digit ()) (fun () -> let _ = Parseff.char '-' in fun a b -> a - b) 0 ()(* "1-2" -> -1 *)(* "" -> 0 *)chainr
Section titled “chainr”Parseff.chainr is like Parseff.chainr1 but with a default value for zero matches.
val chainr : (unit -> 'a) -> (unit -> 'a -> 'a -> 'a) -> 'a -> unit -> 'aComplete example: JSON array
Section titled “Complete example: JSON array”let integer () = let sign = Parseff.optional (fun () -> Parseff.char '-') () in let digits = Parseff.take_while1 (fun c -> c >= '0' && c <= '9') ~label:"digit" in let n = int_of_string digits in match sign with Some _ -> -n | None -> n
let json_array () = let _ = Parseff.char '[' in Parseff.skip_whitespace (); let values = Parseff.sep_by (fun () -> Parseff.skip_whitespace (); let n = integer () in Parseff.skip_whitespace (); n) (fun () -> Parseff.char ',') () in Parseff.skip_whitespace (); let _ = Parseff.char ']' in Parseff.end_of_input (); values
let () = match Parseff.parse "[1, -2, 3]" json_array with | Ok nums -> Printf.printf "Sum: %d\n" (List.fold_left ( + ) 0 nums) | Error { pos; error = `Expected msg } -> Printf.printf "Error at %d: %s\n" pos msg | Error _ -> print_endline "Parse error"