You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Trac3r-rust/src/parser/parser.rs

255 lines
9.2 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

use nom::branch::alt;
use nom::bytes::complete::{escaped, is_not, take, take_till, take_until, take_while};
use nom::bytes::complete::{tag, take_while1, take_while_m_n};
use nom::character::complete::{anychar, char, line_ending, newline, not_line_ending, one_of, multispace1};
use nom::character::complete::alphanumeric1 as alphanumeric;
use nom::character::is_alphabetic;
use nom::combinator::{cut, map, map_res, opt, value, verify, map_opt};
use nom::error::{ParseError, FromExternalError};
use nom::IResult;
use nom::multi::{many0, fold_many0};
use nom::number::complete::be_u16;
use nom::sequence::{delimited, preceded, terminated, tuple};
pub fn length_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
let (input, length) = be_u16(input)?;
take(length)(input)
}
#[derive(Debug, PartialEq)]
pub struct Color {
pub red: u8,
pub green: u8,
pub blue: u8,
}
pub enum ScriptMeta {
Comment(String),
Element(String),
Meta(String),
}
pub fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
u8::from_str_radix(input, 16)
}
pub fn is_hex_digit(c: char) -> bool {
c.is_digit(16)
}
pub fn hex_primary(input: &str) -> IResult<&str, u8> {
map_res(
take_while_m_n(2, 2, is_hex_digit),
from_hex,
)(input)
}
pub fn hex_color(input: &str) -> IResult<&str, Color> {
let (input, _) = tag("#")(input)?;
let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?;
Ok((input, Color { red, green, blue }))
}
pub fn scope<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str> {
let (input, _) = delimited(opt(sp), delimited(char('{'), is_not("}"), char('}')), opt(sp))(input)?;
//let (input, _) = delimited(char('{'), is_not("}"), char('}'))(input)?;
Ok((input, input))
}
pub fn elem<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str> {
let (input, _) = delimited(opt(sp), tag("elem"), sp)(input)?;
let (input, elem_name) = parse_token(input)?;
let (input, _) = scope::<'a, E>(input)?;
println!("elem , name : {:?} || scope : {:?}", elem_name, input);
Ok((input, elem_name))
}
// Parse a single alphanumeric token delimited by spaces
fn parse_token<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
let chars = "\n";
escaped(alphanumeric, '\\', one_of(""))(i)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum StringFragment<'a> {
Literal(&'a str),
EscapedChar(char),
EscapedWS,
}
fn parse_unicode<'a, E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>>(input: &'a str)
-> IResult<&'a str, char, E> {
// `take_while_m_n` parses between `m` and `n` bytes (inclusive) that match
// a predicate. `parse_hex` here parses between 1 and 6 hexadecimal numerals.
let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit());
// `preceeded` takes a prefix parser, and if it succeeds, returns the result
// of the body parser. In this case, it parses u{XXXX}.
let parse_delimited_hex = preceded(
char('u'),
// `delimited` is like `preceded`, but it parses both a prefix and a suffix.
// It returns the result of the middle parser. In this case, it parses
// {XXXX}, where XXXX is 1 to 6 hex numerals, and returns XXXX
delimited(char('{'), parse_hex, char('}')),
);
// `map_res` takes the result of a parser and applies a function that returns
// a Result. In this case we take the hex bytes from parse_hex and attempt to
// convert them to a u32.
let parse_u32 = map_res(parse_delimited_hex, move |hex| u32::from_str_radix(hex, 16));
// map_opt is like map_res, but it takes an Option instead of a Result. If
// the function returns None, map_opt returns an error. In this case, because
// not all u32 values are valid unicode code points, we have to fallibly
// convert to char with from_u32.
map_opt(parse_u32, |value| std::char::from_u32(value))(input)
}
/// Parse an escaped character: \n, \t, \r, \u{00AC}, etc.
fn parse_escaped_char<'a, E: ParseError<&'a str>+ FromExternalError<&'a str, std::num::ParseIntError>>(input: &'a str)
-> IResult<&'a str, char, E> {
preceded(
char('\\'),
// `alt` tries each parser in sequence, returning the result of
// the first successful match
alt((
parse_unicode,
// The `value` parser returns a fixed value (the first argument) if its
// parser (the second argument) succeeds. In these cases, it looks for
// the marker characters (n, r, t, etc) and returns the matching
// character (\n, \r, \t, etc).
value('\n', char('n')),
value('\r', char('r')),
value('\t', char('t')),
value('\u{08}', char('b')),
value('\u{0C}', char('f')),
value('\\', char('\\')),
value('/', char('/')),
value('"', char('"')),
)),
)(input)
}
/// Parse a backslash, followed by any amount of whitespace. This is used later
/// to discard any escaped whitespace.
fn parse_escaped_whitespace<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, &'a str, E> {
preceded(char('\\'), multispace1)(input)
}
/// Parse a non-empty block of text that doesn't include \ or "
fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
// `is_not` parses a string of 0 or more characters that aren't one of the
// given characters.
let not_quote_slash = is_not("\"\\");
// `verify` runs a parser, then runs a verification function on the output of
// the parser. The verification function accepts out output only if it
// returns true. In this case, we want to ensure that the output of is_not
// is non-empty.
verify(not_quote_slash, |s: &str| !s.is_empty())(input)
}
/// Combine parse_literal, parse_escaped_whitespace, and parse_escaped_char
/// into a StringFragment.
fn parse_fragment<'a, E: ParseError<&'a str>+ FromExternalError<&'a str, std::num::ParseIntError>>(
input: &'a str,
) -> IResult<&'a str, StringFragment<'a>, E> {
alt((
// The `map` combinator runs a parser, then applies a function to the output
// of that parser.
map(parse_literal, StringFragment::Literal),
map(parse_escaped_char, StringFragment::EscapedChar),
value(StringFragment::EscapedWS, parse_escaped_whitespace),
))(input)
}
/// Parse a string. Use a loop of parse_fragment and push all of the fragments
/// into an output string.
fn parse_string<'a, E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>>(input: &'a str) -> IResult<&'a str, String, E> {
// fold_many0 is the equivalent of iterator::fold. It runs a parser in a loop,
// and for each output value, calls a folding function on each output value.
let build_string = fold_many0(
// Our parser function parses a single string fragment
parse_fragment,
// Our init value, an empty string
String::new(),
// Our folding function. For each fragment, append the fragment to the
// string.
|mut string, fragment| {
match fragment {
StringFragment::Literal(s) => string.push_str(s),
StringFragment::EscapedChar(c) => string.push(c),
StringFragment::EscapedWS => {}
}
string
},
);
delimited(char('"'), build_string, char('"'))(input)
}
// Parse from a # to a newline character
pub fn comment<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
let v = preceded(char('#'),
cut(terminated(
is_not("\n"),
newline,
)),
)(input)?;
println!("comment : # {:?}", v.1);
Ok((v.0, v.0))
}
// Eat up whitespace characters
fn sp<'a>(i: &'a str) -> IResult<&'a str, &'a str> {
let chars = " \t\r\n";
take_while(move |c| chars.contains(c))(i)
}
pub fn parse_script<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, ScriptMeta, E> {
println!("Full input string : {:?}\n", input);
let mut remaining_str = input;
while remaining_str.len() > 0 {
println!("Remaining Length : {:?}", remaining_str.len());
println!("Remaining String: {:?}", remaining_str);
let x = delimited(
sp,
alt((map(comment, |s| ScriptMeta::Comment(String::from(s))),
map(elem::<'a, E>, |s| ScriptMeta::Element(String::from(s)))
)),
opt(sp),
)(remaining_str);
remaining_str = x.unwrap().0;
}
return Ok((remaining_str, ScriptMeta::Comment(String::default())));
}
/*
// ( and any amount of bytes ). Returns the bytes between the ()
fn parens(input: &str) -> IResult<&str, &str> {
delimited(char('('), is_not(")"), char(')'))(input)
}
// `take_while_m_n` parses between `m` and `n` bytes (inclusive) that match
// a predicate. `parse_hex` here parses between 1 and 6 hexadecimal numerals.
let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit());
*/