add or in regex
This commit is contained in:
parent
f4883d3bd2
commit
81d8a58363
255
src/main.rs
255
src/main.rs
@ -1,4 +1,5 @@
|
||||
use std::env;
|
||||
use std::f32::consts::E;
|
||||
use std::io::{self, BufRead};
|
||||
use std::str::Chars;
|
||||
|
||||
@ -37,7 +38,9 @@ fn get_lines() -> Vec<String> {
|
||||
struct Snapshots<'a> {
|
||||
position: Chars<'a>,
|
||||
rex_position: Option<Chars<'a>>,
|
||||
min_match: u8,
|
||||
rex_or_position: Option<Chars<'a>>,
|
||||
|
||||
min_match: i8,
|
||||
done_matches: u8,
|
||||
matched: Vec<char>,
|
||||
}
|
||||
@ -47,6 +50,8 @@ impl<'a> Snapshots<'a> {
|
||||
Self {
|
||||
position: position,
|
||||
rex_position: None,
|
||||
rex_or_position: None,
|
||||
|
||||
min_match: 1,
|
||||
done_matches: 1,
|
||||
matched: vec![],
|
||||
@ -83,12 +88,15 @@ fn check_for_or(mut regex: Chars, last_match: Vec<char>) -> bool {
|
||||
}
|
||||
}
|
||||
fn check_if_tail_empty(mut regex: Chars, last_match: Vec<char>) -> bool {
|
||||
println!("Lets see if {:?} is null -> last: {:?}", regex, last_match);
|
||||
// lets disallow *? for now - havent considered in other parts anyway - and also not via brackets and stuff
|
||||
let mut enforce_char = last_match
|
||||
let enforce_char = last_match
|
||||
.iter()
|
||||
.all(|x| *x != '*' && *x != '?' && *x != '+');
|
||||
.filter(|&x| *x != '*' && *x != '?' && *x != '+')
|
||||
.count()
|
||||
== 0;
|
||||
|
||||
let mut disallow_chars = false;
|
||||
let disallow_chars = false;
|
||||
let c: char = match regex.next() {
|
||||
None => return true,
|
||||
Some(c) => c,
|
||||
@ -103,6 +111,10 @@ fn check_if_tail_empty(mut regex: Chars, last_match: Vec<char>) -> bool {
|
||||
'?' | '*' => return check_if_tail_empty(regex, vec!['*']),
|
||||
'|' => return check_for_or(regex, vec!['|']),
|
||||
'$' => return regex.next().is_none(),
|
||||
')' => {
|
||||
regex.next();
|
||||
return check_if_tail_empty(regex, vec![')']);
|
||||
}
|
||||
// dont wanna [...] rn -> dont care
|
||||
_ => {
|
||||
if disallow_chars {
|
||||
@ -131,6 +143,7 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
|
||||
let mut last_match: Vec<char> = Vec::new();
|
||||
let mut free_match = false;
|
||||
let mut hist: Vec<Snapshots> = vec![];
|
||||
let mut brackets: i8 = 0;
|
||||
//for line_char in line_iter {
|
||||
loop {
|
||||
if !hist.is_empty() {
|
||||
@ -140,6 +153,7 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
|
||||
let line_char = match line_iter.next() {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
println!("THERE IS NOTHING MORE :-(");
|
||||
if check_if_tail_empty(rex_iter, last_match) {
|
||||
return Ok(line.clone());
|
||||
} else {
|
||||
@ -149,12 +163,13 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
|
||||
};
|
||||
|
||||
println!(
|
||||
"\t\tvor rematch: {:?} - {:?} - {:?} - {:?} -- {:?}",
|
||||
"\t\tvor rematch: {:?} - {:?} - {:?} - {:?} -- {:?}-- {:?}",
|
||||
line_char,
|
||||
last_match,
|
||||
line_iter,
|
||||
free_match,
|
||||
hist.last().unwrap().matched
|
||||
hist.last().unwrap().matched,
|
||||
rex_iter
|
||||
);
|
||||
// handle quantifier:
|
||||
if free_match {
|
||||
@ -190,11 +205,18 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
|
||||
rex_iter, line_char, rex_char
|
||||
);
|
||||
let success_match: bool;
|
||||
(success_match, free_match) = handle_rex_match(rex_iter, rex_char, line_char, &mut hist);
|
||||
(success_match, free_match, brackets) =
|
||||
handle_rex_match(rex_iter, rex_char, line_char, &mut hist, brackets);
|
||||
|
||||
// since i do this more a less unexact, i should probably save me such validation - but whatever...
|
||||
if brackets < 0 {
|
||||
return Err(());
|
||||
}
|
||||
|
||||
rex_iter = hist.last().unwrap().rex_position.as_ref().unwrap().clone();
|
||||
println!(
|
||||
"\nREGEX POS NACH MATCH - {:?} - {:?} - {:?}\n",
|
||||
rex_iter, line_char, rex_char
|
||||
"\nREGEX POS NACH MATCH - {:?} - {:?} - {:?}- {:?}\n",
|
||||
rex_iter, line_char, rex_char, success_match
|
||||
);
|
||||
println!("Result: {:?} - {:?}", success_match, hist);
|
||||
if !success_match {
|
||||
@ -207,6 +229,7 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
|
||||
|
||||
// kind of crappy when i cannot match the actual char bc at that point we think that we need to match - might be better to
|
||||
// rewrite this whole stuff a little more structured i guess...
|
||||
// TODO: brackets / or with quantifier .... buuuuut tbh - most of the nested stuff wouldnt work rn
|
||||
if let Some(tmpchar) = rex_iter.clone().next() {
|
||||
if tmpchar == '?' || tmpchar == '*' {
|
||||
snap.min_match = 0;
|
||||
@ -215,24 +238,52 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
|
||||
}
|
||||
}
|
||||
|
||||
while snap.done_matches <= snap.min_match {
|
||||
let mut helper = false;
|
||||
// yeah i know kinda hacky, but rn and mostly just wanna finish (like above with the bracket var)
|
||||
while snap.done_matches as i8 <= snap.min_match && snap.min_match != -1 {
|
||||
if hist.is_empty() {
|
||||
return Err(());
|
||||
match get_pipe(rex_iter) {
|
||||
None => {
|
||||
return Err(());
|
||||
}
|
||||
Some(pip_pos) => {
|
||||
rex_iter = pip_pos;
|
||||
line_iter = line.chars();
|
||||
hist = vec![];
|
||||
helper = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
snap = hist.pop().unwrap();
|
||||
}
|
||||
snap = hist.pop().unwrap();
|
||||
}
|
||||
|
||||
if snap.done_matches > snap.min_match {
|
||||
if helper {
|
||||
continue;
|
||||
}
|
||||
|
||||
if snap.done_matches as i8 > snap.min_match {
|
||||
line_iter = snap.position;
|
||||
rex_iter = snap.rex_position.unwrap();
|
||||
snap.done_matches -= 1;
|
||||
for _ in 0..snap.done_matches {
|
||||
line_iter.next();
|
||||
|
||||
// TODO: actually need the other or branch for additional backtracking (if we wanna backtracking before the or clause...)
|
||||
if snap.min_match == -1 {
|
||||
println!("looks like a or {:?}", snap.rex_or_position);
|
||||
snap.min_match = 0;
|
||||
snap.done_matches = 0;
|
||||
rex_iter = snap.rex_or_position.unwrap();
|
||||
} else {
|
||||
// for now just ignore the other or arm
|
||||
snap.done_matches -= 1;
|
||||
rex_iter = snap.rex_position.unwrap();
|
||||
for _ in 0..snap.done_matches {
|
||||
line_iter.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
println!(
|
||||
"BACKTRACKING DONE should now be one less than before, trying now with {:?}",
|
||||
rex_iter,
|
||||
"BACKTRACKING DONE should now be one less than before, trying now with {:?} - full hist is now {:?}",
|
||||
rex_iter, hist
|
||||
)
|
||||
//let mut snap = rex_iter = rex.chars();
|
||||
}
|
||||
@ -246,14 +297,81 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
|
||||
Ok(line)
|
||||
}
|
||||
|
||||
fn get_closing<'a>(mut rex_iter: Chars<'a>) -> Option<Chars<'a>> {
|
||||
let mut or_branch: Vec<char> = vec![];
|
||||
let mut local_bracket_count = 1;
|
||||
|
||||
loop {
|
||||
let c = match rex_iter.next() {
|
||||
None => {
|
||||
if local_bracket_count == 1 {
|
||||
// probably bug, but one could argue, this case is w/o brackets i.e. i matched up to | we re done
|
||||
return None;
|
||||
}
|
||||
eprintln!("Could not find closing bracket - one could argue the regex mathes in that case - but probably there is just an error in the code ¯\\_(ツ)_/¯");
|
||||
std::process::exit(1);
|
||||
}
|
||||
Some(d) => d,
|
||||
};
|
||||
|
||||
match c {
|
||||
'(' => {
|
||||
local_bracket_count += 1;
|
||||
or_branch.push(c);
|
||||
}
|
||||
')' => {
|
||||
local_bracket_count -= 1;
|
||||
if local_bracket_count == 0 {
|
||||
return Some(rex_iter);
|
||||
}
|
||||
or_branch.push(c);
|
||||
}
|
||||
_ => or_branch.push(c),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_pipe<'a>(mut rex_iter: Chars<'a>) -> Option<Chars<'a>> {
|
||||
let mut local_bracket_count = 1;
|
||||
loop {
|
||||
let c = match rex_iter.next() {
|
||||
None => {
|
||||
return None;
|
||||
}
|
||||
Some(d) => d,
|
||||
};
|
||||
|
||||
// please DO NOT escape brackets
|
||||
match c {
|
||||
'(' => local_bracket_count += 1,
|
||||
|
||||
')' => {
|
||||
local_bracket_count -= 1;
|
||||
if local_bracket_count == 0 {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
'|' => {
|
||||
if local_bracket_count == 1 {
|
||||
return Some(rex_iter);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_rex_match<'a>(
|
||||
mut rex_iter: Chars<'a>,
|
||||
rex_char: char,
|
||||
line_char: char,
|
||||
hist: &mut Vec<Snapshots<'a>>,
|
||||
) -> (bool, bool) {
|
||||
mut bracket_counter: i8,
|
||||
) -> (bool, bool, i8) {
|
||||
let mut last_match: Vec<char> = Vec::new();
|
||||
let mut free_match = false;
|
||||
println!("{:?} could be anything ...... ", rex_char);
|
||||
|
||||
let success_match = match rex_char {
|
||||
'\\' => {
|
||||
let tmpbool: bool;
|
||||
@ -288,6 +406,101 @@ fn handle_rex_match<'a>(
|
||||
hist.last_mut().unwrap().min_match = 0;
|
||||
true
|
||||
}
|
||||
')' => {
|
||||
bracket_counter -= 1;
|
||||
let foo = rex_iter.next().unwrap();
|
||||
return handle_rex_match(rex_iter, foo, line_char, hist, bracket_counter);
|
||||
}
|
||||
'|' => {
|
||||
if bracket_counter == 0 {
|
||||
// no brackets then first or arm has matched.... lets just encode some stuff in this helper variable
|
||||
let mut foo = rex_iter.clone();
|
||||
foo.next();
|
||||
while foo.next().is_some() {
|
||||
rex_iter.next();
|
||||
hist.last_mut().unwrap().setpos(rex_iter);
|
||||
return (true, true, bracket_counter);
|
||||
}
|
||||
}
|
||||
// TODO: get the first "or" part
|
||||
//let or_branch: Vec<char>;
|
||||
//let rex_it_cp = rex_iter.clone();
|
||||
|
||||
match get_closing(rex_iter.clone()) {
|
||||
None => {
|
||||
let mut foo = rex_iter.clone();
|
||||
foo.next();
|
||||
while foo.next().is_some() {
|
||||
rex_iter.next();
|
||||
hist.last_mut().unwrap().setpos(rex_iter);
|
||||
return (true, true, bracket_counter);
|
||||
}
|
||||
}
|
||||
Some(it) => rex_iter = it,
|
||||
};
|
||||
/* {
|
||||
hist.iter_mut()
|
||||
.filter(|x| {
|
||||
*((*x)
|
||||
.matched
|
||||
.first()
|
||||
.unwrap_or_else(|| std::process::exit(1)))
|
||||
== ')'
|
||||
&& ((*x).min_match == -10)
|
||||
})
|
||||
.last()
|
||||
.map(|x| {
|
||||
(*x).min_match = -5;
|
||||
(*x).setvec(or_branch);
|
||||
(*x).setpos(rex_it_cp)
|
||||
});
|
||||
}*/
|
||||
let foo = rex_iter.next().unwrap();
|
||||
return handle_rex_match(rex_iter, foo, line_char, hist, bracket_counter);
|
||||
}
|
||||
'(' => {
|
||||
println!("No iam in the open btack part");
|
||||
bracket_counter += 1;
|
||||
let mut rex_it_start_bracket = rex_iter.clone(); //hist.last_mut().unwrap().rex_position.clone().unwrap();
|
||||
|
||||
/*hist.last_mut().unwrap().setpos(rex_iter);
|
||||
hist.last_mut().unwrap().setvec(vec![]);
|
||||
hist.last_mut().unwrap().min_match = -10;
|
||||
let string_pos = hist.last_mut().unwrap().position.clone();*/
|
||||
|
||||
match get_pipe(rex_iter.clone()) {
|
||||
None => {
|
||||
let foo = rex_it_start_bracket.next().unwrap();
|
||||
return handle_rex_match(
|
||||
rex_it_start_bracket,
|
||||
foo,
|
||||
line_char,
|
||||
hist,
|
||||
bracket_counter,
|
||||
);
|
||||
}
|
||||
Some(pip) => {
|
||||
//TODO: do i need to call here next as well?
|
||||
let string_pos = hist.last_mut().unwrap().position.clone();
|
||||
let mut shot = Snapshots::init(string_pos);
|
||||
shot.setpos(rex_iter.clone());
|
||||
shot.rex_or_position = Some(pip);
|
||||
shot.min_match = -1;
|
||||
println!("der snap sollte so aussehen: {:?}", shot);
|
||||
hist.push(shot);
|
||||
let foo = rex_it_start_bracket.next().unwrap();
|
||||
|
||||
return handle_rex_match(
|
||||
rex_it_start_bracket,
|
||||
foo,
|
||||
line_char,
|
||||
hist,
|
||||
bracket_counter,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => {
|
||||
last_match = vec![rex_char];
|
||||
line_char == rex_char
|
||||
@ -309,7 +522,7 @@ fn handle_rex_match<'a>(
|
||||
last_match
|
||||
);
|
||||
|
||||
(success_match, free_match)
|
||||
(success_match, free_match, bracket_counter)
|
||||
}
|
||||
|
||||
fn rematch(input: char, custom_class: &Vec<char>) -> bool {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user