add or in regex

This commit is contained in:
hendrik 2024-05-17 16:29:03 +02:00
parent f4883d3bd2
commit 81d8a58363

View File

@ -1,4 +1,5 @@
use std::env;
use std::f32::consts::E;
use std::io::{self, BufRead};
use std::str::Chars;
@ -37,7 +38,9 @@ fn get_lines() -> Vec<String> {
struct Snapshots<'a> {
position: Chars<'a>,
rex_position: Option<Chars<'a>>,
min_match: u8,
rex_or_position: Option<Chars<'a>>,
min_match: i8,
done_matches: u8,
matched: Vec<char>,
}
@ -47,6 +50,8 @@ impl<'a> Snapshots<'a> {
Self {
position: position,
rex_position: None,
rex_or_position: None,
min_match: 1,
done_matches: 1,
matched: vec![],
@ -83,12 +88,15 @@ fn check_for_or(mut regex: Chars, last_match: Vec<char>) -> bool {
}
}
fn check_if_tail_empty(mut regex: Chars, last_match: Vec<char>) -> bool {
println!("Lets see if {:?} is null -> last: {:?}", regex, last_match);
// lets disallow *? for now - havent considered in other parts anyway - and also not via brackets and stuff
let mut enforce_char = last_match
let enforce_char = last_match
.iter()
.all(|x| *x != '*' && *x != '?' && *x != '+');
.filter(|&x| *x != '*' && *x != '?' && *x != '+')
.count()
== 0;
let mut disallow_chars = false;
let disallow_chars = false;
let c: char = match regex.next() {
None => return true,
Some(c) => c,
@ -103,6 +111,10 @@ fn check_if_tail_empty(mut regex: Chars, last_match: Vec<char>) -> bool {
'?' | '*' => return check_if_tail_empty(regex, vec!['*']),
'|' => return check_for_or(regex, vec!['|']),
'$' => return regex.next().is_none(),
')' => {
regex.next();
return check_if_tail_empty(regex, vec![')']);
}
// dont wanna [...] rn -> dont care
_ => {
if disallow_chars {
@ -131,6 +143,7 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
let mut last_match: Vec<char> = Vec::new();
let mut free_match = false;
let mut hist: Vec<Snapshots> = vec![];
let mut brackets: i8 = 0;
//for line_char in line_iter {
loop {
if !hist.is_empty() {
@ -140,6 +153,7 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
let line_char = match line_iter.next() {
Some(c) => c,
None => {
println!("THERE IS NOTHING MORE :-(");
if check_if_tail_empty(rex_iter, last_match) {
return Ok(line.clone());
} else {
@ -149,12 +163,13 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
};
println!(
"\t\tvor rematch: {:?} - {:?} - {:?} - {:?} -- {:?}",
"\t\tvor rematch: {:?} - {:?} - {:?} - {:?} -- {:?}-- {:?}",
line_char,
last_match,
line_iter,
free_match,
hist.last().unwrap().matched
hist.last().unwrap().matched,
rex_iter
);
// handle quantifier:
if free_match {
@ -190,11 +205,18 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
rex_iter, line_char, rex_char
);
let success_match: bool;
(success_match, free_match) = handle_rex_match(rex_iter, rex_char, line_char, &mut hist);
(success_match, free_match, brackets) =
handle_rex_match(rex_iter, rex_char, line_char, &mut hist, brackets);
// since i do this more a less unexact, i should probably save me such validation - but whatever...
if brackets < 0 {
return Err(());
}
rex_iter = hist.last().unwrap().rex_position.as_ref().unwrap().clone();
println!(
"\nREGEX POS NACH MATCH - {:?} - {:?} - {:?}\n",
rex_iter, line_char, rex_char
"\nREGEX POS NACH MATCH - {:?} - {:?} - {:?}- {:?}\n",
rex_iter, line_char, rex_char, success_match
);
println!("Result: {:?} - {:?}", success_match, hist);
if !success_match {
@ -207,6 +229,7 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
// kind of crappy when i cannot match the actual char bc at that point we think that we need to match - might be better to
// rewrite this whole stuff a little more structured i guess...
// TODO: brackets / or with quantifier .... buuuuut tbh - most of the nested stuff wouldnt work rn
if let Some(tmpchar) = rex_iter.clone().next() {
if tmpchar == '?' || tmpchar == '*' {
snap.min_match = 0;
@ -215,24 +238,52 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
}
}
while snap.done_matches <= snap.min_match {
let mut helper = false;
// yeah i know kinda hacky, but rn and mostly just wanna finish (like above with the bracket var)
while snap.done_matches as i8 <= snap.min_match && snap.min_match != -1 {
if hist.is_empty() {
return Err(());
match get_pipe(rex_iter) {
None => {
return Err(());
}
Some(pip_pos) => {
rex_iter = pip_pos;
line_iter = line.chars();
hist = vec![];
helper = true;
}
}
break;
} else {
snap = hist.pop().unwrap();
}
snap = hist.pop().unwrap();
}
if snap.done_matches > snap.min_match {
if helper {
continue;
}
if snap.done_matches as i8 > snap.min_match {
line_iter = snap.position;
rex_iter = snap.rex_position.unwrap();
snap.done_matches -= 1;
for _ in 0..snap.done_matches {
line_iter.next();
// TODO: actually need the other or branch for additional backtracking (if we wanna backtracking before the or clause...)
if snap.min_match == -1 {
println!("looks like a or {:?}", snap.rex_or_position);
snap.min_match = 0;
snap.done_matches = 0;
rex_iter = snap.rex_or_position.unwrap();
} else {
// for now just ignore the other or arm
snap.done_matches -= 1;
rex_iter = snap.rex_position.unwrap();
for _ in 0..snap.done_matches {
line_iter.next();
}
}
}
println!(
"BACKTRACKING DONE should now be one less than before, trying now with {:?}",
rex_iter,
"BACKTRACKING DONE should now be one less than before, trying now with {:?} - full hist is now {:?}",
rex_iter, hist
)
//let mut snap = rex_iter = rex.chars();
}
@ -246,14 +297,81 @@ fn line_match(line: String, rex: &str) -> Result<String, ()> {
Ok(line)
}
fn get_closing<'a>(mut rex_iter: Chars<'a>) -> Option<Chars<'a>> {
let mut or_branch: Vec<char> = vec![];
let mut local_bracket_count = 1;
loop {
let c = match rex_iter.next() {
None => {
if local_bracket_count == 1 {
// probably bug, but one could argue, this case is w/o brackets i.e. i matched up to | we re done
return None;
}
eprintln!("Could not find closing bracket - one could argue the regex mathes in that case - but probably there is just an error in the code ¯\\_(ツ)_/¯");
std::process::exit(1);
}
Some(d) => d,
};
match c {
'(' => {
local_bracket_count += 1;
or_branch.push(c);
}
')' => {
local_bracket_count -= 1;
if local_bracket_count == 0 {
return Some(rex_iter);
}
or_branch.push(c);
}
_ => or_branch.push(c),
}
}
}
fn get_pipe<'a>(mut rex_iter: Chars<'a>) -> Option<Chars<'a>> {
let mut local_bracket_count = 1;
loop {
let c = match rex_iter.next() {
None => {
return None;
}
Some(d) => d,
};
// please DO NOT escape brackets
match c {
'(' => local_bracket_count += 1,
')' => {
local_bracket_count -= 1;
if local_bracket_count == 0 {
return None;
}
}
'|' => {
if local_bracket_count == 1 {
return Some(rex_iter);
}
}
_ => {}
}
}
}
fn handle_rex_match<'a>(
mut rex_iter: Chars<'a>,
rex_char: char,
line_char: char,
hist: &mut Vec<Snapshots<'a>>,
) -> (bool, bool) {
mut bracket_counter: i8,
) -> (bool, bool, i8) {
let mut last_match: Vec<char> = Vec::new();
let mut free_match = false;
println!("{:?} could be anything ...... ", rex_char);
let success_match = match rex_char {
'\\' => {
let tmpbool: bool;
@ -288,6 +406,101 @@ fn handle_rex_match<'a>(
hist.last_mut().unwrap().min_match = 0;
true
}
')' => {
bracket_counter -= 1;
let foo = rex_iter.next().unwrap();
return handle_rex_match(rex_iter, foo, line_char, hist, bracket_counter);
}
'|' => {
if bracket_counter == 0 {
// no brackets then first or arm has matched.... lets just encode some stuff in this helper variable
let mut foo = rex_iter.clone();
foo.next();
while foo.next().is_some() {
rex_iter.next();
hist.last_mut().unwrap().setpos(rex_iter);
return (true, true, bracket_counter);
}
}
// TODO: get the first "or" part
//let or_branch: Vec<char>;
//let rex_it_cp = rex_iter.clone();
match get_closing(rex_iter.clone()) {
None => {
let mut foo = rex_iter.clone();
foo.next();
while foo.next().is_some() {
rex_iter.next();
hist.last_mut().unwrap().setpos(rex_iter);
return (true, true, bracket_counter);
}
}
Some(it) => rex_iter = it,
};
/* {
hist.iter_mut()
.filter(|x| {
*((*x)
.matched
.first()
.unwrap_or_else(|| std::process::exit(1)))
== ')'
&& ((*x).min_match == -10)
})
.last()
.map(|x| {
(*x).min_match = -5;
(*x).setvec(or_branch);
(*x).setpos(rex_it_cp)
});
}*/
let foo = rex_iter.next().unwrap();
return handle_rex_match(rex_iter, foo, line_char, hist, bracket_counter);
}
'(' => {
println!("No iam in the open btack part");
bracket_counter += 1;
let mut rex_it_start_bracket = rex_iter.clone(); //hist.last_mut().unwrap().rex_position.clone().unwrap();
/*hist.last_mut().unwrap().setpos(rex_iter);
hist.last_mut().unwrap().setvec(vec![]);
hist.last_mut().unwrap().min_match = -10;
let string_pos = hist.last_mut().unwrap().position.clone();*/
match get_pipe(rex_iter.clone()) {
None => {
let foo = rex_it_start_bracket.next().unwrap();
return handle_rex_match(
rex_it_start_bracket,
foo,
line_char,
hist,
bracket_counter,
);
}
Some(pip) => {
//TODO: do i need to call here next as well?
let string_pos = hist.last_mut().unwrap().position.clone();
let mut shot = Snapshots::init(string_pos);
shot.setpos(rex_iter.clone());
shot.rex_or_position = Some(pip);
shot.min_match = -1;
println!("der snap sollte so aussehen: {:?}", shot);
hist.push(shot);
let foo = rex_it_start_bracket.next().unwrap();
return handle_rex_match(
rex_it_start_bracket,
foo,
line_char,
hist,
bracket_counter,
);
}
}
}
_ => {
last_match = vec![rex_char];
line_char == rex_char
@ -309,7 +522,7 @@ fn handle_rex_match<'a>(
last_match
);
(success_match, free_match)
(success_match, free_match, bracket_counter)
}
fn rematch(input: char, custom_class: &Vec<char>) -> bool {