From 64ea5c5092e6fe166f446a31a46b93a474a1350c Mon Sep 17 00:00:00 2001 From: Alexander Delman Date: Sun, 23 Jun 2024 03:42:35 +0300 Subject: [PATCH] (#318) implemented Symbol parser --- CMakeLists.txt | 2 +- .../src/MetamorphicTests.cpp | 254 +++++++++--------- apps/UnitTestsApp/src/UnitTests.cpp | 36 +-- config/automata_parser/grammar.txt | 23 -- .../CMakeLists.txt | 2 +- .../include/AutomatonParser}/Lexer.h | 0 .../include/AutomatonParser}/Parser.h | 4 +- .../src/Lexer.cpp | 4 +- .../src/Parser.cpp | 34 +-- libs/InputGenerator/CMakeLists.txt | 2 +- .../InputGenerator/AutomatonGenerator.h | 4 +- .../InputGenerator/src/AutomatonGenerator.cpp | 5 +- libs/Interpreter/CMakeLists.txt | 8 +- .../include/Interpreter/Interpreter.h | 4 +- libs/Interpreter/src/Interpreter.Lexer.cpp | 7 +- libs/Objects/include/Objects/Symbol.h | 20 +- libs/Objects/src/AlgExpression.cpp | 55 ++-- libs/Objects/src/BackRefRegex.cpp | 2 +- libs/Objects/src/Grammar.cpp | 12 +- libs/Objects/src/Symbol.cpp | 127 +++++++-- test_data/MetamorphicTest/test1.txt | 15 +- 21 files changed, 340 insertions(+), 280 deletions(-) delete mode 100644 config/automata_parser/grammar.txt rename libs/{AutomataParser => AutomatonParser}/CMakeLists.txt (95%) rename libs/{AutomataParser/include/AutomataParser => AutomatonParser/include/AutomatonParser}/Lexer.h (100%) rename libs/{AutomataParser/include/AutomataParser => AutomatonParser/include/AutomatonParser}/Parser.h (98%) rename libs/{AutomataParser => AutomatonParser}/src/Lexer.cpp (70%) rename libs/{AutomataParser => AutomatonParser}/src/Parser.cpp (90%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 24570dd1..aa2c4f0c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ project(chipollino) # Add sub directories add_subdirectory(libs/Fraction) add_subdirectory(libs/AutomatonToImage) -add_subdirectory(libs/AutomataParser) +add_subdirectory(libs/AutomatonParser) add_subdirectory(libs/Objects) add_subdirectory(libs/Tester) add_subdirectory(libs/Logger) diff --git a/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp b/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp index b4958354..33bce0a0 100644 --- a/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp +++ b/apps/MetamorphicTestsApp/src/MetamorphicTests.cpp @@ -3,10 +3,10 @@ #include #include -#include "MetamorphicTestsApp/MetamorphicTests.h" -#include "AutomataParser/Parser.h" +#include "AutomatonParser/Parser.h" #include "InputGenerator/AutomatonGenerator.h" #include "InputGenerator/RegexGenerator.h" +#include "MetamorphicTestsApp/MetamorphicTests.h" #include "Objects/BackRefRegex.h" #include "Objects/FiniteAutomaton.h" #include "Objects/MemoryFiniteAutomaton.h" @@ -178,6 +178,56 @@ TEST(TestMFA, ToTxt) { } } +TEST(TestNFA, ToMFA) { + RegexGenerator rg(5, 3, 3, 2); + for (int i = 0; i < RegexNumber; i++) { + string rgx_str = MetamorphicTests::generate_bregex(rg, 2); + SCOPED_TRACE("Regex: " + rgx_str); + MemoryFiniteAutomaton mfa1 = BackRefRegex(rgx_str).to_mfa(); + ASSERT_TRUE(MemoryFiniteAutomaton::equal(mfa1, mfa1.to_symbolic_fa().to_mfa())); + MemoryFiniteAutomaton mfa2 = BackRefRegex(rgx_str).to_mfa_additional(); + ASSERT_TRUE(MemoryFiniteAutomaton::equal(mfa2, mfa2.to_symbolic_fa().to_mfa())); + } +} + +TEST(TestMFA, ToFA) { + RegexGenerator rg(5, 3, 3, 2); + for (int i = 0; i < RegexNumber; i++) { + string rgx_str = rg.generate_regex(); + SCOPED_TRACE("Regex: " + rgx_str); + Regex r = Regex(rgx_str); + FiniteAutomaton fa = r.to_glushkov(); + ASSERT_TRUE(FiniteAutomaton::equal(fa, fa.to_mfa().to_action_fa())); + ASSERT_TRUE(FiniteAutomaton::equal(fa, fa.to_mfa().to_symbolic_fa())); + } +} + +TEST(TestMFA, Bisimilar) { + RegexGenerator rg(5, 3, 3, 2); + for (int i = 0; i < RegexNumber; i++) { + string rgx_str = MetamorphicTests::generate_bregex(rg, 1); + SCOPED_TRACE("Regex: " + rgx_str); + BackRefRegex r = BackRefRegex(rgx_str); + MemoryFiniteAutomaton mfa = r.to_mfa_additional(); + + ASSERT_TRUE(MemoryFiniteAutomaton::action_bisimilar(mfa, mfa)); + ASSERT_TRUE(MemoryFiniteAutomaton::symbolic_bisimilar(mfa, mfa)); + ASSERT_TRUE(MemoryFiniteAutomaton::bisimilar(mfa, mfa).value()); + } +} + +TEST(TestMFA, MergeBisimilar) { + RegexGenerator rg(6, 3, 3, 2); + for (int i = 0; i < RegexNumber; i++) { + string rgx_str = MetamorphicTests::generate_bregex(rg, 2); + SCOPED_TRACE("Regex: " + rgx_str); + BackRefRegex r = BackRefRegex(rgx_str); + MemoryFiniteAutomaton mfa = r.to_mfa_additional(); + + MetamorphicTests::cmp_automatons(mfa.merge_bisimilar(), mfa); + } +} + TEST(IsDeterministic, Test_is_deterministic) { string test_path = "./test_data/MetamorphicTest/test1.txt"; for (int i = 0; i < RegexNumber; i++) { @@ -216,53 +266,53 @@ TEST(AutomatonGenerator, Test_MergeBisim_equivalent) { /* TEST(Statistics, Test_statistics) { - string test_path = "./TestData/MetamorphicTest/test1.txt"; - std::vector OX; - std::vector OY; - AutomatonGenerator::set_initial_state_not_terminal(true); - for (int term = 5; term <= 100; term = term + 5) { - AutomatonGenerator::set_final_probability(term); - int count = 0; - int ALL = 10000; - for (int i = 0; i < ALL; i++) { - AutomatonGenerator a(FA_type::NFA); - a.write_to_file(test_path); - Parser parser; - FiniteAutomaton FA; - try { - FA = parser.parse_NFA(test_path); - } catch (const std::runtime_error& re) { - std::ifstream t(test_path); - stringstream buffer; - buffer << t.rdbuf(); - string file = buffer.str(); - throw(std::runtime_error(file)); - } - if (FA.is_finite()) { - count++; - } - } - std::cout << "final_probability = " << term << " : " << float(count) / float(ALL) << - "%" << std::endl; OX.push_back(term); OY.push_back(float(count) / float(ALL)); - } - std::cout << "OX = ["; - for (int i = 0; i < OX.size() - 1; i++) { - std::cout << OX[i] << ","; - } - std::cout << OX[OX.size() - 1] << "]\n"; - - std::cout << "OY = ["; - for (int i = 0; i < OY.size() - 1; i++) { - std::cout << OY[i] << ","; - } - std::cout << OY[OY.size() - 1] << "]\n"; + string test_path = "./TestData/MetamorphicTest/test.txt"; + std::vector OX; + std::vector OY; + AutomatonGenerator::set_initial_state_not_terminal(true); + for (int term = 5; term <= 100; term = term + 5) { + AutomatonGenerator::set_final_probability(term); + int count = 0; + int ALL = 10000; + for (int i = 0; i < ALL; i++) { + AutomatonGenerator a(FA_type::NFA); + a.write_to_file(test_path); + Parser parser; + FiniteAutomaton FA; + try { + FA = parser.parse_NFA(test_path); + } catch (const std::runtime_error& re) { + std::ifstream t(test_path); + stringstream buffer; + buffer << t.rdbuf(); + string file = buffer.str(); + throw(std::runtime_error(file)); + } + if (FA.is_finite()) { + count++; + } + } + std::cout << "final_probability = " << term << " : " << float(count) / float(ALL) << + "%" << std::endl; OX.push_back(term); OY.push_back(float(count) / float(ALL)); + } + std::cout << "OX = ["; + for (int i = 0; i < OX.size() - 1; i++) { + std::cout << OX[i] << ","; + } + std::cout << OX[OX.size() - 1] << "]\n"; + + std::cout << "OY = ["; + for (int i = 0; i < OY.size() - 1; i++) { + std::cout << OY[i] << ","; + } + std::cout << OY[OY.size() - 1] << "]\n"; } TEST(AutomatonGenerator, Test_Arden_Glushkov_equivalent) { int ALL = 50; for (int i = 0; i < ALL; i++) { - string test_path = "./TestData/MetamorphicTest/test1.txt"; + string test_path = "./TestData/MetamorphicTest/test.txt"; AutomatonGenerator a(FA_type::NFA, 5); a.write_to_file(test_path); Parser parser; @@ -286,7 +336,7 @@ ard.minimize().to_txt() << "\n" << FA.to_regex().to_txt(); TEST(AutomatonGenerator, Test_Arden_Glushkov_Ambiguity_equivalent) { int ALL = 50; for (int i = 0; i < ALL; i++) { - string test_path = "./TestData/MetamorphicTest/test1.txt"; + string test_path = "./TestData/MetamorphicTest/test.txt"; AutomatonGenerator a(FA_type::NFA, 5); a.write_to_file(test_path); Parser parser; @@ -307,92 +357,42 @@ ard.minimize().to_txt() << "\n" << FA.to_regex().to_txt(); } TEST(Statistics, Test_dfa) { - for (int term = 5; term <= 50; term = term + 5) { - AutomatonGenerator::set_final_probability(20); - int count = 0; - int ALL = 10000; - for (int i = 0; i < ALL; i++) { - AutomatonGenerator a(FA_type::DFA); - a.write_to_file("./TestData/tmp/test.txt"); - auto FA = Parser::parse_DFA("./TestData/tmp/test.txt"); - if (FA.is_deterministic() && FA.is_finite()) { - count++; - } - } - std::cout << "final_probability = " << term << " : " << float(count) / float(ALL) * 100<< + for (int term = 5; term <= 50; term = term + 5) { + AutomatonGenerator::set_final_probability(20); + int count = 0; + int ALL = 10000; + for (int i = 0; i < ALL; i++) { + AutomatonGenerator a(FA_type::DFA); + a.write_to_file("./TestData/tmp/test.txt"); + auto FA = Parser::parse_DFA("./TestData/tmp/test.txt"); + if (FA.is_deterministic() && FA.is_finite()) { + count++; + } + } + std::cout << "final_probability = " << term << " : " << float(count) / float(ALL) * 100<< "%" << std::endl; - } + } } TEST(Statistics, Test_fa) { - std::cout << "TEST\n"; - for (int term = 5; term <= 50; term = term + 5) { - AutomatonGenerator::set_final_probability(20); - int count = 0; - int ALL = 10000; - for (int i = 0; i < ALL; i++) { - AutomatonGenerator a(FA_type::FA); - std::cout << "write_to_file START\n"; - a.write_to_file("./TestData/tmp/test.txt"); - std::cout << "write_to_file DONE\n"; - auto FA = Parser::parse_FA("./TestData/tmp/test.txt"); - std::cout << i << " " << std::endl; - if (FA.is_deterministic() && FA.is_finite()) { - count++; - } - std::cout << i << " " << std::endl; - } - std::cout << "final_probability = " << term << " : " << float(count) / float(ALL) * 100 + std::cout << "TEST\n"; + for (int term = 5; term <= 50; term = term + 5) { + AutomatonGenerator::set_final_probability(20); + int count = 0; + int ALL = 10000; + for (int i = 0; i < ALL; i++) { + AutomatonGenerator a(FA_type::FA); + std::cout << "write_to_file START\n"; + a.write_to_file("./TestData/tmp/test.txt"); + std::cout << "write_to_file DONE\n"; + auto FA = Parser::parse_FA("./TestData/tmp/test.txt"); + std::cout << i << " " << std::endl; + if (FA.is_deterministic() && FA.is_finite()) { + count++; + } + std::cout << i << " " << std::endl; + } + std::cout << "final_probability = " << term << " : " << float(count) / float(ALL) * 100 << "%" << std::endl; - } - }*/ - -TEST(TestNFA, ToMFA) { - RegexGenerator rg(5, 3, 3, 2); - for (int i = 0; i < RegexNumber; i++) { - string rgx_str = MetamorphicTests::generate_bregex(rg, 2); - SCOPED_TRACE("Regex: " + rgx_str); - MemoryFiniteAutomaton mfa1 = BackRefRegex(rgx_str).to_mfa(); - ASSERT_TRUE(MemoryFiniteAutomaton::equal(mfa1, mfa1.to_symbolic_fa().to_mfa())); - MemoryFiniteAutomaton mfa2 = BackRefRegex(rgx_str).to_mfa_additional(); - ASSERT_TRUE(MemoryFiniteAutomaton::equal(mfa2, mfa2.to_symbolic_fa().to_mfa())); - } -} - -TEST(TestMFA, ToFA) { - RegexGenerator rg(5, 3, 3, 2); - for (int i = 0; i < RegexNumber; i++) { - string rgx_str = rg.generate_regex(); - SCOPED_TRACE("Regex: " + rgx_str); - Regex r = Regex(rgx_str); - FiniteAutomaton fa = r.to_glushkov(); - ASSERT_TRUE(FiniteAutomaton::equal(fa, fa.to_mfa().to_action_fa())); - ASSERT_TRUE(FiniteAutomaton::equal(fa, fa.to_mfa().to_symbolic_fa())); - } -} - -TEST(TestMFA, Bisimilar) { - RegexGenerator rg(5, 3, 3, 2); - for (int i = 0; i < RegexNumber; i++) { - string rgx_str = MetamorphicTests::generate_bregex(rg, 1); - SCOPED_TRACE("Regex: " + rgx_str); - BackRefRegex r = BackRefRegex(rgx_str); - MemoryFiniteAutomaton mfa = r.to_mfa_additional(); - - ASSERT_TRUE(MemoryFiniteAutomaton::action_bisimilar(mfa, mfa)); - ASSERT_TRUE(MemoryFiniteAutomaton::symbolic_bisimilar(mfa, mfa)); - ASSERT_TRUE(MemoryFiniteAutomaton::bisimilar(mfa, mfa).value()); - } -} - -TEST(TestMFA, MergeBisimilar) { - RegexGenerator rg(6, 3, 3, 2); - for (int i = 0; i < RegexNumber; i++) { - string rgx_str = MetamorphicTests::generate_bregex(rg, 2); - SCOPED_TRACE("Regex: " + rgx_str); - BackRefRegex r = BackRefRegex(rgx_str); - MemoryFiniteAutomaton mfa = r.to_mfa_additional(); - - MetamorphicTests::cmp_automatons(mfa.merge_bisimilar(), mfa); } -} + }*/ diff --git a/apps/UnitTestsApp/src/UnitTests.cpp b/apps/UnitTestsApp/src/UnitTests.cpp index 38d66918..b57a48ea 100644 --- a/apps/UnitTestsApp/src/UnitTests.cpp +++ b/apps/UnitTestsApp/src/UnitTests.cpp @@ -1,7 +1,7 @@ #include "UnitTestsApp/UnitTests.h" +#include "AutomatonParser/Parser.h" #include "AutomatonToImage/AutomatonToImage.h" #include "Interpreter/Interpreter.h" -#include "AutomataParser/Parser.h" #include "Objects/AlgExpression.h" #include "Objects/BackRefRegex.h" #include "Objects/FiniteAutomaton.h" @@ -439,11 +439,11 @@ TEST(TestGetOneUnambigous, GetOneUnambigousWorks) { TEST(TestInterpreter, RunLineTest) { Interpreter interpreter; interpreter.set_log_mode(Interpreter::LogMode::nothing); - ASSERT_TRUE(!interpreter.run_line("A = Annote (Glushkova {a})")); + ASSERT_FALSE(interpreter.run_line("A = Annote (Glushkova {a})")); ASSERT_TRUE(interpreter.run_line(" N1 = ( ( Glushkov ({ab|a}) )) ")); ASSERT_TRUE(interpreter.run_line(" N2 = (Annote N1)")); - ASSERT_TRUE(!interpreter.run_line("N2 = (Glushkov N1)")); - ASSERT_TRUE(!interpreter.run_line("Equiv N1 N3")); + ASSERT_FALSE(interpreter.run_line("N2 = (Glushkov N1)")); + ASSERT_FALSE(interpreter.run_line("Equiv N1 N3")); ASSERT_TRUE(interpreter.run_line(" Equiv (( N1)) ( (Reverse .Reverse (N2) !! ))")); ASSERT_TRUE(interpreter.run_line("Test (Glushkov {a*}) {a*} 1")); @@ -461,14 +461,14 @@ TEST(TestInterpreter, RunLineTest) { ASSERT_TRUE(interpreter.run_line("A = [[] []]")); ASSERT_TRUE(interpreter.run_line("A = [{a} {b}]")); ASSERT_TRUE(interpreter.run_line("A = [[(([{a}]))] [{a} []]]")); - ASSERT_TRUE(!interpreter.run_line("A = [[(([{a}])] [{a} []]]")); - ASSERT_TRUE(!interpreter.run_line("A = [[([{a}]))] [{a} []]]")); - ASSERT_TRUE(!interpreter.run_line("A = [[(([{a}]))] [{a} []]")); - ASSERT_TRUE(!interpreter.run_line("A = [[(([a}]))] [{a} (Glushkov(DeAnnote {a} !!) !!) []]]")); + ASSERT_FALSE(interpreter.run_line("A = [[(([{a}])] [{a} []]]")); + ASSERT_FALSE(interpreter.run_line("A = [[([{a}]))] [{a} []]]")); + ASSERT_FALSE(interpreter.run_line("A = [[(([{a}]))] [{a} []]")); + ASSERT_FALSE(interpreter.run_line("A = [[(([a}]))] [{a} (Glushkov(DeAnnote {a} !!) !!) []]]")); // Normalize ASSERT_TRUE(interpreter.run_line("A = Normalize {abc} [[{a} {b}]]")); - ASSERT_TRUE(!interpreter.run_line("A = Normalize {abc} [[{a} []]]")); + ASSERT_FALSE(interpreter.run_line("A = Normalize {abc} [[{a} []]]")); } TEST(TestTransformationMonoid, IsMinimal) { @@ -960,9 +960,9 @@ TEST(TestAutomatonParser, MFA_correctness_failure) { try { Parser parser; parser.parse_MFA(cycle_with_cell_reopen); - } catch (const std::runtime_error& re) { - ASSERT_EQ(string(re.what()), string("Parser: incorrect memory usage in MFA")); - } + } catch (const std::logic_error& re) { + ASSERT_EQ(string(re.what()), "AutomatonParser: incorrect memory usage in MFA"); + } } TEST(TestAutomatonParser, MFA_correctness) { @@ -974,11 +974,13 @@ TEST(TestAutomatonParser, MFA_correctness) { // TODO: FAILED: /*TEST(AutomatonGenerator, Test_Arden_Glushkov_Ambiguity_equivalent) { - Regex r("((e|k)he*cg)*(|(e|k)he*|((e|k)(b|i)|(e|k)he*(e|ck)))"); - auto ard = r.to_glushkov(); - auto first = ard.ambiguity(); - auto second = ard.to_regex().to_glushkov().ambiguity(); + Regex r("((e|k)he*cg)*(|(e|k)he*|((e|k)(b|i)|(e|k)he*(e|ck)))"); + auto ard = r.to_glushkov(); + auto first = ard.ambiguity(); + auto second = ard.to_regex().to_glushkov().ambiguity(); - ASSERT_EQ(first,second) << "\n" << ard.minimize().to_txt() << "\n" << ard.to_regex().to_glushkov().minimize().to_txt() << "\n" << ard.to_regex().to_txt() << "\n" << ard.to_regex().to_glushkov().to_regex().to_txt(); + ASSERT_EQ(first,second) << "\n" << ard.minimize().to_txt() << "\n" << +ard.to_regex().to_glushkov().minimize().to_txt() << "\n" << ard.to_regex().to_txt() << "\n" << +ard.to_regex().to_glushkov().to_regex().to_txt(); }*/ diff --git a/config/automata_parser/grammar.txt b/config/automata_parser/grammar.txt deleted file mode 100644 index a444e55d..00000000 --- a/config/automata_parser/grammar.txt +++ /dev/null @@ -1,23 +0,0 @@ -production --> atribute automaton ; -atribute --> 'MFA' !MFA | 'NFA' !NFA | 'PDA' !PDA | 'DFA' !DFA ; -automaton --> states transitions; -states --> state_descriptions '...' ; -state_descriptions --> state_description state_descriptions | EPS ; -state_description --> node_id variants ';' ; -variants --> final (initial_set ? EPS : initial_state) label ; -label --> 'label' '=' node_id | EPS ; -final --> 'final' | EPS ; -initial_state --> 'initial_state' !initial_set | EPS ; -transitions --> transition transitions | EPS ; -transition --> stmt (MFA? memory_lists : EPS) (PDA? stack_actions : EPS) ';' ; -stmt --> node_id node_id symbol ; -memory_lists --> memory_cell memory_lists | EPS ; -memory_cell --> cell_id memory_state ; -memory_state --> 'o' | 'c' | 'r' ; -stack_actions --> stack_symbol '/' stack_pushes ; -stack_symbol --> '$' | node_id ; -stack_pushes --> stack_symbol stack_pushes | EPS ; -node_id --> STRING ; -cell_id --> NUMBER ; -symbol --> (MFA ? '&' cell_id | eps_symbol : eps_symbol) ; -eps_symbol --> (DFA ? LETTER | DIGIT : 'eps' | LETTER | DIGIT) \ No newline at end of file diff --git a/libs/AutomataParser/CMakeLists.txt b/libs/AutomatonParser/CMakeLists.txt similarity index 95% rename from libs/AutomataParser/CMakeLists.txt rename to libs/AutomatonParser/CMakeLists.txt index 7de00e71..abd20bca 100644 --- a/libs/AutomataParser/CMakeLists.txt +++ b/libs/AutomatonParser/CMakeLists.txt @@ -1,5 +1,5 @@ # Set the project name -project(AutomataParser) +project(AutomatonParser) # Lexy implementing include(FetchContent) diff --git a/libs/AutomataParser/include/AutomataParser/Lexer.h b/libs/AutomatonParser/include/AutomatonParser/Lexer.h similarity index 100% rename from libs/AutomataParser/include/AutomataParser/Lexer.h rename to libs/AutomatonParser/include/AutomatonParser/Lexer.h diff --git a/libs/AutomataParser/include/AutomataParser/Parser.h b/libs/AutomatonParser/include/AutomatonParser/Parser.h similarity index 98% rename from libs/AutomataParser/include/AutomataParser/Parser.h rename to libs/AutomatonParser/include/AutomatonParser/Parser.h index 03e266c0..c770aa09 100644 --- a/libs/AutomataParser/include/AutomataParser/Parser.h +++ b/libs/AutomatonParser/include/AutomatonParser/Parser.h @@ -17,11 +17,11 @@ #define lexy_ascii_child lexy::_pt_node #include "Lexer.h" -#include "Objects/Symbol.h" #include "Objects/FiniteAutomaton.h" #include "Objects/MemoryFiniteAutomaton.h" +#include "Objects/Symbol.h" -const char GrammarPath[] = "./config/automata_parser/grammar.txt"; +const char GrammarPath[] = "./config/automaton_parser/grammar.txt"; class Parser { private: diff --git a/libs/AutomataParser/src/Lexer.cpp b/libs/AutomatonParser/src/Lexer.cpp similarity index 70% rename from libs/AutomataParser/src/Lexer.cpp rename to libs/AutomatonParser/src/Lexer.cpp index ff0ba396..3a2a66f1 100644 --- a/libs/AutomataParser/src/Lexer.cpp +++ b/libs/AutomatonParser/src/Lexer.cpp @@ -1,4 +1,4 @@ -#include +#include void Lexer::parse_buffer(lexy_ascii_tree& tree, lexy::buffer& buffer) { auto result = lexy::parse_as_tree(tree, buffer, lexy_ext::report_error); @@ -6,6 +6,6 @@ void Lexer::parse_buffer(lexy_ascii_tree& tree, lexy::buffer +#include +using std::logic_error; using std::map; -using std::runtime_error; using std::set; using std::string; using std::unordered_set; @@ -62,20 +62,12 @@ bool Parser::parse_reserved(const std::string& res_case) { if (cur_pos == file.size()) return false; - // if (res_case == "LETTER") { - // if ((file[cur_pos] >= 'a' && file[cur_pos] <= 'z') || (file[cur_pos] >= 'A' && file[cur_pos] <= 'Z')) { - // LETTER = file[cur_pos]; - // read_symbols(1); - // return true; - // } - // } int beg_pos = cur_pos; if (res_case == "LETTER") { - while (cur_pos < file.size() && - ((file[cur_pos] >= 'a' && file[cur_pos] <= 'z') || - (file[cur_pos] >= 'A' && file[cur_pos] <= 'Z') || - (file[cur_pos] >= '0' && file[cur_pos] <= '9') || - file[cur_pos] == '.' || file[cur_pos] == ',')) { + while (cur_pos < file.size() && ((file[cur_pos] >= 'a' && file[cur_pos] <= 'z') || + (file[cur_pos] >= 'A' && file[cur_pos] <= 'Z') || + (file[cur_pos] >= '0' && file[cur_pos] <= '9') || + file[cur_pos] == '.' || file[cur_pos] == ',')) { cur_pos++; } if (beg_pos != cur_pos) @@ -242,11 +234,11 @@ std::variant Parser::parse(lexy_ascii_tr read_symbols(0); if (!parse_transition("production")) { - throw(std::runtime_error("Parser: error occurred while parsing FA")); + throw logic_error("AutomatonParser: error occurred while parsing FA"); } if (!attributes.count("initial_set")) { - throw(std::runtime_error("Parser: initial state is not set")); + throw logic_error("AutomatonParser: initial state is not set"); } for (const auto& transition : FAtransitions) { @@ -282,7 +274,7 @@ std::variant Parser::parse(lexy_ascii_tr auto mfa = MemoryFiniteAutomaton(name_to_ind[initial], MFAstates, alphabet); if (!mfa.check_memory_correctness()) { - throw(std::runtime_error("Parser: incorrect memory usage in MFA")); + throw logic_error("AutomatonParser: incorrect memory usage in MFA"); } return mfa; @@ -312,7 +304,7 @@ std::variant Parser::parse(lexy_ascii_tr auto fa = FiniteAutomaton(name_to_ind[initial], FAstates, alphabet); if (attributes.count("DFA") && !fa.is_deterministic()) { - throw(std::runtime_error("Parser: FA expected to be deterministic")); + throw logic_error("AutomatonParser: FA expected to be deterministic"); } return fa; @@ -330,7 +322,7 @@ FiniteAutomaton Parser::parse_NFA(const std::string& automaton_file, if (attributes.count("DFA") || attributes.count("NFA")) return std::get(res); - throw(std::runtime_error("Parse: parsed automaton is not NFA")); + throw logic_error("Parse: parsed automaton is not NFA"); } FiniteAutomaton Parser::parse_DFA(const std::string& automaton_file, @@ -345,7 +337,7 @@ FiniteAutomaton Parser::parse_DFA(const std::string& automaton_file, if (attributes.count("DFA")) return std::get(res); - throw(std::runtime_error("Parse: parsed automaton is not DFA")); + throw logic_error("Parse: parsed automaton is not DFA"); } MemoryFiniteAutomaton Parser::parse_MFA(const std::string& automaton_file, @@ -360,5 +352,5 @@ MemoryFiniteAutomaton Parser::parse_MFA(const std::string& automaton_file, if (attributes.count("MFA")) return std::get(res); - throw(std::runtime_error("Parse: parsed automaton is not MFA")); + throw logic_error("Parse: parsed automaton is not MFA"); } \ No newline at end of file diff --git a/libs/InputGenerator/CMakeLists.txt b/libs/InputGenerator/CMakeLists.txt index 8f6a392b..5a9197fe 100644 --- a/libs/InputGenerator/CMakeLists.txt +++ b/libs/InputGenerator/CMakeLists.txt @@ -16,6 +16,6 @@ target_include_directories(${PROJECT_NAME} target_link_libraries(${PROJECT_NAME} Objects - AutomataParser + AutomatonParser FuncLib ) diff --git a/libs/InputGenerator/include/InputGenerator/AutomatonGenerator.h b/libs/InputGenerator/include/InputGenerator/AutomatonGenerator.h index ae4f8436..253ae315 100644 --- a/libs/InputGenerator/include/InputGenerator/AutomatonGenerator.h +++ b/libs/InputGenerator/include/InputGenerator/AutomatonGenerator.h @@ -13,8 +13,8 @@ #include #include -#include "AutomataParser/Lexer.h" -#include "AutomataParser/Parser.h" +#include "AutomatonParser/Lexer.h" +#include "AutomatonParser/Parser.h" enum class FA_type { MFA, diff --git a/libs/InputGenerator/src/AutomatonGenerator.cpp b/libs/InputGenerator/src/AutomatonGenerator.cpp index 1a927b9f..6f7f6daf 100644 --- a/libs/InputGenerator/src/AutomatonGenerator.cpp +++ b/libs/InputGenerator/src/AutomatonGenerator.cpp @@ -131,7 +131,7 @@ void AutomatonGenerator::generate_graph() { included_states.push_back(excluded_states[ind]); excluded_states.erase(excluded_states.begin() + ind); } - cur.pop = "$"; + // cur.pop = "$"; graph[beg].push_back(cur); } @@ -358,7 +358,8 @@ void AutomatonGenerator::setup_and_generate(FA_type type, const std::string& gra generate_graph(); if (!parse_transition("production")) - throw(std::runtime_error("Generator: can not apply grammar for generated automaton")); + throw std::runtime_error( + "AutomatonGenerator: can not apply grammar for generated automaton"); } AutomatonGenerator::AutomatonGenerator(FA_type type, int n, const std::string& grammar_file) diff --git a/libs/Interpreter/CMakeLists.txt b/libs/Interpreter/CMakeLists.txt index 2ebf5d07..ed7719d0 100644 --- a/libs/Interpreter/CMakeLists.txt +++ b/libs/Interpreter/CMakeLists.txt @@ -5,20 +5,20 @@ project(Interpreter) set(SOURCES src/Interpreter.cpp src/Interpreter.Lexer.cpp - ) +) # Add a library with the above sources add_library(${PROJECT_NAME} ${SOURCES}) target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include - ) +) target_link_libraries(${PROJECT_NAME} Tester InputGenerator Objects Logger - AutomataParser + AutomatonParser FuncLib - ) \ No newline at end of file +) \ No newline at end of file diff --git a/libs/Interpreter/include/Interpreter/Interpreter.h b/libs/Interpreter/include/Interpreter/Interpreter.h index d4f067fc..0b155862 100644 --- a/libs/Interpreter/include/Interpreter/Interpreter.h +++ b/libs/Interpreter/include/Interpreter/Interpreter.h @@ -10,7 +10,7 @@ #include #include -#include "AutomataParser/Parser.h" +#include "AutomatonParser/Parser.h" #include "FuncLib/Functions.h" #include "FuncLib/Typization.h" #include "InputGenerator/AutomatonGenerator.h" @@ -36,7 +36,7 @@ class Interpreter { // Интерпретация строчки, возвращает true в случае успеха bool run_line(const std::string& line); // Интерпретация файла построчно - bool run_file(const std::string& path, const std::string& user_name=""); + bool run_file(const std::string& path, const std::string& user_name = ""); // Установит режим логгирования в консоль void set_log_mode(LogMode mode); // Выгружает лог в файл diff --git a/libs/Interpreter/src/Interpreter.Lexer.cpp b/libs/Interpreter/src/Interpreter.Lexer.cpp index a22ead68..3f54cacd 100644 --- a/libs/Interpreter/src/Interpreter.Lexer.cpp +++ b/libs/Interpreter/src/Interpreter.Lexer.cpp @@ -109,13 +109,12 @@ Interpreter::Lexem Interpreter::Lexer::scan_dot() { Interpreter::Lexem Interpreter::Lexer::scan_number() { int pos_prev = input.pos; - auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; - string acc = ""; - while (!eof() && is_digit(current_symbol())) { + string acc; + while (!eof() && isdigit(current_symbol())) { acc += current_symbol(); next_symbol(); } - if (acc == "") { + if (acc.empty()) { input.pos = pos_prev; return Lexem(Lexem::error); } diff --git a/libs/Objects/include/Objects/Symbol.h b/libs/Objects/include/Objects/Symbol.h index 3a798ae4..3055c0b1 100644 --- a/libs/Objects/include/Objects/Symbol.h +++ b/libs/Objects/include/Objects/Symbol.h @@ -5,6 +5,15 @@ #include #include +namespace SymbolErrors { +static constexpr const char* EmptyString = "Invalid Symbol format: string cannot be empty"; +static constexpr const char* NullString = "Invalid Symbol format: null string"; +static constexpr const char* InvalidFormat = + "Invalid Symbol format: must start with a single letter, 'eps', or '-empty-'"; +static constexpr const char* ExpectedNumber = "Invalid Symbol format: number expected after marker"; +static constexpr const char* UnexpectedCharacters = "Invalid Symbol format: unexpected characters"; +} // namespace SymbolErrors + // Символ, по которому осуществляются переходы в автомате. // Может быть символом-буквой (и входить ТОЛЬКО в алфавит FA) или ссылкой (&i) class Symbol { @@ -16,13 +25,17 @@ class Symbol { // symbol + разметка std::string value; + void initialize(const std::string& s); + void parse_markup(const std::string& s, size_t pos); void update_value(); public: static const char linearize_marker = '.'; static const char annote_marker = ','; - inline static const std::string Epsilon = "eps"; - inline static const std::string EmptySet = "-empty-"; + + // нужно добавлять в prefixes в Symbol::initialize + static constexpr const char* Epsilon = "eps"; + static constexpr const char* EmptySet = "-empty-"; Symbol() = default; Symbol(const std::string& s); // NOLINT(runtime/explicit) @@ -46,6 +59,7 @@ class Symbol { bool operator!=(const Symbol& other) const; bool operator<(const Symbol& other) const; + bool empty() const; bool is_epsilon() const; // преобразовывает вектор символов в одну строку static std::string vector_to_str(const std::vector&); @@ -83,8 +97,8 @@ class MemorySymbols { static Symbol Reset(int number); static Symbol Open(int number); + static std::optional is_memory_string(const std::string& s); static bool is_memory_symbol(const Symbol& s); - static bool is_memory_char(char c); static bool is_close(const Symbol& s); static bool is_reset(const Symbol& s); static bool is_open(const Symbol& s); diff --git a/libs/Objects/src/AlgExpression.cpp b/libs/Objects/src/AlgExpression.cpp index c6f45a34..86c3f985 100644 --- a/libs/Objects/src/AlgExpression.cpp +++ b/libs/Objects/src/AlgExpression.cpp @@ -172,7 +172,7 @@ void AlgExpression::print_subtree(AlgExpression* expr, int level) const { for (int i = 0; i < level; i++) cout << " "; Symbol r_v; - if (expr->symbol != "") + if (!expr->symbol.empty()) r_v = expr->symbol; else r_v = to_string(expr->type); @@ -186,7 +186,7 @@ void AlgExpression::print_tree() const { for (int i = 0; i < 0; i++) cout << " "; Symbol r_v; - if (symbol != "") + if (!symbol.empty()) r_v = symbol; else r_v = to_string(type); @@ -195,7 +195,7 @@ void AlgExpression::print_tree() const { } string AlgExpression::type_to_str() const { - if (symbol != "") + if (!symbol.empty()) return symbol; switch (type) { case Type::eps: @@ -362,46 +362,23 @@ vector AlgExpression::parse_string(string str, bool allow lexeme.type = Lexeme::Type::star; break; default: - if (isalpha(c)) { - lexeme.type = Lexeme::Type::symb; - lexeme.symbol = c; - for (size_t j = index + 1; j < str.size(); j++) { - bool lin = false; - bool annote = false; - - if (str[j] == Symbol::linearize_marker) { - lin = true; - j++; - } else if (str[j] == Symbol::annote_marker) { - annote = true; - j++; - } else if (!MemorySymbols::is_memory_char(c) || !isdigit(str[j])) { + try { + string s(1, c); + for (index++; index < str.size();) { + if (str[index] == Symbol::linearize_marker || + str[index] == Symbol::annote_marker || isdigit(str[index])) { + s += str[index]; + index++; + } else { + index--; break; } - - int number; - if (!read_number(str, j, number)) - return {Lexeme::Type::error}; - index = j; - - if (lin) { - lexeme.symbol.linearize(number); - } else if (annote) { - lexeme.symbol.annote(number); - } else { // memory - if (c == MemorySymbols::CloseChar) { - lexeme.symbol = MemorySymbols::Close(number); - } else if (c == MemorySymbols::ResetChar) { - lexeme.symbol = MemorySymbols::Reset(number); - } else { - lexeme.symbol = MemorySymbols::Open(number); - } - } } - + lexeme.type = Lexeme::Type::symb; + lexeme.symbol = Symbol(s); regex_is_eps = false; brackets_are_empty = false; - } else { + } catch (...) { return {Lexeme::Type::error}; } break; @@ -676,7 +653,7 @@ string AlgExpression::get_iterated_word(int n) const { if (term_r && type != Type::alt) { str += term_r->get_iterated_word(n); } - if (symbol != "") { + if (!symbol.empty()) { str += symbol; } return str; diff --git a/libs/Objects/src/BackRefRegex.cpp b/libs/Objects/src/BackRefRegex.cpp index 4d43df99..8f255135 100644 --- a/libs/Objects/src/BackRefRegex.cpp +++ b/libs/Objects/src/BackRefRegex.cpp @@ -16,7 +16,7 @@ BackRefRegex::BackRefRegex(const string& str) : BackRefRegex() { try { bool res = from_string(str, true, false); if (!res) { - throw std::runtime_error("BackRefRegex::from_string() ERROR"); + throw std::logic_error("BackRefRegex::from_string() ERROR"); } } catch (const std::runtime_error& re) { cerr << re.what() << "\n"; diff --git a/libs/Objects/src/Grammar.cpp b/libs/Objects/src/Grammar.cpp index 9256370c..636ade31 100644 --- a/libs/Objects/src/Grammar.cpp +++ b/libs/Objects/src/Grammar.cpp @@ -419,14 +419,16 @@ string PrefixGrammar::rules_to_txt() const { string PrefixGrammar::bw_to_txt() const { stringstream ss; - ss << "Базисные слова: " << "\\\\"; + ss << "Базисные слова: " + << "\\\\"; for (const auto& item : prefix_grammar) { if (item.is_terminal) { const Item& g = item; for (const auto& w : g.equivalence_class) { if (w == "") { - ss << "eps" << "; "; + ss << "eps" + << "; "; } else { ss << w << "; "; } @@ -438,12 +440,12 @@ string PrefixGrammar::bw_to_txt() const { } string PrefixGrammar::pg_to_txt() const { - return rules_to_txt() + bw_to_txt(); + return rules_to_txt() + bw_to_txt(); } string PrefixGrammar::pg_log() const { // немного хардкода для рефала - return rules_to_txt() + "%template_bw\n\n\t" + bw_to_txt(); + return rules_to_txt() + "%template_bw\n\n\t" + bw_to_txt(); } FiniteAutomaton PrefixGrammar::prefix_grammar_to_automaton(iLogTemplate* log) const { @@ -478,7 +480,7 @@ FiniteAutomaton PrefixGrammar::prefix_grammar_to_automaton(iLogTemplate* log) co for (const auto& trans : elem.second) { states[trans].transitions[alpha].insert(i); } - if (alpha == "") { + if (alpha.empty()) { alpha = Symbol::Epsilon; } else { symbols.insert(alpha); diff --git a/libs/Objects/src/Symbol.cpp b/libs/Objects/src/Symbol.cpp index 04c7e291..95a56955 100644 --- a/libs/Objects/src/Symbol.cpp +++ b/libs/Objects/src/Symbol.cpp @@ -1,15 +1,84 @@ +#include #include #include "Objects/Symbol.h" using std::cout; +using std::optional; using std::string; using std::to_string; using std::vector; -Symbol::Symbol(const string& s) : symbol(s), value(s) {} -Symbol::Symbol(const char* c) : symbol(c), value(c) {} -Symbol::Symbol(char c) : symbol(string(1, c)), value(string(1, c)) {} +void Symbol::initialize(const string& s) { + if (s.empty()) + throw std::invalid_argument(SymbolErrors::EmptyString); + + bool prefix_found = false; + std::size_t pos = std::string::npos; + + vector prefixes = {Epsilon, EmptySet}; + for (const auto& prefix : prefixes) { + if (s.substr(0, std::strlen(prefix)) == prefix) { + symbol = prefix; + pos = std::strlen(prefix); + prefix_found = true; + break; + } + } + + if (!prefix_found) { + if (auto memory_string = MemorySymbols::is_memory_string(s); memory_string.has_value()) { + symbol = memory_string.value(); + pos = memory_string->size(); + } else if (std::isalpha(s[0])) { + symbol = s[0]; + pos = 1; + } else { + throw std::invalid_argument(SymbolErrors::InvalidFormat); + } + } + + parse_markup(s, pos); + + value = s; +} + +void Symbol::parse_markup(const string& s, size_t pos) { + auto parse_numbers = [&](char marker, std::vector& numbers) { + while (pos < s.size() && s[pos] == marker) { + ++pos; + if (pos >= s.size() || !std::isdigit(s[pos])) { + throw std::invalid_argument(SymbolErrors::ExpectedNumber); + } + + int number = 0; + while (pos < s.size() && std::isdigit(s[pos])) { + number = number * 10 + (s[pos] - '0'); + ++pos; + } + numbers.push_back(number); + } + }; + + parse_numbers(linearize_marker, linearize_numbers); + parse_numbers(annote_marker, annote_numbers); + + if (pos < s.size()) + throw std::invalid_argument(SymbolErrors::UnexpectedCharacters); +} + +Symbol::Symbol(const string& s) { + initialize(s); +} + +Symbol::Symbol(const char* c) { + if (c == nullptr) { + throw std::invalid_argument(SymbolErrors::NullString); + } + initialize(c); +} + +Symbol::Symbol(char c) : symbol(1, c), value(1, c) {} Symbol Symbol::Ref(int number) { Symbol s; @@ -35,14 +104,15 @@ void Symbol::update_value() { } Symbol& Symbol::operator=(const string& s) { - symbol = s; - value = symbol; + initialize(s); return *this; } Symbol& Symbol::operator=(const char* c) { - symbol = c; - value = symbol; + if (c == nullptr) { + throw std::invalid_argument(SymbolErrors::NullString); + } + initialize(c); return *this; } @@ -52,13 +122,14 @@ Symbol& Symbol::operator=(char c) { return *this; } -bool Symbol::is_epsilon() const { - return *this == Symbol::Epsilon; +bool Symbol::empty() const { + return symbol == "" && annote_numbers.empty() && linearize_numbers.empty() && + !reference.has_value(); } bool Symbol::operator==(const Symbol& other) const { return symbol == other.symbol && annote_numbers == other.annote_numbers && - linearize_numbers == other.linearize_numbers; + linearize_numbers == other.linearize_numbers && reference == other.reference; } bool Symbol::operator==(char c) const { @@ -88,6 +159,10 @@ std::ostream& operator<<(std::ostream& os, const Symbol& as) { return os << (string)as; } +bool Symbol::is_epsilon() const { + return *this == Symbol::Epsilon; +} + void Symbol::annote(int num) { annote_numbers.push_back(num); update_value(); @@ -153,13 +228,25 @@ Symbol MemorySymbols::Open(int number) { return {OpenChar + std::to_string(number)}; } -bool MemorySymbols::is_memory_symbol(const Symbol& s) { - return s.symbol.size() > 1 && - (s.symbol[0] == CloseChar || s.symbol[0] == ResetChar || s.symbol[0] == OpenChar); +optional MemorySymbols::is_memory_string(const string& s) { + if (!(s.size() > 1 && (s[0] == CloseChar || s[0] == ResetChar || s[0] == OpenChar))) + return std::nullopt; + + size_t pos = 1; + if (pos >= s.size() || !std::isdigit(s[pos])) + return std::nullopt; + + string memory_string(1, s[0]); + while (pos < s.length() && std::isdigit(s[pos])) { + memory_string += s[pos]; + ++pos; + } + + return memory_string; } -bool MemorySymbols::is_memory_char(char c) { - return c == CloseChar || c == ResetChar || c == OpenChar; +bool MemorySymbols::is_memory_symbol(const Symbol& s) { + return MemorySymbols::is_memory_string(s.symbol).has_value(); } bool MemorySymbols::is_close(const Symbol& s) { @@ -175,12 +262,16 @@ bool MemorySymbols::is_open(const Symbol& s) { } int MemorySymbols::get_cell_number(const Symbol& s) { - string number_str = s.symbol.substr(1); - if (number_str.empty()) { + size_t pos = 1; + if (pos >= s.symbol.size() || !std::isdigit(s.symbol[pos])) return 0; + + int number = 0; + while (pos < s.symbol.size() && std::isdigit(s.symbol[pos])) { + number = number * 10 + (s.symbol[pos] - '0'); + ++pos; } - int number = stoi(number_str); return number; } diff --git a/test_data/MetamorphicTest/test1.txt b/test_data/MetamorphicTest/test1.txt index 211b9077..b8b37eb6 100644 --- a/test_data/MetamorphicTest/test1.txt +++ b/test_data/MetamorphicTest/test1.txt @@ -2,9 +2,14 @@ 0 initial_state ; 1 final ; 2 final ; - 4 final ; + 3 final ; ... - 0 4 eps ; - 3 2 j ; - 3 1 c ; - 4 3 i ; + 0 4 d ; + 0 0 eps ; + 1 3 d ; + 1 2 b ; + 2 4 a ; + 2 3 c ; + 4 1 c ; + 4 2 c ; + 4 1 c ;