diff --git a/src/problems/MultipleSource/algo/matrix_bfs/intersection.py b/src/problems/MultipleSource/algo/matrix_bfs/intersection.py new file mode 100644 index 0000000..85dfb20 --- /dev/null +++ b/src/problems/MultipleSource/algo/matrix_bfs/intersection.py @@ -0,0 +1,200 @@ +from itertools import product +from typing import Dict + +from pyformlang.finite_automaton.state import State +from pyformlang.finite_automaton import EpsilonNFA +from pyformlang.finite_automaton.symbol import Symbol + +from pygraphblas.types import BOOL +from pygraphblas.matrix import Matrix +from pygraphblas.vector import Vector +from pygraphblas import descriptor +from pygraphblas import Accum, binaryop + +from src.graph.graph import Graph +from src.problems.MultipleSource.algo.matrix_bfs.reg_automaton import RegAutomaton + + +class Intersection: + """ + Implementations of graph and regular grammar intersection algorithm + """ + + def __init__(self, graph: Graph, regular_automaton: RegAutomaton): + self.graph = graph + self.graph.load_bool_graph() + self.regular_automaton = regular_automaton + self.intersection_matrices = dict() + self.__create_intersection_matrices__() + + def __create_intersection_matrices__(self): + num_vert_graph = self.graph.get_number_of_vertices() + num_vert_regex = self.regular_automaton.num_states + num_verts_inter = num_vert_graph * num_vert_regex + + for symbol in self.regular_automaton.matrices: + if symbol in self.graph: + self.intersection_matrices[symbol] = Matrix.sparse( + BOOL, num_verts_inter, num_verts_inter + ) + + def __to_automaton__(self) -> EpsilonNFA: + """ + Build automata from matrices + """ + enfa = EpsilonNFA() + graph_vertices_num = self.graph.get_number_of_vertices() + + start_states = [ + self.to_inter_coord(x, y) + for x, y in product( + range(graph_vertices_num), self.regular_automaton.start_states + ) + ] + + final_states = [ + self.to_inter_coord(x, y) + for x, y in product( + range(graph_vertices_num), self.regular_automaton.final_states + ) + ] + + for start_state in start_states: + enfa.add_start_state(State(start_state)) + + for final_state in final_states: + enfa.add_final_state(State(final_state)) + + for symbol in self.intersection_matrices: + matrix = self.intersection_matrices[symbol] + + for row, col in zip(matrix.rows, matrix.cols): + enfa.add_transition(State(row), Symbol(symbol), State(col)) + + return enfa + + def to_inter_coord(self, graph_vert, reg_vert) -> int: + """ + Converts coordinates of graph vertice and regex vertice + to intersection coordinates vertice + """ + return reg_vert * self.graph.get_number_of_vertices() + graph_vert + + def create_diag_matrices(self) -> Dict[str, Matrix]: + """ + Create a block diagonal matrices from graph and regex matrices for each symbol + """ + num_vert_graph = self.graph.get_number_of_vertices() + num_vert_regex = self.regular_automaton.num_states + diag_num_verts = num_vert_graph + num_vert_regex + + diag_matrices = dict() + for symbol in self.regular_automaton.matrices: + if symbol in self.graph: + diag_matrix = Matrix.sparse(BOOL, diag_num_verts, diag_num_verts) + diag_matrix.assign_matrix( + self.regular_automaton.matrices[symbol], + slice(0, num_vert_regex - 1), + slice(0, num_vert_regex - 1), + ) + diag_matrix.assign_matrix( + self.graph[symbol], + slice(num_vert_regex, diag_num_verts - 1), + slice(num_vert_regex, diag_num_verts - 1), + ) + + diag_matrices[symbol] = diag_matrix + + return diag_matrices + + def create_masks_matrix(self) -> Matrix: + num_vert_graph = self.graph.get_number_of_vertices() + num_vert_regex = self.regular_automaton.num_states + num_verts_diag = num_vert_graph + num_vert_regex + + mask_matrix = Matrix.identity(BOOL, num_vert_regex, value=True) + mask_matrix.resize(num_vert_regex, num_verts_diag) + + return mask_matrix + + def intersect_bfs(self, src_verts) -> EpsilonNFA: + """ + Intersection implementation with synchronous breadth first traversal + of a graph and regular grammar represented in automata + """ + num_vert_graph = self.graph.get_number_of_vertices() + num_vert_regex = self.regular_automaton.num_states + + num_verts_inter = num_vert_graph * num_vert_regex + num_verts_diag = num_vert_graph + num_vert_regex + + graph = self.graph + regex = self.regular_automaton.matrices + + regex_start_states = self.regular_automaton.start_states + + diag_matrices = self.create_diag_matrices() + + result = Matrix.sparse(BOOL, num_vert_graph, num_vert_graph) + + # create a mask of source vertices vector + m_src_v = Vector.from_lists(src_verts, [True for _ in range(len(src_verts))], size=num_vert_graph) + + # initialize matrices for multiple source bfs + ident = self.create_masks_matrix() + vect = ident.dup() + found = ident.dup() + + # fill start states + for reg_start_state in regex_start_states: + for gr_start_state in src_verts: + found[reg_start_state, num_vert_regex + gr_start_state] = True + + # matrix which contains newly found nodes on each iteration + found_on_iter = found.dup() + + # Algo's body + not_empty = True + level = 0 + while not_empty and level < num_verts_inter: + # for each symbol we are going to store if any new nodes were found during traversal. + # if none are found, then 'not_empty' flag turns False, which means that no matrices change anymore + # and we can stop the traversal + not_empty_for_at_least_one_symbol = False + + vect.assign_matrix(found_on_iter, mask=vect, desc=descriptor.RC) + vect.assign_scalar(True, mask=ident) + + # stores found nodes for each symbol + found_on_iter.assign_matrix(ident) + + for symbol in regex: + if symbol in graph: + with BOOL.ANY_PAIR: + found = vect.mxm(diag_matrices[symbol]) + + with Accum(binaryop.MAX_BOOL): + # extract left (grammar) part of the masks matrix and rearrange rows + i_x, i_y, _ = found.extract_matrix(col_index=slice(0, num_vert_regex - 1)).to_lists() + for i in range(len(i_y)): + found_on_iter.assign_row(i_y[i], found.extract_row(i_x[i])) + + # check if new nodes were found. if positive, switch the flag + if not found_on_iter.iseq(vect): + not_empty_for_at_least_one_symbol = True + + # extract right (graph) part of the masks matrix and get a row of reachable nodes in a graph + reachable = found_on_iter.extract_matrix( + col_index=slice(num_vert_regex, num_verts_diag - 1) + ).T.reduce_vector(BOOL.ANY_MONOID) # reduce by columns + + # update graph boolean matrix for every source vertex + # result matrix contains reachability for every symbol combined + with Accum(binaryop.MAX_BOOL): + for st_v in src_verts: + result.assign_row(st_v, reachable, mask=m_src_v, desc=descriptor.C) + + not_empty = not_empty_for_at_least_one_symbol + level += 1 + + return result \ No newline at end of file diff --git a/src/problems/MultipleSource/algo/matrix_bfs/matrix_bfs.py b/src/problems/MultipleSource/algo/matrix_bfs/matrix_bfs.py new file mode 100644 index 0000000..13f626e --- /dev/null +++ b/src/problems/MultipleSource/algo/matrix_bfs/matrix_bfs.py @@ -0,0 +1,31 @@ +from pyformlang.cfg import CFG + +from src.grammar.rsa import RecursiveAutomaton + +from src.graph.graph import Graph +from src.graph.label_graph import LabelGraph + +from src.problems.AllPaths.AllPaths import AllPathsProblem +from src.problems.utils import ResultAlgo + + +class ProblemAlgo(AllPathsProblem): + """ + For now we have regular grammar only in this algo. + Hence this is to be implemented with CFG. + """ + + def prepare(self, graph: Graph, grammar: CFG): + pass + + def prepare_for_solve(self): + pass + + def solve(self): + pass + + def prepare_for_exctract_paths(self): + pass + + def getPaths(self, v_start: int, v_finish: int, nonterminal: str, max_len: int): + pass diff --git a/src/problems/MultipleSource/algo/matrix_bfs/reg_automaton.py b/src/problems/MultipleSource/algo/matrix_bfs/reg_automaton.py new file mode 100644 index 0000000..2883a2d --- /dev/null +++ b/src/problems/MultipleSource/algo/matrix_bfs/reg_automaton.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from pyformlang.regular_expression.regex import Regex + +from pygraphblas.matrix import Matrix +from pygraphblas.types import BOOL + + +class RegAutomaton: + """ + Automata representation of regular grammar + """ + + def __init__(self, regex: Regex): + self.enfa = regex.to_epsilon_nfa().minimize() + + self.states = self.enfa.states + self.num_states = len(self.states) + + self.enum_states = dict(zip(self.states, range(self.num_states))) + self.start_states = [ + self.enum_states[state] for state in self.enfa.start_states + ] + self.final_states = [ + self.enum_states[state] for state in self.enfa.final_states + ] + + self.matrices = dict() + self.load_bool_matrices() + + def from_regex_txt(path) -> RegAutomaton: + with open(path, "r") as file: + regex = Regex(file.readline()) + + return RegAutomaton(regex) + + def load_bool_matrices(self) -> None: + """ + Creates boolean matrices for self automata + """ + for src_node, transition in self.enfa.to_dict().items(): + for symbol, tgt_node in transition.items(): + if symbol not in self.matrices: + self.matrices[symbol] = Matrix.sparse( + BOOL, self.num_states, self.num_states + ) + + matr = self.matrices[symbol] + matr[self.enum_states[src_node], self.enum_states[tgt_node]] = True diff --git a/test/MultipleSource/test_bfs.py b/test/MultipleSource/test_bfs.py new file mode 100644 index 0000000..eb76752 --- /dev/null +++ b/test/MultipleSource/test_bfs.py @@ -0,0 +1,93 @@ +import pytest + +from src.graph.graph import Graph + +from src.problems.MultipleSource.algo.matrix_bfs.intersection import Intersection +from src.problems.MultipleSource.algo.matrix_bfs.reg_automaton import RegAutomaton + +from src.utils.useful_paths import LOCAL_CFPQ_DATA + + +@pytest.mark.CI +def test_case_regular_cycle(): + test_data_path = LOCAL_CFPQ_DATA.joinpath("regular/cycle") + + graph = Graph.from_txt(test_data_path.joinpath("Graphs/graph_1.txt")) + grammar = RegAutomaton.from_regex_txt( + test_data_path.joinpath("Grammars/regex_1.txt") + ) + + intersection = Intersection(graph, grammar) + + source_verts = [0] + result = intersection.intersect_bfs(source_verts) + + assert result.nvals == 2 * len(source_verts) + + +@pytest.mark.CI +def test_case_regular_disconnected(): + test_data_path = LOCAL_CFPQ_DATA.joinpath("regular/disconnected") + + graph = Graph.from_txt(test_data_path.joinpath("Graphs/graph_1.txt")) + grammar = RegAutomaton.from_regex_txt( + test_data_path.joinpath("Grammars/regex_1.txt") + ) + + intersection = Intersection(graph, grammar) + + source_verts = [0, 3] + result = intersection.intersect_bfs(source_verts) + + assert result.nvals == 2 * len(source_verts) + + +@pytest.mark.CI +def test_case_regular_loop(): + test_data_path = LOCAL_CFPQ_DATA.joinpath("regular/loop") + + graph = Graph.from_txt(test_data_path.joinpath("Graphs/graph_1.txt")) + grammar = RegAutomaton.from_regex_txt( + test_data_path.joinpath("Grammars/regex_1.txt") + ) + + intersection = Intersection(graph, grammar) + + source_verts = [0, 2] + result = intersection.intersect_bfs(source_verts) + + assert result.nvals == 0 * len(source_verts) + + +@pytest.mark.CI +def test_case_regular_midsymbol(): + test_data_path = LOCAL_CFPQ_DATA.joinpath("regular/midsymbol") + + graph = Graph.from_txt(test_data_path.joinpath("Graphs/graph_1.txt")) + grammar = RegAutomaton.from_regex_txt( + test_data_path.joinpath("Grammars/regex_1.txt") + ) + + intersection = Intersection(graph, grammar) + + source_verts = [0] + result = intersection.intersect_bfs(source_verts) + + assert result.nvals == 1 * len(source_verts) + + +@pytest.mark.CI +def test_case_regular_two_cycles(): + test_data_path = LOCAL_CFPQ_DATA.joinpath("regular/two_cycles") + + graph = Graph.from_txt(test_data_path.joinpath("Graphs/graph_1.txt")) + grammar = RegAutomaton.from_regex_txt( + test_data_path.joinpath("Grammars/regex_1.txt") + ) + + intersection = Intersection(graph, grammar) + + source_verts = [0, 3] + result = intersection.intersect_bfs(source_verts) + + assert result.nvals == 2 * len(source_verts) diff --git a/test/data/regular/cycle/Grammars/regex_1.txt b/test/data/regular/cycle/Grammars/regex_1.txt new file mode 100644 index 0000000..08c2643 --- /dev/null +++ b/test/data/regular/cycle/Grammars/regex_1.txt @@ -0,0 +1 @@ +a a* \ No newline at end of file diff --git a/test/data/regular/cycle/Graphs/graph_1.txt b/test/data/regular/cycle/Graphs/graph_1.txt new file mode 100644 index 0000000..a675802 --- /dev/null +++ b/test/data/regular/cycle/Graphs/graph_1.txt @@ -0,0 +1,3 @@ +0 a 1 +1 a 2 +2 a 0 \ No newline at end of file diff --git a/test/data/regular/disconnected/Grammars/regex_1.txt b/test/data/regular/disconnected/Grammars/regex_1.txt new file mode 100644 index 0000000..35456ac --- /dev/null +++ b/test/data/regular/disconnected/Grammars/regex_1.txt @@ -0,0 +1 @@ +a* b (a|b)* \ No newline at end of file diff --git a/test/data/regular/disconnected/Graphs/graph_1.txt b/test/data/regular/disconnected/Graphs/graph_1.txt new file mode 100644 index 0000000..7296430 --- /dev/null +++ b/test/data/regular/disconnected/Graphs/graph_1.txt @@ -0,0 +1,8 @@ +0 a 1 +0 b 0 +1 a 1 +1 b 2 +2 a 2 +2 b 2 +3 a 3 +3 b 3 \ No newline at end of file diff --git a/test/data/regular/loop/Grammars/regex_1.txt b/test/data/regular/loop/Grammars/regex_1.txt new file mode 100644 index 0000000..08c2643 --- /dev/null +++ b/test/data/regular/loop/Grammars/regex_1.txt @@ -0,0 +1 @@ +a a* \ No newline at end of file diff --git a/test/data/regular/loop/Graphs/graph_1.txt b/test/data/regular/loop/Graphs/graph_1.txt new file mode 100644 index 0000000..17294dd --- /dev/null +++ b/test/data/regular/loop/Graphs/graph_1.txt @@ -0,0 +1,3 @@ +0 a 0 +0 b 1 +1 c 2 \ No newline at end of file diff --git a/test/data/regular/midsymbol/Grammars/regex_1.txt b/test/data/regular/midsymbol/Grammars/regex_1.txt new file mode 100644 index 0000000..76ece65 --- /dev/null +++ b/test/data/regular/midsymbol/Grammars/regex_1.txt @@ -0,0 +1 @@ +a* b c* \ No newline at end of file diff --git a/test/data/regular/midsymbol/Graphs/graph_1.txt b/test/data/regular/midsymbol/Graphs/graph_1.txt new file mode 100644 index 0000000..3fe9500 --- /dev/null +++ b/test/data/regular/midsymbol/Graphs/graph_1.txt @@ -0,0 +1,3 @@ +0 a 0 +0 b 1 +1 c 1 \ No newline at end of file diff --git a/test/data/regular/two_cycles/Grammars/regex_1.txt b/test/data/regular/two_cycles/Grammars/regex_1.txt new file mode 100644 index 0000000..08c2643 --- /dev/null +++ b/test/data/regular/two_cycles/Grammars/regex_1.txt @@ -0,0 +1 @@ +a a* \ No newline at end of file diff --git a/test/data/regular/two_cycles/Graphs/graph_1.txt b/test/data/regular/two_cycles/Graphs/graph_1.txt new file mode 100644 index 0000000..17d1fba --- /dev/null +++ b/test/data/regular/two_cycles/Graphs/graph_1.txt @@ -0,0 +1,6 @@ +0 a 1 +1 a 2 +2 b 1 +2 b 3 +3 a 2 +3 c 4 \ No newline at end of file