-
-
Notifications
You must be signed in to change notification settings - Fork 48.9k
Add graphs/frequent_pattern_graph_miner.py #1866
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
37 commits
Select commit
Hold shift + click to select a range
3422b71
Add files via upload
siva1098 c0d38d6
Update graphs/frequent_pattern_graph_miner.py
siva1098 d1ccba5
Update graphs/frequent_pattern_graph_miner.py
siva1098 aadb8d1
Update frequent_pattern_graph_miner.py
siva1098 87d405e
Update frequent_pattern_graph_miner.py
siva1098 6b2ebb0
Update frequent_pattern_graph_miner.py
siva1098 072f971
Update frequent_pattern_graph_miner.py
siva1098 116877a
Update graphs/frequent_pattern_graph_miner.py
siva1098 09f87e1
Update graphs/frequent_pattern_graph_miner.py
siva1098 46e04f8
Update graphs/frequent_pattern_graph_miner.py
siva1098 e4d0b79
Update graphs/frequent_pattern_graph_miner.py
siva1098 7456e09
Update graphs/frequent_pattern_graph_miner.py
siva1098 7321986
Update graphs/frequent_pattern_graph_miner.py
siva1098 9fd973d
Update graphs/frequent_pattern_graph_miner.py
siva1098 3fb1340
Update graphs/frequent_pattern_graph_miner.py
siva1098 0a1ea2f
Update frequent_pattern_graph_miner.py
siva1098 32adf11
Update frequent_pattern_graph_miner.py
siva1098 c519794
Update frequent_pattern_graph_miner.py
siva1098 e38e510
Update frequent_pattern_graph_miner.py
siva1098 e222c53
Update frequent_pattern_graph_miner.py
siva1098 45b2611
Update graphs/frequent_pattern_graph_miner.py
siva1098 937d288
Update frequent_pattern_graph_miner.py
siva1098 9631750
Update frequent_pattern_graph_miner.py
siva1098 36a0c9f
Update frequent_pattern_graph_miner.py
siva1098 67cc254
Update frequent_pattern_graph_miner.py
siva1098 0420044
Update frequent_pattern_graph_miner.py
siva1098 b7c1a93
Update frequent_pattern_graph_miner.py
siva1098 39dbbf2
Update graphs/frequent_pattern_graph_miner.py
siva1098 3558e91
Update frequent_pattern_graph_miner.py
siva1098 81f67ee
Update frequent_pattern_graph_miner.py
siva1098 39d2ddf
Update frequent_pattern_graph_miner.py
siva1098 fdb4c33
Update frequent_pattern_graph_miner.py
siva1098 b229de0
Update frequent_pattern_graph_miner.py
siva1098 860748d
Update frequent_pattern_graph_miner.py
siva1098 9a50cf6
Update frequent_pattern_graph_miner.py
siva1098 358fa99
Whitespace changes
cclauss a8ae4c8
Format with psf/black
cclauss File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,232 @@ | ||
""" | ||
FP-GraphMiner - A Fast Frequent Pattern Mining Algorithm for Network Graphs | ||
|
||
A novel Frequent Pattern Graph Mining algorithm, FP-GraphMiner, that compactly | ||
represents a set of network graphs as a Frequent Pattern Graph (or FP-Graph). | ||
This graph can be used to efficiently mine frequent subgraphs including maximal | ||
frequent subgraphs and maximum common subgraphs. | ||
|
||
URL: https://www.researchgate.net/publication/235255851 | ||
""" | ||
# fmt: off | ||
edge_array = [ | ||
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12', 'cd-e2', 'ce-e4', | ||
'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3', 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3'], | ||
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'cd-e2', 'de-e1', 'df-e8', | ||
'ef-e3', 'eg-e2', 'fg-e6'], | ||
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'de-e1', 'df-e8', 'dg-e5', 'ef-e3', 'eg-e2', | ||
'eh-e12', 'fg-e6', 'fh-e10', 'gh-e6'], | ||
['ab-e1', 'ac-e3', 'bc-e4', 'bd-e2', 'bh-e12', 'cd-e2', 'df-e8', 'dh-e10'], | ||
['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'cd-e2', 'ce-e4', 'de-e1', 'df-e8', | ||
'dg-e5', 'ef-e3', 'eg-e2', 'fg-e6'] | ||
] | ||
# fmt: on | ||
|
||
|
||
def get_distinct_edge(edge_array): | ||
""" | ||
Return Distinct edges from edge array of multiple graphs | ||
>>> sorted(get_distinct_edge(edge_array)) | ||
['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] | ||
""" | ||
distinct_edge = set() | ||
for row in edge_array: | ||
for item in row: | ||
distinct_edge.add(item[0]) | ||
return list(distinct_edge) | ||
|
||
|
||
def get_bitcode(edge_array, distinct_edge): | ||
""" | ||
Return bitcode of distinct_edge | ||
""" | ||
bitcode = ["0"] * len(edge_array) | ||
for i, row in enumerate(edge_array): | ||
for item in row: | ||
if distinct_edge in item[0]: | ||
bitcode[i] = "1" | ||
break | ||
return "".join(bitcode) | ||
|
||
|
||
def get_frequency_table(edge_array): | ||
""" | ||
Returns Frequency Table | ||
""" | ||
distinct_edge = get_distinct_edge(edge_array) | ||
frequency_table = dict() | ||
|
||
for item in distinct_edge: | ||
bit = get_bitcode(edge_array, item) | ||
# print('bit',bit) | ||
# bt=''.join(bit) | ||
s = bit.count("1") | ||
frequency_table[item] = [s, bit] | ||
# Store [Distinct edge, WT(Bitcode), Bitcode] in descending order | ||
sorted_frequency_table = [ | ||
[k, v[0], v[1]] | ||
for k, v in sorted(frequency_table.items(), key=lambda v: v[1][0], reverse=True) | ||
] | ||
return sorted_frequency_table | ||
|
||
|
||
def get_nodes(frequency_table): | ||
""" | ||
Returns nodes | ||
format nodes={bitcode:edges that represent the bitcode} | ||
>>> get_nodes([['ab', 5, '11111'], ['ac', 5, '11111'], ['df', 5, '11111'], | ||
... ['bd', 5, '11111'], ['bc', 5, '11111']]) | ||
{'11111': ['ab', 'ac', 'df', 'bd', 'bc']} | ||
""" | ||
nodes = {} | ||
for i, item in enumerate(frequency_table): | ||
nodes.setdefault(item[2], []).append(item[0]) | ||
return nodes | ||
|
||
|
||
def get_cluster(nodes): | ||
""" | ||
Returns cluster | ||
format cluster:{WT(bitcode):nodes with same WT} | ||
""" | ||
cluster = {} | ||
for key, value in nodes.items(): | ||
cluster.setdefault(key.count("1"), {})[key] = value | ||
return cluster | ||
|
||
|
||
def get_support(cluster): | ||
""" | ||
Returns support | ||
>>> get_support({5: {'11111': ['ab', 'ac', 'df', 'bd', 'bc']}, | ||
... 4: {'11101': ['ef', 'eg', 'de', 'fg'], '11011': ['cd']}, | ||
... 3: {'11001': ['ad'], '10101': ['dg']}, | ||
... 2: {'10010': ['dh', 'bh'], '11000': ['be'], '10100': ['gh'], | ||
... '10001': ['ce']}, | ||
... 1: {'00100': ['fh', 'eh'], '10000': ['hi']}}) | ||
[100.0, 80.0, 60.0, 40.0, 20.0] | ||
""" | ||
return [i * 100 / len(cluster) for i in cluster] | ||
|
||
|
||
def print_all() -> None: | ||
print("\nNodes\n") | ||
for key, value in nodes.items(): | ||
print(key, value) | ||
print("\nSupport\n") | ||
print(support) | ||
print("\n Cluster \n") | ||
for key, value in sorted(cluster.items(), reverse=True): | ||
print(key, value) | ||
print("\n Graph\n") | ||
for key, value in graph.items(): | ||
print(key, value) | ||
print("\n Edge List of Frequent subgraphs \n") | ||
for edge_list in freq_subgraph_edge_list: | ||
print(edge_list) | ||
|
||
|
||
def create_edge(nodes, graph, cluster, c1): | ||
""" | ||
create edge between the nodes | ||
""" | ||
for i in cluster[c1].keys(): | ||
count = 0 | ||
c2 = c1 + 1 | ||
while c2 < max(cluster.keys()): | ||
for j in cluster[c2].keys(): | ||
""" | ||
creates edge only if the condition satisfies | ||
""" | ||
if int(i, 2) & int(j, 2) == int(i, 2): | ||
if tuple(nodes[i]) in graph: | ||
graph[tuple(nodes[i])].append(nodes[j]) | ||
else: | ||
graph[tuple(nodes[i])] = [nodes[j]] | ||
count += 1 | ||
if count == 0: | ||
c2 = c2 + 1 | ||
else: | ||
break | ||
|
||
|
||
def construct_graph(cluster, nodes): | ||
X = cluster[max(cluster.keys())] | ||
cluster[max(cluster.keys()) + 1] = "Header" | ||
graph = {} | ||
for i in X: | ||
if tuple(["Header"]) in graph: | ||
graph[tuple(["Header"])].append(X[i]) | ||
else: | ||
graph[tuple(["Header"])] = [X[i]] | ||
for i in X: | ||
graph[tuple(X[i])] = [["Header"]] | ||
i = 1 | ||
while i < max(cluster) - 1: | ||
create_edge(nodes, graph, cluster, i) | ||
i = i + 1 | ||
return graph | ||
|
||
|
||
def myDFS(graph, start, end, path=[]): | ||
""" | ||
find different DFS walk from given node to Header node | ||
""" | ||
path = path + [start] | ||
if start == end: | ||
paths.append(path) | ||
for node in graph[start]: | ||
if tuple(node) not in path: | ||
myDFS(graph, tuple(node), end, path) | ||
|
||
|
||
def find_freq_subgraph_given_support(s, cluster, graph): | ||
""" | ||
find edges of multiple frequent subgraphs | ||
""" | ||
k = int(s / 100 * (len(cluster) - 1)) | ||
for i in cluster[k].keys(): | ||
myDFS(graph, tuple(cluster[k][i]), tuple(["Header"])) | ||
|
||
|
||
def freq_subgraphs_edge_list(paths): | ||
""" | ||
returns Edge list for frequent subgraphs | ||
""" | ||
freq_sub_EL = [] | ||
for edges in paths: | ||
EL = [] | ||
for j in range(len(edges) - 1): | ||
temp = list(edges[j]) | ||
for e in temp: | ||
edge = (e[0], e[1]) | ||
EL.append(edge) | ||
freq_sub_EL.append(EL) | ||
return freq_sub_EL | ||
|
||
|
||
def preprocess(edge_array): | ||
""" | ||
Preprocess the edge array | ||
>>> preprocess([['ab-e1', 'ac-e3', 'ad-e5', 'bc-e4', 'bd-e2', 'be-e6', 'bh-e12', | ||
... 'cd-e2', 'ce-e4', 'de-e1', 'df-e8', 'dg-e5', 'dh-e10', 'ef-e3', | ||
... 'eg-e2', 'fg-e6', 'gh-e6', 'hi-e3']]) | ||
|
||
""" | ||
for i in range(len(edge_array)): | ||
for j in range(len(edge_array[i])): | ||
t = edge_array[i][j].split("-") | ||
edge_array[i][j] = t | ||
|
||
|
||
if __name__ == "__main__": | ||
preprocess(edge_array) | ||
frequency_table = get_frequency_table(edge_array) | ||
nodes = get_nodes(frequency_table) | ||
cluster = get_cluster(nodes) | ||
support = get_support(cluster) | ||
graph = construct_graph(cluster, nodes) | ||
find_freq_subgraph_given_support(60, cluster, graph) | ||
paths = [] | ||
freq_subgraph_edge_list = freq_subgraphs_edge_list(paths) | ||
print_all() |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.