enaeseth · yeonsssu26 · Sep 8, 2021 · Nov 30, 2021 · daavve · Oct 13, 2022
diff --git a/Readme.md b/Readme.md
@@ -1,11 +1,11 @@
 Python FP-Growth
 ================
 
-This module provides a pure Python implementation of the FP-growth algorithm for
-finding frequent itemsets. FP-growth exploits an (often-valid) assumption that
-many transactions will have items in common to build a prefix tree. If the
-assumption holds true, this tree produces a compact representation of the actual
-transactions and is used to generate itemsets much faster than *Apriori* can.
+## concept
+Association Rule (연관규칙)을 적용하기 위해서는 각 item들이 각itemset에서 어떤 빈도로 나타났고, 어떤 item과 함께 나왔는지 세는 것이 필수이다. 하지만 데이터셋이 큰 경우, 이를 모든 후보 itemset들에 대해서 하나하나 검사하는 것은 굉장히 비효율적이다. 이러한 문제를 해결하기 위해 제시된 것이 FP-growth algorithm이다.
+
+reference)
+https://process-mining.tistory.com/92
 
 Installation
 ------------
@@ -84,4 +84,4 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 
 [me]: http://github.com/enaeseth/
-[pypi]: http://pypi.python.org/
+[pypi]: http://pypi.python.org/
diff --git a/fp_growth.py b/fp_growth.py
@@ -10,11 +10,13 @@
 """
 
 from collections import defaultdict, namedtuple
-from itertools import imap
 
-__author__ = 'Eric Naeseth <[email protected]>'
-__copyright__ = 'Copyright © 2009 Eric Naeseth'
-__license__ = 'MIT License'
+# from itertools import imap
+
+__author__ = "Eric Naeseth <[email protected]>"
+__copyright__ = "Copyright © 2009 Eric Naeseth"
+__license__ = "MIT License"
+
 
 def find_frequent_itemsets(transactions, minimum_support, include_support=False):
     """
@@ -31,7 +33,7 @@ def find_frequent_itemsets(transactions, minimum_support, include_support=False)
     If `include_support` is true, yield (itemset, support) pairs instead of
     just the itemsets.
     """
-    items = defaultdict(lambda: 0) # mapping from items to their supports
+    items = defaultdict(lambda: 0)  # mapping from items to their supports
 
     # Load the passed-in transactions and count the support that individual
     # items have.
@@ -40,19 +42,25 @@ def find_frequent_itemsets(transactions, minimum_support, include_support=False)
             items[item] += 1
 
     # Remove infrequent items from the item support dictionary.
-    items = dict((item, support) for item, support in items.iteritems()
-        if support >= minimum_support)
+    items = dict(
+        (item, support)
+        # for item, support in items.iteritems()
+        for item, support in items.items()
+        if support >= minimum_support
+    )
 
     # Build our FP-tree. Before any transactions can be added to the tree, they
     # must be stripped of infrequent items and their surviving items must be
     # sorted in decreasing order of frequency.
     def clean_transaction(transaction):
         transaction = filter(lambda v: v in items, transaction)
-        transaction.sort(key=lambda v: items[v], reverse=True)
+        # transaction.sort(key=lambda v: items[v], reverse=True)
+        transaction = sorted(transaction, key=lambda v: items[v], reverse=True)
         return transaction
 
     master = FPTree()
-    for transaction in imap(clean_transaction, transactions):
+    # for transaction in imap(clean_transaction, transactions):
+    for transaction in list(map(clean_transaction, transactions)):
         master.add(transaction)
 
     def find_with_suffix(tree, suffix):
@@ -67,12 +75,13 @@ def find_with_suffix(tree, suffix):
                 # itemsets within it.
                 cond_tree = conditional_tree_from_paths(tree.prefix_paths(item))
                 for s in find_with_suffix(cond_tree, found_set):
-                    yield s # pass along the good news to our caller
+                    yield s  # pass along the good news to our caller
 
     # Search for frequent itemsets, and yield the results we find.
     for itemset in find_with_suffix(master, []):
         yield itemset
 
+
 class FPTree(object):
     """
     An FP tree.
@@ -81,7 +90,7 @@ class FPTree(object):
     (i.e., all items must be valid as dictionary keys or set members).
     """
 
-    Route = namedtuple('Route', 'head tail')
+    Route = namedtuple("Route", "head tail")
 
     def __init__(self):
         # The root node of the tree.
@@ -124,7 +133,7 @@ def _update_route(self, point):
 
         try:
             route = self._routes[point.item]
-            route[1].neighbor = point # route[1] is the tail
+            route[1].neighbor = point  # route[1] is the tail
             self._routes[point.item] = self.Route(route[0], point)
         except KeyError:
             # First node for this item; start a new route.
@@ -136,7 +145,8 @@ def items(self):
         element of the tuple is the item itself, and the second element is a
         generator that will yield the nodes in the tree that belong to the item.
         """
-        for item in self._routes.iterkeys():
+        # for item in self._routes.iterkeys():
+        for item in self._routes:
             yield (item, self.nodes(item))
 
     def nodes(self, item):
@@ -167,15 +177,16 @@ def collect_path(node):
         return (collect_path(node) for node in self.nodes(item))
 
     def inspect(self):
-        print 'Tree:'
+        print("Tree:")
         self.root.inspect(1)
 
-        print
-        print 'Routes:'
+        print()
+        print("Routes:")
         for item, nodes in self.items():
-            print '  %r' % item
+            print("  %r" % item)
             for node in nodes:
-                print '    %r' % node
+                print("    %r" % node)
+
 
 def conditional_tree_from_paths(paths):
     """Build a conditional FP-tree from the given prefix paths."""
@@ -212,6 +223,7 @@ def conditional_tree_from_paths(paths):
 
     return tree
 
+
 class FPNode(object):
     """A node in an FP tree."""
 
@@ -312,7 +324,7 @@ def children(self):
         return tuple(self._children.itervalues())
 
     def inspect(self, depth=0):
-        print ('  ' * depth) + repr(self)
+        print(("  " * depth) + repr(self))
         for child in self.children:
             child.inspect(depth + 1)
 
@@ -322,21 +334,31 @@ def __repr__(self):
         return "<%s %r (%r)>" % (type(self).__name__, self.item, self.count)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     from optparse import OptionParser
     import csv
 
-    p = OptionParser(usage='%prog data_file')
-    p.add_option('-s', '--minimum-support', dest='minsup', type='int',
-        help='Minimum itemset support (default: 2)')
-    p.add_option('-n', '--numeric', dest='numeric', action='store_true',
-        help='Convert the values in datasets to numerals (default: false)')
+    p = OptionParser(usage="%prog data_file")
+    p.add_option(
+        "-s",
+        "--minimum-support",
+        dest="minsup",
+        type="int",
+        help="Minimum itemset support (default: 2)",
+    )
+    p.add_option(
+        "-n",
+        "--numeric",
+        dest="numeric",
+        action="store_true",
+        help="Convert the values in datasets to numerals (default: false)",
+    )
     p.set_defaults(minsup=2)
     p.set_defaults(numeric=False)
 
     options, args = p.parse_args()
     if len(args) < 1:
-        p.error('must provide the path to a CSV file to read')
+        p.error("must provide the path to a CSV file to read")
 
     transactions = []
     with open(args[0]) as database:
@@ -351,8 +373,8 @@ def __repr__(self):
 
     result = []
     for itemset, support in find_frequent_itemsets(transactions, options.minsup, True):
-        result.append((itemset,support))
+        result.append((itemset, support))
 
     result = sorted(result, key=lambda i: i[0])
     for itemset, support in result:
-        print str(itemset) + ' ' + str(support)
+        print(str(itemset) + " " + str(support))