Loading...
The University of Aizu, Aizu-Wakamatsu, Japan
+81-8048340999

Evaluating Multiple Pattern Mining Algorithms on a Dataset

Step 1: Creation of pandas data frame to store the results of multiple algorithms

import pandas as pd
result = pd.DataFrame(columns=['algorithm', 'minSup', 'patterns', 'runtime', 'memory'])

Note: The first column of data frame has to be ‘algorithm.’ Otherwise, the code will generate the error.

Step 2: Declaring the name of the dataset

#inputFile = 'fileName'
inputFile = 'Transactional_T10I4D100K.csv'

Click here to download the dataset.

Step 3: Specify the range of values for an input parameter

#constraintList = [array of values]
minSupList = [400,500,600,700,800,900,1000]

Step 4: Declare the algorithm name, import and execute it, and store the results in the data frame

#algorithmName = 'name of the algorithm'
algorithmName = 'Apriori'

#import the mining algorithm
from PAMI.frequentPattern.basic import Apriori as alg

# execute the mining algorithm at different constraint values using the for loop
#for constraint in constraintList:
for minSup in minSupList:
    #create an object of the mining algorithm
    obj = alg.Apriori(inputFile,minSup, sep='\t')

    #start the mining process
    obj.startMine()

    #append the results into the data frame
    result.loc[result.shape[0]] = [algorithmName, minSup, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS()]

Step 5: Repeat Step-4 for each other pattern mining algorithms

#algorithmName = 'name of the algorithm'
algorithmName = 'FPGrowth'

#import the mining algorithm
from PAMI.frequentPattern.basic import FPGrowth as alg

# execute the mining algorithm at different constraint values using the for loop
#for constraint in constraintList:
for minSup in minSupList:
    #create an object of the mining algorithm
    obj = alg.FPGrowth(inputFile,minSup, sep='\t')

    #start the mining process
    obj.startMine()

    #append the results into the data frame
    result.loc[result.shape[0]] = [algorithmName, minSup, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS()]

#---------------------------------
#Repeating above steps for another algorithm

#algorithmName = 'name of the algorithm'
algorithmName = 'ECLAT'

#import the mining algorithm
from PAMI.frequentPattern.basic import ECLAT as alg

# execute the mining algorithm at different constraint values using the for loop
#for constraint in constraintList:
for minSup in minSupList:
    #create an object of the mining algorithm
    obj = alg.ECLAT(inputFile,minSup, sep='\t')

    #start the mining process
    obj.startMine()

    #append the results into the data frame
    result.loc[result.shape[0]] = [algorithmName, minSup, len(obj.getPatterns()), obj.getRuntime(), obj.getMemoryRSS()]

Step 6: Visualizing the comparative results

#Import the library
from PAMI.extras.graph import dataFrameInToFigures as dif
    #Pass the result data frame to the class
    ab = dif.dataFrameInToFigures(result)
    #Draw the graphs
    ab.plotGraphsFromDataFrame()