python 实现 apriori 算法

测试数据文件为test, 支持度为2

下面是源代码:

import string

def getsrcdata(filename):
f=open(filename,’r')
srcdata=[]
# read file each line
# it is equal to line in f.readlines()
for line in f:
# split string line seprated by space or table
srcdata.append(string.split(line))
f.close()
# if success,return tuple (True,the data)
return srcdata

# get the first candidates C1 from source data
# return a dict c1 whose key is frozenset which
# contains each item and its value is its count
def getC1(srcdata):
c1={}
# in each transaction
for i in srcdata:
# for each item in one transaction
for j in i:
# put into a set and then return a frozenset
# so that can be a key in dict
s=set()
s.add(j)
key=frozenset(s)
# if the item has appeared before,plus one
if key in c1:
c1[key]=c1[key]+1
else:
c1[key]=1
return c1

# compare candidate support count with the given
# minimum support count
def getL(c,supct):
# key in candidate which will be deleted
# the support count less than the given
d_key=[]
for key in c:
ct=c[key]
if ct < supct:
d_key.append(key)
# delete the items whose support count
# less than the given
for key in d_key:
del c[key]
return c

# get the next candidate from previous L
# and scan source data for count of each candidate
def getnextcandi(preL,srcdata):
c={}
for key1 in preL:
for key2 in preL:
if key1 != key2:
# preL cartesion product with preL
key=key1.union(key2)
c[key]=0
# count for each item
for i in srcdata:
for item in c:
if item.issubset(i):
c[item]=c[item]+1
return c

# Apriori algorithem
def Apriori(filename,supct):
#get source data from file
srcdata=getsrcdata(filename)
# get C1
c=getC1(srcdata)
# L
L={}
while True:
# temp L,if empty,over
# while not,this is the new L
temp_L=getL(c,supct)
if not temp_L:
break
else:
L=temp_L
# get the next candidate from pre L
c=getnextcandi(L,srcdata)
return L

def main():
L=Apriori(“test”,2)
s=”"
for item in L:
s=s+’{ ‘
for i in item:
s=s+i+’ ‘
s=s+”} –> “+str(L[item])+’n’
print s
if __name__==’__main__’:
main()

Read More:

发表评论?

3 条评论。

  1. 啊哈 我抢到个沙发呢啊 看看先了
    呵呵新建了个QQ群:32240592 欢迎加入

  2. 加上点空格好不。。。。。。。。。

发表评论

注意 - 你可以用以下 HTML tags and attributes:
<a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong> <pre lang="" line="" escaped="" highlight="">