测试数据文件为test, 支持度为2
下面是源代码:
import string
def getsrcdata(filename):
f=open(filename,’r')
srcdata=[]
# read file each line
# it is equal to line in f.readlines()
for line in f:
# split string line seprated by space or table
srcdata.append(string.split(line))
f.close()
# if success,return tuple (True,the data)
return srcdata
# get the first candidates C1 from source data
# return a dict c1 whose key is frozenset which
# contains each item and its value is its count
def getC1(srcdata):
c1={}
# in each transaction
for i in srcdata:
# for each item in one transaction
for j in i:
# put into a set and then return a frozenset
# so that can be a key in dict
s=set()
s.add(j)
key=frozenset(s)
# if the item has appeared before,plus one
if key in c1:
c1[key]=c1[key]+1
else:
c1[key]=1
return c1
# compare candidate support count with the given
# minimum support count
def getL(c,supct):
# key in candidate which will be deleted
# the support count less than the given
d_key=[]
for key in c:
ct=c[key]
if ct < supct:
d_key.append(key)
# delete the items whose support count
# less than the given
for key in d_key:
del c[key]
return c
# get the next candidate from previous L
# and scan source data for count of each candidate
def getnextcandi(preL,srcdata):
c={}
for key1 in preL:
for key2 in preL:
if key1 != key2:
# preL cartesion product with preL
key=key1.union(key2)
c[key]=0
# count for each item
for i in srcdata:
for item in c:
if item.issubset(i):
c[item]=c[item]+1
return c
# Apriori algorithem
def Apriori(filename,supct):
#get source data from file
srcdata=getsrcdata(filename)
# get C1
c=getC1(srcdata)
# L
L={}
while True:
# temp L,if empty,over
# while not,this is the new L
temp_L=getL(c,supct)
if not temp_L:
break
else:
L=temp_L
# get the next candidate from pre L
c=getnextcandi(L,srcdata)
return L
def main():
L=Apriori(“test”,2)
s=”"
for item in L:
s=s+’{ ‘
for i in item:
s=s+i+’ ‘
s=s+”} –> “+str(L[item])+’n’
print s
if __name__==’__main__’:
main()
啊哈 我抢到个沙发呢啊 看看先了
呵呵新建了个QQ群:32240592 欢迎加入
QQ群 我用的是linuxqq… 所以 你只能加我了。。 我这里没有办法加。。。 我的q 274107206
加上点空格好不。。。。。。。。。