武翔

add apriori and userfc

This diff is collapsed. Click to expand it.
1 +import math
2 +#!usr/bin/python
3 +
4 +
5 +
6 +def readFile(file_name):
7 + contents_lines=[]
8 + f = open(file_name,'r')
9 + contents_lines=f.readlines()
10 + f.close()
11 + return contents_lines
12 +
13 +def getBooksList(file_name):
14 + books_contents=readFile(file_name)
15 + books_info={}
16 + for book in books_contents:
17 + book_info=book.split("|")
18 + books_info[int(book_info[0])]=book_info[1:]
19 + return books_info
20 +
21 +
22 +
23 +def getRatingInformation(ratings):
24 + rates=[]
25 + for line in ratings:
26 + rate = line.split('\t')
27 + rates.append([int(rate[0]),int(rate[1]),int(rate[2])])
28 + return rates
29 +
30 +def createUserRankDic(rates):
31 + user_rate_dic={}
32 + item_to_user={}
33 + for i in rates:
34 + user_rank=(i[1],i[2])
35 + if i[0] in user_rate_dic:
36 + user_rate_dic[i[0]].append(user_rank)
37 + else:
38 + user_rate_dic[i[0]]=[user_rank]
39 +
40 + if i[1] in item_to_user:
41 + item_to_user[i[1]].append(i[0])
42 + else:
43 + item_to_user[i[1]]=[i[0]]
44 + return user_rate_dic,item_to_user
45 +
46 +def clacSimlaryCosDist(user1,user2):
47 + sum_x=0.0
48 + sum_y=0.0
49 + sum_xy=0.0
50 + avg_x=0.0
51 + avg_y=0.0
52 + for key in user1:
53 + avg_x+=key[1]
54 + avg_x=avg_x/len(user1)
55 +
56 + for key in user2:
57 + avg_y+=key[1]
58 + avg_y=avg_y/len(user2)
59 +
60 + for key1 in user1:
61 + for key2 in user2:
62 + if key1[0]==key2[0]:
63 + sum_xy+=(key1[1]-avg_x)*(key2[1]-avg_y)
64 + sum_x+=(key1[1]-avg_x)*(key1[1]-avg_x)
65 + for key2 in user2:
66 + sum_y+=(key2[1]-avg_y)*(key2[1]-avg_y)
67 + # print sum_x,sum_y,sum_xy
68 + if sum_xy ==0.0:
69 + return 0
70 + sx_sy=math.sqrt(sum_x*sum_y)
71 + return sum_xy/sx_sy
72 +
73 +
74 +
75 +def calcNearestNeighbor(userid,users_dic,item_dic):
76 + neighbors=[]
77 + for item in users_dic[userid]:
78 + for neighbor in item_dic[item[0]]:
79 + if neighbor != userid and neighbor not in neighbors:
80 + neighbors.append(neighbor)
81 +
82 + neighbors_dist=[]
83 + for neighbor in neighbors:
84 + dist=clacSimlaryCosDist(users_dic[userid],users_dic[neighbor])
85 + neighbors_dist.append([dist,neighbor])
86 + neighbors_dist.sort(reverse=True)
87 + return neighbors_dist
88 +
89 +
90 +def recommendByUserFC(file_name,userid,k=5):
91 + test_contents=readFile(file_name)
92 + test_rates = getRatingInformation(test_contents)
93 + test_dic,test_item_to_user=createUserRankDic(test_rates)
94 +
95 + neighbors=calcNearestNeighbor(userid,test_dic,test_item_to_user)[:k]
96 +
97 + recommend_dic={}
98 + for neighbor in neighbors:
99 + neighbor_user_id=neighbor[1]
100 + books=test_dic[neighbor_user_id]
101 + for book in books:
102 + if book[0] not in recommend_dic:
103 + recommend_dic[book[0]]=neighbor[0]
104 + else:
105 + recommend_dic[book[0]]+=neighbor[0]
106 + recommend_list=[]
107 + for key in recommend_dic:
108 + recommend_list.append([recommend_dic[key],key])
109 +
110 + recommend_list.sort(reverse=True)
111 + user_books = [i[0] for i in test_dic[userid]]
112 +
113 + return [i[1] for i in recommend_list],user_books,neighbors
114 +
115 +
116 +if __name__ == '__main__':
117 + books = readFile('u.data')
118 + rates = getRatingInformation(books)
119 + recommend_list,user_movie,neighbors=recommendByUserFC("u.data",222)
120 +
121 + print recommend_list[:5]
122 +