Showing
2 changed files
with
122 additions
and
0 deletions
Apriori.ipynb
0 → 100644
This diff is collapsed. Click to expand it.
user_FC.py
0 → 100644
1 | +import math | ||
2 | +#!usr/bin/python | ||
3 | + | ||
4 | + | ||
5 | + | ||
6 | +def readFile(file_name): | ||
7 | + contents_lines=[] | ||
8 | + f = open(file_name,'r') | ||
9 | + contents_lines=f.readlines() | ||
10 | + f.close() | ||
11 | + return contents_lines | ||
12 | + | ||
13 | +def getBooksList(file_name): | ||
14 | + books_contents=readFile(file_name) | ||
15 | + books_info={} | ||
16 | + for book in books_contents: | ||
17 | + book_info=book.split("|") | ||
18 | + books_info[int(book_info[0])]=book_info[1:] | ||
19 | + return books_info | ||
20 | + | ||
21 | + | ||
22 | + | ||
23 | +def getRatingInformation(ratings): | ||
24 | + rates=[] | ||
25 | + for line in ratings: | ||
26 | + rate = line.split('\t') | ||
27 | + rates.append([int(rate[0]),int(rate[1]),int(rate[2])]) | ||
28 | + return rates | ||
29 | + | ||
30 | +def createUserRankDic(rates): | ||
31 | + user_rate_dic={} | ||
32 | + item_to_user={} | ||
33 | + for i in rates: | ||
34 | + user_rank=(i[1],i[2]) | ||
35 | + if i[0] in user_rate_dic: | ||
36 | + user_rate_dic[i[0]].append(user_rank) | ||
37 | + else: | ||
38 | + user_rate_dic[i[0]]=[user_rank] | ||
39 | + | ||
40 | + if i[1] in item_to_user: | ||
41 | + item_to_user[i[1]].append(i[0]) | ||
42 | + else: | ||
43 | + item_to_user[i[1]]=[i[0]] | ||
44 | + return user_rate_dic,item_to_user | ||
45 | + | ||
46 | +def clacSimlaryCosDist(user1,user2): | ||
47 | + sum_x=0.0 | ||
48 | + sum_y=0.0 | ||
49 | + sum_xy=0.0 | ||
50 | + avg_x=0.0 | ||
51 | + avg_y=0.0 | ||
52 | + for key in user1: | ||
53 | + avg_x+=key[1] | ||
54 | + avg_x=avg_x/len(user1) | ||
55 | + | ||
56 | + for key in user2: | ||
57 | + avg_y+=key[1] | ||
58 | + avg_y=avg_y/len(user2) | ||
59 | + | ||
60 | + for key1 in user1: | ||
61 | + for key2 in user2: | ||
62 | + if key1[0]==key2[0]: | ||
63 | + sum_xy+=(key1[1]-avg_x)*(key2[1]-avg_y) | ||
64 | + sum_x+=(key1[1]-avg_x)*(key1[1]-avg_x) | ||
65 | + for key2 in user2: | ||
66 | + sum_y+=(key2[1]-avg_y)*(key2[1]-avg_y) | ||
67 | + # print sum_x,sum_y,sum_xy | ||
68 | + if sum_xy ==0.0: | ||
69 | + return 0 | ||
70 | + sx_sy=math.sqrt(sum_x*sum_y) | ||
71 | + return sum_xy/sx_sy | ||
72 | + | ||
73 | + | ||
74 | + | ||
75 | +def calcNearestNeighbor(userid,users_dic,item_dic): | ||
76 | + neighbors=[] | ||
77 | + for item in users_dic[userid]: | ||
78 | + for neighbor in item_dic[item[0]]: | ||
79 | + if neighbor != userid and neighbor not in neighbors: | ||
80 | + neighbors.append(neighbor) | ||
81 | + | ||
82 | + neighbors_dist=[] | ||
83 | + for neighbor in neighbors: | ||
84 | + dist=clacSimlaryCosDist(users_dic[userid],users_dic[neighbor]) | ||
85 | + neighbors_dist.append([dist,neighbor]) | ||
86 | + neighbors_dist.sort(reverse=True) | ||
87 | + return neighbors_dist | ||
88 | + | ||
89 | + | ||
90 | +def recommendByUserFC(file_name,userid,k=5): | ||
91 | + test_contents=readFile(file_name) | ||
92 | + test_rates = getRatingInformation(test_contents) | ||
93 | + test_dic,test_item_to_user=createUserRankDic(test_rates) | ||
94 | + | ||
95 | + neighbors=calcNearestNeighbor(userid,test_dic,test_item_to_user)[:k] | ||
96 | + | ||
97 | + recommend_dic={} | ||
98 | + for neighbor in neighbors: | ||
99 | + neighbor_user_id=neighbor[1] | ||
100 | + books=test_dic[neighbor_user_id] | ||
101 | + for book in books: | ||
102 | + if book[0] not in recommend_dic: | ||
103 | + recommend_dic[book[0]]=neighbor[0] | ||
104 | + else: | ||
105 | + recommend_dic[book[0]]+=neighbor[0] | ||
106 | + recommend_list=[] | ||
107 | + for key in recommend_dic: | ||
108 | + recommend_list.append([recommend_dic[key],key]) | ||
109 | + | ||
110 | + recommend_list.sort(reverse=True) | ||
111 | + user_books = [i[0] for i in test_dic[userid]] | ||
112 | + | ||
113 | + return [i[1] for i in recommend_list],user_books,neighbors | ||
114 | + | ||
115 | + | ||
116 | +if __name__ == '__main__': | ||
117 | + books = readFile('u.data') | ||
118 | + rates = getRatingInformation(books) | ||
119 | + recommend_list,user_movie,neighbors=recommendByUserFC("u.data",222) | ||
120 | + | ||
121 | + print recommend_list[:5] | ||
122 | + |
-
Please register or login to post a comment