user_FC.py
2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import math
#!usr/bin/python
def readFile(file_name):
contents_lines=[]
f = open(file_name,'r')
contents_lines=f.readlines()
f.close()
return contents_lines
def getBooksList(file_name):
books_contents=readFile(file_name)
books_info={}
for book in books_contents:
book_info=book.split("|")
books_info[int(book_info[0])]=book_info[1:]
return books_info
def getRatingInformation(ratings):
rates=[]
for line in ratings:
rate = line.split('\t')
rates.append([int(rate[0]),int(rate[1]),int(rate[2])])
return rates
def createUserRankDic(rates):
user_rate_dic={}
item_to_user={}
for i in rates:
user_rank=(i[1],i[2])
if i[0] in user_rate_dic:
user_rate_dic[i[0]].append(user_rank)
else:
user_rate_dic[i[0]]=[user_rank]
if i[1] in item_to_user:
item_to_user[i[1]].append(i[0])
else:
item_to_user[i[1]]=[i[0]]
return user_rate_dic,item_to_user
def clacSimlaryCosDist(user1,user2):
sum_x=0.0
sum_y=0.0
sum_xy=0.0
avg_x=0.0
avg_y=0.0
for key in user1:
avg_x+=key[1]
avg_x=avg_x/len(user1)
for key in user2:
avg_y+=key[1]
avg_y=avg_y/len(user2)
for key1 in user1:
for key2 in user2:
if key1[0]==key2[0]:
sum_xy+=(key1[1]-avg_x)*(key2[1]-avg_y)
sum_x+=(key1[1]-avg_x)*(key1[1]-avg_x)
for key2 in user2:
sum_y+=(key2[1]-avg_y)*(key2[1]-avg_y)
# print sum_x,sum_y,sum_xy
if sum_xy ==0.0:
return 0
sx_sy=math.sqrt(sum_x*sum_y)
return sum_xy/sx_sy
def calcNearestNeighbor(userid,users_dic,item_dic):
neighbors=[]
for item in users_dic[userid]:
for neighbor in item_dic[item[0]]:
if neighbor != userid and neighbor not in neighbors:
neighbors.append(neighbor)
neighbors_dist=[]
for neighbor in neighbors:
dist=clacSimlaryCosDist(users_dic[userid],users_dic[neighbor])
neighbors_dist.append([dist,neighbor])
neighbors_dist.sort(reverse=True)
return neighbors_dist
def recommendByUserFC(file_name,userid,k=5):
test_contents=readFile(file_name)
test_rates = getRatingInformation(test_contents)
test_dic,test_item_to_user=createUserRankDic(test_rates)
neighbors=calcNearestNeighbor(userid,test_dic,test_item_to_user)[:k]
recommend_dic={}
for neighbor in neighbors:
neighbor_user_id=neighbor[1]
books=test_dic[neighbor_user_id]
for book in books:
if book[0] not in recommend_dic:
recommend_dic[book[0]]=neighbor[0]
else:
recommend_dic[book[0]]+=neighbor[0]
recommend_list=[]
for key in recommend_dic:
recommend_list.append([recommend_dic[key],key])
recommend_list.sort(reverse=True)
user_books = [i[0] for i in test_dic[userid]]
return [i[1] for i in recommend_list],user_books,neighbors
if __name__ == '__main__':
books = readFile('u.data')
rates = getRatingInformation(books)
recommend_list,user_movie,neighbors=recommendByUserFC("u.data",222)
print recommend_list[:5]