Showing
2 changed files
with
122 additions
and
0 deletions
Apriori.ipynb
0 → 100644
This diff is collapsed. Click to expand it.
user_FC.py
0 → 100644
| 1 | +import math | ||
| 2 | +#!usr/bin/python | ||
| 3 | + | ||
| 4 | + | ||
| 5 | + | ||
| 6 | +def readFile(file_name): | ||
| 7 | + contents_lines=[] | ||
| 8 | + f = open(file_name,'r') | ||
| 9 | + contents_lines=f.readlines() | ||
| 10 | + f.close() | ||
| 11 | + return contents_lines | ||
| 12 | + | ||
| 13 | +def getBooksList(file_name): | ||
| 14 | + books_contents=readFile(file_name) | ||
| 15 | + books_info={} | ||
| 16 | + for book in books_contents: | ||
| 17 | + book_info=book.split("|") | ||
| 18 | + books_info[int(book_info[0])]=book_info[1:] | ||
| 19 | + return books_info | ||
| 20 | + | ||
| 21 | + | ||
| 22 | + | ||
| 23 | +def getRatingInformation(ratings): | ||
| 24 | + rates=[] | ||
| 25 | + for line in ratings: | ||
| 26 | + rate = line.split('\t') | ||
| 27 | + rates.append([int(rate[0]),int(rate[1]),int(rate[2])]) | ||
| 28 | + return rates | ||
| 29 | + | ||
| 30 | +def createUserRankDic(rates): | ||
| 31 | + user_rate_dic={} | ||
| 32 | + item_to_user={} | ||
| 33 | + for i in rates: | ||
| 34 | + user_rank=(i[1],i[2]) | ||
| 35 | + if i[0] in user_rate_dic: | ||
| 36 | + user_rate_dic[i[0]].append(user_rank) | ||
| 37 | + else: | ||
| 38 | + user_rate_dic[i[0]]=[user_rank] | ||
| 39 | + | ||
| 40 | + if i[1] in item_to_user: | ||
| 41 | + item_to_user[i[1]].append(i[0]) | ||
| 42 | + else: | ||
| 43 | + item_to_user[i[1]]=[i[0]] | ||
| 44 | + return user_rate_dic,item_to_user | ||
| 45 | + | ||
| 46 | +def clacSimlaryCosDist(user1,user2): | ||
| 47 | + sum_x=0.0 | ||
| 48 | + sum_y=0.0 | ||
| 49 | + sum_xy=0.0 | ||
| 50 | + avg_x=0.0 | ||
| 51 | + avg_y=0.0 | ||
| 52 | + for key in user1: | ||
| 53 | + avg_x+=key[1] | ||
| 54 | + avg_x=avg_x/len(user1) | ||
| 55 | + | ||
| 56 | + for key in user2: | ||
| 57 | + avg_y+=key[1] | ||
| 58 | + avg_y=avg_y/len(user2) | ||
| 59 | + | ||
| 60 | + for key1 in user1: | ||
| 61 | + for key2 in user2: | ||
| 62 | + if key1[0]==key2[0]: | ||
| 63 | + sum_xy+=(key1[1]-avg_x)*(key2[1]-avg_y) | ||
| 64 | + sum_x+=(key1[1]-avg_x)*(key1[1]-avg_x) | ||
| 65 | + for key2 in user2: | ||
| 66 | + sum_y+=(key2[1]-avg_y)*(key2[1]-avg_y) | ||
| 67 | + # print sum_x,sum_y,sum_xy | ||
| 68 | + if sum_xy ==0.0: | ||
| 69 | + return 0 | ||
| 70 | + sx_sy=math.sqrt(sum_x*sum_y) | ||
| 71 | + return sum_xy/sx_sy | ||
| 72 | + | ||
| 73 | + | ||
| 74 | + | ||
| 75 | +def calcNearestNeighbor(userid,users_dic,item_dic): | ||
| 76 | + neighbors=[] | ||
| 77 | + for item in users_dic[userid]: | ||
| 78 | + for neighbor in item_dic[item[0]]: | ||
| 79 | + if neighbor != userid and neighbor not in neighbors: | ||
| 80 | + neighbors.append(neighbor) | ||
| 81 | + | ||
| 82 | + neighbors_dist=[] | ||
| 83 | + for neighbor in neighbors: | ||
| 84 | + dist=clacSimlaryCosDist(users_dic[userid],users_dic[neighbor]) | ||
| 85 | + neighbors_dist.append([dist,neighbor]) | ||
| 86 | + neighbors_dist.sort(reverse=True) | ||
| 87 | + return neighbors_dist | ||
| 88 | + | ||
| 89 | + | ||
| 90 | +def recommendByUserFC(file_name,userid,k=5): | ||
| 91 | + test_contents=readFile(file_name) | ||
| 92 | + test_rates = getRatingInformation(test_contents) | ||
| 93 | + test_dic,test_item_to_user=createUserRankDic(test_rates) | ||
| 94 | + | ||
| 95 | + neighbors=calcNearestNeighbor(userid,test_dic,test_item_to_user)[:k] | ||
| 96 | + | ||
| 97 | + recommend_dic={} | ||
| 98 | + for neighbor in neighbors: | ||
| 99 | + neighbor_user_id=neighbor[1] | ||
| 100 | + books=test_dic[neighbor_user_id] | ||
| 101 | + for book in books: | ||
| 102 | + if book[0] not in recommend_dic: | ||
| 103 | + recommend_dic[book[0]]=neighbor[0] | ||
| 104 | + else: | ||
| 105 | + recommend_dic[book[0]]+=neighbor[0] | ||
| 106 | + recommend_list=[] | ||
| 107 | + for key in recommend_dic: | ||
| 108 | + recommend_list.append([recommend_dic[key],key]) | ||
| 109 | + | ||
| 110 | + recommend_list.sort(reverse=True) | ||
| 111 | + user_books = [i[0] for i in test_dic[userid]] | ||
| 112 | + | ||
| 113 | + return [i[1] for i in recommend_list],user_books,neighbors | ||
| 114 | + | ||
| 115 | + | ||
| 116 | +if __name__ == '__main__': | ||
| 117 | + books = readFile('u.data') | ||
| 118 | + rates = getRatingInformation(books) | ||
| 119 | + recommend_list,user_movie,neighbors=recommendByUserFC("u.data",222) | ||
| 120 | + | ||
| 121 | + print recommend_list[:5] | ||
| 122 | + |
-
Please register or login to post a comment