user_FC.py 2.74 KB
import math
#!usr/bin/python



def readFile(file_name):
	contents_lines=[]
	f = open(file_name,'r')
	contents_lines=f.readlines()
	f.close()
	return contents_lines

def getBooksList(file_name):
	books_contents=readFile(file_name)
	books_info={}
	for book in books_contents:
		book_info=book.split("|")
		books_info[int(book_info[0])]=book_info[1:]
	return books_info



def getRatingInformation(ratings):
	rates=[]
	for line in ratings:
		rate = line.split('\t')
		rates.append([int(rate[0]),int(rate[1]),int(rate[2])])
	return rates

def createUserRankDic(rates):
	user_rate_dic={}
	item_to_user={}
	for i in rates:
		user_rank=(i[1],i[2])
		if i[0] in user_rate_dic:
			user_rate_dic[i[0]].append(user_rank)
		else:
			user_rate_dic[i[0]]=[user_rank]

		if i[1] in item_to_user:
			item_to_user[i[1]].append(i[0])
		else:
			item_to_user[i[1]]=[i[0]]
	return user_rate_dic,item_to_user

def clacSimlaryCosDist(user1,user2):
	sum_x=0.0
	sum_y=0.0
	sum_xy=0.0
	avg_x=0.0
	avg_y=0.0
	for key in user1:
		avg_x+=key[1]
	avg_x=avg_x/len(user1)

	for key in user2:
		avg_y+=key[1]
	avg_y=avg_y/len(user2)

	for key1 in user1:
		for key2 in user2:
			if key1[0]==key2[0]:
				sum_xy+=(key1[1]-avg_x)*(key2[1]-avg_y)
		sum_x+=(key1[1]-avg_x)*(key1[1]-avg_x)
	for key2 in user2:
		sum_y+=(key2[1]-avg_y)*(key2[1]-avg_y)
	# print sum_x,sum_y,sum_xy
	if sum_xy ==0.0:
		return 0
	sx_sy=math.sqrt(sum_x*sum_y)
	return sum_xy/sx_sy



def calcNearestNeighbor(userid,users_dic,item_dic):
	neighbors=[]
	for item in users_dic[userid]:
		for neighbor in item_dic[item[0]]:
			if neighbor != userid and neighbor not in neighbors:
				neighbors.append(neighbor)
	
	neighbors_dist=[]
	for neighbor in neighbors:
		dist=clacSimlaryCosDist(users_dic[userid],users_dic[neighbor])
		neighbors_dist.append([dist,neighbor])
	neighbors_dist.sort(reverse=True)
	return neighbors_dist


def recommendByUserFC(file_name,userid,k=5):
	test_contents=readFile(file_name)
	test_rates = getRatingInformation(test_contents)
	test_dic,test_item_to_user=createUserRankDic(test_rates)

	neighbors=calcNearestNeighbor(userid,test_dic,test_item_to_user)[:k]

	recommend_dic={}
	for neighbor in neighbors:
		neighbor_user_id=neighbor[1]
		books=test_dic[neighbor_user_id]
		for book in books:
			if book[0] not in recommend_dic:
				recommend_dic[book[0]]=neighbor[0]
			else:
				recommend_dic[book[0]]+=neighbor[0]
	recommend_list=[]
	for key in recommend_dic:
		recommend_list.append([recommend_dic[key],key])

	recommend_list.sort(reverse=True)
	user_books = [i[0] for i in test_dic[userid]]

	return [i[1] for i in recommend_list],user_books,neighbors


if __name__ == '__main__':
	books = readFile('u.data')
	rates = getRatingInformation(books)
	recommend_list,user_movie,neighbors=recommendByUserFC("u.data",222)

	print recommend_list[:5]