Toggle navigation
Toggle navigation
This project
Loading...
Sign in
武翔
/
git
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
武翔
2016-11-04 11:07:52 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
0b5e237d0ea7f6cea9b9dd3f9b0bf86d03cf0f97
0b5e237d
1 parent
277372cf
add apriori and userfc
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
122 additions
and
0 deletions
Apriori.ipynb
user_FC.py
Apriori.ipynb
0 → 100644
View file @
0b5e237
This diff is collapsed. Click to expand it.
user_FC.py
0 → 100644
View file @
0b5e237
import
math
#!usr/bin/python
def
readFile
(
file_name
):
contents_lines
=
[]
f
=
open
(
file_name
,
'r'
)
contents_lines
=
f
.
readlines
()
f
.
close
()
return
contents_lines
def
getBooksList
(
file_name
):
books_contents
=
readFile
(
file_name
)
books_info
=
{}
for
book
in
books_contents
:
book_info
=
book
.
split
(
"|"
)
books_info
[
int
(
book_info
[
0
])]
=
book_info
[
1
:]
return
books_info
def
getRatingInformation
(
ratings
):
rates
=
[]
for
line
in
ratings
:
rate
=
line
.
split
(
'
\t
'
)
rates
.
append
([
int
(
rate
[
0
]),
int
(
rate
[
1
]),
int
(
rate
[
2
])])
return
rates
def
createUserRankDic
(
rates
):
user_rate_dic
=
{}
item_to_user
=
{}
for
i
in
rates
:
user_rank
=
(
i
[
1
],
i
[
2
])
if
i
[
0
]
in
user_rate_dic
:
user_rate_dic
[
i
[
0
]]
.
append
(
user_rank
)
else
:
user_rate_dic
[
i
[
0
]]
=
[
user_rank
]
if
i
[
1
]
in
item_to_user
:
item_to_user
[
i
[
1
]]
.
append
(
i
[
0
])
else
:
item_to_user
[
i
[
1
]]
=
[
i
[
0
]]
return
user_rate_dic
,
item_to_user
def
clacSimlaryCosDist
(
user1
,
user2
):
sum_x
=
0.0
sum_y
=
0.0
sum_xy
=
0.0
avg_x
=
0.0
avg_y
=
0.0
for
key
in
user1
:
avg_x
+=
key
[
1
]
avg_x
=
avg_x
/
len
(
user1
)
for
key
in
user2
:
avg_y
+=
key
[
1
]
avg_y
=
avg_y
/
len
(
user2
)
for
key1
in
user1
:
for
key2
in
user2
:
if
key1
[
0
]
==
key2
[
0
]:
sum_xy
+=
(
key1
[
1
]
-
avg_x
)
*
(
key2
[
1
]
-
avg_y
)
sum_x
+=
(
key1
[
1
]
-
avg_x
)
*
(
key1
[
1
]
-
avg_x
)
for
key2
in
user2
:
sum_y
+=
(
key2
[
1
]
-
avg_y
)
*
(
key2
[
1
]
-
avg_y
)
# print sum_x,sum_y,sum_xy
if
sum_xy
==
0.0
:
return
0
sx_sy
=
math
.
sqrt
(
sum_x
*
sum_y
)
return
sum_xy
/
sx_sy
def
calcNearestNeighbor
(
userid
,
users_dic
,
item_dic
):
neighbors
=
[]
for
item
in
users_dic
[
userid
]:
for
neighbor
in
item_dic
[
item
[
0
]]:
if
neighbor
!=
userid
and
neighbor
not
in
neighbors
:
neighbors
.
append
(
neighbor
)
neighbors_dist
=
[]
for
neighbor
in
neighbors
:
dist
=
clacSimlaryCosDist
(
users_dic
[
userid
],
users_dic
[
neighbor
])
neighbors_dist
.
append
([
dist
,
neighbor
])
neighbors_dist
.
sort
(
reverse
=
True
)
return
neighbors_dist
def
recommendByUserFC
(
file_name
,
userid
,
k
=
5
):
test_contents
=
readFile
(
file_name
)
test_rates
=
getRatingInformation
(
test_contents
)
test_dic
,
test_item_to_user
=
createUserRankDic
(
test_rates
)
neighbors
=
calcNearestNeighbor
(
userid
,
test_dic
,
test_item_to_user
)[:
k
]
recommend_dic
=
{}
for
neighbor
in
neighbors
:
neighbor_user_id
=
neighbor
[
1
]
books
=
test_dic
[
neighbor_user_id
]
for
book
in
books
:
if
book
[
0
]
not
in
recommend_dic
:
recommend_dic
[
book
[
0
]]
=
neighbor
[
0
]
else
:
recommend_dic
[
book
[
0
]]
+=
neighbor
[
0
]
recommend_list
=
[]
for
key
in
recommend_dic
:
recommend_list
.
append
([
recommend_dic
[
key
],
key
])
recommend_list
.
sort
(
reverse
=
True
)
user_books
=
[
i
[
0
]
for
i
in
test_dic
[
userid
]]
return
[
i
[
1
]
for
i
in
recommend_list
],
user_books
,
neighbors
if
__name__
==
'__main__'
:
books
=
readFile
(
'u.data'
)
rates
=
getRatingInformation
(
books
)
recommend_list
,
user_movie
,
neighbors
=
recommendByUserFC
(
"u.data"
,
222
)
print
recommend_list
[:
5
]
Please
register
or
login
to post a comment