Toggle navigation
Toggle navigation
This project
Loading...
Sign in
武翔
/
git
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
武翔
2016-11-04 11:11:39 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
a42e10e7de07ba679381499d6af9a7493616b009
a42e10e7
1 parent
0b5e237d
add a file
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
62 additions
and
0 deletions
test/kmeans_example.py
test/kmeans_example.py
0 → 100644
View file @
a42e10e
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from
__future__
import
print_function
# $example on$
from
pyspark.ml.clustering
import
KMeans
# $example off$
from
pyspark.sql
import
SparkSession
"""
An example demonstrating k-means clustering.
Run with:
bin/spark-submit examples/src/main/python/ml/kmeans_example.py
This example requires NumPy (http://www.numpy.org/).
"""
if
__name__
==
"__main__"
:
spark
=
SparkSession
\
.
builder
\
.
appName
(
"PythonKMeansExample"
)
\
.
getOrCreate
()
# 例子
# 加载数据
dataset
=
spark
.
read
.
format
(
"libsvm"
)
.
load
(
"sample_kmeans_data.txt"
)
#加载数据
# Trains a k-means model.
kmeans
=
KMeans
()
.
setK
(
2
)
.
setSeed
(
1
)
#k-means 模型
model
=
kmeans
.
fit
(
dataset
)
#建立模型
# Evaluate clustering by computing Within Set Sum of Squared Errors.通过计算误差项平方和内的聚类分析
wssse
=
model
.
computeCost
(
dataset
)
print
(
"Within Set Sum of Squared Errors = "
+
str
(
wssse
))
# 显示结果 clusterCenters 聚类中心
centers
=
model
.
clusterCenters
()
print
(
"Cluster Centers: "
)
for
center
in
centers
:
print
(
center
)
# $example off$
spark
.
stop
()
Please
register
or
login
to post a comment