武翔

add a file

1 +#
2 +# Licensed to the Apache Software Foundation (ASF) under one or more
3 +# contributor license agreements. See the NOTICE file distributed with
4 +# this work for additional information regarding copyright ownership.
5 +# The ASF licenses this file to You under the Apache License, Version 2.0
6 +# (the "License"); you may not use this file except in compliance with
7 +# the License. You may obtain a copy of the License at
8 +#
9 +# http://www.apache.org/licenses/LICENSE-2.0
10 +#
11 +# Unless required by applicable law or agreed to in writing, software
12 +# distributed under the License is distributed on an "AS IS" BASIS,
13 +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 +# See the License for the specific language governing permissions and
15 +# limitations under the License.
16 +#
17 +#!/usr/bin/env python
18 +# -*- coding: utf-8 -*-
19 +from __future__ import print_function
20 +
21 +# $example on$
22 +from pyspark.ml.clustering import KMeans
23 +# $example off$
24 +
25 +from pyspark.sql import SparkSession
26 +
27 +"""
28 +An example demonstrating k-means clustering.
29 +Run with:
30 + bin/spark-submit examples/src/main/python/ml/kmeans_example.py
31 +
32 +This example requires NumPy (http://www.numpy.org/).
33 +"""
34 +
35 +
36 +if __name__ == "__main__":
37 +
38 + spark = SparkSession\
39 + .builder\
40 + .appName("PythonKMeansExample")\
41 + .getOrCreate()
42 +
43 + # 例子
44 + # 加载数据
45 + dataset = spark.read.format("libsvm").load("sample_kmeans_data.txt")#加载数据
46 +
47 + # Trains a k-means model.
48 + kmeans = KMeans().setK(2).setSeed(1) #k-means 模型
49 + model = kmeans.fit(dataset) #建立模型
50 +
51 + # Evaluate clustering by computing Within Set Sum of Squared Errors.通过计算误差项平方和内的聚类分析
52 + wssse = model.computeCost(dataset)
53 + print("Within Set Sum of Squared Errors = " + str(wssse))
54 +
55 + # 显示结果 clusterCenters 聚类中心
56 + centers = model.clusterCenters()
57 + print("Cluster Centers: ")
58 + for center in centers:
59 + print(center)
60 + # $example off$
61 +
62 + spark.stop()