Showing
1 changed file
with
62 additions
and
0 deletions
test/kmeans_example.py
0 → 100644
1 | +# | ||
2 | +# Licensed to the Apache Software Foundation (ASF) under one or more | ||
3 | +# contributor license agreements. See the NOTICE file distributed with | ||
4 | +# this work for additional information regarding copyright ownership. | ||
5 | +# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
6 | +# (the "License"); you may not use this file except in compliance with | ||
7 | +# the License. You may obtain a copy of the License at | ||
8 | +# | ||
9 | +# http://www.apache.org/licenses/LICENSE-2.0 | ||
10 | +# | ||
11 | +# Unless required by applicable law or agreed to in writing, software | ||
12 | +# distributed under the License is distributed on an "AS IS" BASIS, | ||
13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
14 | +# See the License for the specific language governing permissions and | ||
15 | +# limitations under the License. | ||
16 | +# | ||
17 | +#!/usr/bin/env python | ||
18 | +# -*- coding: utf-8 -*- | ||
19 | +from __future__ import print_function | ||
20 | + | ||
21 | +# $example on$ | ||
22 | +from pyspark.ml.clustering import KMeans | ||
23 | +# $example off$ | ||
24 | + | ||
25 | +from pyspark.sql import SparkSession | ||
26 | + | ||
27 | +""" | ||
28 | +An example demonstrating k-means clustering. | ||
29 | +Run with: | ||
30 | + bin/spark-submit examples/src/main/python/ml/kmeans_example.py | ||
31 | + | ||
32 | +This example requires NumPy (http://www.numpy.org/). | ||
33 | +""" | ||
34 | + | ||
35 | + | ||
36 | +if __name__ == "__main__": | ||
37 | + | ||
38 | + spark = SparkSession\ | ||
39 | + .builder\ | ||
40 | + .appName("PythonKMeansExample")\ | ||
41 | + .getOrCreate() | ||
42 | + | ||
43 | + # 例子 | ||
44 | + # 加载数据 | ||
45 | + dataset = spark.read.format("libsvm").load("sample_kmeans_data.txt")#加载数据 | ||
46 | + | ||
47 | + # Trains a k-means model. | ||
48 | + kmeans = KMeans().setK(2).setSeed(1) #k-means 模型 | ||
49 | + model = kmeans.fit(dataset) #建立模型 | ||
50 | + | ||
51 | + # Evaluate clustering by computing Within Set Sum of Squared Errors.通过计算误差项平方和内的聚类分析 | ||
52 | + wssse = model.computeCost(dataset) | ||
53 | + print("Within Set Sum of Squared Errors = " + str(wssse)) | ||
54 | + | ||
55 | + # 显示结果 clusterCenters 聚类中心 | ||
56 | + centers = model.clusterCenters() | ||
57 | + print("Cluster Centers: ") | ||
58 | + for center in centers: | ||
59 | + print(center) | ||
60 | + # $example off$ | ||
61 | + | ||
62 | + spark.stop() |
-
Please register or login to post a comment