Showing
1 changed file
with
62 additions
and
0 deletions
test/kmeans_example.py
0 → 100644
| 1 | +# | ||
| 2 | +# Licensed to the Apache Software Foundation (ASF) under one or more | ||
| 3 | +# contributor license agreements. See the NOTICE file distributed with | ||
| 4 | +# this work for additional information regarding copyright ownership. | ||
| 5 | +# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| 6 | +# (the "License"); you may not use this file except in compliance with | ||
| 7 | +# the License. You may obtain a copy of the License at | ||
| 8 | +# | ||
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 | ||
| 10 | +# | ||
| 11 | +# Unless required by applicable law or agreed to in writing, software | ||
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | +# See the License for the specific language governing permissions and | ||
| 15 | +# limitations under the License. | ||
| 16 | +# | ||
| 17 | +#!/usr/bin/env python | ||
| 18 | +# -*- coding: utf-8 -*- | ||
| 19 | +from __future__ import print_function | ||
| 20 | + | ||
| 21 | +# $example on$ | ||
| 22 | +from pyspark.ml.clustering import KMeans | ||
| 23 | +# $example off$ | ||
| 24 | + | ||
| 25 | +from pyspark.sql import SparkSession | ||
| 26 | + | ||
| 27 | +""" | ||
| 28 | +An example demonstrating k-means clustering. | ||
| 29 | +Run with: | ||
| 30 | + bin/spark-submit examples/src/main/python/ml/kmeans_example.py | ||
| 31 | + | ||
| 32 | +This example requires NumPy (http://www.numpy.org/). | ||
| 33 | +""" | ||
| 34 | + | ||
| 35 | + | ||
| 36 | +if __name__ == "__main__": | ||
| 37 | + | ||
| 38 | + spark = SparkSession\ | ||
| 39 | + .builder\ | ||
| 40 | + .appName("PythonKMeansExample")\ | ||
| 41 | + .getOrCreate() | ||
| 42 | + | ||
| 43 | + # 例子 | ||
| 44 | + # 加载数据 | ||
| 45 | + dataset = spark.read.format("libsvm").load("sample_kmeans_data.txt")#加载数据 | ||
| 46 | + | ||
| 47 | + # Trains a k-means model. | ||
| 48 | + kmeans = KMeans().setK(2).setSeed(1) #k-means 模型 | ||
| 49 | + model = kmeans.fit(dataset) #建立模型 | ||
| 50 | + | ||
| 51 | + # Evaluate clustering by computing Within Set Sum of Squared Errors.通过计算误差项平方和内的聚类分析 | ||
| 52 | + wssse = model.computeCost(dataset) | ||
| 53 | + print("Within Set Sum of Squared Errors = " + str(wssse)) | ||
| 54 | + | ||
| 55 | + # 显示结果 clusterCenters 聚类中心 | ||
| 56 | + centers = model.clusterCenters() | ||
| 57 | + print("Cluster Centers: ") | ||
| 58 | + for center in centers: | ||
| 59 | + print(center) | ||
| 60 | + # $example off$ | ||
| 61 | + | ||
| 62 | + spark.stop() |
-
Please register or login to post a comment