问题背景
使用matplotlib将DBSCAN分类结果散点图可视化时提示此TypeError
源代码:
from sklearn.cluster import DBSCAN
import pandas as pd
import matplotlib.pyplot as plt
def devicesDbscan():
path="./unique_deviceID_lng_lat.csv"
df=pd.read_csv(path,header=None,names=["DEVICE_ID","LNG","LAT"])
print(df.shape())
X_df=df.drop("DEVICE_ID",axis=1) #去掉ID
y_pred=DBSCAN(eps=0.1,min_samples=1,n_jobs=20).fit_predict(X_df) #拟合并返回预测标签
count_clusters = len(set(y_pred)) #聚类簇的数目
print("clusters的数目:"+str(count_clusters))
plt.scatter(X_df[:,0],X_df[:,1],c=y_pred) #-->提示错误地方
plt.show()
if __name__=="__main__":
devicesDbscan()
解决
将DataFrame对象X_df转成ndarray数组即可
from sklearn.cluster import DBSCAN
import pandas as pd
import matplotlib.pyplot as plt
def devicesDbscan():
path="./unique_deviceID_lng_lat.csv"
df=pd.read_csv(path,header=None,names=["DEVICE_ID","LNG","LAT"])
print(df.shape())
X_df=df.drop("DEVICE_ID",axis=1)
y_pred=DBSCAN(eps=0.1,min_samples=1,n_jobs=20).fit_predict(X_df)
count_clusters = len(set(y_pred))
print("clusters的数目:"+str(count_clusters))
plt.scatter(X_df.values[:,0],X_df.values[:,1],c=y_pred)
plt.show()
if __name__=="__main__":
devicesDbscan()