如何计算逻辑回归精度

2024-04-06

我是机器学习和 Python 编码的完全初学者,我的任务是从头开始编码逻辑回归,以了解幕后发生的情况。到目前为止,我已经编码了假设函数、成本函数和梯度下降,然后编码了逻辑回归。然而,在打印精度编码时,我得到的输出较低(0.69),该输出不会随着迭代次数的增加或学习率的变化而改变。我的问题是,我下面的准确性代码有问题吗?任何指向正确方向的帮助将不胜感激

X = data[['radius_mean', 'texture_mean', 'perimeter_mean',
   'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
   'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
   'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
   'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
   'fractal_dimension_se', 'radius_worst', 'texture_worst',
   'perimeter_worst', 'area_worst', 'smoothness_worst',
   'compactness_worst', 'concavity_worst', 'concave points_worst',
   'symmetry_worst', 'fractal_dimension_worst']]
X = np.array(X)
X = min_max_scaler.fit_transform(X)
Y = data["diagnosis"].map({'M':1,'B':0})
Y = np.array(Y)

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.25)

X = data["diagnosis"].map(lambda x: float(x))

def Sigmoid(z):
    if z < 0:
        return 1 - 1/(1 + math.exp(z))
    else:
        return 1/(1 + math.exp(-z))

def Hypothesis(theta, x):
    z = 0
    for i in range(len(theta)):
        z += x[i]*theta[i]
    return Sigmoid(z)

def Cost_Function(X,Y,theta,m):
    sumOfErrors = 0
    for i in range(m):
        xi = X[i]
        hi = Hypothesis(theta,xi)
        error = Y[i] * math.log(hi if  hi >0 else 1)
        if Y[i] == 1:
            error = Y[i] * math.log(hi if  hi >0 else 1)
        elif Y[i] == 0:
            error = (1-Y[i]) * math.log(1-hi  if  1-hi >0 else 1)
        sumOfErrors += error

    constant = -1/m
    J = constant * sumOfErrors
    #print ('cost is: ', J ) 
    return J

def Cost_Function_Derivative(X,Y,theta,j,m,alpha):
    sumErrors = 0
    for i in range(m):
        xi = X[i]
        xij = xi[j]
        hi = Hypothesis(theta,X[i])
        error = (hi - Y[i])*xij
        sumErrors += error
    m = len(Y)
    constant = float(alpha)/float(m)
    J = constant * sumErrors
    return J

def Gradient_Descent(X,Y,theta,m,alpha):
    new_theta = []
    constant = alpha/m
    for j in range(len(theta)):
        CFDerivative = Cost_Function_Derivative(X,Y,theta,j,m,alpha)
        new_theta_value = theta[j] - CFDerivative
        new_theta.append(new_theta_value)
    return new_theta


def Accuracy(theta):
    correct = 0
    length = len(X_test, Hypothesis(X,theta))
    for i in range(length):
        prediction = round(Hypothesis(X[i],theta))
        answer = Y[i]
    if prediction == answer.all():
            correct += 1
    my_accuracy = (correct / length)*100
    print ('LR Accuracy %: ', my_accuracy)



def Logistic_Regression(X,Y,alpha,theta,num_iters):
    theta = np.zeros(X.shape[1])
    m = len(Y)
    for x in range(num_iters):
        new_theta = Gradient_Descent(X,Y,theta,m,alpha)
        theta = new_theta
        if x % 100 == 0:
            Cost_Function(X,Y,theta,m)
            print ('theta: ', theta)    
            print ('cost: ', Cost_Function(X,Y,theta,m))
    Accuracy(theta)

initial_theta = [0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]  
alpha = 0.0001
iterations = 1000
Logistic_Regression(X,Y,alpha,initial_theta,iterations)

这是使用来自威斯康星州乳腺癌数据集的数据(https://www.kaggle.com/uciml/breast-cancer-wisconsin-data https://www.kaggle.com/uciml/breast-cancer-wisconsin-data),我权衡了 30 个特征 - 尽管将特征更改为已知相关的特征也不会改变我的准确性。


Python 为我们提供了这个 scikit-learn 库,使我们的工作更加轻松, 这对我有用:

from sklearn.metrics import accuracy_score

y_pred = log.predict(x_test)

score =accuracy_score(y_test,y_pred)
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

如何计算逻辑回归精度 的相关文章

随机推荐