这个算法原作者意思是在NLP中面对高维向量和数据稀疏时效果会不错,算法保持了当前预测向量w的均值和方差,并做优化。代码实现如下:
#cw learning algorithm
def get_phi():
'''confidence parameter phi'''
import numpy as np
from scipy import stats
eta = float(raw_input("please input confidence parameter between (0 1)\n"))
X = stats.norm(0.0,1.0)
phi = X.ppf(eta)
return phi
def get_data_size(datafile):
f = open(datafile)
size = f.readline().strip().split()
f.close()
return len(size)
def dot(X,Y):
sum = 0
for (x,y) in zip(X,Y):
sum += x*y
return sum
def mul(X,Y):
Z = []
for (x,y) in zip(X,Y):
Z.append(x*y)
return Z
def plus(X,Y):
Z = []
for (x,y) in zip(X,Y):
Z.append(x+y)
return Z
def opp(X):
Y = []
for x in X:
Y.append(1/x)
return Y
def init_u_E(datafile):
#initialize
#inp