Python股票历史数据预处理(二)
从网上下载的股票历史数据往往不能直接使用,需要转换为自己所需要的格式。下面以Python代码编程为工具,将csv文件中存储的股票历史数据提取出来并处理。处理的数据结果为是30天涨跌幅子数据库,下载地址为:http://download.csdn.net/detail/suiyingy/9688605。
主要步骤有(Python csv数据读写):
-
#csv文件读取股票历史涨跌幅数据;
-
#随机选取30个历史涨跌幅数据;
-
#构建自己的数据库;
-
#将处理结果保存为新的csv文件。
具体代码如下:
# -*- coding: utf-8 -*-
"""
Created on Thu Nov 17 23:04:33 2016
csv格式股票历史涨跌幅数据处理
@author: yehxqq1513760265
"""
import numpy
as np
import pandas
as pd
import
random
import
csv
import
sys
reload
(
sys
)
sys.
setdefaultencoding
(
'utf-8'
)
'''
- 加载csv格式数据
'''
def loadCSVfile1
(datafile
):
filelist
=
[
]
with
open
(datafile
)
as
file:
lines
=
csv.
reader
(
file
)
for oneline
in lines:
filelist.
append
(oneline
)
filelist
= np.
array
(filelist
)
return filelist
#数据处理
#随机选取30个历史涨跌幅数据
#构建自己的数据库
def dataProcess
(dataArr
, subLen
):
totLen
, totWid
= np.
shape
(data
)
print totLen
, totWid
lenArr
= dataArr
[totLen-
1
,
2:totWid
]
columnCnt
=
1
dataOut
=
[
]
for lenData
in lenArr:
columnCnt
= columnCnt +
1
N60
=
int
(lenData
) /
(
2 * subLen
)
print N60
if N60
>
0:
randIndex
=
random.
sample
(
range
(totLen-
int
(lenData
)-
1
,totLen-subLen
)
, N60
)
for i
in randIndex:
dataOut.
append
(dataArr
[i:
(i+subLen
)
,columnCnt
]
)
dataOut
= np.
array
(dataOut
)
return dataOut
if __name__
==
"__main__":
datafile
=
"00100 (3).csv"
data
= loadCSVfile1
(datafile
)
df
= pd.
DataFrame
(data
)
m
, n
= np.
shape
(data
)
dataOut
= dataProcess
(data
,
30
)
m
, n
= np.
shape
(dataOut
)
#保存处理结果
csvfile
=
file
(
'csvtest.csv'
,
'wb'
)
writer
=
csv.
writer
(csvfile
)
writer.
writerows
(dataOut
)
csvfile.
close
(
)