#从csv文件创建DataFrame类型的数据结构
>>>df=pd.read_csv("xxx.csv")#DataFrame类型的形状和长度
>>>df.shape
(38, 39)>>>len(df)38
#各列的标题和数据类型
>>>df.columns>>>df.dtypes#索引
>>>df.index
RangeIndex(start=0, stop=38, step=1)#将DataFrame转化成Numpy数组
>>>df.values#查看变量类型
>>>type(df)
#获取DataFrame的一列(得到的数据类型是Series)
>>>type(df)
>>> col=df['104']>>>type(col)
#Series中与DataFrame相似的属性
>>>col.shape
(38,)>>>col.values
array([301, 1051, 1657, 1852, 2057, 2258, 2938, 3418, 3718, 3938, 4148, 4568, 5068])>>>col.index
RangeIndex(start=0, stop=38, step=1)>>>col.name'104'
#截取最后几个元素
>>> col[-2:]36 65536
37 65536Name:104, dtype: int64>>> type(col[-2:])
#DataFrame的符号
>>>np.sign(df)>>> last_col=df.columns[-1]>>>np.sign(df[last_col])#head(取前几行)和tail(取后几行)
>>> df.head(2)>>> df.tail(2)#按索引查找某一行数据
>>> last_col=df.index[-1]>>>last_col>>>df.iloc[last_col]#按索引查找某一行的某一列数据
>>> df.iloc[2:9]#iloc和iat作用相同
>>> df.iloc[2,3]>>> df.iat[2,3]#逻辑查找
>>> df[df>df.mean()]#统计计算
#描述信息
>>>df.describe()#非空数据的数量
>>>df.count()#平均绝对偏差(类似于标准差)
>>>df.mad()#中位数
>>>df.median()#最小值
>>>df.min()#最大值
>>>df.max()#众数
>>>df.mode()#标准差
>>>df.std()#方差
>>>df.var()#偏态系数(skewness,表示数据的对称程度)
>>>df.skew()#峰态函数(kurtosis,表示数据分布图的尖扁程度)
>>>df.kurt()#用python字典生成DataFrame
>>> df=pd.DataFrame({'weather':['cold','hot'],'food':['soup','ice cream']})>>>df
food weather
0 soup cold1ice cream hot#对某个属性按类型分组
>>> group=df.groupby('weather')>>> for name,gro ingroup:
...print(name)
...print(gro)
...
cold
food weather
0 soup cold2cake cold
hot
food weather1ice cream hot3bread hot>>>group
#各分组第一行、最后一行、平均数
>>> group=df.groupby('weather')>>>group.first()
food price
weather
cold soup1hot ice cream2
>>>group.last()
food price
weather
cold cake3hot bread4
>>>group.mean()
price
weather
cold2hot3
#查看分组
>>> g=df.groupby(['weather','food'])>>>g.groups
{('hot', 'bread'): Int64Index([3], dtype='int64'), ('cold', 'cake'): Int64Index([2], dtype='int64'), ('hot', 'ice cream'): Int64Index([1], dtype='int64'), ('cold', 'soup'): Int64Index([0], dtype='int64')}#为分组追加属性
>>>g.agg([np.mean])
price
mean
weather food
cold cake3soup1hot bread4ice cream2
#截取几行数据并连接
>>> d=pd.concat([df[:2],df[3:]])>>>d>>> d=pd.concat([df[:2],df[3:]])>>>d
food price weather
0 soup1cold1 ice cream 2hot3 bread 4hot>>> d.append(df[3:])
food price weather
0 soup1cold1 ice cream 2hot3 bread 4hot3 bread 4 hot