Data Analysis学习笔记 --- python数据清洗对数据聚合

# -*- coding=utf-8 -*-
import numpy as np
import pandas as pd

#use the aggregation function
data = pd.DataFrame({'level':['a', 'b', 'c', 'b', 'a'],'num':[3,5,6,8,9]})
newdata = data.groupby('level')
#NA avr
print newdata.agg("mean")

#multi function
print newdata.agg(['mean', 'sum', 'std'])

#multi function agg by dict
data = pd.DataFrame({'level':['a', 'b', 'c', 'b', 'a'],'num':[3,5,6,8,9], 'num1':[2,5,9,6,8]})
newdata = data.groupby('level')
print(newdata.agg({'num':'mean', 'num1':'sum'}))

#transform
data=pd.DataFrame(np.random.randn(5,5),index=['li','chen','wang','zhao','qian'],columns=['a','b','c','d','e'])
key=['ss','kk','kk','ss','ss']
print data.groupby(key).mean()

#define by ourself
data=pd.DataFrame({'level':['a','b','c','b','a'],
               'num':[3,5,6,8,9],
               'num1':[2,5,9,6,8]})
def fun(data):
    return data.groupby('level').agg(['mean','sum'])
print data



结果: