| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 
 | 
 
 
 
 import pandas as pd
 import datetime
 import numpy as np
 
 data = pd.read_csv("成都市二手房交易信息.csv")
 print(type(data))
 
 
 print(data["房屋所属市辖区"].value_counts()[0:5])
 
 label=['心动价','大众价','奋斗价']
 bins=[0,10000,20000,max(data['单价(元/平方米)'])]
 
 data_price = pd.cut(data['单价(元/平方米)'],bins,labels=label)
 
 data.insert(16,column='价格分布',value=data_price)
 print(data)
 
 
 data_agg=data.groupby(['房屋所属市辖区','价格分布'])['单价(元/平方米)']
 
 
 print(data_agg.agg(['max','min','median']))
 
 print(data['挂牌时间'].dtype)
 
 current_time = datetime.datetime.now()
 datedata = current_time.strftime("%Y-%m-%d")
 def g_date(i):
 if len(i)!=5:
 return pd.to_datetime(i)
 else:
 return pd.to_datetime('1900-1-1')+pd.Timedelta(days=int(i))
 
 data['挂牌时间']=data['挂牌时间'].apply(g_date)
 print(data['挂牌时间'].dtype)
 print(data['挂牌时间'])
 date_time = pd.to_datetime(datedata)-data['挂牌时间']
 def print_days(day):
 print(day.days)
 date_time.apply(print_days)
 
 
 data_new=data.set_index("挂牌时间")
 print(data_new)
 
 aver = data.groupby(data['挂牌时间'].dt.strftime('%Y%m'))['单价(元/平方米)']
 print(aver.agg('mean').round(0))
 avg = data_new['单价(元/平方米)'].resample('M').mean()
 print(avg.round(0))
 
 data_povit = pd.pivot_table(data,index=['房屋所属市辖区'],columns='价格分布',values=['单价(元/平方米)'],aggfunc=np.mean)
 print(data_povit.round(0))
 data_crosstab = pd.crosstab(index=data['房屋所属市辖区'], columns=data['价格分布'],values=data['单价(元/平方米)'],aggfunc=np.mean).round(0)
 print(data_crosstab)
 
 
 |