QUANT
滑动窗口rolling
df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
df
B
0 0.0
1 1.0
2 2.0
3 NaN
4 4.0
df.rolling(2).sum()
B
0 NaN
1 1.0
2 3.0
3 NaN
4 NaN
收益率
daily_close.pct_change()
daily_pct_change = daily_close / daily_close.shift(1) -1
获取每月第一天数据
df.index = pd.to_datetime(df.index)
time_month = df.index.strftime("%Y-%m")
df.index = time_month
df.groupby(df.index).first()
调整x轴横坐标
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdate
plt.rcParams['font.sans-serif'] = ['Simhei'] #显示中文
plt.rcParams['axes.unicode_minus'] = False #显示负号
x = df300['tradeDate']
# 获取收盘价收益率
y = df300['avgReturn1M']
# 调整图像大小
plt.figure(figsize=(10,5))
# 绘制曲线
plt.plot(x, y, label="沪深300")
plt.title("沪深300指数收益率")
ax = plt.gca()
# 横坐标显示的日期
x = pd.to_datetime(x)
x = x.dt.strftime('%Y-%m-%d')
# 修改横坐标显示的数值
xtricks_list = pd.date_range(x.iloc[0],x.iloc[-1], freq='6m').strftime('%Y-%m-%d').tolist()
plt.xticks(xtricks_list)
plt.xlabel("时间")
plt.ylabel("收益率")
plt.show()
监测数据缺失情况
# check for nan under a single DF column
df['column name'].isnull().values.any()
# count the nan under a single DF column
df['column name'].isnull().sum()
# check for nan under an entire DF
df.isnull().values.any()
# count the nan under an entire DF
df.isnull().sum().sum()
时间序列处理
# 时间序列整理datetime format
df = pd.DataFrame({"Date": ["26-12-2007", "27-12-2007", "28-12-2007"]})
df["Date"] = pd.to_datetime(df["Date"]).dt.strftime('%Y-%m-%d')
df["Date"] = pd.to_datetime(df["Date"]).dt.strftime('%Y-%m-%d %H:%M:%S')
# 时间范围生成range
DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00','2018-01-01 02:00:00'],
dtype='datetime64[ns]', freq='H')
# periods代表生成的数量
dti = pd.date_range('2018-01-01', periods=3, freq='H')
DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
'2018-04-27 00:00:00'],
dtype='datetime64[ns]', freq=None)
pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)
# 生成2000-01-01到2001-02-06的月末数据
date_range = pd.date_range(start="2000-01-01", end="2001-02-06", freq='ME')
2000-01-31
2000-02-29
...
...
# 获取该月份第一天的日期
start_date = (date.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1)).strftime('%Y-%m-%d')
会计表达
# 万分位
f'{num / 10000:.2f} 万'
计算涨跌幅
def calculate_change_pct(data):
"""
涨跌幅 = (当期收盘价-前期收盘价) / 前期收盘价
:param data: dataframe,带有收盘价
:return: dataframe,带有涨跌幅
"""
data['close_pct'] = (data['close'] - data['close'].shift(1)) / data['close'].shift(1)
return data