滑动窗口rolling

df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
df
     B
0  0.0
1  1.0
2  2.0
3  NaN
4  4.0

df.rolling(2).sum()
     B
0  NaN
1  1.0
2  3.0
3  NaN
4  NaN

收益率

daily_close.pct_change()
daily_pct_change = daily_close / daily_close.shift(1) -1

获取每月第一天数据

df.index = pd.to_datetime(df.index)
time_month = df.index.strftime("%Y-%m")
df.index = time_month
df.groupby(df.index).first()

调整x轴横坐标

import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdate

plt.rcParams['font.sans-serif'] = ['Simhei'] #显示中文
plt.rcParams['axes.unicode_minus'] = False #显示负号


x = df300['tradeDate']
# 获取收盘价收益率
y = df300['avgReturn1M']
# 调整图像大小
plt.figure(figsize=(10,5))

# 绘制曲线
plt.plot(x, y, label="沪深300")

plt.title("沪深300指数收益率")
ax = plt.gca()
# 横坐标显示的日期
x = pd.to_datetime(x)
x = x.dt.strftime('%Y-%m-%d')
# 修改横坐标显示的数值
xtricks_list = pd.date_range(x.iloc[0],x.iloc[-1], freq='6m').strftime('%Y-%m-%d').tolist()

plt.xticks(xtricks_list)
plt.xlabel("时间")
plt.ylabel("收益率")

plt.show()

监测数据缺失情况

# check for nan under a single DF column
df['column name'].isnull().values.any()

# count the nan under a single DF column
df['column name'].isnull().sum()

# check for nan under an entire DF
df.isnull().values.any()

# count the nan under an entire DF
df.isnull().sum().sum()

时间序列处理

# 时间序列整理datetime format
df = pd.DataFrame({"Date": ["26-12-2007", "27-12-2007", "28-12-2007"]})
df["Date"] = pd.to_datetime(df["Date"]).dt.strftime('%Y-%m-%d')
df["Date"] = pd.to_datetime(df["Date"]).dt.strftime('%Y-%m-%d %H:%M:%S')


# 时间范围生成range
DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00','2018-01-01 02:00:00'],
              dtype='datetime64[ns]', freq='H')
# periods代表生成的数量
dti = pd.date_range('2018-01-01', periods=3, freq='H')

DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
               '2018-04-27 00:00:00'],
              dtype='datetime64[ns]', freq=None)
pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)

# 生成2000-01-01到2001-02-06的月末数据
date_range = pd.date_range(start="2000-01-01", end="2001-02-06", freq='ME')
2000-01-31
2000-02-29
...
...

# 获取该月份第一天的日期
start_date = (date.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1)).strftime('%Y-%m-%d')

会计表达

# 万分位
f'{num / 10000:.2f} 万'

计算涨跌幅

def calculate_change_pct(data):
    """
    涨跌幅 = (当期收盘价-前期收盘价) / 前期收盘价
    :param data: dataframe,带有收盘价
    :return: dataframe,带有涨跌幅
    """
    data['close_pct'] = (data['close'] - data['close'].shift(1)) / data['close'].shift(1)
    return data