Timeserie Analytics¶
Examples:
In [1]: from wax_toolbox.tsanalytics import analyse_datetimeindex
In [2]: idx_gap # let's look at that pd.DatetimeIndex
Out[2]:
DatetimeIndex(['2016-03-01 02:00:00+01:00', '2016-03-01 03:00:00+01:00',
'2016-03-01 04:00:00+01:00', '2016-03-01 07:00:00+01:00',
'2016-03-01 08:00:00+01:00', '2016-03-01 09:00:00+01:00',
'2016-03-01 14:00:00+01:00', '2016-03-01 15:00:00+01:00',
'2016-03-01 16:00:00+01:00', '2016-03-01 17:00:00+01:00',
...
'2016-03-30 15:00:00+02:00', '2016-03-30 16:00:00+02:00',
'2016-03-30 17:00:00+02:00', '2016-03-30 18:00:00+02:00',
'2016-03-30 19:00:00+02:00', '2016-03-30 20:00:00+02:00',
'2016-03-30 21:00:00+02:00', '2016-03-30 22:00:00+02:00',
'2016-03-30 23:00:00+02:00', '2016-03-31 00:00:00+02:00'],
dtype='datetime64[ns, CET]', length=712, freq=None)
In [3]: tsinfo = analyse_datetimeindex(idx_gap, start=start, end=end)
In [4]: tsinfo
Out[4]:
freq: <Hour>
sorted: True
continuous: [(Timestamp('2016-03-01 02:00:00+0100', tz='CET'), Timestamp('2016-03-01 04:00:00+0100', tz='CET')), (Timestamp('2016-03-01 07:00:00+0100', tz='CET'), Timestamp('2016-03-01 09:00:00+0100', tz='CET')), (Timestamp('2016-03-01 14:00:00+0100', tz='CET'), Timestamp('2016-03-31 00:00:00+0200', tz='CET'))]
gaps: [(Timestamp('2016-03-01 00:00:00+0100', tz='CET'), Timestamp('2016-03-01 01:00:00+0100', tz='CET')), (Timestamp('2016-03-01 05:00:00+0100', tz='CET'), Timestamp('2016-03-01 06:00:00+0100', tz='CET')), (Timestamp('2016-03-01 10:00:00+0100', tz='CET'), Timestamp('2016-03-01 13:00:00+0100', tz='CET'))]
duplicates: []
In [5]: print('This timeserie got a (minimal) frequency of {}'.format(tsinfo.freq))