@@ -167,11 +167,9 @@ def _slice_categorical_stream(df1, df2, first_ts=None, last_ts=None):
167
167
168
168
if first_ts is None :
169
169
first_ts = max (df2 .loc [0 , TIME ], df1 .loc [0 , TIME ])
170
- first_ts -= pd .Timedelta ('1ms' )
171
170
if last_ts is None :
172
171
last_ts = min (df2 .loc [df2 .index [- 1 ], TIME ],
173
172
df1 .loc [df1 .index [- 1 ], TIME ])
174
- last_ts += pd .Timedelta ('1ms' )
175
173
176
174
df1_tmp = df1 .copy ()
177
175
df2_tmp = df2 .copy ()
@@ -189,8 +187,8 @@ def _slice_categorical_stream(df1, df2, first_ts=None, last_ts=None):
189
187
df1 [ACTIVITY ] = df1 [ACTIVITY ].ffill ()
190
188
df2 [ACTIVITY ] = df2 [ACTIVITY ].ffill ()
191
189
192
- df1 = df1 [(first_ts < df1 [TIME ]) & (df1 [TIME ] < last_ts )]
193
- df2 = df2 [(first_ts < df2 [TIME ]) & (df2 [TIME ] < last_ts )]
190
+ df1 = df1 [(first_ts <= df1 [TIME ]) & (df1 [TIME ] <= last_ts )]
191
+ df2 = df2 [(first_ts <= df2 [TIME ]) & (df2 [TIME ] <= last_ts )]
194
192
195
193
df = df1 .copy ()
196
194
df [lbl2_col ] = df2 [ACTIVITY ]
@@ -211,7 +209,7 @@ def online_confusion_matrix(y_true: pd.DataFrame=None, y_pred: np.ndarray=None,
211
209
Parameters
212
210
----------
213
211
y_true : pd.DataFrame
214
- y_true : pd.DataFrame
212
+ y_true : np.ndarray
215
213
times : pd.DataFrame
216
214
df : pd.DataFrame
217
215
The already prepared dataframe
@@ -299,7 +297,7 @@ def add_other(df_acts, add_offset=False):
299
297
300
298
301
299
302
- def _prepare_cat_stream (y_true : pd .DataFrame , y_pred : np .ndarray , times :np .ndarray ) -> pd .DataFrame :
300
+ def _prepare_cat_stream (y_true : pd .DataFrame , y_pred : np .ndarray , y_times :np .ndarray ) -> pd .DataFrame :
303
301
"""
304
302
305
303
CAVE add the 'other' activity
@@ -313,6 +311,10 @@ def _prepare_cat_stream(y_true: pd.DataFrame, y_pred: np.ndarray, times:np.ndarr
313
311
times : np.ndarray datetime64[ns], shape (N, )
314
312
Contains the times the predictions where made
315
313
314
+ Attention
315
+ ---------
316
+ The last prediction is not included since for the last prediction the duration is not known.
317
+
316
318
Example
317
319
-------
318
320
time y_pred y_true diff y_pred_idx
@@ -332,9 +334,9 @@ def _prepare_cat_stream(y_true: pd.DataFrame, y_pred: np.ndarray, times:np.ndarr
332
334
333
335
if is_activity_df (y_true ):
334
336
335
- y_pred , times = y_pred .squeeze (), times .squeeze ()
337
+ y_pred , y_times = y_pred .squeeze (), y_times .squeeze ()
336
338
337
- df_y_pred = pd .DataFrame ({TIME : times , 'y_pred' : y_pred })
339
+ df_y_pred = pd .DataFrame ({TIME : y_times , 'y_pred' : y_pred })
338
340
df_y_pred = df_y_pred .sort_values (by = TIME )[[TIME , 'y_pred' ]] \
339
341
.reset_index (drop = True )
340
342
@@ -352,12 +354,13 @@ def _prepare_cat_stream(y_true: pd.DataFrame, y_pred: np.ndarray, times:np.ndarr
352
354
353
355
354
356
# Clip Ground truth to predictions or pad GT with other such
355
- # That both series start and end at the same time
357
+ # that the ground truth envelopes the predictions by epsilon amount of time
356
358
df_sel_y_true , df_sel_y_pred = df_y_true .copy (), df_y_pred .copy ()
357
359
if df_sel_y_pred [TIME ].iat [- 1 ] < df_sel_y_true [TIME ].iat [- 1 ]:
358
360
# Preds end before GT -> clip GT to preds
359
361
mask = (df_sel_y_true [TIME ] < df_sel_y_pred [TIME ].iat [- 1 ]).shift (fill_value = True )
360
362
df_sel_y_true = df_sel_y_true [mask ].reset_index (drop = True )
363
+ df_sel_y_true [TIME ].iat [- 1 ] = df_sel_y_pred [TIME ].iat [- 1 ] + epsilon
361
364
else :
362
365
# GT ends before preds -> add 'other' activity to GT
363
366
df_sel_y_true = pd .concat ([df_sel_y_true , pd .DataFrame ({
@@ -372,7 +375,7 @@ def _prepare_cat_stream(y_true: pd.DataFrame, y_pred: np.ndarray, times:np.ndarr
372
375
'y_true' : [OTHER ]
373
376
}), df_sel_y_true ]).reset_index (drop = True )
374
377
clipped_true_to_preds = False
375
- else :
378
+ else :
376
379
# GT starts before Preds -> clip GT to preds
377
380
mask = (df_sel_y_pred [TIME ].iat [0 ] < df_sel_y_true [TIME ]).shift (- 1 , fill_value = True )
378
381
df_sel_y_true = df_sel_y_true [mask ].reset_index (drop = True )
@@ -382,17 +385,16 @@ def _prepare_cat_stream(y_true: pd.DataFrame, y_pred: np.ndarray, times:np.ndarr
382
385
df = _slice_categorical_stream (df_sel_y_pred , df_sel_y_true )
383
386
384
387
else :
385
- y_true , y_pred , times = y_true .squeeze (), y_pred .squeeze (), times .squeeze ()
388
+ y_true , y_pred , y_times = y_true .squeeze (), y_pred .squeeze (), y_times .squeeze ()
386
389
387
- df = pd .DataFrame (data = [times , y_true , y_pred ],
390
+ df = pd .DataFrame (data = [y_times , y_true , y_pred ],
388
391
index = [TIME , 'y_true' , 'y_pred' ]).T
389
392
df [TIME ] = pd .to_datetime (df [TIME ])
390
393
raise
391
394
392
395
df ['diff' ] = df [TIME ].shift (- 1 ) - df [TIME ]
393
396
# Remove last prediction since there is no td and remove first if GT was clipped
394
- s_idx = 1 if clipped_true_to_preds else 0
395
- df = df .iloc [s_idx :- 1 ]
397
+ df = df .iloc [:- 1 ]
396
398
df .reset_index (inplace = True )
397
399
398
400
# Create the new column using the index from df_y_pred
@@ -410,6 +412,8 @@ def _prepare_cat_stream(y_true: pd.DataFrame, y_pred: np.ndarray, times:np.ndarr
410
412
assert prev_idx > 0
411
413
df ['y_true_idx' ] = df ['y_true_idx' ].fillna (prev_idx - 1 )\
412
414
.astype (int )
415
+ else :
416
+ df ['y_true_idx' ] = df ['y_true_idx' ].astype (int )
413
417
414
418
return df .drop (columns = ['index' ])
415
419
@@ -424,18 +428,27 @@ def online_max_calibration_error(y_true, y_pred, y_conf, y_times, num_bins):
424
428
return bin_data ['max_calibration_error' ]
425
429
426
430
427
- def relative_rate ( df_y_true : pd .DataFrame , y_pred :np .ndarray , y_times : np .ndarray , average : str = 'micro' ):
428
- """ Calculates how often
431
+ def relative_prediction_rate ( y_true : pd .DataFrame , y_pred :np .ndarray , y_times : np .ndarray , average : str = 'micro' ):
432
+ """ Calculates how often a prediction changes over the course of a true activity.
429
433
430
434
Parameters
431
435
----------
432
- df_y_true: pd.DataFrame
436
+ y_true: pd.DataFrame
437
+ An activity dataframe with columns ['start_time', 'end_time', 'activity']
438
+ y_pred: np.ndarray
439
+
440
+ y_times: np.ndarray
441
+
442
+ average: str, one of ['micro', 'macro'], default='micro'
443
+
444
+ Returns
445
+ -------
433
446
434
447
"""
435
448
436
449
assert average in ['micro' , 'macro' ]
437
450
438
- df = _prepare_cat_stream (df_y_true , y_pred , y_times )
451
+ df = _prepare_cat_stream (y_true , y_pred , y_times )
439
452
df ['y_pred_changes' ] = (df ['y_pred' ] != df ['y_pred' ].shift ())\
440
453
& (df ['y_true' ] == df ['y_true' ].shift ())
441
454
counts_per_activity = df .groupby (['y_true' , 'y_true_idx' ])['y_pred_changes' ]\
0 commit comments