#!/usr/bin/env python3 # -*- coding: utf-8 -*- import math from .generics import _sum_wo_nan """ In order to shorten the length of the variables, the general convention in this file is to let: - I for a predicted event (start, stop), - Is for a list of predicted events, - J for a ground truth event, - Js for a list of ground truth events. """ def interval_length(J = (1,2)): """ Length of an interval :param J: couple representating the start and stop of an interval, or None :return: length of the interval, and 0 for a None interval """ if J is None: return(0) return(J[1] - J[0]) def sum_interval_lengths(Is = [(1,2),(3,4),(5,6)]): """ Sum of length of the intervals :param Is: list of intervals represented by starts and stops :return: sum of the interval length """ return(sum([interval_length(I) for I in Is])) def interval_intersection(I = (1, 3), J = (2, 4)): """ Intersection between two intervals I and J I and J should be either empty or represent a positive interval (no point) :param I: an interval represented by start and stop :param J: a second interval of the same form :return: an interval representing the start and stop of the intersection (or None if empty) """ if I is None: return(None) if J is None: return(None) I_inter_J = (max(I[0], J[0]), min(I[1], J[1])) if I_inter_J[0] >= I_inter_J[1]: return(None) else: return(I_inter_J) def interval_subset(I = (1, 3), J = (0, 6)): """ Checks whether I is a subset of J :param I: an non empty interval represented by start and stop :param J: a second non empty interval of the same form :return: True if I is a subset of J """ if (I[0] >= J[0]) and (I[1] <= J[1]): return True else: return False def cut_into_three_func(I, J): """ Cut an interval I into a partition of 3 subsets: the elements before J, the elements belonging to J, and the elements after J :param I: an interval represented by start and stop, or None for an empty one :param J: a non empty interval :return: a triplet of three intervals, each represented by either (start, stop) or None """ if I is None: return((None, None, None)) I_inter_J = interval_intersection(I, J) if I == I_inter_J: I_before = None I_after = None elif I[1] <= J[0]: I_before = I I_after = None elif I[0] >= J[1]: I_before = None I_after = I elif (I[0] <= J[0]) and (I[1] >= J[1]): I_before = (I[0], I_inter_J[0]) I_after = (I_inter_J[1], I[1]) elif I[0] <= J[0]: I_before = (I[0], I_inter_J[0]) I_after = None elif I[1] >= J[1]: I_before = None I_after = (I_inter_J[1], I[1]) else: raise ValueError('unexpected unconsidered case') return(I_before, I_inter_J, I_after) def get_pivot_j(I, J): """ Get the single point of J that is the closest to I, called 'pivot' here, with the requirement that I should be outside J :param I: a non empty interval (start, stop) :param J: another non empty interval, with empty intersection with I :return: the element j of J that is the closest to I """ if interval_intersection(I, J) is not None: raise ValueError('I and J should have a void intersection') j_pivot = None # j_pivot is a border of J if max(I) <= min(J): j_pivot = min(J) elif min(I) >= max(J): j_pivot = max(J) else: raise ValueError('I should be outside J') return(j_pivot) def integral_mini_interval(I, J): """ In the specific case where interval I is located outside J, integral of distance from x to J over the interval x \in I. This is the *integral* i.e. the sum. It's not the mean (not divided by the length of I yet) :param I: a interval (start, stop), or None :param J: a non empty interval, with empty intersection with I :return: the integral of distances d(x, J) over x \in I """ if I is None: return(0) j_pivot = get_pivot_j(I, J) a = min(I) b = max(I) return((b-a)*abs((j_pivot - (a+b)/2))) def integral_interval_distance(I, J): """ For any non empty intervals I, J, compute the integral of distance from x to J over the interval x \in I. This is the *integral* i.e. the sum. It's not the mean (not divided by the length of I yet) The interval I can intersect J or not :param I: a interval (start, stop), or None :param J: a non empty interval :return: the integral of distances d(x, J) over x \in I """ # I and J are single intervals (not generic sets) # I is a predicted interval in the range of affiliation of J def f(I_cut): return(integral_mini_interval(I_cut, J)) # If I_middle is fully included into J, it is # the distance to J is always 0 def f0(I_middle): return(0) cut_into_three = cut_into_three_func(I, J) # Distance for now, not the mean: # Distance left: Between cut_into_three[0] and the point min(J) d_left = f(cut_into_three[0]) # Distance middle: Between cut_into_three[1] = I inter J, and J d_middle = f0(cut_into_three[1]) # Distance right: Between cut_into_three[2] and the point max(J) d_right = f(cut_into_three[2]) # It's an integral so summable return(d_left + d_middle + d_right) def integral_mini_interval_P_CDFmethod__min_piece(I, J, E): """ Helper of `integral_mini_interval_Pprecision_CDFmethod` In the specific case where interval I is located outside J, compute the integral $\int_{d_min}^{d_max} \min(m, x) dx$, with: - m the smallest distance from J to E, - d_min the smallest distance d(x, J) from x \in I to J - d_max the largest distance d(x, J) from x \in I to J :param I: a single predicted interval, a non empty interval (start, stop) :param J: ground truth interval, a non empty interval, with empty intersection with I :param E: the affiliation/influence zone for J, represented as a couple (start, stop) :return: the integral $\int_{d_min}^{d_max} \min(m, x) dx$ """ if interval_intersection(I, J) is not None: raise ValueError('I and J should have a void intersection') if not interval_subset(J, E): raise ValueError('J should be included in E') if not interval_subset(I, E): raise ValueError('I should be included in E') e_min = min(E) j_min = min(J) j_max = max(J) e_max = max(E) i_min = min(I) i_max = max(I) d_min = max(i_min - j_max, j_min - i_max) d_max = max(i_max - j_max, j_min - i_min) m = min(j_min - e_min, e_max - j_max) A = min(d_max, m)**2 - min(d_min, m)**2 B = max(d_max, m) - max(d_min, m) C = (1/2)*A + m*B return(C) def integral_mini_interval_Pprecision_CDFmethod(I, J, E): """ Integral of the probability of distances over the interval I. In the specific case where interval I is located outside J, compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$. This is the *integral* i.e. the sum (not the mean) :param I: a single predicted interval, a non empty interval (start, stop) :param J: ground truth interval, a non empty interval, with empty intersection with I :param E: the affiliation/influence zone for J, represented as a couple (start, stop) :return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$ """ integral_min_piece = integral_mini_interval_P_CDFmethod__min_piece(I, J, E) e_min = min(E) j_min = min(J) j_max = max(J) e_max = max(E) i_min = min(I) i_max = max(I) d_min = max(i_min - j_max, j_min - i_max) d_max = max(i_max - j_max, j_min - i_min) integral_linear_piece = (1/2)*(d_max**2 - d_min**2) integral_remaining_piece = (j_max - j_min)*(i_max - i_min) DeltaI = i_max - i_min DeltaE = e_max - e_min output = DeltaI - (1/DeltaE)*(integral_min_piece + integral_linear_piece + integral_remaining_piece) return(output) def integral_interval_probaCDF_precision(I, J, E): """ Integral of the probability of distances over the interval I. Compute the integral $\int_{x \in I} Fbar(dist(x,J)) dx$. This is the *integral* i.e. the sum (not the mean) :param I: a single (non empty) predicted interval in the zone of affiliation of J :param J: ground truth interval :param E: affiliation/influence zone for J :return: the integral $\int_{x \in I} Fbar(dist(x,J)) dx$ """ # I and J are single intervals (not generic sets) def f(I_cut): if I_cut is None: return(0) else: return(integral_mini_interval_Pprecision_CDFmethod(I_cut, J, E)) # If I_middle is fully included into J, it is # integral of 1 on the interval I_middle, so it's |I_middle| def f0(I_middle): if I_middle is None: return(0) else: return(max(I_middle) - min(I_middle)) cut_into_three = cut_into_three_func(I, J) # Distance for now, not the mean: # Distance left: Between cut_into_three[0] and the point min(J) d_left = f(cut_into_three[0]) # Distance middle: Between cut_into_three[1] = I inter J, and J d_middle = f0(cut_into_three[1]) # Distance right: Between cut_into_three[2] and the point max(J) d_right = f(cut_into_three[2]) # It's an integral so summable return(d_left + d_middle + d_right) def cut_J_based_on_mean_func(J, e_mean): """ Helper function for the recall. Partition J into two intervals: before and after e_mean (e_mean represents the center element of E the zone of affiliation) :param J: ground truth interval :param e_mean: a float number (center value of E) :return: a couple partitionning J into (J_before, J_after) """ if J is None: J_before = None J_after = None elif e_mean >= max(J): J_before = J J_after = None elif e_mean <= min(J): J_before = None J_after = J else: # e_mean is across J J_before = (min(J), e_mean) J_after = (e_mean, max(J)) return((J_before, J_after)) def integral_mini_interval_Precall_CDFmethod(I, J, E): """ Integral of the probability of distances over the interval J. In the specific case where interval J is located outside I, compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$. This is the *integral* i.e. the sum (not the mean) :param I: a single (non empty) predicted interval :param J: ground truth (non empty) interval, with empty intersection with I :param E: the affiliation/influence zone for J, represented as a couple (start, stop) :return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$ """ # The interval J should be located outside I # (so it's either the left piece or the right piece w.r.t I) i_pivot = get_pivot_j(J, I) e_min = min(E) e_max = max(E) e_mean = (e_min + e_max) / 2 # If i_pivot is outside E (it's possible), then # the distance is worst that any random element within E, # so we set the recall to 0 if i_pivot <= min(E): return(0) elif i_pivot >= max(E): return(0) # Otherwise, we have at least i_pivot in E and so d < M so min(d,M)=d cut_J_based_on_e_mean = cut_J_based_on_mean_func(J, e_mean) J_before = cut_J_based_on_e_mean[0] J_after = cut_J_based_on_e_mean[1] iemin_mean = (e_min + i_pivot)/2 cut_Jbefore_based_on_iemin_mean = cut_J_based_on_mean_func(J_before, iemin_mean) J_before_closeE = cut_Jbefore_based_on_iemin_mean[0] # before e_mean and closer to e_min than i_pivot ~ J_before_before J_before_closeI = cut_Jbefore_based_on_iemin_mean[1] # before e_mean and closer to i_pivot than e_min ~ J_before_after iemax_mean = (e_max + i_pivot)/2 cut_Jafter_based_on_iemax_mean = cut_J_based_on_mean_func(J_after, iemax_mean) J_after_closeI = cut_Jafter_based_on_iemax_mean[0] # after e_mean and closer to i_pivot than e_max ~ J_after_before J_after_closeE = cut_Jafter_based_on_iemax_mean[1] # after e_mean and closer to e_max than i_pivot ~ J_after_after if J_before_closeE is not None: j_before_before_min = min(J_before_closeE) # == min(J) j_before_before_max = max(J_before_closeE) else: j_before_before_min = math.nan j_before_before_max = math.nan if J_before_closeI is not None: j_before_after_min = min(J_before_closeI) # == j_before_before_max if existing j_before_after_max = max(J_before_closeI) # == max(J_before) else: j_before_after_min = math.nan j_before_after_max = math.nan if J_after_closeI is not None: j_after_before_min = min(J_after_closeI) # == min(J_after) j_after_before_max = max(J_after_closeI) else: j_after_before_min = math.nan j_after_before_max = math.nan if J_after_closeE is not None: j_after_after_min = min(J_after_closeE) # == j_after_before_max if existing j_after_after_max = max(J_after_closeE) # == max(J) else: j_after_after_min = math.nan j_after_after_max = math.nan # <-- J_before_closeE --> <-- J_before_closeI --> <-- J_after_closeI --> <-- J_after_closeE --> # j_bb_min j_bb_max j_ba_min j_ba_max j_ab_min j_ab_max j_aa_min j_aa_max # (with `b` for before and `a` for after in the previous variable names) # vs e_mean m = min(t-e_min, e_max-t) d=|i_pivot-t| min(d,m) \int min(d,m)dt \int d dt \int_(min(d,m)+d)dt \int_{t \in J}(min(d,m)+d)dt # Case J_before_closeE & i_pivot after J before t-e_min i_pivot-t min(i_pivot-t,t-e_min) = t-e_min t^2/2-e_min*t i_pivot*t-t^2/2 t^2/2-e_min*t+i_pivot*t-t^2/2 = (i_pivot-e_min)*t (i_pivot-e_min)*tB - (i_pivot-e_min)*tA = (i_pivot-e_min)*(tB-tA) # Case J_before_closeI & i_pivot after J before t-e_min i_pivot-t min(i_pivot-t,t-e_min) = i_pivot-t i_pivot*t-t^2/2 i_pivot*t-t^2/2 i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2 2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # Case J_after_closeI & i_pivot after J after e_max-t i_pivot-t min(i_pivot-t,e_max-t) = i_pivot-t i_pivot*t-t^2/2 i_pivot*t-t^2/2 i_pivot*t-t^2/2+i_pivot*t-t^2/2 = 2*i_pivot*t-t^2 2*i_pivot*tB-tB^2 - 2*i_pivot*tA + tA^2 = 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # Case J_after_closeE & i_pivot after J after e_max-t i_pivot-t min(i_pivot-t,e_max-t) = e_max-t e_max*t-t^2/2 i_pivot*t-t^2/2 e_max*t-t^2/2+i_pivot*t-t^2/2 = (e_max+i_pivot)*t-t^2 (e_max+i_pivot)*tB-tB^2 - (e_max+i_pivot)*tA + tA^2 = (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2) # # Case J_before_closeE & i_pivot before J before t-e_min t-i_pivot min(t-i_pivot,t-e_min) = t-e_min t^2/2-e_min*t t^2/2-i_pivot*t t^2/2-e_min*t+t^2/2-i_pivot*t = t^2-(e_min+i_pivot)*t tB^2-(e_min+i_pivot)*tB - tA^2 + (e_min+i_pivot)*tA = (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA) # Case J_before_closeI & i_pivot before J before t-e_min t-i_pivot min(t-i_pivot,t-e_min) = t-i_pivot t^2/2-i_pivot*t t^2/2-i_pivot*t t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # Case J_after_closeI & i_pivot before J after e_max-t t-i_pivot min(t-i_pivot,e_max-t) = t-i_pivot t^2/2-i_pivot*t t^2/2-i_pivot*t t^2/2-i_pivot*t+t^2/2-i_pivot*t = t^2-2*i_pivot*t tB^2-2*i_pivot*tB - tA^2 + 2*i_pivot*tA = (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # Case J_after_closeE & i_pivot before J after e_max-t t-i_pivot min(t-i_pivot,e_max-t) = e_max-t e_max*t-t^2/2 t^2/2-i_pivot*t e_max*t-t^2/2+t^2/2-i_pivot*t = (e_max-i_pivot)*t (e_max-i_pivot)*tB - (e_max-i_pivot)*tA = (e_max-i_pivot)*(tB-tA) if i_pivot >= max(J): part1_before_closeE = (i_pivot-e_min)*(j_before_before_max - j_before_before_min) # (i_pivot-e_min)*(tB-tA) # j_before_before_max - j_before_before_min part2_before_closeI = 2*i_pivot*(j_before_after_max-j_before_after_min) - (j_before_after_max**2 - j_before_after_min**2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_before_after_max - j_before_after_min part3_after_closeI = 2*i_pivot*(j_after_before_max-j_after_before_min) - (j_after_before_max**2 - j_after_before_min**2) # 2*i_pivot*(tB-tA) - (tB^2 - tA^2) # j_after_before_max - j_after_before_min part4_after_closeE = (e_max+i_pivot)*(j_after_after_max-j_after_after_min) - (j_after_after_max**2 - j_after_after_min**2) # (e_max+i_pivot)*(tB-tA) - (tB^2 - tA^2) # j_after_after_max - j_after_after_min out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE] elif i_pivot <= min(J): part1_before_closeE = (j_before_before_max**2 - j_before_before_min**2) - (e_min+i_pivot)*(j_before_before_max-j_before_before_min) # (tB^2 - tA^2) - (e_min+i_pivot)*(tB-tA) # j_before_before_max - j_before_before_min part2_before_closeI = (j_before_after_max**2 - j_before_after_min**2) - 2*i_pivot*(j_before_after_max-j_before_after_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_before_after_max - j_before_after_min part3_after_closeI = (j_after_before_max**2 - j_after_before_min**2) - 2*i_pivot*(j_after_before_max - j_after_before_min) # (tB^2 - tA^2) - 2*i_pivot*(tB-tA) # j_after_before_max - j_after_before_min part4_after_closeE = (e_max-i_pivot)*(j_after_after_max - j_after_after_min) # (e_max-i_pivot)*(tB-tA) # j_after_after_max - j_after_after_min out_parts = [part1_before_closeE, part2_before_closeI, part3_after_closeI, part4_after_closeE] else: raise ValueError('The i_pivot should be outside J') out_integral_min_dm_plus_d = _sum_wo_nan(out_parts) # integral on all J, i.e. sum of the disjoint parts # We have for each point t of J: # \bar{F}_{t, recall}(d) = 1 - (1/|E|) * (min(d,m) + d) # Since t is a single-point here, and we are in the case where i_pivot is inside E. # The integral is then given by: # C = \int_{t \in J} \bar{F}_{t, recall}(D(t)) dt # = \int_{t \in J} 1 - (1/|E|) * (min(d,m) + d) dt # = |J| - (1/|E|) * [\int_{t \in J} (min(d,m) + d) dt] # = |J| - (1/|E|) * out_integral_min_dm_plus_d DeltaJ = max(J) - min(J) DeltaE = max(E) - min(E) C = DeltaJ - (1/DeltaE) * out_integral_min_dm_plus_d return(C) def integral_interval_probaCDF_recall(I, J, E): """ Integral of the probability of distances over the interval J. Compute the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$. This is the *integral* i.e. the sum (not the mean) :param I: a single (non empty) predicted interval :param J: ground truth (non empty) interval :param E: the affiliation/influence zone for J :return: the integral $\int_{y \in J} Fbar_y(dist(y,I)) dy$ """ # I and J are single intervals (not generic sets) # E is the outside affiliation interval of J (even for recall!) # (in particular J \subset E) # # J is the portion of the ground truth affiliated to I # I is a predicted interval (can be outside E possibly since it's recall) def f(J_cut): if J_cut is None: return(0) else: return integral_mini_interval_Precall_CDFmethod(I, J_cut, E) # If J_middle is fully included into I, it is # integral of 1 on the interval J_middle, so it's |J_middle| def f0(J_middle): if J_middle is None: return(0) else: return(max(J_middle) - min(J_middle)) cut_into_three = cut_into_three_func(J, I) # it's J that we cut into 3, depending on the position w.r.t I # since we integrate over J this time. # # Distance for now, not the mean: # Distance left: Between cut_into_three[0] and the point min(I) d_left = f(cut_into_three[0]) # Distance middle: Between cut_into_three[1] = J inter I, and I d_middle = f0(cut_into_three[1]) # Distance right: Between cut_into_three[2] and the point max(I) d_right = f(cut_into_three[2]) # It's an integral so summable return(d_left + d_middle + d_right)