# coding=utf-8 import datetime from datetime import timedelta import logging import pandas as pd from web.utils import get_today_midnight_date from web.models.holiday import Holiday from web.models.availability import Availability from web.models.appointment import Appointment from web.models.appointment_type_link import AppointmentTypeLink from web.models.constants import AVAILABILITY_EXTRA, AVAILABILITY_HOLIDAY logger = logging.getLogger(__name__) #only for plot method import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt class OfficeAvailability(object): ''' start: datetime-like indicating when the range starts. If none, then today midnight end: datetime-like indicating when the range ends. If none, then tomorrow midnight office_start: when the office hours begin office_end: when the office hours finish minimum_slot: frequency of the pandas series. T stands of minutes. Docs: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.date_range.html ''' def __init__(self, name, start=None, end=None, office_start='8:00', office_end='18:00', minimum_slot='1T'): today_midnight = get_today_midnight_date() tomorrow_midnight = today_midnight + datetime.timedelta(days=1) if start is None: self.start = today_midnight else: self.start = start if end is None: self.end = tomorrow_midnight else: self.end = end self.name = name self.office_start = office_start self.office_end = office_end self.minimum_slot = minimum_slot self.range = pd.date_range(start=self.start, end=self.end, freq=self.minimum_slot) logger.debug(u'Name: {}. Min index: {}. Max index: {}'.format(self.name, self.start, self.end)) self.availability = pd.Series(index=self.range, data=0) # initialize range at 0 def _get_duration(self): ''' Private method. Returns the differ ''' return self.availability.index[-1] - self.availability.index[0] def add_availability(self, range, only_working_hours=False): ''' Receives a pandas date_range `pd.date_range` object. Sets the availability to one for the specific interval of the provided range. ''' range = range.round(self.minimum_slot) if only_working_hours: range = range.to_series().between_time(self.office_start, self.office_end).index self.availability[range] = 1 def remove_availability(self, range, only_working_hours=False): ''' Receives a pandas date_range `pd.date_range` object. Sets the availability to zero for the specific interval of the provided range. ''' range = range.round(self.minimum_slot) if only_working_hours: range = range.to_series().between_time(self.office_start, self.office_end).index self.availability[range] = 0 def _ensure_dates_are_in_bounds(self, given_start, given_end): ''' given_start and given_end should not be a string but if so, they must comply with pd.Timestamp requirements ''' # sort dates to ensure start < end start, end = sorted([pd.Timestamp(given_start), pd.Timestamp(given_end)]) if start < self.availability.index.min(): start = self.availability.index.min() # check if end is in bounds if end > self.availability.index.max(): end = self.availability.index.max() # this could only happen if both start and end dates are higher or lower than index.max/min since start and end dates are sorted # this means that the two dates are out of bounds and then its time range doesn't overlap with the self.availability if start > self.availability.index.max() or end < self.availability.index.min(): raise ValueError return start, end def consider_this(self, appointment_availability_or_holiday, only_working_hours=False): ''' :appointment_availability_or_holiday can be an object from the following classes: Availability, Holiday, Appointment, AppointmentTypeLink. :only_working_hours if true, only consider the defined working hours Availability repeat every week. Availability always refers to a moment in which the worker should be working. Never the opposite. Holiday has higher preference because it refers to extraordinary events like extra availability or lack of availability. Holiday modifies the status of Availability for specific periods of time. Only_working_hours: If true changed are limited to the provided working hours. Known Issues: If the range to be added extends beyond the limits of the given time series range, the call to self.availability[portion.index] = set_to will fail. It fails because there are keys missing within the time series of the object. Two solutions are possible: - First, limit the time periods of the ranges to be considered to the object time space. (current solution) - Second, extend the object time space. Notwithstanding, this issue shouldn't exist because in preivous steps we should receive the availabilities queried to the limits of this objects time space. First proposal should be the solution to consider. ''' if isinstance(appointment_availability_or_holiday, Availability): start = appointment_availability_or_holiday.available_from end = appointment_availability_or_holiday.available_till weekday = appointment_availability_or_holiday.day_number logger.debug('Considering Availability from {} to {} for weekday {}'.format(start, end, weekday)) portion = self.availability[self.availability.index.weekday == (weekday-1)].between_time(start,end) #selects the weekdays and then the specific hours set_to = 1 elif isinstance(appointment_availability_or_holiday, Holiday): start = appointment_availability_or_holiday.datetime_start end = appointment_availability_or_holiday.datetime_end # ensure the start and end dates are in the same range to avoid memory issues (for example, someone asking from 1960 to 2120 creating a huge pd.Range) logger.debug('Considering {} from {} to {}'.format('Extra Availability' if appointment_availability_or_holiday.kind == AVAILABILITY_EXTRA else 'Holiday', start, end)) try: start, end = self._ensure_dates_are_in_bounds(start, end) except ValueError: logger.debug('Holiday range does not overlap the availability range. Ignoring Holiday.') return portion = self.availability[pd.date_range(start=start, end=end, freq=self.minimum_slot)] #select the specific range set_to = 1 if appointment_availability_or_holiday.kind == AVAILABILITY_EXTRA else 0 elif isinstance(appointment_availability_or_holiday, Appointment): start = appointment_availability_or_holiday.datetime_when end = start + datetime.timedelta(minutes=appointment_availability_or_holiday.length) logger.debug('Considering General Appointment from {} to {}'.format(start, end)) try: start, end = self._ensure_dates_are_in_bounds(start, end) except ValueError: logger.debug('Appointment range does not overlap the availability range. Ignoring Appointment.') return portion = self.availability[pd.date_range(start=start, end=end, freq=self.minimum_slot)] #select the specific range set_to = 0 elif isinstance(appointment_availability_or_holiday, AppointmentTypeLink): start = appointment_availability_or_holiday.date_when end = start + datetime.timedelta(minutes=appointment_availability_or_holiday.appointment_type.default_duration) logger.debug('Considering Subject Appointment from {} to {}'.format(start, end)) try: start, end = self._ensure_dates_are_in_bounds(start, end) except ValueError: logger.debug('AppointmentTypeLink range does not overlap the availability range. Ignoring AppointmentTypeLink.') return portion = self.availability[pd.date_range(start=start, end=end, freq=self.minimum_slot)] #select the specific range set_to = 0 else: logger.error('Expected Availability, Holiday, Appointment or AppointmentTypeLink objects.') raise TypeError if only_working_hours: portion = portion.between_time(self.office_start, self.office_end) #limit portion to be changed to the bounds of the object time space (solution 1 of the aforementioned problem) portion = portion[(self.availability.index.min() <= portion.index) & (portion.index <= self.availability.index.max())] self.availability[portion.index] = set_to def get_availability_percentage(self, only_working_hours=False): ''' For multiple values this is the solution: return self.availability.value_counts().div(len(s))[1] * 100 But since it's 0 or 1, this works as well and is faster: return self.availability.mean() * 100 To test it: import pandas as pd range = pd.date_range(start='2018-10-1', end='2018-10-2 01:00', freq='5T', closed=None) s = pd.Series(index=range, data=0) range2 = pd.date_range(start='2018-10-1 1:00', end='2018-10-1 2:30', freq='5T') s[range2] = 1 print(s.value_counts().div(len(s))[1]*100) # prints 6.312292358803987 print(s.mean()*100) # prints 6.312292358803987 %timeit s.value_counts().div(len(s))[1]*100 # 504 µs ± 19.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) %timeit s.mean()*100 # 56.3 µs ± 1.66 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) ''' if only_working_hours: availability = self.availability.between_time(self.office_start, self.office_end) else: availability = self.availability return availability.mean() * 100 #better to isolate the operation in case we change it later def is_available(self, only_working_hours=False): ''' Returns True if on the selected period is available at least 50% of the time Otherwise returns False ''' return self.get_availability_percentage(only_working_hours=only_working_hours) > 50.0 def plot_availability(self): ''' Plot availability chart. ''' fig = plt.figure() #create new figure. This should ensure thread safe method ax=fig.gca() #get current axes matplotlib.rcParams['hatch.linewidth'] = 1 logger.debug('business_hours: {} {}'.format(self.office_start, self.office_end)) business_hours = self.business_hours = pd.Series(index=self.range, data=0) mask = business_hours.between_time(self.office_start, self.office_end).index business_hours[mask] = 1 ax = business_hours.plot(kind='area', alpha = 0.33, color='#1190D8', label='Business Hours', legend=True, ax=ax) #calculate good xticks hours = self._get_duration().total_seconds()/3600 n_ticks = int(hours/24) if n_ticks == 0: minutes = self._get_duration().total_seconds()/60 n_ticks = int(minutes/60) if n_ticks == 0: n_ticks = 1 xticks=self.availability.asfreq('{}T'.format(n_ticks)).index else: xticks=self.availability.asfreq('{}H'.format(n_ticks)).index title = 'Availability for {} from {} to {}'.format(self.name, self.start.strftime('%Y/%m/%d %H:%M'), self.end.strftime('%Y/%m/%d %H:%M')) ax = self.availability.plot(figsize=(16, 8), grid = True, title=title, legend=True, label='Availability', color='#00af52', xticks=xticks, ax=ax, yticks=[0,1]) ax.fill_between(self.availability.index, self.availability.tolist(), facecolor="none", hatch='//', edgecolor="#00af52", alpha=1, linewidth=0.5) ax.set_axisbelow(True) ax.yaxis.grid(color='gray', linewidth=0.5, alpha=0) ax.xaxis.grid(color='gray', linewidth=0.5, alpha=1) ax.set_yticklabels(['False', 'True']) ax.set_ylabel('Is Available ?') ax.set_xlabel('Date & Time') fig.tight_layout() fig.savefig('{}_{}_{}.pdf'.format(self.name, self.start.strftime('%Y%m%d%H%M'), self.end.strftime('%Y%m%d%H%M')))