-
Carlos Vega authoredCarlos Vega authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
officeAvailability.py 11.25 KiB
# coding=utf-8
import datetime
from datetime import timedelta
import logging
import pandas as pd
from web.utils import get_today_midnight_date
from web.models.holiday import Holiday
from web.models.availability import Availability
from web.models.appointment import Appointment
from web.models.appointment_type_link import AppointmentTypeLink
from web.models.constants import AVAILABILITY_EXTRA, AVAILABILITY_HOLIDAY
logger = logging.getLogger(__name__)
#only for plot method
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
class OfficeAvailability(object):
'''
start: datetime-like indicating when the range starts. If none, then today midnight
end: datetime-like indicating when the range ends. If none, then tomorrow midnight
office_start: when the office hours begin
office_end: when the office hours finish
minimum_slot: frequency of the pandas series. T stands of minutes. Docs: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.date_range.html
'''
def __init__(self, name, start=None, end=None, office_start='8:00', office_end='18:00', minimum_slot='1T'):
today_midnight = get_today_midnight_date()
tomorrow_midnight = today_midnight + datetime.timedelta(days=1)
if start is None:
self.start = today_midnight
else:
self.start = start
if end is None:
self.end = tomorrow_midnight
else:
self.end = end
self.name = name
self.office_start = office_start
self.office_end = office_end
self.minimum_slot = minimum_slot
self.range = pd.date_range(start=self.start, end=self.end, freq=self.minimum_slot)
logger.debug(u'Name: {}. Min index: {}. Max index: {}'.format(self.name, self.start, self.end))
self.availability = pd.Series(index=self.range, data=0) # initialize range at 0
def _get_duration(self):
'''
Private method. Returns the differ
'''
return self.availability.index[-1] - self.availability.index[0]
def add_availability(self, range, only_working_hours=False):
'''
Receives a pandas date_range `pd.date_range` object.
Sets the availability to one for the specific interval of the provided range.
'''
range = range.round(self.minimum_slot)
if only_working_hours:
range = range.to_series().between_time(self.office_start, self.office_end).index
self.availability[range] = 1
def remove_availability(self, range, only_working_hours=False):
'''
Receives a pandas date_range `pd.date_range` object.
Sets the availability to zero for the specific interval of the provided range.
'''
range = range.round(self.minimum_slot)
if only_working_hours:
range = range.to_series().between_time(self.office_start, self.office_end).index
self.availability[range] = 0
def _ensure_dates_are_in_bounds(self, given_start, given_end):
'''
given_start and given_end should not be a string but if so, they must comply with pd.Timestamp requirements
'''
# sort dates to ensure start < end
start, end = sorted([pd.Timestamp(given_start), pd.Timestamp(given_end)])
if start < self.availability.index.min():
start = self.availability.index.min()
# check if end is in bounds
if end > self.availability.index.max():
end = self.availability.index.max()
# this could only happen if both start and end dates are higher or lower than index.max/min since start and end dates are sorted
# this means that the two dates are out of bounds and then its time range doesn't overlap with the self.availability
if start > self.availability.index.max() or end < self.availability.index.min():
raise ValueError
return start, end
def consider_this(self, appointment_availability_or_holiday, only_working_hours=False):
'''
:appointment_availability_or_holiday can be an object from the following classes: Availability, Holiday, Appointment, AppointmentTypeLink.
:only_working_hours if true, only consider the defined working hours
Availability repeat every week.
Availability always refers to a moment in which the worker should be working. Never the opposite.
Holiday has higher preference because it refers to extraordinary events like extra availability or lack of availability.
Holiday modifies the status of Availability for specific periods of time.
Only_working_hours: If true changed are limited to the provided working hours.
Known Issues: If the range to be added extends beyond the limits of the given time series range, the call to self.availability[portion.index] = set_to will fail.
It fails because there are keys missing within the time series of the object.
Two solutions are possible:
- First, limit the time periods of the ranges to be considered to the object time space. (current solution)
- Second, extend the object time space.
Notwithstanding, this issue shouldn't exist because in preivous steps we should receive the availabilities queried to the limits of this objects time space.
First proposal should be the solution to consider.
'''
if isinstance(appointment_availability_or_holiday, Availability):
start = appointment_availability_or_holiday.available_from
end = appointment_availability_or_holiday.available_till
weekday = appointment_availability_or_holiday.day_number
logger.debug('Considering Availability from {} to {} for weekday {}'.format(start, end, weekday))
portion = self.availability[self.availability.index.weekday == (weekday-1)].between_time(start,end) #selects the weekdays and then the specific hours
set_to = 1
elif isinstance(appointment_availability_or_holiday, Holiday):
start = appointment_availability_or_holiday.datetime_start
end = appointment_availability_or_holiday.datetime_end
# ensure the start and end dates are in the same range to avoid memory issues (for example, someone asking from 1960 to 2120 creating a huge pd.Range)
logger.debug('Considering {} from {} to {}'.format('Extra Availability' if appointment_availability_or_holiday.kind == AVAILABILITY_EXTRA else 'Holiday', start, end))
try:
start, end = self._ensure_dates_are_in_bounds(start, end)
except ValueError:
logger.debug('Holiday range does not overlap the availability range. Ignoring Holiday.')
return
portion = self.availability[pd.date_range(start=start, end=end, freq=self.minimum_slot)] #select the specific range
set_to = 1 if appointment_availability_or_holiday.kind == AVAILABILITY_EXTRA else 0
elif isinstance(appointment_availability_or_holiday, Appointment):
start = appointment_availability_or_holiday.datetime_when
end = start + datetime.timedelta(minutes=appointment_availability_or_holiday.length)
logger.debug('Considering General Appointment from {} to {}'.format(start, end))
try:
start, end = self._ensure_dates_are_in_bounds(start, end)
except ValueError:
logger.debug('Appointment range does not overlap the availability range. Ignoring Appointment.')
return
portion = self.availability[pd.date_range(start=start, end=end, freq=self.minimum_slot)] #select the specific range
set_to = 0
elif isinstance(appointment_availability_or_holiday, AppointmentTypeLink):
start = appointment_availability_or_holiday.date_when
end = start + datetime.timedelta(minutes=appointment_availability_or_holiday.appointment_type.default_duration)
logger.debug('Considering Subject Appointment from {} to {}'.format(start, end))
try:
start, end = self._ensure_dates_are_in_bounds(start, end)
except ValueError:
logger.debug('AppointmentTypeLink range does not overlap the availability range. Ignoring AppointmentTypeLink.')
return
portion = self.availability[pd.date_range(start=start, end=end, freq=self.minimum_slot)] #select the specific range
set_to = 0
else:
logger.error('Expected Availability, Holiday, Appointment or AppointmentTypeLink objects.')
raise TypeError
if only_working_hours:
portion = portion.between_time(self.office_start, self.office_end)
#limit portion to be changed to the bounds of the object time space (solution 1 of the aforementioned problem)
portion = portion[(self.availability.index.min() <= portion.index) & (portion.index <= self.availability.index.max())]
self.availability[portion.index] = set_to
def get_availability_percentage(self, only_working_hours=False):
'''
For multiple values this is the solution: return self.availability.value_counts().div(len(s))[1] * 100
But since it's 0 or 1, this works as well and is faster: return self.availability.mean() * 100
To test it:
import pandas as pd
range = pd.date_range(start='2018-10-1', end='2018-10-2 01:00', freq='5T', closed=None)
s = pd.Series(index=range, data=0)
range2 = pd.date_range(start='2018-10-1 1:00', end='2018-10-1 2:30', freq='5T')
s[range2] = 1
print(s.value_counts().div(len(s))[1]*100) # prints 6.312292358803987
print(s.mean()*100) # prints 6.312292358803987
%timeit s.value_counts().div(len(s))[1]*100 # 504 µs ± 19.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
%timeit s.mean()*100 # 56.3 µs ± 1.66 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
'''
if only_working_hours:
availability = self.availability.between_time(self.office_start, self.office_end)
else:
availability = self.availability
return availability.mean() * 100 #better to isolate the operation in case we change it later
def is_available(self, only_working_hours=False):
'''
Returns True if on the selected period is available at least 50% of the time
Otherwise returns False
'''
return self.get_availability_percentage(only_working_hours=only_working_hours) > 50.0
def plot_availability(self):
'''
Plot availability chart.
'''
fig = plt.figure() #create new figure. This should ensure thread safe method
ax=fig.gca() #get current axes
matplotlib.rcParams['hatch.linewidth'] = 1
logger.debug('business_hours: {} {}'.format(self.office_start, self.office_end))
business_hours = self.business_hours = pd.Series(index=self.range, data=0)
mask = business_hours.between_time(self.office_start, self.office_end).index
business_hours[mask] = 1
ax = business_hours.plot(kind='area', alpha = 0.33, color='#1190D8', label='Business Hours', legend=True, ax=ax)
#calculate good xticks
hours = self._get_duration().total_seconds()/3600
n_ticks = int(hours/24)
if n_ticks == 0:
minutes = self._get_duration().total_seconds()/60
n_ticks = int(minutes/60)
if n_ticks == 0:
n_ticks = 1
xticks=self.availability.asfreq('{}T'.format(n_ticks)).index
else:
xticks=self.availability.asfreq('{}H'.format(n_ticks)).index
title = 'Availability for {} from {} to {}'.format(self.name, self.start.strftime('%Y/%m/%d %H:%M'), self.end.strftime('%Y/%m/%d %H:%M'))
ax = self.availability.plot(figsize=(16, 8), grid = True,
title=title, legend=True, label='Availability', color='#00af52',
xticks=xticks, ax=ax, yticks=[0,1])
ax.fill_between(self.availability.index, self.availability.tolist(), facecolor="none", hatch='//', edgecolor="#00af52", alpha=1, linewidth=0.5)
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray', linewidth=0.5, alpha=0)
ax.xaxis.grid(color='gray', linewidth=0.5, alpha=1)
ax.set_yticklabels(['False', 'True'])
ax.set_ylabel('Is Available ?')
ax.set_xlabel('Date & Time')
fig.tight_layout()
fig.savefig('{}_{}_{}.pdf'.format(self.name, self.start.strftime('%Y%m%d%H%M'), self.end.strftime('%Y%m%d%H%M')))