Skip to content
Snippets Groups Projects
officeAvailability.py 11.25 KiB
# coding=utf-8
import datetime
from datetime import timedelta
import logging
import pandas as pd
from web.utils import get_today_midnight_date
from web.models.holiday import Holiday
from web.models.availability import Availability
from web.models.appointment import Appointment
from web.models.appointment_type_link import AppointmentTypeLink
from web.models.constants import AVAILABILITY_EXTRA, AVAILABILITY_HOLIDAY

logger = logging.getLogger(__name__)

#only for plot method
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

class OfficeAvailability(object):
	'''
		start: datetime-like indicating when the range starts. If none, then today midnight
		end: datetime-like indicating when the range ends. If none, then tomorrow midnight
		office_start: when the office hours begin
		office_end: when the office hours finish 
		minimum_slot: frequency of the pandas series. T stands of minutes. Docs: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.date_range.html
	'''
	def __init__(self, name, start=None, end=None, office_start='8:00', office_end='18:00', minimum_slot='1T'):
		today_midnight = get_today_midnight_date()
		tomorrow_midnight = today_midnight + datetime.timedelta(days=1)

		if start is None:
			self.start = today_midnight
		else:
			self.start = start

		if end is None:
			self.end = tomorrow_midnight
		else:
			self.end = end

		self.name = name
		self.office_start = office_start
		self.office_end   = office_end
		self.minimum_slot = minimum_slot
		self.range = pd.date_range(start=self.start, end=self.end, freq=self.minimum_slot)
		logger.debug(u'Name: {}. Min index: {}. Max index: {}'.format(self.name, self.start, self.end))
		self.availability = pd.Series(index=self.range, data=0) # initialize range at 0
	
	def _get_duration(self):
		'''
		Private method. Returns the differ
		'''
		return self.availability.index[-1] - self.availability.index[0]

	def add_availability(self, range, only_working_hours=False):
		'''
		Receives a pandas date_range `pd.date_range` object.
		Sets the availability to one for the specific interval of the provided range.
		'''
		range = range.round(self.minimum_slot)
		if only_working_hours:
			range = range.to_series().between_time(self.office_start, self.office_end).index
		self.availability[range] = 1

	def remove_availability(self, range, only_working_hours=False):
		'''
		Receives a pandas date_range `pd.date_range` object.
		Sets the availability to zero for the specific interval of the provided range.
		'''
		range = range.round(self.minimum_slot)
		if only_working_hours:
			range = range.to_series().between_time(self.office_start, self.office_end).index
		self.availability[range] = 0

	def _ensure_dates_are_in_bounds(self, given_start, given_end):
		'''
		given_start and given_end should not be a string but if so, they must comply with pd.Timestamp requirements
		'''
		# sort dates to ensure start < end
		start, end = sorted([pd.Timestamp(given_start), pd.Timestamp(given_end)])

		if start < self.availability.index.min():
			start = self.availability.index.min() 

		# check if end is in bounds
		if end > self.availability.index.max():
			end = self.availability.index.max()

		# this could only happen if both start and end dates are higher or lower than index.max/min since start and end dates are sorted
		# this means that the two dates are out of bounds and then its time range doesn't overlap with the self.availability
		if start > self.availability.index.max() or end < self.availability.index.min():
			raise ValueError

		return start, end

	def consider_this(self, appointment_availability_or_holiday, only_working_hours=False):
		'''
			:appointment_availability_or_holiday can be an object from the following classes: Availability, Holiday, Appointment, AppointmentTypeLink.
			:only_working_hours if true, only consider the defined working hours

			Availability repeat every week.
			Availability always refers to a moment in which the worker should be working. Never the opposite.
			
			Holiday has higher preference because it refers to extraordinary events like extra availability or lack of availability.
			Holiday modifies the status of Availability for specific periods of time.

			Only_working_hours: If true changed are limited to the provided working hours.

			Known Issues: If the range to be added extends beyond the limits of the given time series range, the call to self.availability[portion.index] = set_to will fail.
			It fails because there are keys missing within the time series of the object.

			Two solutions are possible:
			- First, limit the time periods of the ranges to be considered to the object time space. (current solution)
			- Second, extend the object time space.

			Notwithstanding, this issue shouldn't exist because in preivous steps we should receive the availabilities queried to the limits of this objects time space.
			First proposal should be the solution to consider.
		'''
		if isinstance(appointment_availability_or_holiday, Availability):
			start   = appointment_availability_or_holiday.available_from
			end     = appointment_availability_or_holiday.available_till
			weekday = appointment_availability_or_holiday.day_number
			logger.debug('Considering Availability from {} to {} for weekday {}'.format(start, end, weekday))
			portion = self.availability[self.availability.index.weekday == (weekday-1)].between_time(start,end) #selects the weekdays and then the specific hours
			set_to  = 1 
		elif isinstance(appointment_availability_or_holiday, Holiday):
			start   = appointment_availability_or_holiday.datetime_start
			end     = appointment_availability_or_holiday.datetime_end
			# ensure the start and end dates are in the same range to avoid memory issues (for example, someone asking from 1960 to 2120 creating a huge pd.Range)
			logger.debug('Considering {} from {} to {}'.format('Extra Availability' if appointment_availability_or_holiday.kind == AVAILABILITY_EXTRA else 'Holiday', start, end))
			try:
				start, end = self._ensure_dates_are_in_bounds(start, end)
			except ValueError:
				logger.debug('Holiday range does not overlap the availability range. Ignoring Holiday.')
				return
			portion = self.availability[pd.date_range(start=start, end=end, freq=self.minimum_slot)] #select the specific range
			set_to	= 1 if appointment_availability_or_holiday.kind == AVAILABILITY_EXTRA else 0
		elif isinstance(appointment_availability_or_holiday, Appointment):
			start   = appointment_availability_or_holiday.datetime_when
			end     = start + datetime.timedelta(minutes=appointment_availability_or_holiday.length)
			logger.debug('Considering General Appointment from {} to {}'.format(start, end))
			try:
				start, end = self._ensure_dates_are_in_bounds(start, end)
			except ValueError:
				logger.debug('Appointment range does not overlap the availability range. Ignoring Appointment.')
				return
			portion = self.availability[pd.date_range(start=start, end=end, freq=self.minimum_slot)] #select the specific range
			set_to  = 0
		elif isinstance(appointment_availability_or_holiday, AppointmentTypeLink):
			start   = appointment_availability_or_holiday.date_when
			end     = start + datetime.timedelta(minutes=appointment_availability_or_holiday.appointment_type.default_duration)
			logger.debug('Considering Subject Appointment from {} to {}'.format(start, end))
			try:
				start, end = self._ensure_dates_are_in_bounds(start, end)
			except ValueError:
				logger.debug('AppointmentTypeLink range does not overlap the availability range. Ignoring AppointmentTypeLink.')
				return
			portion = self.availability[pd.date_range(start=start, end=end, freq=self.minimum_slot)] #select the specific range
			set_to  = 0
		else:
			logger.error('Expected Availability, Holiday, Appointment or AppointmentTypeLink objects.')
			raise TypeError

		if only_working_hours:
			portion = portion.between_time(self.office_start, self.office_end)

		#limit portion to be changed to the bounds of the object time space (solution 1 of the aforementioned problem)
		portion = portion[(self.availability.index.min() <= portion.index) & (portion.index <= self.availability.index.max())]

		self.availability[portion.index] = set_to

	def get_availability_percentage(self, only_working_hours=False):
		'''
		For multiple values this is the solution: return self.availability.value_counts().div(len(s))[1] * 100 
		But since it's 0 or 1, this works as well and is faster: return self.availability.mean() * 100

		To test it:
		import pandas as pd
		range = pd.date_range(start='2018-10-1', end='2018-10-2 01:00', freq='5T', closed=None)
		s = pd.Series(index=range, data=0)
		range2 = pd.date_range(start='2018-10-1 1:00', end='2018-10-1 2:30', freq='5T')
		s[range2] = 1
		print(s.value_counts().div(len(s))[1]*100)   # prints 6.312292358803987
		print(s.mean()*100)							 # prints 6.312292358803987
		%timeit s.value_counts().div(len(s))[1]*100  # 504 µs ± 19.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
		%timeit s.mean()*100                         # 56.3 µs ± 1.66 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
		'''
		if only_working_hours:
			availability = self.availability.between_time(self.office_start, self.office_end)
		else:
			availability = self.availability

		return availability.mean() * 100 #better to isolate the operation in case we change it later

	def is_available(self, only_working_hours=False):
		'''
		Returns True if on the selected period is available at least 50% of the time
		Otherwise returns False
		'''
		return self.get_availability_percentage(only_working_hours=only_working_hours) > 50.0

	def plot_availability(self):
		'''
		Plot availability chart.
		'''
		fig = plt.figure() #create new figure. This should ensure thread safe method
		ax=fig.gca() #get current axes
		matplotlib.rcParams['hatch.linewidth'] = 1
		logger.debug('business_hours: {} {}'.format(self.office_start, self.office_end))
		business_hours = self.business_hours = pd.Series(index=self.range, data=0)
		mask = business_hours.between_time(self.office_start, self.office_end).index
		business_hours[mask] = 1
		ax = business_hours.plot(kind='area', alpha = 0.33, color='#1190D8', label='Business Hours', legend=True, ax=ax)

		#calculate good xticks
		hours = self._get_duration().total_seconds()/3600
		n_ticks = int(hours/24)
		if n_ticks == 0:
			minutes = self._get_duration().total_seconds()/60
			n_ticks = int(minutes/60)
			if n_ticks == 0:
				n_ticks = 1
			xticks=self.availability.asfreq('{}T'.format(n_ticks)).index
		else:
			xticks=self.availability.asfreq('{}H'.format(n_ticks)).index

		title = 'Availability for {} from {} to {}'.format(self.name, self.start.strftime('%Y/%m/%d %H:%M'), self.end.strftime('%Y/%m/%d %H:%M'))

		ax = self.availability.plot(figsize=(16, 8), grid = True, 
			title=title, legend=True, label='Availability', color='#00af52',
			xticks=xticks, ax=ax, yticks=[0,1])
		
		ax.fill_between(self.availability.index, self.availability.tolist(), facecolor="none", hatch='//', edgecolor="#00af52", alpha=1, linewidth=0.5)
		ax.set_axisbelow(True)
		ax.yaxis.grid(color='gray', linewidth=0.5, alpha=0)
		ax.xaxis.grid(color='gray', linewidth=0.5, alpha=1)
		ax.set_yticklabels(['False', 'True'])
		ax.set_ylabel('Is Available ?')
		ax.set_xlabel('Date & Time')

		fig.tight_layout()
		fig.savefig('{}_{}_{}.pdf'.format(self.name, self.start.strftime('%Y%m%d%H%M'), self.end.strftime('%Y%m%d%H%M')))