# -*- coding: utf-8 -*- """ Created on Tue Jun 25 11:20:27 2019 @author: mor96805 Door events analysis Show number of events per day plot all events for time of day UPDATED Thur Nov 14 2019 """ designated_csv = 'C:\\Users\\PATH TO YOUR CSV HERE\\door_tracker.csv' import pandas as pd pd.set_option('display.max_columns', 100) pd.set_option('display.max_rows', 500) import matplotlib.pyplot as plt plt.rcParams.update({'font.size': 8}) import numpy as np import datetime df = pd.read_csv(designated_csv) # Totals total_days = df.Date.drop_duplicates().size print("\nTotal days recorded: " + str(total_days)) total_events = df[['Door(YN)', 'Buzzer(YN)']].sum().sum() print("Total events: " + str(total_events)) print("Average per day: " + str(round(total_events/total_days, 2))) stamp = datetime.datetime.now() date_stamp = str(stamp.strftime("%d/%m/%Y")) if date_stamp in df.Date.tolist(): print("No. events today: " + str(df.loc[df.Date == date_stamp][['Buzzer(YN)', 'Door(YN)']].sum().sum())) else: print("No Rory events today.") # Get Rory % but then proceed for all values as either Rory/non Rory total_rory_events = df.loc[df['Non-RoryEvent(YN)']==0][['Buzzer(YN)', 'Door(YN)']].sum().sum() print("Total Rory Events : " + str(total_rory_events)) print("Rory % of total: " + str(round(100*total_rory_events/total_events,2)) + " %") deliveries = df.loc[df['Delivery(YN)']==1][['Door(YN)', 'Buzzer(YN)']].sum().sum() print("% of total events from deliveries: " + str(round(100*deliveries/total_events,2))+" %") glasgow_staff = df.loc[df['BVGlasgowEmployee(YN)']==1][['Door(YN)', 'Buzzer(YN)']].sum().sum() print("% of total events from Glasgow office staff: " + str(round(100*glasgow_staff/total_events,2))+" %") # 'Date', 'Time', 'weekday', 'Buzzer(YN)', 'Door(YN)', 'Delivery(YN)', # 'BVGlasgowEmployee(YN)', 'Non-RoryEvent(YN)' date_table = pd.pivot_table(df, index='Date', values=['Door(YN)', 'Buzzer(YN)'], aggfunc=np.sum) date_table['Sum'] = date_table['Buzzer(YN)'] + date_table['Door(YN)'] date_table = date_table.sort_values(by=['Sum']) print("-"*50) print("\nMinimum events in a day: ") print(date_table.head(10)) print("\nNumber of no-event days: " + str(sum(date_table.Sum == 0))) print("\nMaximum events in a day: ") print(date_table.tail(10)) # ============================================================================= # Graphs # ============================================================================= #plt.figure(figsize=(12,8)) #plt.rcParams.update({'font.size': 16}) #plt.grid(linestyle="--") #plt.hist(date_table.Sum, edgecolor='black', linewidth=1.0, bins=max(date_table.Sum)) #plt.title("Total number of events histogram") #plt.ylabel("Days") #plt.xlabel("Events") #plt.savefig('Hist.jpg') times = df.Time fig, ax1 = plt.subplots(figsize=(8,3), constrained_layout=True) ax1.grid(linestyle="--") ax1.hist(date_table.Sum, edgecolor='black', linewidth=1.0, bins=max(date_table.Sum)) ax1.set_xlabel("Events") ax1.set_ylabel("Days") ax2 = ax1.twinx() ax2.set_ylabel("Prob", color='red') ax2.tick_params(axis='y', labelcolor='red') ax2.hist(date_table.Sum, bins=max(date_table.Sum), density=True, histtype='step', cumulative=1, color='red') plt.title("Total number of events histogram") #ax1.plot([0.5, 6.5, 12.5], [12, 8, 0]) plt.show() # ============================================================================= # For the hist of hourly events # ============================================================================= df['interval'] = "" for i in range(0, len(df)): x = str(df.loc[i, 'Time']) y = x[:3] if x[3] < "3": y += str("00") else: y+= str("30") df.loc[i, 'interval'] = y df['interval'] = "" for i in range(0, len(df)): x = str(df.loc[i, 'Time']) y = x[:3] if x[3] < "3": y += str("00") else: y+= str("30") df.loc[i, 'interval'] = y time = [] for i in range(7,10): time.append("0"+str(i)+":00:00") time.append("0"+str(i)+":30:00") for i in range(10,19): time.append(str(i)+":00:00") time.append(str(i)+":30:00") time_table = pd.DataFrame({'interval':time, 'count':[0]*len(time)}) for i in range(0,len(time_table)-1): time_table.loc[i, 'count'] = len(df[(df.Time > time_table.loc[i, 'interval']) & (df.Time < time_table.loc[i+1, 'interval'])]) # make up some data x = time_table['interval'] y = time_table['count'] # plot from matplotlib.pyplot import figure figure(num=1, figsize=(8, 3), dpi=80, facecolor='w', edgecolor='black') plt.bar(x, y, edgecolor='black') plt.gcf().autofmt_xdate() #plt.yticks(range(0, 1+max(time_table['count']))) plt.title('Time of day Hist') plt.xlabel('Time of Day') plt.ylabel('Total Events') plt.grid(linestyle="--") plt.show() # ============================================================================= # Day of the week count # ============================================================================= weekday_table = pd.pivot_table(df, index='weekday', values=['Door(YN)', 'Buzzer(YN)'], aggfunc=np.sum) weekday_count_table = pd.pivot_table(df, index='Date', values=['weekday']) weekdays = list(range(0,5)) weekdays_count = [np.sum([int(j == i) for j in weekday_count_table['weekday'].tolist()])for i in weekdays] weekday_av = [(weekday_table['Buzzer(YN)'] + weekday_table['Door(YN)']).tolist()[i]/weekdays_count[i] for i in range(0,5)] figure(num=2, figsize=(8, 3), dpi=80, facecolor='w', edgecolor='black') plt.bar(weekdays, weekday_av, edgecolor='black') #plt.gcf().autofmt_xdate() #plt.yticks(range(0, 1+max(time_table['count']))) plt.title('Day of Week Hist') plt.xlabel('Weekday') plt.ylabel('Total Events') plt.grid(linestyle="--") plt.show()