# Installing the libraries with the specified version.
# Not necessary in Google Collab
#!pip install numpy==1.25.2 pandas==1.5.3 matplotlib==3.7.1 seaborn==0.13.1 -q --user

# import libraries for data manipulation
import numpy as np
import pandas as pd

# import libraries for data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Mount file from Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

# Load the data into a Pandas data frame
foodhub_data = pd.read_csv('/content/drive/MyDrive/UT/Projects/FoodHub/foodhub_order.csv')

# Create a copy of the data
data = foodhub_data.copy()

# Take a peek at the data by observing the first 5 rows
data.head(5)

# Determine the shape of the data
data.shape

(1898, 9)

# Determine the datatypes
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1898 entries, 0 to 1897
Data columns (total 9 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   order_id               1898 non-null   int64  
 1   customer_id            1898 non-null   int64  
 2   restaurant_name        1898 non-null   object 
 3   cuisine_type           1898 non-null   object 
 4   cost_of_the_order      1898 non-null   float64
 5   day_of_the_week        1898 non-null   object 
 6   rating                 1898 non-null   object 
 7   food_preparation_time  1898 non-null   int64  
 8   delivery_time          1898 non-null   int64  
dtypes: float64(1), int64(4), object(4)
memory usage: 133.6+ KB

# Find any missing values
missing_values = data.isnull().sum()
missing_values

# Find the minimum, average, and maximum values for the time it takes for food to be prepared once an order is placed.
data.describe()
max_prep_time = data['food_preparation_time'].max()
mean_prep_time = data['food_preparation_time'].mean()
min_prep_time = data['food_preparation_time'].min()

print(f"Maximum time for food to be prepared: {max_prep_time:.2f} minutes")
print(f"Average time for food to be prepared: {mean_prep_time:.2f} minutes")
print(f"Minimum time for food to be prepared: {min_prep_time:.2f} minutes")

Maximum time for food to be prepared: 35.00 minutes
Average time for food to be prepared: 27.37 minutes
Minimum time for food to be prepared: 20.00 minutes

# Calculate the number of orders not rated
total_orders = data.shape[0]
orders_not_rated = data[data['rating'] == 'Not given'].shape[0]
percent_orders_not_rated = (orders_not_rated / total_orders) * 100
print(f"Number of orders not rated: {orders_not_rated}")
print(f"Total number of orders: {total_orders}")
print(f"Percentage of orders not rated: {percent_orders_not_rated:.2f}%")

Number of orders not rated: 736
Total number of orders: 1898
Percentage of orders not rated: 38.78%

# Data exploration of Restaurants using a Barplot

# Calculate the frequency of each restaurant
restaurant_counts = data['restaurant_name'].value_counts()

# Plot a countplot for the top 10 most frequent restaurants
top_restaurants = restaurant_counts.nlargest(10)

# Find and print the total number of restaurants
total_restaurants = data['restaurant_name'].nunique()

print(f"Total number of restaurants {total_restaurants}\n\n")

# Plot our data
plt.figure(figsize=(10, 6))
sns.barplot(x=top_restaurants.values, y=top_restaurants.index, palette='viridis')
plt.title('Top 10 Restaurants by Number of Orders')
plt.xlabel('Number of Orders')
plt.ylabel('Restaurant Name')
plt.show()

Total number of restaurants 178

<ipython-input-21-1059dba21aed>:16: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=top_restaurants.values, y=top_restaurants.index, palette='viridis')

data['restaurant_name'].value_counts()

# Data exploration of Cost with Histogram, Boxplot, and Countplot

# Create reusable bins and labels for binned data
bins = [0, 5, 10, 15, 20, 30, 50, 100]
labels = ['0-5', '5-10', '10-15', '15-20', '20-30', '30-50', '50-100']
data['cost_bins'] = pd.cut(data['cost_of_the_order'], bins=bins, labels=labels)

# Create a figure with three subplots arranged horizontally
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Plot a histogram with KDE on the first subplot
sns.histplot(data['cost_of_the_order'], kde=True, ax=axes[0]);
axes[0].set_title('Distribution of Cost')
axes[0].set_xlabel('Cost')
axes[0].set_ylabel('Frequency')

# Plot a boxplot on the second subplot
sns.boxplot(x=data['cost_of_the_order'], ax=axes[1]);
axes[1].set_title('Boxplot of Cost')
axes[1].set_xlabel('Cost')

# Plot a countplot of cost bins on the third subplot
sns.countplot(x='cost_bins', data=data, ax=axes[2]);
axes[2].set_title('Frequency of Cost Bins')
axes[2].set_xlabel('Cost Range')
axes[2].set_ylabel('Frequency')
axes[2].set_xticklabels(axes[2].get_xticklabels(), rotation=45)

# Adjust the spacing to prevent overlap
plt.tight_layout()

# Display the plots
plt.show()

<ipython-input-13-172b67177985>:27: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  axes[2].set_xticklabels(axes[2].get_xticklabels(), rotation=45)

data['cost_of_the_order'].describe()

# Data exploration of Days of the Week with Count Plot

# Plot a countplot for 'day_of_the_week'
plt.figure(figsize=(6, 4))
sns.countplot(x='day_of_the_week', data=data, palette='pastel')
plt.title('Number of Orders by Day of the Week')
plt.xlabel('Day of the Week')
plt.ylabel('Number of Orders')

# Display the plot
plt.show();

<ipython-input-15-885c28c9806e>:5: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x='day_of_the_week', data=data, palette='pastel')

day_of_the_week_counts = data['day_of_the_week'].value_counts()
day_of_the_week_proportion = data['day_of_the_week'].value_counts(1)

print("Day of the Week Counts:")
print(day_of_the_week_counts)
print("\nDay of the Week Proportions:")
print(day_of_the_week_proportion)

Day of the Week Counts:
day_of_the_week
Weekend    1351
Weekday     547
Name: count, dtype: int64

Day of the Week Proportions:
day_of_the_week
Weekend    0.711802
Weekday    0.288198
Name: proportion, dtype: float64

# Data transformation of Ratings data

# First, replace 'Not given' with NaN for improved data consistency
data['rating'] = data['rating'].replace('Not given', np.nan)

# Display ratings data
rating_counts = data['rating'].value_counts(dropna=False)
print(rating_counts)

rating
NaN    736
5      588
4      386
3      188
Name: count, dtype: int64

# Data exploration of Ratings with Countplot

# Assuming rating_counts has been computed with dropna=False
rating_counts = data['rating'].value_counts(dropna=False)

# Plot a barplot using the rating_counts
plt.figure(figsize=(8, 5))
sns.barplot(x=rating_counts.index.astype('str'), y=rating_counts.values, palette='coolwarm')
plt.title('Distribution of Ratings Including Missing Values')
plt.xlabel('Rating')
plt.ylabel('Number of Orders')

# Display the plot
plt.show()

<ipython-input-18-9fbcd6cecdb7>:8: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=rating_counts.index.astype('str'), y=rating_counts.values, palette='coolwarm')

ratings_count_proportion = data['rating'].value_counts(1, dropna=False)

print("Ratings Proportions:")
print(ratings_count_proportion)

Ratings Proportions:
rating
NaN    0.387777
5      0.309800
4      0.203372
3      0.099052
Name: proportion, dtype: float64

# Data exploration of Food Preparation Time with Histogram and Boxplot

# Create a figure with two subplots side by side
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram for Food Preparation Time
sns.histplot(data['food_preparation_time'], bins=20, kde=True, ax=axes[0], color='skyblue')
axes[0].set_title('Histogram of Food Preparation Time')
axes[0].set_xlabel('Preparation Time (minutes)')
axes[0].set_ylabel('Frequency')

# Boxplot for Food Preparation Time
sns.boxplot(x=data['food_preparation_time'], ax=axes[1], color='lightgreen')
axes[1].set_title('Boxplot of Food Preparation Time')
axes[1].set_xlabel('Preparation Time (minutes)')

# Display the plots
plt.tight_layout()
plt.show()# Write the code here

data['food_preparation_time'].describe()

# Data exploration of Delivery Time with Histogram and Boxplot

# Create a figure with two subplots side by side
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histogram for Delivery Time
sns.histplot(data['delivery_time'], bins=20, kde=True, ax=axes[0], color='dodgerblue', edgecolor='black')
axes[0].set_title('Histogram of Delivery Time')
axes[0].set_xlabel('Delivery Time (minutes)')
axes[0].set_ylabel('Frequency')

# Boxplot for Delivery Time
sns.boxplot(x=data['delivery_time'], ax=axes[1], color='lightcoral')
axes[1].set_title('Boxplot of Delivery Time')
axes[1].set_xlabel('Delivery Time (minutes)')

# Display the plots
plt.tight_layout()
plt.show()

data['delivery_time'].describe()

# Calculate the frequency of each restaurant
restaurant_counts = data['restaurant_name'].value_counts()

# Plot a countplot for the top 5 most frequent restaurants
top_5_restaurants = restaurant_counts.nlargest(5)
print(top_5_restaurants)

restaurant_name
Shake Shack                  219
The Meatball Shop            132
Blue Ribbon Sushi            119
Blue Ribbon Fried Chicken     96
Parm                          68
Name: count, dtype: int64

# Filter the data by Weekend and the group by Cuisine Type
weekend_data = data[data['day_of_the_week'] == 'Weekend']
weekend_cuisine = weekend_data.groupby('cuisine_type').size().sort_values(ascending=False)

# Calculate the top 5 weekend cuisines
top_five_weekend_cuisines = weekend_cuisine.head(5)

# Plot using seaborn
plt.figure(figsize=(10, 6))
sns.barplot(x=top_five_weekend_cuisines.values, y=top_five_weekend_cuisines.index, palette='coolwarm')
plt.title('Top 5 Cuisines on Weekends')
plt.xlabel('Number of Orders')
plt.ylabel('Cuisine Type')
plt.xticks(rotation=45)
plt.show()


print(f'Top Five Weekend Cuisines: \n\n{top_five_weekend_cuisines}')

# Calculate the total number of weekend orders
total_weekend_orders = weekend_data.shape[0]

# Calculate the percentage for the top five weekend cuisines
top_five_percentages = (top_five_weekend_cuisines / total_weekend_orders) * 100

# Print out the top five cuisines with their percentages
print(f'\n\nTop Five Weekend Cuisines and their percentages:\n\n{top_five_percentages}')

<ipython-input-25-24c309a9417e>:10: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=top_five_weekend_cuisines.values, y=top_five_weekend_cuisines.index, palette='coolwarm')

Top Five Weekend Cuisines: 

cuisine_type
American    415
Japanese    335
Italian     207
Chinese     163
Mexican      53
dtype: int64


Top Five Weekend Cuisines and their percentages:

cuisine_type
American    30.717987
Japanese    24.796447
Italian     15.321984
Chinese     12.065137
Mexican      3.923020
dtype: float64

# Find orders over $20
orders_over_20 = data[data['cost_of_the_order'] > 20].shape[0]
# Total number of orders
total_orders = data.shape[0]
# Find the percentage of orders over $20 compared to the entire data set
percent_orders_over_20 = (orders_over_20 / total_orders) * 100
# Orders costing $20 or less
orders_20_or_less = total_orders - orders_over_20

# Labels and sizes for the pie
labels = ['Over $20', '$20 or Less']
sizes = [orders_over_20, orders_20_or_less]

# Pie chart
plt.figure(figsize=(4, 4))
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=140, colors=['#ff9999','#66b3ff'])
plt.title('Percentage of Orders Over $20')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()

print(f"Number of orders over $20: {orders_over_20}")
print(f"Percentage of orders over $20: {percent_orders_over_20:.2f}%")

Number of orders over $20: 555
Percentage of orders over $20: 29.24%

# Calculate the mean order delivery time
mean_delivery_time = data['delivery_time'].mean()

# Plotting the histogram with the mean line
plt.figure(figsize=(8, 6))
sns.histplot(data['delivery_time'], bins=30, kde=True, color='skyblue')
plt.axvline(mean_delivery_time, color='red', linestyle='--', label=f'Mean: {mean_delivery_time:.2f}')
plt.title('Distribution of Order Delivery Times with Mean')
plt.xlabel('Delivery Time (minutes)')
plt.ylabel('Frequency')
plt.legend()
plt.grid(axis='y')
plt.show()

# Print the result
print(f"\n\nMean Order Delivery Time: {mean_delivery_time:.3f} minutes")


Mean Order Delivery Time: 24.162 minutes

# Get the top 3 customers and their order counts
top_customers = data['customer_id'].value_counts().head(3)

# Plotting the top customers
plt.figure(figsize=(8, 6))
sns.barplot(x=top_customers.index, y=top_customers.values, palette='pastel')
plt.title('Top 3 Customers by Number of Orders')
plt.xlabel('Customer ID')
plt.ylabel('Number of Orders')
plt.xticks(rotation=45)  # Rotate customer IDs
plt.show()

# Print the top customers and their order counts
print("\nTop 3 Customers by Number of Orders:\n")
print(top_customers)

<ipython-input-28-b20e16d518e1>:6: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=top_customers.index, y=top_customers.values, palette='pastel')

Top 3 Customers by Number of Orders:

customer_id
52832    13
47440    10
83287     9
Name: count, dtype: int64

# Explore the relationship between order cost, preparation time, and delivery time.
# This provides insights into whether higher costs are associated with different operational parameters.

# Calculate correlation matrix
correlation_matrix = data[['cost_of_the_order', 'food_preparation_time', 'delivery_time']].corr()
print(correlation_matrix)

# Visualize the correlation matrix
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

                       cost_of_the_order  food_preparation_time  delivery_time
cost_of_the_order               1.000000               0.041527      -0.029949
food_preparation_time           0.041527               1.000000       0.011094
delivery_time                  -0.029949               0.011094       1.000000

sns.jointplot(x='cost_of_the_order', y='food_preparation_time', data=data, kind='hex')
print('Cost vs Food Preparation Time')
plt.show()

Cost vs Food Preparation Time

jittered_data = data.copy()
jittered_data['food_preparation_time'] += np.random.normal(0, 0.5, size=len(data))

plt.figure(figsize=(10, 6))
sns.scatterplot(x='cost_of_the_order', y='food_preparation_time', hue='cuisine_type', data=jittered_data, alpha=0.7)

# Moving the legend outside
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.title('Cost vs Food Preparation Time with Cuisine Type')
plt.xlabel('Cost of Order')
plt.ylabel('Food Preparation Time (minutes)')
plt.show()

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 6))

# Count Plot
sns.countplot(ax=axes[0], x='day_of_the_week', hue='cuisine_type', data=data)
axes[0].set_title('Count Plot by Day')
axes[0].set_xlabel('Day of the Week')
axes[0].set_ylabel('Count')
axes[0].legend(title='Cuisine Type', bbox_to_anchor=(1.05, 1), loc='upper left')

# Heatmap
pivot_table = data.pivot_table(index='day_of_the_week', columns='cuisine_type', aggfunc='size', fill_value=0)
sns.heatmap(pivot_table, annot=True, fmt='d', cmap='YlGnBu', ax=axes[1])
axes[1].set_title('Heatmap of Cuisine by Day')
axes[1].set_xlabel('Cuisine Type')
axes[1].set_ylabel('Day of the Week')

# Stacked Bar Plot
day_cuisine_counts = data.groupby(['day_of_the_week', 'cuisine_type']).size().unstack()
day_cuisine_counts.plot(kind='bar', stacked=True, ax=axes[2])
axes[2].set_title('Stacked Bar Plot')
axes[2].set_xlabel('Day of the Week')
axes[2].set_ylabel('Count')
axes[2].legend(title='Cuisine Type', bbox_to_anchor=(1.3, 1), loc='upper left')

plt.tight_layout()
plt.show()

# Boxplot for preparation time categorized by cost
plt.figure(figsize=(8, 6))
sns.boxplot(x=data['cost_of_the_order'] > 20, y='food_preparation_time', data=data)
plt.title('Food Preparation Time by Cost Category')
plt.xlabel('Cost > $20')
plt.ylabel('Preparation Time (minutes)')
plt.show()

# Boxplot for delivery time categorized by cost
plt.figure(figsize=(8, 6))
sns.boxplot(x=data['cost_of_the_order'] > 20, y='delivery_time', data=data)
plt.title('Delivery Time by Cost Category')
plt.xlabel('Cost > $20')
plt.ylabel('Delivery Time (minutes)')
plt.show()

sns.pairplot(data[['cost_of_the_order', 'food_preparation_time', 'delivery_time']], kind='kde')
plt.suptitle('KDE Pair Plot of Cost, Preparation, and Delivery Times', y=1.02)
plt.show()

# Convert the `rating` column to numeric, coercing any errors to NaN
data['rating_numeric'] = pd.to_numeric(data['rating'], errors='coerce')

# Drop the rows with NaN in 'rating_numeric' if they're not needed
data = data.dropna(subset=['rating_numeric'])

# Group by restaurant and calculate the rating count and average rating
restaurant_ratings = data.groupby('restaurant_name').agg({'rating_numeric': ['count', 'mean']})

# Rename columns for clarity
restaurant_ratings.columns = ['rating_count', 'average_rating']

# Filter restaurants meeting the criteria
eligible_restaurants = restaurant_ratings[(restaurant_ratings['rating_count'] > 50) & (restaurant_ratings['average_rating'] > 4)]

# Display the eligible restaurants
print(eligible_restaurants)

                           rating_count  average_rating
restaurant_name                                        
Blue Ribbon Fried Chicken            64        4.328125
Blue Ribbon Sushi                    73        4.219178
Shake Shack                         133        4.278195
The Meatball Shop                    84        4.511905

# Calculate revenue for orders where cost is greater than 20
revenue_greater_than_20 = data.loc[data['cost_of_the_order'] > 20, 'cost_of_the_order'].sum() * 0.25

# Calculate revenue for orders where cost is greater than 5 and up to and including 20
revenue_greater_than_5 = data.loc[(data['cost_of_the_order'] > 5) & (data['cost_of_the_order'] <= 20), 'cost_of_the_order'].sum() * 0.15

# Calculate total net revenue
total_net_revenue = revenue_greater_than_20 + revenue_greater_than_5

# Display the total net revenue
print(f"The total net revenue generated by the company is: ${total_net_revenue:.2f}")

The total net revenue generated by the company is: $3865.57

# Calculate the total time for each order using .loc to modify the DataFrame
data.loc[:, 'total_delivery_time'] = data['food_preparation_time'] + data['delivery_time']

# Count the number of orders taking more than 60 minutes
orders_over_60_min = data[data['total_delivery_time'] > 60].shape[0]

# Calculate the percentage of such orders
percentage_over_60_min = (orders_over_60_min / data.shape[0]) * 100

# Display the percentage
print(f"{percentage_over_60_min:.2f}% of orders take more than 60 minutes to be delivered.")

10.24% of orders take more than 60 minutes to be delivered.

# Ensure that 'day_of_the_week' column has correct values denoting weekdays vs weekends
weekday_data = data[data['day_of_the_week'] == 'Weekday']
weekend_data = data[data['day_of_the_week'] == 'Weekend']

# Calculate mean delivery time for weekdays
mean_delivery_time_weekday = weekday_data['delivery_time'].mean()

# Calculate mean delivery time for weekends
mean_delivery_time_weekend = weekend_data['delivery_time'].mean()

# Display the mean delivery times
print(f"Mean delivery time on weekdays: {mean_delivery_time_weekday:.2f} minutes")
print(f"Mean delivery time on weekends: {mean_delivery_time_weekend:.2f} minutes")

Mean delivery time on weekdays: 28.31 minutes
Mean delivery time on weekends: 22.44 minutes

	cost_of_the_order
count	1898.000000
mean	16.498851
std	7.483812
min	4.470000
25%	12.080000
50%	14.140000
75%	22.297500
max	35.410000

	food_preparation_time
count	1898.000000
mean	27.371970
std	4.632481
min	20.000000
25%	23.000000
50%	27.000000
75%	31.000000
max	35.000000

	delivery_time
count	1898.000000
mean	24.161749
std	4.972637
min	15.000000
25%	20.000000
50%	25.000000
75%	28.000000
max	33.000000

	order_id	customer_id	restaurant_name	cuisine_type	cost_of_the_order	day_of_the_week	rating	food_preparation_time	delivery_time
0	1477147	337525	Hangawi	Korean	30.75	Weekend	Not given	25	20
1	1477685	358141	Blue Ribbon Sushi Izakaya	Japanese	12.08	Weekend	Not given	25	23
2	1477070	66393	Cafe Habana	Mexican	12.23	Weekday	5	23	28
3	1477334	106968	Blue Ribbon Fried Chicken	American	29.20	Weekend	3	25	15
4	1478249	76942	Dirty Bird to Go	American	11.59	Weekday	4	25	24

	0
order_id	0
customer_id	0
restaurant_name	0
cuisine_type	0
cost_of_the_order	0
day_of_the_week	0
rating	0
food_preparation_time	0
delivery_time	0

	count
restaurant_name
Shake Shack	219
The Meatball Shop	132
Blue Ribbon Sushi	119
Blue Ribbon Fried Chicken	96
Parm	68
...	...
Sushi Choshi	1
Dos Caminos Soho	1
La Follia	1
Philippe Chow	1
'wichcraft	1

Univariate and Multivariate Data Analysis with Python and Jupyter Notebooks ¶

Alexander Stevenson

Context¶

Objective¶

Data Description¶

Data Dictionary¶

Let us start by importing the required libraries¶

Understanding the structure of the data¶

Question 1: How many rows and columns are present in the data? [0.5 mark]¶

Observations:¶

Question 2: What are the datatypes of the different columns in the dataset? (The info() function can be used) [0.5 mark]¶

Observations:¶

Question 3: Are there any missing values in the data? If yes, treat them using an appropriate method. [1 mark]¶

Observations:¶

Question 4: Check the statistical summary of the data. What is the minimum, average, and maximum time it takes for food to be prepared once an order is placed? [2 marks]¶

Observations:¶

Question 5: How many orders are not rated? [1 mark]¶

Observations:¶

Exploratory Data Analysis (EDA)¶

Univariate Analysis¶

Question 6: Explore all the variables and provide observations on their distributions. (Generally, histograms, boxplots, countplots, etc. are used for univariate exploration.) [9 marks]¶

Observations:¶

Observations:¶

Observations:¶

Observations:¶

Observations:¶

Observations:¶

Question 7: Which are the top 5 restaurants in terms of the number of orders received? [1 mark]¶

Observations:¶

Question 8: Which is the most popular cuisine on weekends? [1 mark]¶

Observations:¶

Question 9: What percentage of the orders cost more than 20 dollars? [2 marks]¶

Observations:¶

Question 10: What is the mean order delivery time? [1 mark]¶

Observations:¶

Question 11: The company has decided to give 20% discount vouchers to the top 3 most frequent customers. Find the IDs of these customers and the number of orders they placed. [1 mark]¶

Observations:¶

Multivariate Analysis¶

Question 12: Perform a multivariate analysis to explore relationships between the important variables in the dataset. (It is a good idea to explore relations between numerical variables as well as relations between numerical and categorical variables) [10 marks]¶

Observations:¶

Question 14: The company charges the restaurant 25% on the orders having cost greater than 20 dollars and 15% on the orders having cost greater than 5 dollars. Find the net revenue generated by the company across all orders. [3 marks]¶

Observations:¶

Question 15: The company wants to analyze the total time required to deliver the food. What percentage of orders take more than 60 minutes to get delivered from the time the order is placed? (The food has to be prepared and then delivered.) [2 marks]¶

Observations:¶

Question 16: The company wants to analyze the delivery time of the orders on weekdays and weekends. How does the mean delivery time vary during weekdays and weekends? [2 marks]¶

Observations:¶

Conclusion and Recommendations¶

Question 17: What are your conclusions from the analysis? What recommendations would you like to share to help improve the business? (You can use cuisine type and feedback ratings to drive your business recommendations.) [6 marks]¶

Conclusions:¶

Recommendations:¶