Part 1: Solutions
Solution 1: Time Series Resampling
Show Solution
import pandas as pd
import numpy as np
data = {
'DateTime': pd.to_datetime([
'2025-08-15 00:00:00', '2025-08-15 01:00:00', '2025-08-15 02:30:00',
'2025-08-15 03:30:00', '2025-08-15 04:00:00', '2025-08-15 07:00:00'
]),
'Value': [10, 12, 8, 15, 18, 20]
}
ts_df = pd.DataFrame(data)
# Solution:
resampled_df = ts_df.resample('3H', on='DateTime')['Value'].mean()
print(resampled_df)
Solution 2: Using `pipe()`
Show Solution
data = {'Price': [100, 150, 200], 'Quantity': [5, 10, 4]}
pipe_df = pd.DataFrame(data)
# Solution:
def add_revenue_col(df):
df['Revenue'] = df['Price'] * df['Quantity']
return df
def apply_discount(df, discount=0.10):
df['Price'] = df['Price'] * (1 - discount)
return df
result_df = pipe_df.pipe(add_revenue_col).pipe(apply_discount, discount=0.10)
print(result_df)
Solution 3: `transform()`
Show Solution
data = {'Category': ['A', 'A', 'B', 'B'], 'Value': [10, 30, 20, 80]}
transform_df = pd.DataFrame(data)
# Solution:
transform_df['Pct_of_Category_Value'] = transform_df['Value'] / transform_df.groupby('Category')['Value'].transform('sum')
print(transform_df)
Solution 4: MultiIndex
Show Solution
data = {
'Year': [2024, 2024, 2025, 2025],
'Quarter': ['Q1', 'Q2', 'Q1', 'Q2'],
'Sales': [1000, 1200, 1100, 1300]
}
multi_df = pd.DataFrame(data)
# Solution:
multi_indexed_df = multi_df.set_index(['Year', 'Quarter'])
print(multi_indexed_df)
Solution 5: `agg()` vs. `transform()`
Show Solution
data = {'Group': ['X', 'X', 'Y', 'Y'], 'Value': [15, 25, 30, 40]}
agg_trans_df = pd.DataFrame(data)
# Solution a:
avg_value_df = agg_trans_df.groupby('Group')[['Value']].mean()
# Solution b:
agg_trans_df['Group_Avg_Value'] = agg_trans_df.groupby('Group')['Value'].transform('mean')
print("--- Aggregated DataFrame (a) ---")
print(avg_value_df)
print("\n--- Original DataFrame with Transformed Column (b) ---")
print(agg_trans_df)
Part 2: Solutions
Solution 6: Filtered Aggregation
Show Solution
# Setup from lab sheet must be run first
electronics_revenue = sales_df[sales_df['Product'] == 'Electronics'].groupby('Region').apply(lambda df: (df['Units Sold'] * df['Price']).sum())
print(electronics_revenue)
Solution 7: Time-Series Growth
Show Solution
# Setup from lab sheet must be run first
sales_df['Revenue'] = sales_df['Units Sold'] * sales_df['Price']
north_sales = sales_df[sales_df['Region'] == 'North'].copy()
monthly_growth = north_sales.set_index('Date').resample('M')['Revenue'].sum().pct_change()
month_with_highest_growth = monthly_growth.idxmax()
print(month_with_highest_growth)
Solution 8: Pivot Table
Show Solution
# Setup from lab sheet must be run first
pivot = sales_df.pivot_table(index='Product', columns='Region', values='Units Sold', aggfunc='mean')
print(pivot)
Solution 9: N-Largest
Show Solution
# Setup from lab sheet must be run first
top_2_per_region = sales_df.groupby('Region', group_keys=False).apply(lambda x: x.nlargest(2, 'Units Sold'))
print(top_2_per_region)
Solution 10: Filtering with Transform
Show Solution
# Setup from lab sheet must be run first
avg_units_by_product = sales_df.groupby('Product')['Units Sold'].transform('mean')
above_avg_sales = sales_df[sales_df['Units Sold'] > avg_units_by_product]
print(above_avg_sales.shape)
Solution 11: Missing Data Imputation
Show Solution
# Solution:
url = 'https://raw.githubusercontent.com/Gaurav0963/pandas-data-analysis/refs/heads/main/aps_failure_sample.csv'
df = pd.read_csv(url, na_values='na')
# 1. Calculate total missing values
total_missing = df.isnull().sum().sum()
print(f"Total missing values: {total_missing}")
# 2. Impute missing values with the mean of each column
df_imputed = df.apply(lambda x: x.fillna(x.mean()), axis=0)
# Verification (optional): Check if any NaNs are left in the new DataFrame
print(f"Missing values after imputation: {df_imputed.isnull().sum().sum()}")