# -*- coding: utf-8 -*-
"""Huynh_Do_LAB1.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1XlOfPlKMp9s0w6x7KKahi2WRsDrpFv8M
"""

# Import required Python packages
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns  # Optional for better visuals
from google.colab import files

# Upload 'Cereals.csv' file
uploaded = files.upload()

# Load the dataset
df = pd.read_csv("Cereals.csv")
df.head()

# Summary statistics
summary_stats = df.describe().T
summary_stats['median'] = df.median(numeric_only=True)
summary_stats

# Correlation matrix
quantitative_df = df.select_dtypes(include=[np.number])
correlation_matrix = quantitative_df.corr()
correlation_matrix

# Plot histogram for 'rating'
plt.figure(figsize=(8,5))
plt.hist(df['rating'], bins=15, edgecolor='black')
plt.title('Histogram of Cereal Ratings')
plt.xlabel('Rating')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()