# Program 6 # Identify frequent item sets using the Apriori algorithm for a given transaction data set # Dataset Link: https://www.kaggle.com/datasets/prasad22/retail-transactions-dataset !pip install mlxtend import pandas as pd from mlxtend.preprocessing import TransactionEncoder from mlxtend.frequent_patterns import fpgrowth # Step 2: Load the dataset df = pd.read_csv(r" ") # Step 3: Clean the 'Product' column df['Product'] = df['Product'].astype(str).str.strip().str.lower() # Step 4: Optional sampling to reduce runtime df = df.sample(n=20000, random_state=1) # Increase for better results if system allows # Step 5: Group products by Transaction_ID transactions = df.groupby('Transaction_ID')['Product'].apply(list) # Step 6: One-hot encode the transaction data te = TransactionEncoder() te_ary = te.fit(transactions).transform(transactions) df_encoded = pd.DataFrame(te_ary, columns=te.columns_) # Step 7: Apply FP-Growth to find frequent itemsets frequent_itemsets = fpgrowth(df_encoded, min_support=0.0005, use_colnames=True) # Step 8: Display top frequent itemsets print("Frequent Itemsets:") print(frequent_itemsets.sort_values(by='support', ascending=False).head(10))