Update transform.py

2026-04-28 17:54:16 +00:00
--- a/transform.py
+++ b/transform.py
@@ -4,15 +4,16 @@

 # بسم الله الرحمن الرحيم

+# بسم الله الرحمن الرحيم
+
 import base64
 import json
-import requests
-import os
 import pandas as pd
+import os
+import numpy as np
 from datetime import datetime
 import io
-import numpy as np
-import re
+from difflib import SequenceMatcher

 def sanitize_text(value):
    """
@@ -71,47 +72,99 @@ def clean_site_name(name):
    
    return name.title()

-def clean_brand(brand):
+def create_brand_mapping(series):
    """
-    Clean Brand names by standardizing similar values
+    Dynamically create brand mapping by analyzing unique brand names
+    Uses first N characters and similarity matching
+    """
+    # Get unique brand values (excluding nulls and unknowns)
+    unique_brands = series.dropna().unique()
+    unique_brands = [str(b).strip() for b in unique_brands if str(b).strip() != "" and str(b).strip().lower() != "unknown"]
+    
+    # Dictionary to store mappings
+    brand_map = {}
+    
+    # First, group by first 3-5 characters
+    brand_groups = {}
+    
+    for brand in unique_brands:
+        brand_lower = brand.lower()
+        
+        # Try different prefix lengths
+        for prefix_len in [5, 4, 3]:
+            if len(brand_lower) >= prefix_len:
+                prefix = brand_lower[:prefix_len]
+                if prefix not in brand_groups:
+                    brand_groups[prefix] = []
+                brand_groups[prefix].append(brand)
+                break
+    
+    # For each group, find the most common/canonical name
+    for prefix, brands in brand_groups.items():
+        if len(brands) == 1:
+            # Single brand - use it as is (capitalized)
+            brand_map[brands[0].lower()] = brands[0].title()
+        else:
+            # Multiple brands with same prefix - find the most frequent or common one
+            # Count occurrences in the original series
+            brand_counts = series.value_counts()
+            
+            # Find the brand with highest count in this group
+            best_match = max(brands, key=lambda b: brand_counts.get(b, 0))
+            canonical_name = best_match.title()
+            
+            # Map all variations to the canonical name
+            for brand in brands:
+                brand_map[brand.lower()] = canonical_name
+    
+    # Also check for brands that are substrings of others
+    sorted_brands = sorted(unique_brands, key=len, reverse=True)
+    for i, long_brand in enumerate(sorted_brands):
+        long_lower = long_brand.lower()
+        for short_brand in sorted_brands[i+1:]:
+            short_lower = short_brand.lower()
+            if short_lower in long_lower and len(short_lower) > 3:
+                # Short brand is a substring of long brand
+                if short_brand.lower() not in brand_map:
+                    brand_map[short_lower] = long_brand.title()
+    
+    return brand_map
+
+def clean_brand_dynamic(brand, brand_mapping):
+    """
+    Clean Brand names using dynamic mapping
    """
    if pd.isna(brand) or brand == "":
        return "Unknown"
    
-    brand = str(brand).strip().lower()
+    brand_str = str(brand).strip()
+    brand_lower = brand_str.lower()
    
-    # Brand variations mapping
-    brand_mapping = {
-        'nike': 'Nike',
-        'nik e': 'Nike',
-        'ni ke': 'Nike',
-        'adidas': 'Adidas',
-        'addidas': 'Adidas',
-        'adidas ': 'Adidas',
-        'puma': 'Puma',
-        'pum a': 'Puma',
-        'reebok': 'Reebok',
-        'reebok ': 'Reebok',
-        'reeb ok': 'Reebok',
-        'gucci': 'Gucci',
-        'gucc i': 'Gucci',
-        'chanel': 'Chanel',
-        'chan el': 'Chanel'
-    }
+    # Check if we have a mapping for this brand
+    if brand_lower in brand_mapping:
+        return brand_mapping[brand_lower]
    
+    # Try partial matching using first few characters
    for key, value in brand_mapping.items():
-        if key in brand:
-            return value
+        # Check if brand starts with the same prefix
+        if len(brand_lower) >= 3 and len(key) >= 3:
+            if brand_lower[:3] == key[:3]:
+                return value
    
-    return brand.title()
+    # If not found, return title case
+    return brand_str.title()

-def calculate_age_from_dob(dob_value):
+def calculate_age_from_dob(dob_value, transaction_date):
    """
-    Convert DOB to age, handle 1900-01-01 as Unknown
+    Convert DOB to age based on transaction date, not today's date
+    Handles 1900-01-01 as Unknown
    """
    if pd.isna(dob_value) or dob_value == "":
        return "Unknown"
    
+    if pd.isna(transaction_date) or transaction_date == "":
+        return "Unknown"
+    
    dob_str = str(dob_value).strip()
    
    # Check for the placeholder date
@@ -119,16 +172,25 @@ def calculate_age_from_dob(dob_value):
        return "Unknown"
    
    try:
-        # Try to parse the date
+        # Parse DOB
        if '-' in dob_str:
-            dob = pd.to_datetime(dob_str.split()[0])  # Handle datetime strings
+            dob = pd.to_datetime(dob_str.split()[0])
        elif '/' in dob_str:
            dob = pd.to_datetime(dob_str)
        else:
            return "Unknown"
        
-        today = datetime.now()
-        age = today.year - dob.year - ((today.month, today.day) < (dob.month, dob.day))
+        # Parse Transaction Date
+        trans_date_str = str(transaction_date).strip()
+        if '-' in trans_date_str:
+            trans_date = pd.to_datetime(trans_date_str.split()[0])
+        elif '/' in trans_date_str:
+            trans_date = pd.to_datetime(trans_date_str)
+        else:
+            return "Unknown"
+        
+        # Calculate age at time of transaction
+        age = trans_date.year - dob.year - ((trans_date.month, trans_date.day) < (dob.month, dob.day))
        
        if age < 0 or age > 120:  # Sanity check
            return "Unknown"
@@ -137,6 +199,48 @@ def calculate_age_from_dob(dob_value):
    except:
        return "Unknown"

+def calculate_registration_duration(registration_date, transaction_date):
+    """
+    Calculate number of days between registration and transaction
+    """
+    if pd.isna(registration_date) or registration_date == "":
+        return "Unknown"
+    
+    if pd.isna(transaction_date) or transaction_date == "":
+        return "Unknown"
+    
+    try:
+        # Parse Registration Date
+        reg_str = str(registration_date).strip()
+        if '-' in reg_str:
+            reg_date = pd.to_datetime(reg_str.split()[0])
+        elif '/' in reg_str:
+            reg_date = pd.to_datetime(reg_str)
+        else:
+            return "Unknown"
+        
+        # Parse Transaction Date
+        trans_str = str(transaction_date).strip()
+        if '-' in trans_str:
+            trans_date = pd.to_datetime(trans_str.split()[0])
+        elif '/' in trans_str:
+            trans_date = pd.to_datetime(trans_str)
+        else:
+            return "Unknown"
+        
+        # Calculate days difference
+        days_diff = (trans_date - reg_date).days
+        
+        if days_diff < 0:
+            return "0"  # Transaction before registration - treat as 0
+        
+        if days_diff > 3650:  # Cap at 10 years (sanity check)
+            return "3650+"
+        
+        return days_diff
+    except:
+        return "Unknown"
+
 def merge_contact_methods(row):
    """
    Merge Email, SMS, Mail, Phone into one column with priority order
@@ -157,7 +261,7 @@ def merge_contact_methods(row):
    
    return ','.join(contact_methods)  # Return all methods as comma-separated

-def extract_date_components(date_value, column_name):
+def extract_date_components(date_value, column_name, reference_date=None):
    """
    Extract Year, Month, TimeOfMonth, Day from date
    """
@@ -269,15 +373,21 @@ def transform_dataframe(df):
    if 'Gender' in df.columns:
        print("   ✅ Keeping 'Gender'")
    
-    # G > Convert DOB to Age
-    if 'DOB' in df.columns:
-        df['Age'] = df['DOB'].apply(calculate_age_from_dob)
+    # G > Convert DOB to Age (using TransactionDate as reference)
+    if 'DOB' in df.columns and 'TransactionDate' in df.columns:
+        df['Age'] = df.apply(lambda row: calculate_age_from_dob(row['DOB'], row['TransactionDate']), axis=1)
        df = df.drop(columns=['DOB'])
-        print("   ✅ Converted 'DOB' to 'Age' (1900-01-01 → Unknown)")
+        print("   ✅ Converted 'DOB' to 'Age' (using TransactionDate as reference)")
+    elif 'DOB' in df.columns:
+        print("   ⚠️ 'DOB' found but 'TransactionDate' missing - cannot calculate Age properly")
    
-    # H > Keep RegistrationDate
-    if 'RegistrationDate' in df.columns:
-        print("   ✅ Keeping 'RegistrationDate'")
+    # H > Convert RegistrationDate to duration (days since registration)
+    if 'RegistrationDate' in df.columns and 'TransactionDate' in df.columns:
+        df['DaysSinceRegistration'] = df.apply(lambda row: calculate_registration_duration(row['RegistrationDate'], row['TransactionDate']), axis=1)
+        df = df.drop(columns=['RegistrationDate'])
+        print("   ✅ Converted 'RegistrationDate' to 'DaysSinceRegistration' (days between registration and transaction)")
+    elif 'RegistrationDate' in df.columns:
+        print("   ⚠️ 'RegistrationDate' found but 'TransactionDate' missing - keeping as-is")
    
    # I > Drop FirstLoginDate
    if 'FirstLoginDate' in df.columns:
@@ -370,19 +480,18 @@ def transform_dataframe(df):
    if 'Tier' in df.columns:
        print("   ✅ Keeping 'Tier'")
    
-    # AE, AF > Merge TransactionDate and CreateDate into date components
-    date_columns_to_process = []
+    # AE > Convert TransactionDate into date components
    if 'TransactionDate' in df.columns:
-        date_columns_to_process.append(('TransactionDate', 'Transaction'))
-    if 'CreateDate' in df.columns:
-        date_columns_to_process.append(('CreateDate', 'Create'))
-    
-    for date_col, prefix in date_columns_to_process:
-        date_components = df[date_col].apply(lambda x: extract_date_components(x, prefix))
+        date_components = df['TransactionDate'].apply(lambda x: extract_date_components(x, 'Transaction'))
        date_df = pd.DataFrame(date_components.tolist())
        df = pd.concat([df, date_df], axis=1)
-        df = df.drop(columns=[date_col])
-        print(f"   ✅ Converted '{date_col}' into 4 columns ({prefix}_Year, {prefix}_Month, {prefix}_TimeOfMonth, {prefix}_Day)")
+        df = df.drop(columns=['TransactionDate'])
+        print("   ✅ Converted 'TransactionDate' into 4 columns (Transaction_Year, Transaction_Month, Transaction_TimeOfMonth, Transaction_Day)")
+    
+    # AF > Drop CreateDate (as requested - it's the same as TransactionDate)
+    if 'CreateDate' in df.columns:
+        df = df.drop(columns=['CreateDate'])
+        print("   🗑️ Dropped 'CreateDate' (duplicate of TransactionDate)")
    
    # AG > Drop MemberId
    if 'MemberId' in df.columns:
@@ -394,135 +503,128 @@ def transform_dataframe(df):
        df = df.drop(columns=['SiteId'])
        print("   🗑️ Dropped 'SiteId'")
    
-    # AI > Drop ParentSiteId
-    if 'ParentSiteId' in df.columns:
-        df = df.drop(columns=['ParentSiteId'])
-        print("   🗑️ Dropped 'ParentSiteId'")
-    
-    # AJ > Keep and clean SiteName
+    # AI > Clean and keep SiteName
    if 'SiteName' in df.columns:
        df['SiteName'] = df['SiteName'].apply(clean_site_name)
        print("   ✅ Kept and cleaned 'SiteName'")
    
-    # AK > Drop SiteType
-    if 'SiteType' in df.columns:
-        df = df.drop(columns=['SiteType'])
-        print("   🗑️ Dropped 'SiteType'")
-    
-    # AL > Keep Quantity
+    # AJ > Keep Quantity
    if 'Quantity' in df.columns:
        print("   ✅ Keeping 'Quantity'")
    
-    # AM > Keep Amount
+    # AK > Keep Amount
    if 'Amount' in df.columns:
        print("   ✅ Keeping 'Amount'")
    
-    # AN > Drop RewardType
+    # AL > Drop RewardType
    if 'RewardType' in df.columns:
        df = df.drop(columns=['RewardType'])
        print("   🗑️ Dropped 'RewardType'")
    
-    # AO > Keep Points
+    # AM > Keep Points
    if 'Points' in df.columns:
        print("   ✅ Keeping 'Points'")
    
-    # AP > Drop trxDetailId
+    # AN > Drop trxDetailId
    if 'trxDetailId' in df.columns:
        df = df.drop(columns=['trxDetailId'])
        print("   🗑️ Dropped 'trxDetailId'")
    
-    # AQ > Drop TrxId
+    # AO > Drop TrxId
    if 'TrxId' in df.columns:
        df = df.drop(columns=['TrxId'])
        print("   🗑️ Dropped 'TrxId'")
    
-    # AR > Drop TransactionStatusId
+    # AP > Drop TransactionStatusId
    if 'TransactionStatusId' in df.columns:
        df = df.drop(columns=['TransactionStatusId'])
        print("   🗑️ Dropped 'TransactionStatusId'")
    
-    # AS > Keep TransactionStatusName
+    # AQ > Keep TransactionStatusName
    if 'TransactionStatusName' in df.columns:
        print("   ✅ Keeping 'TransactionStatusName'")
    
-    # AT > Drop TransactionTypeId
+    # AR > Drop TransactionTypeId
    if 'TransactionTypeId' in df.columns:
        df = df.drop(columns=['TransactionTypeId'])
        print("   🗑️ Dropped 'TransactionTypeId'")
    
-    # AU > Keep TransactionTypeName
+    # AS > Keep TransactionTypeName
    if 'TransactionTypeName' in df.columns:
        print("   ✅ Keeping 'TransactionTypeName'")
    
-    # AV > Drop Reportable
+    # AT > Drop Reportable
    if 'Reportable' in df.columns:
        df = df.drop(columns=['Reportable'])
        print("   🗑️ Dropped 'Reportable'")
    
-    # AW > Keep TransactionItemCode
+    # AU > Keep TransactionItemCode
    if 'TransactionItemCode' in df.columns:
        print("   ✅ Keeping 'TransactionItemCode'")
    
-    # AX > Keep AnalysisCode1
+    # AV > Keep AnalysisCode1
    if 'AnalysisCode1' in df.columns:
        print("   ✅ Keeping 'AnalysisCode1'")
    
-    # AY > Keep AnalysisCode2
+    # AW > Keep AnalysisCode2
    if 'AnalysisCode2' in df.columns:
        print("   ✅ Keeping 'AnalysisCode2'")
    
-    # AZ > Keep AnalysisCode3
+    # AX > Keep AnalysisCode3
    if 'AnalysisCode3' in df.columns:
        print("   ✅ Keeping 'AnalysisCode3'")
    
-    # BA > Keep AnalysisCode4
+    # AY > Keep AnalysisCode4
    if 'AnalysisCode4' in df.columns:
        print("   ✅ Keeping 'AnalysisCode4'")
    
-    # BB > Keep and clean Brand
+    # AZ > Dynamically clean Brand
    if 'Brand' in df.columns:
-        df['Brand'] = df['Brand'].apply(clean_brand)
-        print("   ✅ Kept and cleaned 'Brand'")
+        print("   🔍 Analyzing unique brand names to create dynamic mapping...")
+        brand_mapping = create_brand_mapping(df['Brand'])
+        print(f"   📊 Created mapping for {len(brand_mapping)} unique brand variations")
+        df['Brand'] = df['Brand'].apply(lambda x: clean_brand_dynamic(x, brand_mapping))
+        print("   ✅ Kept and dynamically cleaned 'Brand'")
    
-    # BC > Keep AnalysisCode6
+    # BA > Keep AnalysisCode6
    if 'AnalysisCode6' in df.columns:
        print("   ✅ Keeping 'AnalysisCode6'")
    
-    # BD > Keep AnalysisCode7
+    # BB > Keep AnalysisCode7
    if 'AnalysisCode7' in df.columns:
        print("   ✅ Keeping 'AnalysisCode7'")
    
-    # BE > Keep AnalysisCode8
+    # BC > Keep AnalysisCode8
    if 'AnalysisCode8' in df.columns:
        print("   ✅ Keeping 'AnalysisCode8'")
    
-    # BF > Keep Price
+    # BD > Keep Price
    if 'Price' in df.columns:
        print("   ✅ Keeping 'Price'")
    
-    # BG > Keep AnalysisCode10
+    # BE > Keep AnalysisCode10
    if 'AnalysisCode10' in df.columns:
        print("   ✅ Keeping 'AnalysisCode10'")
    
-    # BH > Keep InvalidReason
+    # BF > Keep InvalidReason
    if 'InvalidReason' in df.columns:
        print("   ✅ Keeping 'InvalidReason'")
    
-    # BI > Drop Description
+    # BG > Drop Description
    if 'Description' in df.columns:
        df = df.drop(columns=['Description'])
        print("   🗑️ Dropped 'Description'")
    
-    # BJ > Drop PromotionId
+    # BH > Drop PromotionId
    if 'PromotionId' in df.columns:
        df = df.drop(columns=['PromotionId'])
        print("   🗑️ Dropped 'PromotionId'")
    
-    # BK > Keep PromotionName
+    # BI > Keep PromotionName
    if 'PromotionName' in df.columns:
        print("   ✅ Keeping 'PromotionName'")
    
-    # BL > Convert PromotionStartDate into 4 columns
+    # BJ > Convert PromotionStartDate into 4 columns
    if 'PromotionStartDate' in df.columns:
        date_components = df['PromotionStartDate'].apply(lambda x: extract_date_components(x, 'PromotionStart'))
        date_df = pd.DataFrame(date_components.tolist())
@@ -530,32 +632,32 @@ def transform_dataframe(df):
        df = df.drop(columns=['PromotionStartDate'])
        print("   ✅ Converted 'PromotionStartDate' into 4 columns (PromotionStart_Year, PromotionStart_Month, PromotionStart_TimeOfMonth, PromotionStart_Day)")
    
-    # BM > Drop PromotionEndDate
+    # BK > Drop PromotionEndDate
    if 'PromotionEndDate' in df.columns:
        df = df.drop(columns=['PromotionEndDate'])
        print("   🗑️ Dropped 'PromotionEndDate'")
    
-    # BN > Drop PromotionOfferTypeId
+    # BL > Drop PromotionOfferTypeId
    if 'PromotionOfferTypeId' in df.columns:
        df = df.drop(columns=['PromotionOfferTypeId'])
        print("   🗑️ Dropped 'PromotionOfferTypeId'")
    
-    # BO > Drop PromotionOfferTypeName
+    # BM > Drop PromotionOfferTypeName
    if 'PromotionOfferTypeName' in df.columns:
        df = df.drop(columns=['PromotionOfferTypeName'])
        print("   🗑️ Dropped 'PromotionOfferTypeName'")
    
-    # BP > Drop PromotionSiteId
+    # BN > Drop PromotionSiteId
    if 'PromotionSiteId' in df.columns:
        df = df.drop(columns=['PromotionSiteId'])
        print("   🗑️ Dropped 'PromotionSiteId'")
    
-    # BQ > Drop PromotionSite
+    # BO > Drop PromotionSite
    if 'PromotionSite' in df.columns:
        df = df.drop(columns=['PromotionSite'])
        print("   🗑️ Dropped 'PromotionSite'")
    
-    # BR > Drop QualifyingProductQuantity
+    # BP > Drop QualifyingProductQuantity
    if 'QualifyingProductQuantity' in df.columns:
        df = df.drop(columns=['QualifyingProductQuantity'])
        print("   🗑️ Dropped 'QualifyingProductQuantity'")
@@ -619,64 +721,6 @@ def read_and_process_file(file_path, max_rows=5000):
        traceback.print_exc()
        return None, None, None

-def encode_file_to_base64(file_content):
-    """
-    Encode file content to base64 string
-    """
-    try:
-        base64_encoded = base64.b64encode(file_content).decode('ascii')
-        return base64_encoded
-    except Exception as e:
-        print(f"❌ Error encoding to base64: {e}")
-        cleaned_content = bytes([b for b in file_content if b < 128])
-        base64_encoded = base64.b64encode(cleaned_content).decode('ascii')
-        return base64_encoded
-
-def send_to_api(file_name, base64_data):
-    """
-    Send the encoded file data to the API
-    """
-    api_url = "https://problab-api-0004c00ee319.hosted.ghaymah.systems/process_dataset"
-    
-    payload = {
-        "event": {
-            "data": {
-                "new": {
-                    "id": "snipp_transformed",
-                    "file_data": base64_data,
-                    "file_name": file_name,
-                    "hasHeader": True,
-                    "delimiter": ","
-                }
-            }
-        }
-    }
-    
-    headers = {
-        'Content-Type': 'application/json',
-        'User-Agent': 'Data-Transformer/1.0',
-        'Accept': 'application/json'
-    }
-    
-    try:
-        print(f"\n🔄 Sending transformed file '{file_name}' to API...")
-        print(f"📊 Base64 data size: {len(base64_data)} characters")
-        
-        response = requests.post(api_url, json=payload, headers=headers, timeout=60)
-        
-        if response.status_code == 200:
-            print("✅ File sent successfully!")
-            print(f"📋 Response status: {response.status_code}")
-        else:
-            print(f"❌ Failed to send file. Status code: {response.status_code}")
-            print(f"📋 Response: {response.text[:500]}")
-        
-        return response
-    
-    except Exception as e:
-        print(f"❌ Error occurred while sending to API: {e}")
-        return None
-
 def save_clean_dataset(df, file_name):
    """
    Save the transformed dataset locally
@@ -693,10 +737,10 @@ def save_clean_dataset(df, file_name):

 def main():
    """
-    Main function to execute all transformations and upload
+    Main function to execute all transformations
    """
    print("=" * 80)
-    print("🚢 Ship Performance Dataset - Complete Transformation & Upload")
+    print("🚢 Ship Performance Dataset - Complete Transformation")
    print("=" * 80)
    
    # Specify the path to your Excel file
@@ -710,21 +754,12 @@ def main():
        print("\n❌ Process failed. Please check if the file exists.")
        return
    
-    # Encode to base64
-    print("\n2️⃣ Encoding transformed file to base64...")
-    base64_data = encode_file_to_base64(file_content)
-    print(f"   ✅ Encoding complete ({len(base64_data)} characters)")
-    
-    # Send to API
-    print("\n3️⃣ Sending transformed data to API...")
-    response = send_to_api(modified_file_name, base64_data)
-    
    # Save locally
    save_clean_dataset(df, modified_file_name)
    
    # Save transformation summary
    summary_file = f'transformation_summary_{datetime.now().strftime("%Y%m%d_%H%M%S")}.txt'
-    with open(summary_file, 'w') as f:
+    with open(summary_file, 'w', encoding='utf-8') as f:
        f.write("TRANSFORMATION SUMMARY\n")
        f.write("=" * 50 + "\n\n")
        f.write(f"Original file: {excel_file_path}\n")
@@ -733,23 +768,34 @@ def main():
        f.write("Final columns list:\n")
        for col in df.columns:
            f.write(f"  - {col}\n")
+        f.write("\n" + "=" * 50 + "\n\n")
+        f.write("Key transformations applied:\n")
+        f.write("  - Added IsRecurringCustomer flag (based on multiple transactions per Userid)\n")
+        f.write("  - Converted DOB to Age (using TransactionDate as reference, not today's date)\n")
+        f.write("  - Converted RegistrationDate to DaysSinceRegistration (days between registration and transaction)\n")
+        f.write("  - Dropped CreateDate (duplicate of TransactionDate)\n")
+        f.write("  - Dynamically cleaned Brand names using prefix matching\n")
+        f.write("  - Cleaned SiteName variations\n")
+        f.write("  - Merged contact method columns into single ContactMethod field\n")
+        f.write("  - Split date columns into Year, Month, TimeOfMonth, Day components\n")
+        f.write("  - Removed redundant columns (StoreId, Store, SiteType, etc.)\n")
    
    print(f"\n📄 Transformation summary saved: {summary_file}")
    
    print("\n" + "=" * 80)
-    if response and response.status_code == 200:
-        print("🎉 All transformations completed and file uploaded successfully! إن شاء الله")
-        print(f"   ✅ {len(df)} rows processed")
-        print(f"   ✅ {len(df.columns)} columns in final dataset")
-        print("   ✅ Recurring customer flag added")
-        print("   ✅ DOB converted to Age")
-        print("   ✅ Contact methods merged")
-        print("   ✅ Date columns split into components")
-        print("   ✅ SiteName and Brand cleaned")
-    else:
-        print("⚠️ Process completed but API upload may have failed.")
-        print("   💡 Transformed file saved locally for inspection.")
+    print("🎉 All transformations completed successfully! إن شاء الله")
+    print(f"   ✅ {len(df)} rows processed")
+    print(f"   ✅ {len(df.columns)} columns in final dataset")
+    print("   ✅ Recurring customer flag added")
+    print("   ✅ DOB converted to Age (using transaction date)")
+    print("   ✅ RegistrationDate converted to DaysSinceRegistration")
+    print("   ✅ CreateDate dropped (duplicate)")
+    print("   ✅ Contact methods merged")
+    print("   ✅ Date columns split into components")
+    print("   ✅ SiteName and Brand dynamically cleaned")
    print("=" * 80)

 if __name__ == "__main__":
-    main()
+    main()
+
+