import os
import re
import shutil

# 📁 Input folder containing HTML files
input_folder = "v-html"

# 📁 Output base folder
output_base = "v-html-dir"

# 🧠 Regex to extract topic name (between number and "Class")
topic_pattern = r'^\d+\.\s*(.*?)\s+Class'

# 🔁 Iterate through all HTML files
for filename in os.listdir(input_folder):
    if filename.endswith(".html"):
        file_path = os.path.join(input_folder, filename)

        # 🔍 Extract topic name
        match = re.search(topic_pattern, filename)
        if match:
            topic = match.group(1).strip()
            safe_topic = re.sub(r'[^\w\s-]', '', topic).replace(" ", "_")

            # 🗂️ Create topic directory
            topic_dir = os.path.join(output_base, safe_topic)
            os.makedirs(topic_dir, exist_ok=True)

            # 🚚 Move file
            shutil.move(file_path, os.path.join(topic_dir, filename))
            print(f"Moved '{filename}' to '{topic_dir}'")
        else:
            print(f"Skipped '{filename}': Topic name not found.")