import os
import time
from bs4 import BeautifulSoup
from markdownify import markdownify as md

def is_inside_ul(tag):
    """Check if a tag is inside a <ul>"""
    while tag:
        if tag.name == 'ul':
            return True
        tag = tag.parent
    return False

def custom_html_to_md(html):
    soup = BeautifulSoup(html, 'html.parser')

    # Convert <strong> (with or without underline) to <h2> if outside <ul>
    for strong in soup.find_all('strong'):
        parent_span = strong.find_parent('span')
        style = parent_span.get('style', '') if parent_span else ''
        has_underline = 'text-decoration: underline' in style

        if not is_inside_ul(strong):
            heading_text = strong.get_text(strip=True)
            heading_tag = soup.new_tag('h2')
            heading_tag.string = heading_text

            try:
                if parent_span and parent_span in soup.descendants:
                    parent_span.replace_with(heading_tag)
                else:
                    strong.replace_with(heading_tag)
            except Exception as e:
                print(f"⚠️ Skipped heading '{heading_text}' due to error: {e}")

    # Replace underlined spans with ==text==
    for span in soup.find_all('span'):
        style = span.get('style', '')
        if 'text-decoration: underline' in style:
            underline_text = span.get_text(strip=True)
            markdown_underline = soup.new_string(f"=={underline_text}==")
            try:
                if span in soup.descendants:
                    span.replace_with(markdown_underline)
            except Exception as e:
                print(f"⚠️ Skipped underline '{underline_text}' due to error: {e}")

    # Convert to Markdown with '-' for bullets
    return md(str(soup), heading_style="ATX", bullets='-')

def convert_directory(input_dir, output_dir):
    start_time = time.time()
    file_count = 0

    for root, _, files in os.walk(input_dir):
        for filename in files:
            if filename.endswith(".html"):
                input_path = os.path.join(root, filename)

                # Preserve subdirectory structure
                relative_path = os.path.relpath(root, input_dir)
                output_subdir = os.path.join(output_dir, relative_path)
                os.makedirs(output_subdir, exist_ok=True)

                output_filename = os.path.splitext(filename)[0] + ".md"
                output_path = os.path.join(output_subdir, output_filename)

                try:
                    with open(input_path, "r", encoding="utf-8") as f:
                        html_content = f.read()

                    markdown_output = custom_html_to_md(html_content)

                    with open(output_path, "w", encoding="utf-8") as f:
                        f.write(markdown_output)

                    print(f"✅ Converted: {input_path} → {output_path}")
                    file_count += 1
                except Exception as e:
                    print(f"❌ Error converting {input_path}: {e}")

    elapsed_time = time.time() - start_time
    print(f"\n📁  Total files converted: {file_count}")
    print(f"⏱ Time taken: {elapsed_time:.2f} seconds")

if __name__ == "__main__":
    input_directory = "v-html-dir"     # 🔁 Replace with your input folder
    output_directory = "v-md"        # 📁 Replace with your output folder

    convert_directory(input_directory, output_directory)