from flask import Flask, request, jsonify
from bs4 import BeautifulSoup
import requests, hashlib, re, datetime

app = Flask(__name__)

def generate_external_id(name, date_start, notice_type, state_code):
    key = f"{name}{date_start}{notice_type}{state_code}"
    return hashlib.sha256(key.encode("utf-8")).hexdigest()[:16]

@app.route("/parse", methods=["GET"])
def parse_notice():
    url = request.args.get("url")
    if not url:
        return jsonify({"error": "Missing url parameter"}), 400

    try:
        res = requests.get(url, timeout=10)
        res.raise_for_status()
    except Exception as e:
        return jsonify({"error": f"Failed to fetch: {str(e)}"}), 500

    soup = BeautifulSoup(res.text, "html.parser")

    # --- Basic content extraction ---
    text = soup.get_text(separator="\n", strip=True)
    paragraphs = [p.get_text(" ", strip=True) for p in soup.find_all("p") if p.get_text(strip=True)]
    full_text = " ".join(paragraphs) or text

    # --- Honoree name ---
    name_match = re.search(r"(?i)honor(?:ing| of)? (?:the )?(?:late )?(?:former )?(?:Vice President )?([A-Z][a-z]+(?: [A-Z][a-z]+)+)", text)
    honoree = name_match.group(1).strip() if name_match else ""

    # --- Title ---
    title = f"Honoring {honoree}" if honoree else "Flag Notice"

    # --- Notice type ---
    notice_type = "Federal" if "white house" in text.lower() or "president" in text.lower() else "Local"

    # --- State code (from domain or text) ---
    state_code = ""
    state_map = {
        "ohio.gov": "OH", "mn.gov": "MN", "ca.gov": "CA", "ny.gov": "NY", "tx.gov": "TX"
        # extend this list as needed
    }
    for domain, code in state_map.items():
        if domain in url:
            state_code = code

    if notice_type == "Federal":
        state_code = ""

    # --- Dates ---
    date_matches = re.findall(r"(\w+day,?\s+\w+\s+\d{1,2},\s+\d{4})", text)
    date_start, date_end = "", ""
    if date_matches:
        try:
            date_start = datetime.datetime.strptime(date_matches[0], "%A, %B %d, %Y").strftime("%Y-%m-%d")
            if len(date_matches) > 1:
                date_end = datetime.datetime.strptime(date_matches[-1], "%A, %B %d, %Y").strftime("%Y-%m-%d")
            else:
                date_end = date_start
        except:
            pass

    # --- Date string ---
    date_phrase = ""
    match_phrase = re.search(r"(?:from|through|until).{0,120}(?:sunset|sunrise|day|week)", text, re.I)
    if match_phrase:
        date_phrase = match_phrase.group(0).strip()

    # --- Source name ---
    gov_match = re.search(r"Governor\s+[A-Z][a-z]+\s+[A-Z][a-z]+", text)
    source_name = f"Office of {gov_match.group(0)}" if gov_match else "The State Government"

    # --- Content field (HTML) ---
    headline = soup.find("h1") or soup.find("title")
    headline_text = headline.get_text(strip=True) if headline else title
    content_html = f"<h2>{headline_text}</h2>"
    for p in paragraphs:
        content_html += f"<p>{p}</p>"
    content_html += f"<p>Source: {url}</p>"

    # --- External ID ---
    external_id = generate_external_id(honoree, date_start, notice_type, state_code)

    # --- Confidence & status ---
    status = "Publish" if honoree else "Draft"

    result = {
        "title": title,
        "content": content_html,
        "notice_type": notice_type,
        "state_code": state_code,
        "date_start": date_start,
        "date_end": date_end,
        "source_url": url,
        "source_name": source_name,
        "external_id": external_id,
        "dateString": date_phrase,
        "status": status,
        "official_name": honoree
    }

    return jsonify(result)

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8080)
