import json
from lxml import etree

BATCH_SIZE = 5000

def import_sms_xml(conn, xml_path: str, service: str = "sms_backup"):
  """
  Streaming import: iterparse does not load the full XML into memory.
  Supports SMS Backup & Restore-like structures where <sms ... body="..."/> is common.
  """
  cur = conn.cursor()
  to_insert = []

  # iterparse on "end" so we can clear nodes promptly
  context = etree.iterparse(xml_path, events=("end",), recover=True, huge_tree=True)

  for event, elem in context:
    if elem.tag.lower() == "sms":
      address = elem.get("address")
      date_ms = safe_int(elem.get("date"))
      msg_type = safe_int(elem.get("type"))
      body = elem.get("body") or ""
      thread_id = safe_int(elem.get("thread_id"))

      raw = {k: elem.get(k) for k in elem.keys()}
      to_insert.append((address, date_ms, msg_type, body, thread_id, service, json.dumps(raw)))

      if len(to_insert) >= BATCH_SIZE:
        cur.executemany(
          "INSERT INTO messages(address,date_ms,type,body,thread_id,service,raw_json) VALUES (?,?,?,?,?,?,?)",
          to_insert
        )
        conn.commit()
        to_insert.clear()

      # critical: free memory
      elem.clear()
      while elem.getprevious() is not None:
        del elem.getparent()[0]

  if to_insert:
    cur.executemany(
      "INSERT INTO messages(address,date_ms,type,body,thread_id,service,raw_json) VALUES (?,?,?,?,?,?,?)",
      to_insert
    )
    conn.commit()

def safe_int(v):
  try:
    return int(v) if v is not None else None
  except:
    return None
