#!/usr/bin/env python3
"""Web fetch extension - fetch content from URLs"""

import urllib.request
import re

@register_tool(
    "fetch",
    "Fetch content from URL (supports http/https)",
    {"url": "string", "timeout": "number?"}
)
def fetch(args):
    """Fetch web content"""
    url = args["url"]
    timeout = args.get("timeout", 10)
    
    try:
        req = urllib.request.Request(url)
        req.add_header('User-Agent', 'nanocode/1.0')
        response = urllib.request.urlopen(req, timeout=timeout)
        content = response.read().decode('utf-8', errors='ignore')
        
        # 简单清理HTML
        content = re.sub(r'<script[^>]*>.*?</script>', '', content, flags=re.DOTALL)
        content = re.sub(r'<style[^>]*>.*?</style>', '', content, flags=re.DOTALL)
        content = re.sub(r'<[^>]+>', ' ', content)
        content = re.sub(r'\s+', ' ', content).strip()
        
        # 限制长度
        if len(content) > 5000:
            content = content[:5000] + f"\n... (truncated, total {len(content)} chars)"
        
        return content
    except Exception as e:
        return f"error: {e}"