#!/usr/bin/env python3
"""Web fetch extension - fetch content from URLs"""
import urllib.request
import re
@register_tool(
"fetch",
"Fetch content from URL (supports http/https)",
{"url": "string", "timeout": "number?"}
)
def fetch(args):
"""Fetch web content"""
url = args["url"]
timeout = args.get("timeout", 10)
try:
req = urllib.request.Request(url)
req.add_header('User-Agent', 'nanocode/1.0')
response = urllib.request.urlopen(req, timeout=timeout)
content = response.read().decode('utf-8', errors='ignore')
# 简单清理HTML
content = re.sub(r'', '', content, flags=re.DOTALL)
content = re.sub(r'', '', content, flags=re.DOTALL)
content = re.sub(r'<[^>]+>', ' ', content)
content = re.sub(r'\s+', ' ', content).strip()
# 限制长度
if len(content) > 5000:
content = content[:5000] + f"\n... (truncated, total {len(content)} chars)"
return content
except Exception as e:
return f"error: {e}"