#!/usr/bin/env python3 """Web fetch extension - fetch content from URLs""" import urllib.request import re @register_tool( "fetch", "Fetch content from URL (supports http/https)", {"url": "string", "timeout": "number?"} ) def fetch(args): """Fetch web content""" url = args["url"] timeout = args.get("timeout", 10) try: req = urllib.request.Request(url) req.add_header('User-Agent', 'nanocode/1.0') response = urllib.request.urlopen(req, timeout=timeout) content = response.read().decode('utf-8', errors='ignore') # 简单清理HTML content = re.sub(r']*>.*?', '', content, flags=re.DOTALL) content = re.sub(r']*>.*?', '', content, flags=re.DOTALL) content = re.sub(r'<[^>]+>', ' ', content) content = re.sub(r'\s+', ' ', content).strip() # 限制长度 if len(content) > 5000: content = content[:5000] + f"\n... (truncated, total {len(content)} chars)" return content except Exception as e: return f"error: {e}"