Revision of web_fetch.py

1 file changed, 35 insertions

web_fetch.py(file created)

		@@ -0,0 +1,35 @@
1	+	#!/usr/bin/env python3
2	+	"""Web fetch extension - fetch content from URLs"""
3	+
4	+	import urllib.request
5	+	import re
6	+
7	+	@register_tool(
8	+	"fetch",
9	+	"Fetch content from URL (supports http/https)",
10	+	{"url": "string", "timeout": "number?"}
11	+	)
12	+	def fetch(args):
13	+	"""Fetch web content"""
14	+	url = args["url"]
15	+	timeout = args.get("timeout", 10)
16	+
17	+	try:
18	+	req = urllib.request.Request(url)
19	+	req.add_header('User-Agent', 'nanocode/1.0')
20	+	response = urllib.request.urlopen(req, timeout=timeout)
21	+	content = response.read().decode('utf-8', errors='ignore')
22	+
23	+	# 简单清理HTML
24	+	content = re.sub(r'<script[^>]>.?</script>', '', content, flags=re.DOTALL)
25	+	content = re.sub(r'<style[^>]>.?</style>', '', content, flags=re.DOTALL)
26	+	content = re.sub(r'<[^>]+>', ' ', content)
27	+	content = re.sub(r'\s+', ' ', content).strip()
28	+
29	+	# 限制长度
30	+	if len(content) > 5000:
31	+	content = content[:5000] + f"\n... (truncated, total {len(content)} chars)"
32	+
33	+	return content
34	+	except Exception as e:
35	+	return f"error: {e}"

Newer Older