web_fetch.py
· 1.1 KiB · Python
Raw
#!/usr/bin/env python3
"""Web fetch extension - fetch content from URLs"""
import urllib.request
import re
@register_tool(
"fetch",
"Fetch content from URL (supports http/https)",
{"url": "string", "timeout": "number?"}
)
def fetch(args):
"""Fetch web content"""
url = args["url"]
timeout = args.get("timeout", 10)
try:
req = urllib.request.Request(url)
req.add_header('User-Agent', 'nanocode/1.0')
response = urllib.request.urlopen(req, timeout=timeout)
content = response.read().decode('utf-8', errors='ignore')
# 简单清理HTML
content = re.sub(r'<script[^>]*>.*?</script>', '', content, flags=re.DOTALL)
content = re.sub(r'<style[^>]*>.*?</style>', '', content, flags=re.DOTALL)
content = re.sub(r'<[^>]+>', ' ', content)
content = re.sub(r'\s+', ' ', content).strip()
# 限制长度
if len(content) > 5000:
content = content[:5000] + f"\n... (truncated, total {len(content)} chars)"
return content
except Exception as e:
return f"error: {e}"
| 1 | #!/usr/bin/env python3 |
| 2 | """Web fetch extension - fetch content from URLs""" |
| 3 | |
| 4 | import urllib.request |
| 5 | import re |
| 6 | |
| 7 | @register_tool( |
| 8 | "fetch", |
| 9 | "Fetch content from URL (supports http/https)", |
| 10 | {"url": "string", "timeout": "number?"} |
| 11 | ) |
| 12 | def fetch(args): |
| 13 | """Fetch web content""" |
| 14 | url = args["url"] |
| 15 | timeout = args.get("timeout", 10) |
| 16 | |
| 17 | try: |
| 18 | req = urllib.request.Request(url) |
| 19 | req.add_header('User-Agent', 'nanocode/1.0') |
| 20 | response = urllib.request.urlopen(req, timeout=timeout) |
| 21 | content = response.read().decode('utf-8', errors='ignore') |
| 22 | |
| 23 | # 简单清理HTML |
| 24 | content = re.sub(r'<script[^>]*>.*?</script>', '', content, flags=re.DOTALL) |
| 25 | content = re.sub(r'<style[^>]*>.*?</style>', '', content, flags=re.DOTALL) |
| 26 | content = re.sub(r'<[^>]+>', ' ', content) |
| 27 | content = re.sub(r'\s+', ' ', content).strip() |
| 28 | |
| 29 | # 限制长度 |
| 30 | if len(content) > 5000: |
| 31 | content = content[:5000] + f"\n... (truncated, total {len(content)} chars)" |
| 32 | |
| 33 | return content |
| 34 | except Exception as e: |
| 35 | return f"error: {e}" |