Claude 的 Vision(视觉)能力让它可以直接理解图片内容——分析截图、识别文字、理解图表、把设计稿转成代码。本文展示所有实用场景。
支持的图片格式
- JPEG、PNG、GIF、WebP
- 最大单张:5MB(base64)或 URL 引用
- 每次请求最多 20 张图片
基础 API 用法
方式 1:本地图片(base64)
python
import anthropic, base64
client = anthropic.Anthropic()
with open('screenshot.png', 'rb') as f:
image_data = base64.standard_b64encode(f.read()).decode('utf-8')
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
messages=[{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": image_data
}
},
{"type": "text", "text": "Describe what you see in this screenshot."}
]
}]
)
print(response.content[0].text)方式 2:URL 图片
python
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
messages=[{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "url",
"url": "https://example.com/chart.png"
}
},
{"type": "text", "text": "Analyze this chart and extract the key data points."}
]
}]
)场景 1:截图转 React 代码
python
def screenshot_to_react(image_path):
with open(image_path, 'rb') as f:
data = base64.standard_b64encode(f.read()).decode('utf-8')
prompt = """
Convert this UI screenshot to a React component.
Requirements:
- TypeScript
- Tailwind CSS for styling
- Match the layout and colors as closely as possible
- Make it responsive (mobile-first)
- Use semantic HTML
Output only the component code.
"""
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=4096,
messages=[{
"role": "user",
"content": [
{"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": data}},
{"type": "text", "text": prompt}
]
}]
)
return response.content[0].text
code = screenshot_to_react('figma-design.png')场景 2:OCR 文字提取
python
def extract_text(image_path):
with open(image_path, 'rb') as f:
data = base64.standard_b64encode(f.read()).decode('utf-8')
ext = image_path.split('.')[-1].lower()
media_type = {'jpg': 'image/jpeg', 'jpeg': 'image/jpeg',
'png': 'image/png', 'webp': 'image/webp'}.get(ext, 'image/png')
response = client.messages.create(
model="claude-haiku-3-5",
max_tokens=2048,
messages=[{
"role": "user",
"content": [
{"type": "image", "source": {"type": "base64", "media_type": media_type, "data": data}},
{"type": "text", "text": "Extract all text from this image. Preserve formatting (tables, lists). Output only the extracted text."}
]
}]
)
return response.content[0].text
# 批量处理扫描文档
import glob
for img in glob.glob('scanned/*.png'):
text = extract_text(img)
with open(img.replace('.png', '.txt'), 'w') as f:
f.write(text)场景 3:数据图表分析
python
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
messages=[{
"role": "user",
"content": [
{"type": "image", "source": {"type": "url", "url": chart_url}},
{"type": "text", "text": """
Analyze this chart:
1. What type of chart is this?
2. Extract all data points as JSON
3. Identify the trend (increasing/decreasing/stable)
4. What's the highest and lowest value?
5. Key insight in one sentence
"""}
]
}]
)场景 4:设计稿审查
python
def review_design(design_img, spec_img=None):
content = []
with open(design_img, 'rb') as f:
d = base64.standard_b64encode(f.read()).decode()
content.append({"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": d}})
if spec_img:
with open(spec_img, 'rb') as f:
d2 = base64.standard_b64encode(f.read()).decode()
content.append({"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": d2}})
content.append({"type": "text", "text": "First image is the implementation, second is the spec. Find differences."})
else:
content.append({"type": "text", "text": "Review this UI for: accessibility issues, spacing inconsistencies, color contrast, missing hover states."})
response = client.messages.create(
model="claude-sonnet-4-5", max_tokens=1024,
messages=[{"role": "user", "content": content}]
)
return response.content[0].text在 Claude Code 终端中使用图片
bash
# 在交互模式里直接粘贴截图
claude
# 然后 Ctrl+V 粘贴截图(macOS/Linux 支持)
# 或拖拽图片文件到终端
# 非交互模式
claude -p "Convert this design to React component" --image design.png多图对比
python
# 对比两个版本的 UI
def compare_screenshots(before_path, after_path):
images = []
for path in [before_path, after_path]:
with open(path, 'rb') as f:
d = base64.standard_b64encode(f.read()).decode()
images.append({"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": d}})
images.append({"type": "text", "text": "Compare these two screenshots. List all visual differences."})
response = client.messages.create(
model="claude-sonnet-4-5", max_tokens=1024,
messages=[{"role": "user", "content": images}]
)
return response.content[0].text来源:Vision API - Anthropic 官方文档