Extract valuable metadata from web pages alongside screenshots. Analyze SEO tags, fonts, images, and more for comprehensive content analysis.
Beyond capturing screenshots, extract structured metadata from web pages. Get Open Graph tags for SEO analysis, font information for design audits, and image metadata for content inventory.
When to extract metadata?
Extract Open Graph meta tags from the page. Returns structured data including title, description, image, URL, and other OG tags used for social media sharing.
booleanfalseconst response = await fetch('https://www.snapshotai.dev/api/v1/screenshots', {
method: 'POST',
headers: {
'Authorization': 'Bearer sk_live_YOUR_API_KEY',
'Content-Type': 'application/json',
},
body: JSON.stringify({
url: 'https://example.com',
options: {
metadata_open_graph: true
}
})
});
const data = await response.json();
console.log('Screenshot:', data.data.id);
console.log('Open Graph Data:', data.data.metadata.open_graph);
// Example response:
// {
// og:title: "Example Page Title",
// og:description: "This is an example page description",
// og:image: "https://example.com/image.jpg",
// og:url: "https://example.com",
// og:type: "website",
// og:site_name: "Example Site"
// }import requests
response = requests.post(
'https://www.snapshotai.dev/api/v1/screenshots',
headers={'Authorization': 'Bearer sk_live_YOUR_API_KEY'},
json={
'url': 'https://blog.example.com/article',
'options': {
'metadata_open_graph': True
}
}
)
data = response.json()
og_data = data['data']['metadata']['open_graph']
# Validate required OG tags
required_tags = ['og:title', 'og:description', 'og:image']
missing_tags = [tag for tag in required_tags if tag not in og_data]
if missing_tags:
print(f"Missing OG tags: {missing_tags}")
else:
print("✓ All required OG tags present")
print(f"Title: {og_data['og:title']}")
print(f"Image: {og_data['og:image']}")Extract a list of all fonts used on the page. Useful for design system audits, branding consistency checks, and typography analysis.
booleanfalseconst response = await fetch('https://www.snapshotai.dev/api/v1/screenshots', {
method: 'POST',
headers: {
'Authorization': 'Bearer sk_live_YOUR_API_KEY',
'Content-Type': 'application/json',
},
body: JSON.stringify({
url: 'https://example.com',
options: {
metadata_fonts: true
}
})
});
const data = await response.json();
console.log('Fonts used:', data.data.metadata.fonts);
// Example response:
// {
// fonts: [
// "Inter, sans-serif",
// "Roboto, Arial, sans-serif",
// "Fira Code, monospace"
// ]
// }import requests
pages = [
'https://example.com',
'https://example.com/about',
'https://example.com/blog'
]
all_fonts = {}
for url in pages:
response = requests.post(
'https://www.snapshotai.dev/api/v1/screenshots',
headers={'Authorization': 'Bearer sk_live_YOUR_API_KEY'},
json={
'url': url,
'options': {
'metadata_fonts': True
}
}
)
fonts = response.json()['data']['metadata']['fonts']
all_fonts[url] = fonts
print(f"{url}: {len(fonts)} fonts")
# Check consistency
unique_fonts = set()
for fonts in all_fonts.values():
unique_fonts.update(fonts)
print(f"
Total unique fonts across site: {len(unique_fonts)}")Extract information about all images on the page including URLs, dimensions, alt text, and file sizes. Perfect for content inventory and image optimization audits.
booleanfalsecurl -X POST https://www.snapshotai.dev/api/v1/screenshots \
-H "Authorization: Bearer sk_live_YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"url": "https://example.com",
"options": {
"metadata_image_size": true
}
}'
# Example response:
# {
# images: [
# {
# url: "https://example.com/logo.png",
# width: 200,
# height: 50,
# alt: "Company Logo",
# size_bytes: 15000
# },
# {
# url: "https://example.com/hero.jpg",
# width: 1920,
# height: 1080,
# alt: "Hero Image",
# size_bytes: 250000
# }
# ]
# }const response = await fetch('https://www.snapshotai.dev/api/v1/screenshots', {
method: 'POST',
headers: {
'Authorization': 'Bearer sk_live_YOUR_API_KEY',
'Content-Type': 'application/json',
},
body: JSON.stringify({
url: 'https://example.com',
options: {
metadata_image_size: true
}
})
});
const data = await response.json();
const images = data.data.metadata.images;
// Find images over 100KB
const largeImages = images.filter(img => img.size_bytes > 100000);
console.log(`Found ${largeImages.length} images over 100KB:`);
largeImages.forEach(img => {
const sizeKB = (img.size_bytes / 1024).toFixed(2);
console.log(`- ${img.url} (${sizeKB}KB, ${img.width}x${img.height})`);
});
// Find images missing alt text
const missingAlt = images.filter(img => !img.alt || img.alt.trim() === '');
console.log(`
${missingAlt.length} images missing alt text`);Extract all metadata types in a single request for comprehensive analysis:
import requests
response = requests.post(
'https://www.snapshotai.dev/api/v1/screenshots',
headers={'Authorization': 'Bearer sk_live_YOUR_API_KEY'},
json={
'url': 'https://example.com',
'options': {
# Extract all metadata
'metadata_open_graph': True,
'metadata_fonts': True,
'metadata_image_size': True,
# Optional: Also capture screenshot
'viewport_width': 1920,
'viewport_height': 1080
}
}
)
data = response.json()
metadata = data['data']['metadata']
# Analyze all metadata
print("=== SEO Analysis ===")
og = metadata.get('open_graph', {})
print(f"Title: {og.get('og:title', 'Missing')}")
print(f"Description: {og.get('og:description', 'Missing')}")
print(f"Image: {og.get('og:image', 'Missing')}")
print("
=== Typography ===")
fonts = metadata.get('fonts', [])
print(f"Fonts used: {len(fonts)}")
for font in fonts:
print(f" - {font}")
print("
=== Images ===")
images = metadata.get('images', [])
print(f"Total images: {len(images)}")
total_size = sum(img['size_bytes'] for img in images)
print(f"Total size: {(total_size / 1024 / 1024):.2f}MB")Validate Open Graph tags across multiple pages:
const pages = await crawlSitemap('https://example.com/sitemap.xml');
for (const url of pages) {
const response = await fetch('https://www.snapshotai.dev/api/v1/screenshots', {
method: 'POST',
body: JSON.stringify({
url,
options: { metadata_open_graph: true }
})
});
const og = (await response.json()).data.metadata.open_graph;
validateOGTags(url, og);
}Check typography consistency across your site:
urls = get_all_pages('https://example.com')
font_usage = {}
for url in urls:
metadata = extract_metadata(url, metadata_fonts=True)
for font in metadata['fonts']:
font_usage[font] = font_usage.get(font, 0) + 1
print("Font usage across site:")
for font, count in sorted(font_usage.items(), key=lambda x: -x[1]):
print(f"{font}: {count} pages")Catalog all images and their metadata:
{
"url": "https://example.com",
"options": {
"metadata_image_size": true
}
}
// Build inventory of all images
// Track duplicates, sizes, missing alt text
// Generate optimization recommendationsRequest Only Needed Metadata
Only enable the metadata extraction options you need to minimize processing time and response size.
Wait for Full Page Load
Use wait_for_network_idle to ensure all fonts and images are loaded before extraction.
Cache Results
Metadata doesn't change frequently. Cache results to avoid unnecessary API calls.
Validate Extracted Data
Always check if metadata fields exist before accessing them, as not all pages have all tags.
Our team can help you build comprehensive SEO and content analysis workflows.
Contact Support