Robots.txt and meta robots are polite suggestions. Malicious crawlers ignore them. You need actual security to protect sensitive content.
✅ Good Security Practices:
❌ Don't Rely On:
Protect your Vue/Nuxt app from unwanted crawlers:
// server/middleware/security.ts
export default defineEventHandler((event) => {
// Block non-production environments
if (process.env.NODE_ENV !== 'production') {
setHeader(event, 'X-Robots-Tag', 'noindex, nofollow')
}
// Enforce HTTPS
if (!getRequestHost(event).includes('https')) {
return sendRedirect(event, `https://${getRequestHost(event)}${event.path}`, 301)
}
})
// nuxt.config.ts
export default defineNuxtConfig({
nitro: {
routeRules: {
'/**': {
headers: {
// Basic security headers
'X-Frame-Options': 'DENY',
'X-Content-Type-Options': 'nosniff',
'Referrer-Policy': 'strict-origin-when-cross-origin'
}
}
}
}
})
import { rateLimit } from 'express-rate-limit'
export default defineNuxtConfig({
// Add rate limiting to API routes
serverMiddleware: [
{ path: '/api', handler: rateLimit({
windowMs: 15 * 60 * 1000, // 15 minutes
max: 100 // limit each IP to 100 requests per windowMs
}) }
]
})
Always block search engines in non-production environments:
// server/middleware/block-non-production.ts
export default defineEventHandler((event) => {
const isProd = process.env.NODE_ENV === 'production'
const isMainDomain = getRequestHost(event) === 'mysite.com'
if (!isProd || !isMainDomain) {
setHeader(event, 'X-Robots-Tag', 'noindex, nofollow')
// Also consider basic auth for staging
if (!event.headers.get('authorization')) {
setResponseStatus(event, 401)
setHeader(event, 'WWW-Authenticate', 'Basic')
return 'Authentication required'
}
}
})
Protect admin and user areas:
// server/middleware/protect-routes.ts
export default defineEventHandler((event) => {
const protectedPaths = ['/admin', '/dashboard', '/user']
if (protectedPaths.some(path => event.path.startsWith(path))) {
// Ensure user is authenticated
if (!event.context.auth?.user) {
return sendRedirect(event, '/login')
}
// Block indexing of protected content
setHeader(event, 'X-Robots-Tag', 'noindex, nofollow')
}
})
Identify legitimate crawlers through:
// server/utils/verify-crawler.ts
export async function isLegitCrawler(ip: string, userAgent: string) {
// Example: Verify Googlebot
if (userAgent.includes('Googlebot')) {
const hostname = await reverseDns(ip)
return hostname.endsWith('googlebot.com')
}
return false
}
Implement tiered rate limiting:
import { rateLimit } from 'express-rate-limit'
// Different limits for different paths
const apiLimiter = rateLimit({
windowMs: 15 * 60 * 1000,
max: 100
})
const crawlerLimiter = rateLimit({
windowMs: 60 * 1000,
max: 10,
skip: req => !req.headers['user-agent']?.includes('bot')
})
export default defineEventHandler((event) => {
if (event.path.startsWith('/api')) {
return apiLimiter(event)
}
return crawlerLimiter(event)
})
Always redirect HTTP to HTTPS:
export default defineEventHandler((event) => {
const proto = event.headers.get('x-forwarded-proto')
if (proto === 'http') {
return sendRedirect(
event,
`https://${getRequestHost(event)}${event.path}`,
301
)
}
})
Add security headers:
export default defineNuxtConfig({
nitro: {
routeRules: {
'/**': {
headers: {
// Prevent clickjacking
'X-Frame-Options': 'DENY',
// Prevent MIME type sniffing
'X-Content-Type-Options': 'nosniff',
// Control referrer information
'Referrer-Policy': 'strict-origin-when-cross-origin',
// Enable strict CSP in production
...(process.env.NODE_ENV === 'production'
? {
'Content-Security-Policy': 'default-src \'self\';'
}
: {})
}
}
}
}
})
// server/middleware/crawler-monitor.ts
export default defineEventHandler((event) => {
const ua = event.headers.get('user-agent')
const ip = getRequestIP(event)
// Log suspicious patterns
if (isSuspiciousPattern(ua, ip)) {
console.warn(`Suspicious crawler: ${ip} with UA: ${ua}`)
// Consider blocking or rate limiting
}
})
Services like Cloudflare or AWS WAF can:
Opinion: If you're running a small blog, a WAF is overkill. Add it when you're actually getting attacked.
Prevent automated content theft:
export default defineEventHandler((event) => {
const requests = getRequestCount(getRequestIP(event))
if (requests > 100) {
setResponseStatus(event, 429)
return 'Too Many Requests'
}
// Add slight delays to automated requests
if (isBot(event.headers.get('user-agent'))) {
await new Promise(r => setTimeout(r, 500))
}
})
Protect forms from bot submissions:
// server/api/contact.post.ts
export default defineEventHandler(async (event) => {
const body = await readBody(event)
// Honeypot check
if (body.website) { // hidden field
return { success: false }
}
// Rate limiting
if (exceedsRateLimit(getRequestIP(event))) {
return createError({
statusCode: 429,
message: 'Too many attempts'
})
}
// Process legitimate submission
})