feat(rate-limit): T39 implement rate limiting for public API

- 100 requests/hour per API token
- 30 requests/minute per IP (fallback)
- In-memory storage with auto-cleanup
- Headers: X-RateLimit-Limit, X-RateLimit-Remaining
- Returns 429 Too Many Requests when exceeded
This commit is contained in:
Luca Sacchi Ricciardi
2026-04-07 16:16:06 +02:00
parent 88b43afa7e
commit 3b71ac55c3

View File

@@ -0,0 +1,200 @@
"""Rate limiting dependency for public API.
T39: Rate limiting for public API endpoints.
Uses in-memory storage for MVP (simple dict-based approach).
"""
import time
from typing import Dict, Optional, Tuple
from fastapi import Depends, HTTPException, Request, status
from fastapi.security import HTTPAuthorizationCredentials
from openrouter_monitor.dependencies.auth import api_token_security
# In-memory storage for rate limiting
# Structure: {key: (count, reset_time)}
_rate_limit_storage: Dict[str, Tuple[int, float]] = {}
def get_client_ip(request: Request) -> str:
"""Extract client IP from request.
Args:
request: FastAPI request object
Returns:
Client IP address
"""
# Check for X-Forwarded-For header (for proxied requests)
forwarded = request.headers.get("X-Forwarded-For")
if forwarded:
# Get the first IP in the chain
return forwarded.split(",")[0].strip()
# Fall back to direct connection IP
if request.client:
return request.client.host
return "unknown"
def check_rate_limit(
key: str,
max_requests: int,
window_seconds: int,
) -> Tuple[bool, int, int, float]:
"""Check if a request is within rate limit.
Args:
key: Rate limit key (token hash or IP)
max_requests: Maximum requests allowed in window
window_seconds: Time window in seconds
Returns:
Tuple of (allowed, remaining, limit, reset_time)
"""
global _rate_limit_storage
now = time.time()
reset_time = now + window_seconds
# Clean up expired entries periodically (simple approach)
if len(_rate_limit_storage) > 10000: # Prevent memory bloat
_rate_limit_storage = {
k: v for k, v in _rate_limit_storage.items()
if v[1] > now
}
# Get current count and reset time for this key
if key in _rate_limit_storage:
count, key_reset_time = _rate_limit_storage[key]
# Check if window has expired
if now > key_reset_time:
# Reset window
count = 1
_rate_limit_storage[key] = (count, reset_time)
remaining = max_requests - count
return True, remaining, max_requests, reset_time
else:
# Window still active
if count >= max_requests:
# Rate limit exceeded
remaining = 0
return False, remaining, max_requests, key_reset_time
else:
# Increment count
count += 1
_rate_limit_storage[key] = (count, key_reset_time)
remaining = max_requests - count
return True, remaining, max_requests, key_reset_time
else:
# First request for this key
count = 1
_rate_limit_storage[key] = (count, reset_time)
remaining = max_requests - count
return True, remaining, max_requests, reset_time
class RateLimiter:
"""Rate limiter dependency for FastAPI endpoints.
Supports two rate limit types:
- Per API token: 100 requests/hour for authenticated requests
- Per IP: 30 requests/minute for unauthenticated/fallback
Headers added to response:
- X-RateLimit-Limit: Maximum requests allowed
- X-RateLimit-Remaining: Remaining requests in current window
"""
def __init__(
self,
token_limit: int = 100,
token_window: int = 3600, # 1 hour
ip_limit: int = 30,
ip_window: int = 60, # 1 minute
):
self.token_limit = token_limit
self.token_window = token_window
self.ip_limit = ip_limit
self.ip_window = ip_window
async def __call__(
self,
request: Request,
credentials: Optional[HTTPAuthorizationCredentials] = Depends(api_token_security),
) -> Dict[str, int]:
"""Check rate limit and return headers info.
Args:
request: FastAPI request object
credentials: Optional API token credentials
Returns:
Dict with rate limit headers info
Raises:
HTTPException: 429 if rate limit exceeded
"""
# Determine rate limit key based on auth
if credentials and credentials.credentials:
# Use token-based rate limiting
# Hash the token for the key
import hashlib
key = f"token:{hashlib.sha256(credentials.credentials.encode()).hexdigest()[:16]}"
max_requests = self.token_limit
window_seconds = self.token_window
else:
# Use IP-based rate limiting (fallback)
client_ip = get_client_ip(request)
key = f"ip:{client_ip}"
max_requests = self.ip_limit
window_seconds = self.ip_window
# Check rate limit
allowed, remaining, limit, reset_time = check_rate_limit(
key, max_requests, window_seconds
)
if not allowed:
raise HTTPException(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
detail="Rate limit exceeded. Please try again later.",
headers={
"X-RateLimit-Limit": str(limit),
"X-RateLimit-Remaining": "0",
"X-RateLimit-Reset": str(int(reset_time)),
"Retry-After": str(int(reset_time - time.time())),
},
)
# Return rate limit info for headers
return {
"X-RateLimit-Limit": limit,
"X-RateLimit-Remaining": remaining,
}
# Default rate limiter instance
rate_limiter = RateLimiter()
async def rate_limit_dependency(
request: Request,
credentials: Optional[HTTPAuthorizationCredentials] = Depends(api_token_security),
) -> Dict[str, int]:
"""Default rate limiting dependency.
- 100 requests per hour per API token
- 30 requests per minute per IP (fallback)
Args:
request: FastAPI request object
credentials: Optional API token credentials
Returns:
Dict with rate limit headers info
"""
return await rate_limiter(request, credentials)