llmCacheRateLimit_8js_source.html

/**

 * @file LLM Caching and Rate Limiting Middleware

 * @module middleware/llmCacheRateLimit

 * @description

 * Express middleware for Large Language Model (LLM) request caching and per-tenant

 * rate limiting. Checks Redis cache for previous LLM results, enforces rate limits,

 * and caches successful LLM responses to reduce API costs and improve performance.

 *

 * Features:

 * - **Response caching**: Stores LLM outputs by (type, text) key

 * - **Rate limiting**: Per-tenant limits using Redis counters

 * - **Cost optimization**: Prevents redundant LLM API calls

 * - **Performance**: Returns cached results instantly (no LLM latency)

 * @requires llmRedis - Redis caching and rate limit logic from services/llmRedis

 * @see {@link module:services/llmRedis} for cache storage implementation

 */


// llmCacheRateLimit.js - Express middleware for LLM caching and rate limiting

const { getCachedLLMResult, rateLimit } = require('../services/llmRedis');


/**

 * LLM caching and rate limiting middleware.

 *

 * Validates request body, checks rate limits, queries cache, and either returns

 * cached result or passes to LLM handler. Stores cache key in res.locals for

 * response caching in route handler.

 * @function llmCacheRateLimit

 * @param {object} req - Express request object

 * @param {object} req.body - Request body

 * @param {string} req.body.text - Input text for LLM processing

 * @param {string} req.body.type - LLM operation type (e.g., "summarize", "classify")

 * @param {number} req.body.tenantId - Tenant ID for rate limiting

 * @param {object} res - Express response object

 * @param {object} res.locals - Response locals object

 * @param {object} res.locals.llmCacheKey - Cache key {type, text} for result storage

 * @param {Function} next - Express next middleware function

 * @returns {void} Returns cached result or calls next() for LLM processing

 * @throws {400} Missing required fields - text, type, or tenantId missing

 * @throws {429} Rate limit exceeded - Tenant exceeded LLM request quota

 * @example

 * // Apply to LLM routes

 * const llmCacheRateLimit = require('./middleware/llmCacheRateLimit');

 * const { setCachedLLMResult } = require('./services/llmRedis');

 * router.post('/ai/summarize', authenticateToken, llmCacheRateLimit, async (req, res) => {

 *   const summary = await llmService.summarize(req.body.text);

 *

 *   // Cache result for future requests

 *   await setCachedLLMResult(res.locals.llmCacheKey.type, res.locals.llmCacheKey.text, summary);

 *

 *   res.json({ output: summary, cached: false });

 * });

 * @example

 * // Cached response

 * {

 *   "output": "This text summarizes...",

 *   "cached": true

 * }

 */

module.exports = async function llmCacheRateLimit(req, res, next) {

  const { text, type, tenantId } = req.body;

  if (!text || !type || !tenantId) return res.status(400).json({ error: 'Missing required fields.' });


  // Rate limit per tenant

  if (await rateLimit(tenantId)) {

    return res.status(429).json({ error: 'Rate limit exceeded.' });

  }


  // Check cache

  const cached = await getCachedLLMResult(type, text);

  if (cached) {

    return res.json({ output: cached, cached: true });

  }


  // Continue to LLM processing

  res.locals.llmCacheKey = { type, text };

  next();

};