llamaCppService_8js_source.html

// llamaCppService.js - HTTP client for llama.cpp server on droplet

const fetch = require('node-fetch');


// Configuration

const LLAMA_CPP_ENDPOINT = process.env.LLAMA_CPP_ENDPOINT || 'http://localhost:8080';

const DEFAULT_MODEL = process.env.LLAMA_CPP_MODEL || 'qwen2.5:0.5b';

const REQUEST_TIMEOUT = parseInt(process.env.LLAMA_CPP_TIMEOUT) || 60000; // 60 seconds


/**

 * Call llama.cpp completion API

 * @param {string} prompt - The prompt to send

 * @param {object} options - Additional options

 * @returns {Promise<string>} - The generated text

 */

async function completion(prompt, options = {}) {

  const {

    model = DEFAULT_MODEL,

    temperature = 0.7,

    max_tokens = 512,

    stop = null,

    stream = false,

    top_p = 0.9,

    top_k = 40,

    repeat_penalty = 1.1

  } = options;


  const url = `${LLAMA_CPP_ENDPOINT}/completion`;


  const requestBody = {

    prompt,

    temperature,

    n_predict: max_tokens, // llama.cpp uses n_predict instead of max_tokens

    stop: stop || [],

    stream,

    top_p,

    top_k,

    repeat_penalty

  };


  try {

    const controller = new AbortController();

    const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT);


    const response = await fetch(url, {

      method: 'POST',

      headers: {

        'Content-Type': 'application/json'

      },

      body: JSON.stringify(requestBody),

      signal: controller.signal

    });


    clearTimeout(timeoutId);


    if (!response.ok) {

      const errorText = await response.text();

      throw new Error(`llama.cpp API error: ${response.status} - ${errorText}`);

    }


    const data = await response.json();


    // llama.cpp returns the completion in the 'content' field

    return data.content || data.response || '';

  } catch (error) {

    if (error.name === 'AbortError') {

      console.error('[llama.cpp] Request timeout');

      throw new Error('llama.cpp request timeout');

    }

    console.error('[llama.cpp] Error:', error.message);

    throw error;

  }

}


/**

 * Call llama.cpp with streaming support

 * @param {string} prompt

 * @param {Function} onChunk - Callback for each chunk

 * @param {object} options

 */

async function completionStream(prompt, onChunk, options = {}) {

  const {

    model = DEFAULT_MODEL,

    temperature = 0.7,

    max_tokens = 512,

    stop = null,

    top_p = 0.9,

    top_k = 40,

    repeat_penalty = 1.1

  } = options;


  const url = `${LLAMA_CPP_ENDPOINT}/completion`;


  const requestBody = {

    prompt,

    temperature,

    n_predict: max_tokens,

    stop: stop || [],

    stream: true,

    top_p,

    top_k,

    repeat_penalty

  };


  try {

    const response = await fetch(url, {

      method: 'POST',

      headers: {

        'Content-Type': 'application/json'

      },

      body: JSON.stringify(requestBody)

    });


    if (!response.ok) {

      throw new Error(`llama.cpp API error: ${response.status}`);

    }


    // Parse SSE stream

    const reader = response.body;

    let buffer = '';


    reader.on('data', (chunk) => {

      buffer += chunk.toString();

      const lines = buffer.split('\n');


      // Keep the last incomplete line in the buffer

      buffer = lines.pop() || '';


      for (const line of lines) {

        if (line.startsWith('data: ')) {

          const data = line.slice(6);

          if (data === '[DONE]') {

            return;

          }

          try {

            const parsed = JSON.parse(data);

            if (parsed.content) {

              onChunk(parsed.content);

            }

          } catch (e) {

            console.error('[llama.cpp] Failed to parse chunk:', e.message);

          }

        }

      }

    });


    return new Promise((resolve, reject) => {

      reader.on('end', resolve);

      reader.on('error', reject);

    });

  } catch (error) {

    console.error('[llama.cpp] Streaming error:', error.message);

    throw error;

  }

}


/**

 * High-level function for different AI task types

 * @param {string} text - Input text

 * @param {string} type - Task type: 'rewrite', 'summary', 'expand', 'analyze'

 * @param {object} options - Additional options

 */

async function callLlama(text, type = 'rewrite', options = {}) {

  let prompt;


  switch (type) {

    case 'summary':

      prompt = `Summarize the following text concisely:\n\n${text}`;

      break;

    case 'expand':

      prompt = `Expand the following bullet points into full paragraphs:\n\n${text}`;

      break;

    case 'analyze':

      prompt = `Analyze the following and provide insights:\n\n${text}`;

      break;

    case 'ticket':

      prompt = `Given this support ticket, generate:\n- Short summary\n- Suggested category\n- Possible solution\n- Follow-up questions\n- Priority level\n- SLA impact\n- Auto-tag hardware/software\n\nTicket: ${text}`;

      break;

    case 'alert':

      prompt = `Classify this alert. Is it a false alarm, needs ticket, or can be auto-resolved?\nExplain error/logs if present.\n\nAlert: ${text}`;

      break;

    case 'email':

      prompt = `Analyze the following email.\nDetermine whether the issue is resolved, unresolved, or needs more details.\nRespond with JSON:\n{ "status": "resolved"|"unresolved"|"followup", "reason": "...", "confidence": 0-1 }\n\nEmail: ${text}`;

      break;

    case 'remediation':

      prompt = `Given this event, generate a remediation script or recommended action.\nEvent: ${text}`;

      break;

    case 'contract':

      prompt = `Given this contract and agent data, suggest policy changes or compliance actions.\nData: ${text}`;

      break;

    default: // rewrite

      prompt = `Rewrite the following in a clear, professional tone. Preserve technical meaning:\n\n${text}`;

  }


  return await completion(prompt, options);

}


/**

 * Health check for llama.cpp endpoint

 */

async function healthCheck() {

  try {

    const response = await fetch(`${LLAMA_CPP_ENDPOINT}/health`, {

      method: 'GET',

      timeout: 5000

    });

    return response.ok;

  } catch (error) {

    console.error('[llama.cpp] Health check failed:', error.message);

    return false;

  }

}


/**

 * Get model info (if supported by llama.cpp)

 */

async function getModelInfo() {

  try {

    const response = await fetch(`${LLAMA_CPP_ENDPOINT}/props`, {

      method: 'GET',

      timeout: 5000

    });

    if (response.ok) {

      return await response.json();

    }

    return null;

  } catch (error) {

    console.error('[llama.cpp] Model info unavailable:', error.message);

    return null;

  }

}


module.exports = {

  completion,

  completionStream,

  callLlama,

  healthCheck,

  getModelInfo,

  // Backward compatibility aliases

  callQwen: callLlama

};