1// llamaCppService.js - HTTP client for llama.cpp server on droplet
2const fetch = require('node-fetch');
5const LLAMA_CPP_ENDPOINT = process.env.LLAMA_CPP_ENDPOINT || 'http://localhost:8080';
6const DEFAULT_MODEL = process.env.LLAMA_CPP_MODEL || 'qwen2.5:0.5b';
7const REQUEST_TIMEOUT = parseInt(process.env.LLAMA_CPP_TIMEOUT) || 60000; // 60 seconds
10 * Call llama.cpp completion API
11 * @param {string} prompt - The prompt to send
12 * @param {object} options - Additional options
13 * @returns {Promise<string>} - The generated text
15async function completion(prompt, options = {}) {
17 model = DEFAULT_MODEL,
27 const url = `${LLAMA_CPP_ENDPOINT}/completion`;
32 n_predict: max_tokens, // llama.cpp uses n_predict instead of max_tokens
41 const controller = new AbortController();
42 const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT);
44 const response = await fetch(url, {
47 'Content-Type': 'application/json'
49 body: JSON.stringify(requestBody),
50 signal: controller.signal
53 clearTimeout(timeoutId);
56 const errorText = await response.text();
57 throw new Error(`llama.cpp API error: ${response.status} - ${errorText}`);
60 const data = await response.json();
62 // llama.cpp returns the completion in the 'content' field
63 return data.content || data.response || '';
65 if (error.name === 'AbortError') {
66 console.error('[llama.cpp] Request timeout');
67 throw new Error('llama.cpp request timeout');
69 console.error('[llama.cpp] Error:', error.message);
75 * Call llama.cpp with streaming support
76 * @param {string} prompt
77 * @param {Function} onChunk - Callback for each chunk
78 * @param {object} options
80async function completionStream(prompt, onChunk, options = {}) {
82 model = DEFAULT_MODEL,
91 const url = `${LLAMA_CPP_ENDPOINT}/completion`;
96 n_predict: max_tokens,
105 const response = await fetch(url, {
108 'Content-Type': 'application/json'
110 body: JSON.stringify(requestBody)
114 throw new Error(`llama.cpp API error: ${response.status}`);
118 const reader = response.body;
121 reader.on('data', (chunk) => {
122 buffer += chunk.toString();
123 const lines = buffer.split('\n');
125 // Keep the last incomplete line in the buffer
126 buffer = lines.pop() || '';
128 for (const line of lines) {
129 if (line.startsWith('data: ')) {
130 const data = line.slice(6);
131 if (data === '[DONE]') {
135 const parsed = JSON.parse(data);
136 if (parsed.content) {
137 onChunk(parsed.content);
140 console.error('[llama.cpp] Failed to parse chunk:', e.message);
146 return new Promise((resolve, reject) => {
147 reader.on('end', resolve);
148 reader.on('error', reject);
151 console.error('[llama.cpp] Streaming error:', error.message);
157 * High-level function for different AI task types
158 * @param {string} text - Input text
159 * @param {string} type - Task type: 'rewrite', 'summary', 'expand', 'analyze'
160 * @param {object} options - Additional options
162async function callLlama(text, type = 'rewrite', options = {}) {
167 prompt = `Summarize the following text concisely:\n\n${text}`;
170 prompt = `Expand the following bullet points into full paragraphs:\n\n${text}`;
173 prompt = `Analyze the following and provide insights:\n\n${text}`;
176 prompt = `Given this support ticket, generate:\n- Short summary\n- Suggested category\n- Possible solution\n- Follow-up questions\n- Priority level\n- SLA impact\n- Auto-tag hardware/software\n\nTicket: ${text}`;
179 prompt = `Classify this alert. Is it a false alarm, needs ticket, or can be auto-resolved?\nExplain error/logs if present.\n\nAlert: ${text}`;
182 prompt = `Analyze the following email.\nDetermine whether the issue is resolved, unresolved, or needs more details.\nRespond with JSON:\n{ "status": "resolved"|"unresolved"|"followup", "reason": "...", "confidence": 0-1 }\n\nEmail: ${text}`;
185 prompt = `Given this event, generate a remediation script or recommended action.\nEvent: ${text}`;
188 prompt = `Given this contract and agent data, suggest policy changes or compliance actions.\nData: ${text}`;
191 prompt = `Rewrite the following in a clear, professional tone. Preserve technical meaning:\n\n${text}`;
194 return await completion(prompt, options);
198 * Health check for llama.cpp endpoint
200async function healthCheck() {
202 const response = await fetch(`${LLAMA_CPP_ENDPOINT}/health`, {
208 console.error('[llama.cpp] Health check failed:', error.message);
214 * Get model info (if supported by llama.cpp)
216async function getModelInfo() {
218 const response = await fetch(`${LLAMA_CPP_ENDPOINT}/props`, {
223 return await response.json();
227 console.error('[llama.cpp] Model info unavailable:', error.message);
238 // Backward compatibility aliases