EverydayTech Platform - Developer Reference
Complete Source Code Documentation - All Applications
Loading...
Searching...
No Matches
llamaCppService.js
Go to the documentation of this file.
1// llamaCppService.js - HTTP client for llama.cpp server on droplet
2const fetch = require('node-fetch');
3
4// Configuration
5const LLAMA_CPP_ENDPOINT = process.env.LLAMA_CPP_ENDPOINT || 'http://localhost:8080';
6const DEFAULT_MODEL = process.env.LLAMA_CPP_MODEL || 'qwen2.5:0.5b';
7const REQUEST_TIMEOUT = parseInt(process.env.LLAMA_CPP_TIMEOUT) || 60000; // 60 seconds
8
9/**
10 * Call llama.cpp completion API
11 * @param {string} prompt - The prompt to send
12 * @param {object} options - Additional options
13 * @returns {Promise<string>} - The generated text
14 */
15async function completion(prompt, options = {}) {
16 const {
17 model = DEFAULT_MODEL,
18 temperature = 0.7,
19 max_tokens = 512,
20 stop = null,
21 stream = false,
22 top_p = 0.9,
23 top_k = 40,
24 repeat_penalty = 1.1
25 } = options;
26
27 const url = `${LLAMA_CPP_ENDPOINT}/completion`;
28
29 const requestBody = {
30 prompt,
31 temperature,
32 n_predict: max_tokens, // llama.cpp uses n_predict instead of max_tokens
33 stop: stop || [],
34 stream,
35 top_p,
36 top_k,
37 repeat_penalty
38 };
39
40 try {
41 const controller = new AbortController();
42 const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT);
43
44 const response = await fetch(url, {
45 method: 'POST',
46 headers: {
47 'Content-Type': 'application/json'
48 },
49 body: JSON.stringify(requestBody),
50 signal: controller.signal
51 });
52
53 clearTimeout(timeoutId);
54
55 if (!response.ok) {
56 const errorText = await response.text();
57 throw new Error(`llama.cpp API error: ${response.status} - ${errorText}`);
58 }
59
60 const data = await response.json();
61
62 // llama.cpp returns the completion in the 'content' field
63 return data.content || data.response || '';
64 } catch (error) {
65 if (error.name === 'AbortError') {
66 console.error('[llama.cpp] Request timeout');
67 throw new Error('llama.cpp request timeout');
68 }
69 console.error('[llama.cpp] Error:', error.message);
70 throw error;
71 }
72}
73
74/**
75 * Call llama.cpp with streaming support
76 * @param {string} prompt
77 * @param {Function} onChunk - Callback for each chunk
78 * @param {object} options
79 */
80async function completionStream(prompt, onChunk, options = {}) {
81 const {
82 model = DEFAULT_MODEL,
83 temperature = 0.7,
84 max_tokens = 512,
85 stop = null,
86 top_p = 0.9,
87 top_k = 40,
88 repeat_penalty = 1.1
89 } = options;
90
91 const url = `${LLAMA_CPP_ENDPOINT}/completion`;
92
93 const requestBody = {
94 prompt,
95 temperature,
96 n_predict: max_tokens,
97 stop: stop || [],
98 stream: true,
99 top_p,
100 top_k,
101 repeat_penalty
102 };
103
104 try {
105 const response = await fetch(url, {
106 method: 'POST',
107 headers: {
108 'Content-Type': 'application/json'
109 },
110 body: JSON.stringify(requestBody)
111 });
112
113 if (!response.ok) {
114 throw new Error(`llama.cpp API error: ${response.status}`);
115 }
116
117 // Parse SSE stream
118 const reader = response.body;
119 let buffer = '';
120
121 reader.on('data', (chunk) => {
122 buffer += chunk.toString();
123 const lines = buffer.split('\n');
124
125 // Keep the last incomplete line in the buffer
126 buffer = lines.pop() || '';
127
128 for (const line of lines) {
129 if (line.startsWith('data: ')) {
130 const data = line.slice(6);
131 if (data === '[DONE]') {
132 return;
133 }
134 try {
135 const parsed = JSON.parse(data);
136 if (parsed.content) {
137 onChunk(parsed.content);
138 }
139 } catch (e) {
140 console.error('[llama.cpp] Failed to parse chunk:', e.message);
141 }
142 }
143 }
144 });
145
146 return new Promise((resolve, reject) => {
147 reader.on('end', resolve);
148 reader.on('error', reject);
149 });
150 } catch (error) {
151 console.error('[llama.cpp] Streaming error:', error.message);
152 throw error;
153 }
154}
155
156/**
157 * High-level function for different AI task types
158 * @param {string} text - Input text
159 * @param {string} type - Task type: 'rewrite', 'summary', 'expand', 'analyze'
160 * @param {object} options - Additional options
161 */
162async function callLlama(text, type = 'rewrite', options = {}) {
163 let prompt;
164
165 switch (type) {
166 case 'summary':
167 prompt = `Summarize the following text concisely:\n\n${text}`;
168 break;
169 case 'expand':
170 prompt = `Expand the following bullet points into full paragraphs:\n\n${text}`;
171 break;
172 case 'analyze':
173 prompt = `Analyze the following and provide insights:\n\n${text}`;
174 break;
175 case 'ticket':
176 prompt = `Given this support ticket, generate:\n- Short summary\n- Suggested category\n- Possible solution\n- Follow-up questions\n- Priority level\n- SLA impact\n- Auto-tag hardware/software\n\nTicket: ${text}`;
177 break;
178 case 'alert':
179 prompt = `Classify this alert. Is it a false alarm, needs ticket, or can be auto-resolved?\nExplain error/logs if present.\n\nAlert: ${text}`;
180 break;
181 case 'email':
182 prompt = `Analyze the following email.\nDetermine whether the issue is resolved, unresolved, or needs more details.\nRespond with JSON:\n{ "status": "resolved"|"unresolved"|"followup", "reason": "...", "confidence": 0-1 }\n\nEmail: ${text}`;
183 break;
184 case 'remediation':
185 prompt = `Given this event, generate a remediation script or recommended action.\nEvent: ${text}`;
186 break;
187 case 'contract':
188 prompt = `Given this contract and agent data, suggest policy changes or compliance actions.\nData: ${text}`;
189 break;
190 default: // rewrite
191 prompt = `Rewrite the following in a clear, professional tone. Preserve technical meaning:\n\n${text}`;
192 }
193
194 return await completion(prompt, options);
195}
196
197/**
198 * Health check for llama.cpp endpoint
199 */
200async function healthCheck() {
201 try {
202 const response = await fetch(`${LLAMA_CPP_ENDPOINT}/health`, {
203 method: 'GET',
204 timeout: 5000
205 });
206 return response.ok;
207 } catch (error) {
208 console.error('[llama.cpp] Health check failed:', error.message);
209 return false;
210 }
211}
212
213/**
214 * Get model info (if supported by llama.cpp)
215 */
216async function getModelInfo() {
217 try {
218 const response = await fetch(`${LLAMA_CPP_ENDPOINT}/props`, {
219 method: 'GET',
220 timeout: 5000
221 });
222 if (response.ok) {
223 return await response.json();
224 }
225 return null;
226 } catch (error) {
227 console.error('[llama.cpp] Model info unavailable:', error.message);
228 return null;
229 }
230}
231
232module.exports = {
233 completion,
234 completionStream,
235 callLlama,
236 healthCheck,
237 getModelInfo,
238 // Backward compatibility aliases
239 callQwen: callLlama
240};