Files
WeFlow/electron/services/llamaService.ts

372 lines
14 KiB
TypeScript

import fs from "fs";
import { app, BrowserWindow } from "electron";
import path from "path";
import { ConfigService } from './config';
// Define interfaces locally to avoid static import of types that might not be available or cause issues
type LlamaModel = any;
type LlamaContext = any;
type LlamaChatSession = any;
export class LlamaService {
private _model: LlamaModel | null = null;
private _context: LlamaContext | null = null;
private _sequence: any = null;
private _session: LlamaChatSession | null = null;
private _llama: any = null;
private _nodeLlamaCpp: any = null;
private configService = new ConfigService();
private _initialized = false;
constructor() {
// 延迟初始化,只在需要时初始化
}
public async init() {
if (this._initialized) return;
try {
// Dynamic import to handle ESM module in CJS context
this._nodeLlamaCpp = await import("node-llama-cpp");
this._llama = await this._nodeLlamaCpp.getLlama();
this._initialized = true;
console.log("[LlamaService] Llama initialized");
} catch (error) {
console.error("[LlamaService] Failed to initialize Llama:", error);
}
}
public async loadModel(modelPath: string) {
if (!this._llama) await this.init();
try {
console.log("[LlamaService] Loading model from:", modelPath);
if (!this._llama) {
throw new Error("Llama not initialized");
}
this._model = await this._llama.loadModel({
modelPath: modelPath,
gpuLayers: 'max', // Offload all layers to GPU if possible
useMlock: false // Disable mlock to avoid "VirtualLock" errors (common on Windows)
});
if (!this._model) throw new Error("Failed to load model");
this._context = await this._model.createContext({
contextSize: 8192, // Balanced context size for better performance
batchSize: 2048 // Increase batch size for better prompt processing speed
});
if (!this._context) throw new Error("Failed to create context");
this._sequence = this._context.getSequence();
const { LlamaChatSession } = this._nodeLlamaCpp;
this._session = new LlamaChatSession({
contextSequence: this._sequence
});
console.log("[LlamaService] Model loaded successfully");
return true;
} catch (error) {
console.error("[LlamaService] Failed to load model:", error);
throw error;
}
}
public async createSession(systemPrompt?: string) {
if (!this._context) throw new Error("Model not loaded");
if (!this._nodeLlamaCpp) await this.init();
const { LlamaChatSession } = this._nodeLlamaCpp;
if (!this._sequence) {
this._sequence = this._context.getSequence();
}
this._session = new LlamaChatSession({
contextSequence: this._sequence,
systemPrompt: systemPrompt
});
return true;
}
public async chat(message: string, options: { thinking?: boolean } = {}, onToken: (token: string) => void) {
if (!this._session) throw new Error("Session not initialized");
const thinking = options.thinking ?? false;
// Sampling parameters based on mode
const samplingParams = thinking ? {
temperature: 0.6,
topP: 0.95,
topK: 20,
repeatPenalty: 1.5 // PresencePenalty=1.5
} : {
temperature: 0.7,
topP: 0.8,
topK: 20,
repeatPenalty: 1.5
};
try {
const response = await this._session.prompt(message, {
...samplingParams,
onTextChunk: (chunk: string) => {
onToken(chunk);
}
});
return response;
} catch (error) {
console.error("[LlamaService] Chat error:", error);
throw error;
}
}
public async getModelStatus(modelPath: string) {
try {
const exists = fs.existsSync(modelPath);
if (!exists) {
return { exists: false, path: modelPath };
}
const stats = fs.statSync(modelPath);
return {
exists: true,
path: modelPath,
size: stats.size
};
} catch (error) {
return { exists: false, error: String(error) };
}
}
private resolveModelDir(): string {
const configured = this.configService.get('whisperModelDir') as string | undefined;
if (configured) return configured;
return path.join(app.getPath('documents'), 'WeFlow', 'models');
}
public async downloadModel(url: string, savePath: string, onProgress: (payload: { downloaded: number; total: number; speed: number }) => void): Promise<void> {
// Ensure directory exists
const dir = path.dirname(savePath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
console.info(`[LlamaService] Multi-threaded download check for: ${savePath}`);
if (fs.existsSync(savePath)) {
fs.unlinkSync(savePath);
}
// 1. Get total size and check range support
let probeResult;
try {
probeResult = await this.probeUrl(url);
} catch (err) {
console.warn("[LlamaService] Probe failed, falling back to single-thread.", err);
return this.downloadSingleThread(url, savePath, onProgress);
}
const { totalSize, acceptRanges, finalUrl } = probeResult;
console.log(`[LlamaService] Total size: ${totalSize}, Accept-Ranges: ${acceptRanges}`);
if (totalSize <= 0 || !acceptRanges) {
console.warn("[LlamaService] Ranges not supported or size unknown, falling back to single-thread.");
return this.downloadSingleThread(finalUrl, savePath, onProgress);
}
const threadCount = 4;
const chunkSize = Math.ceil(totalSize / threadCount);
const fd = fs.openSync(savePath, 'w');
let downloadedLength = 0;
let lastDownloadedLength = 0;
let lastTime = Date.now();
let speed = 0;
const speedInterval = setInterval(() => {
const now = Date.now();
const duration = (now - lastTime) / 1000;
if (duration > 0) {
speed = (downloadedLength - lastDownloadedLength) / duration;
lastDownloadedLength = downloadedLength;
lastTime = now;
onProgress({ downloaded: downloadedLength, total: totalSize, speed });
}
}, 1000);
try {
const promises = [];
for (let i = 0; i < threadCount; i++) {
const start = i * chunkSize;
const end = i === threadCount - 1 ? totalSize - 1 : (i + 1) * chunkSize - 1;
promises.push(this.downloadChunk(finalUrl, fd, start, end, (bytes) => {
downloadedLength += bytes;
}));
}
await Promise.all(promises);
console.log("[LlamaService] Multi-threaded download complete");
// Final progress update
onProgress({ downloaded: totalSize, total: totalSize, speed: 0 });
} catch (err) {
console.error("[LlamaService] Multi-threaded download failed:", err);
throw err;
} finally {
clearInterval(speedInterval);
fs.closeSync(fd);
}
}
private async probeUrl(url: string): Promise<{ totalSize: number, acceptRanges: boolean, finalUrl: string }> {
const protocol = url.startsWith('https') ? require('https') : require('http');
const options = {
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'https://www.modelscope.cn/',
'Range': 'bytes=0-0'
}
};
return new Promise((resolve, reject) => {
const req = protocol.get(url, options, (res: any) => {
if ([301, 302, 307, 308].includes(res.statusCode)) {
const location = res.headers.location;
const nextUrl = new URL(location, url).href;
this.probeUrl(nextUrl).then(resolve).catch(reject);
return;
}
if (res.statusCode !== 206 && res.statusCode !== 200) {
reject(new Error(`Probe failed: HTTP ${res.statusCode}`));
return;
}
const contentRange = res.headers['content-range'];
let totalSize = 0;
if (contentRange) {
const parts = contentRange.split('/');
totalSize = parseInt(parts[parts.length - 1], 10);
} else {
totalSize = parseInt(res.headers['content-length'] || '0', 10);
}
const acceptRanges = res.headers['accept-ranges'] === 'bytes' || !!contentRange;
resolve({ totalSize, acceptRanges, finalUrl: url });
res.destroy();
});
req.on('error', reject);
});
}
private async downloadChunk(url: string, fd: number, start: number, end: number, onData: (bytes: number) => void): Promise<void> {
const protocol = url.startsWith('https') ? require('https') : require('http');
const options = {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'https://www.modelscope.cn/',
'Range': `bytes=${start}-${end}`
}
};
return new Promise((resolve, reject) => {
const req = protocol.get(url, options, (res: any) => {
if (res.statusCode !== 206) {
reject(new Error(`Chunk download failed: HTTP ${res.statusCode}`));
return;
}
let currentOffset = start;
res.on('data', (chunk: Buffer) => {
try {
fs.writeSync(fd, chunk, 0, chunk.length, currentOffset);
currentOffset += chunk.length;
onData(chunk.length);
} catch (err) {
reject(err);
res.destroy();
}
});
res.on('end', () => resolve());
res.on('error', reject);
});
req.on('error', reject);
});
}
private async downloadSingleThread(url: string, savePath: string, onProgress: (payload: { downloaded: number; total: number; speed: number }) => void): Promise<void> {
return new Promise((resolve, reject) => {
const protocol = url.startsWith('https') ? require('https') : require('http');
const options = {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'https://www.modelscope.cn/'
}
};
const request = protocol.get(url, options, (response: any) => {
if ([301, 302, 307, 308].includes(response.statusCode)) {
const location = response.headers.location;
const nextUrl = new URL(location, url).href;
this.downloadSingleThread(nextUrl, savePath, onProgress).then(resolve).catch(reject);
return;
}
if (response.statusCode !== 200) {
reject(new Error(`Fallback download failed: HTTP ${response.statusCode}`));
return;
}
const totalLength = parseInt(response.headers['content-length'] || '0', 10);
let downloadedLength = 0;
let lastDownloadedLength = 0;
let lastTime = Date.now();
let speed = 0;
const fileStream = fs.createWriteStream(savePath);
response.pipe(fileStream);
const speedInterval = setInterval(() => {
const now = Date.now();
const duration = (now - lastTime) / 1000;
if (duration > 0) {
speed = (downloadedLength - lastDownloadedLength) / duration;
lastDownloadedLength = downloadedLength;
lastTime = now;
onProgress({ downloaded: downloadedLength, total: totalLength, speed });
}
}, 1000);
response.on('data', (chunk: any) => {
downloadedLength += chunk.length;
});
fileStream.on('finish', () => {
clearInterval(speedInterval);
fileStream.close();
resolve();
});
fileStream.on('error', (err: any) => {
clearInterval(speedInterval);
fs.unlink(savePath, () => { });
reject(err);
});
});
request.on('error', reject);
});
}
public getModelsPath() {
return this.resolveModelDir();
}
}
export const llamaService = new LlamaService();