You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
40 lines
1.2 KiB
40 lines
1.2 KiB
/** Детерминированный 512-d L2-нормализованный псевдо-эмбеддинг. */ |
|
import { createHash } from 'node:crypto'; |
|
|
|
function seededRandom(seed: number): () => number { |
|
let s = seed >>> 0; |
|
return () => { |
|
s = (s * 1664525 + 1013904223) >>> 0; |
|
return s / 0xffffffff; |
|
}; |
|
} |
|
|
|
export function makeEmbedding(seed: number, jitter = 0): number[] { |
|
// Гауссово приближение через 12-tap uniform. |
|
const rng = seededRandom(seed); |
|
const jitterRng = seededRandom(seed + Math.floor(jitter * 1000) + 7); |
|
const vec = new Array<number>(512); |
|
let norm = 0; |
|
for (let i = 0; i < 512; i++) { |
|
let s = 0; |
|
for (let k = 0; k < 12; k++) s += rng(); |
|
const base = s - 6; |
|
let noise = 0; |
|
if (jitter > 0) { |
|
let s2 = 0; |
|
for (let k = 0; k < 12; k++) s2 += jitterRng(); |
|
noise = (s2 - 6) * jitter; |
|
} |
|
const v = base + noise; |
|
vec[i] = v; |
|
norm += v * v; |
|
} |
|
norm = Math.sqrt(norm) || 1; |
|
for (let i = 0; i < 512; i++) vec[i] /= norm; |
|
return vec; |
|
} |
|
|
|
/** Хеш для логов. */ |
|
export function embeddingFingerprint(embedding: number[]): string { |
|
return createHash('sha1').update(embedding.slice(0, 32).join(',')).digest('hex').slice(0, 12); |
|
}
|
|
|