Files
autogame-17_capability-evolver/src/gep/selector.js

416 lines
16 KiB
JavaScript
Raw Normal View History

const { scoreTagOverlap } = require('./learningSignals');
const { captureEnvFingerprint } = require('./envFingerprint');
// ---------------------------------------------------------------------------
// Lightweight semantic similarity (bag-of-words cosine) for Gene selection.
// Acts as a complement to exact signals_match pattern matching.
// When EMBEDDING_PROVIDER is configured, can be replaced with real embeddings.
// ---------------------------------------------------------------------------
const SEMANTIC_WEIGHT = parseFloat(process.env.SEMANTIC_MATCH_WEIGHT || '0.4') || 0.4;
const STOP_WORDS = new Set([
'the', 'and', 'for', 'with', 'from', 'that', 'this', 'into', 'when',
'are', 'was', 'has', 'had', 'not', 'but', 'its', 'can', 'will', 'all',
'any', 'use', 'may', 'also', 'should', 'would', 'could',
]);
function tokenize(text) {
return String(text || '').toLowerCase()
.replace(/[^a-z0-9_\-]+/g, ' ')
.split(/\s+/)
.filter(function (w) { return w.length >= 2 && !STOP_WORDS.has(w); });
}
function buildTermFrequency(tokens) {
var tf = {};
for (var i = 0; i < tokens.length; i++) {
tf[tokens[i]] = (tf[tokens[i]] || 0) + 1;
}
return tf;
}
function cosineSimilarity(tfA, tfB) {
var keys = new Set(Object.keys(tfA).concat(Object.keys(tfB)));
var dotProduct = 0;
var normA = 0;
var normB = 0;
keys.forEach(function (k) {
var a = tfA[k] || 0;
var b = tfB[k] || 0;
dotProduct += a * b;
normA += a * a;
normB += b * b;
});
if (normA === 0 || normB === 0) return 0;
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}
function scoreGeneSemantic(gene, signals) {
if (!gene || !signals || signals.length === 0) return 0;
var signalTokens = [];
signals.forEach(function (s) {
signalTokens = signalTokens.concat(tokenize(s));
});
if (signalTokens.length === 0) return 0;
var geneTokens = [];
if (Array.isArray(gene.signals_match)) {
gene.signals_match.forEach(function (s) {
geneTokens = geneTokens.concat(tokenize(s));
});
}
if (gene.summary) geneTokens = geneTokens.concat(tokenize(gene.summary));
if (gene.id) geneTokens = geneTokens.concat(tokenize(gene.id));
if (geneTokens.length === 0) return 0;
var tfSignals = buildTermFrequency(signalTokens);
var tfGene = buildTermFrequency(geneTokens);
return cosineSimilarity(tfSignals, tfGene);
}
function matchPatternToSignals(pattern, signals) {
if (!pattern || !signals || signals.length === 0) return false;
const p = String(pattern);
const sig = signals.map(s => String(s));
// Regex pattern: /body/flags
const regexLike = p.length >= 2 && p.startsWith('/') && p.lastIndexOf('/') > 0;
if (regexLike) {
const lastSlash = p.lastIndexOf('/');
const body = p.slice(1, lastSlash);
const flags = p.slice(lastSlash + 1);
try {
const re = new RegExp(body, flags || 'i');
return sig.some(s => re.test(s));
} catch (e) {
// fallback to substring
}
}
// Multi-language alias: "en_term|zh_term|ja_term" -- any branch matching = hit
if (p.includes('|') && !p.startsWith('/')) {
const branches = p.split('|').map(b => b.trim().toLowerCase()).filter(Boolean);
return branches.some(needle => sig.some(s => s.toLowerCase().includes(needle)));
}
const needle = p.toLowerCase();
return sig.some(s => s.toLowerCase().includes(needle));
}
function scoreGene(gene, signals) {
if (!gene || gene.type !== 'Gene') return 0;
const patterns = Array.isArray(gene.signals_match) ? gene.signals_match : [];
const tagScore = scoreTagOverlap(gene, signals);
if (patterns.length === 0) return tagScore > 0 ? tagScore * 0.6 : 0;
let score = 0;
for (const pat of patterns) {
if (matchPatternToSignals(pat, signals)) score += 1;
}
const semanticScore = scoreGeneSemantic(gene, signals) * SEMANTIC_WEIGHT;
return score + (tagScore * 0.6) + semanticScore;
}
function getEpigeneticBoostLocal(gene, envFingerprint) {
if (!gene || !Array.isArray(gene.epigenetic_marks)) return 0;
const platform = envFingerprint && envFingerprint.platform ? String(envFingerprint.platform) : '';
const arch = envFingerprint && envFingerprint.arch ? String(envFingerprint.arch) : '';
const nodeVersion = envFingerprint && envFingerprint.node_version ? String(envFingerprint.node_version) : '';
const envContext = [platform, arch, nodeVersion].filter(Boolean).join('/') || 'unknown';
const mark = gene.epigenetic_marks.find(function (m) { return m && m.context === envContext; });
return mark ? Number(mark.boost) || 0 : 0;
}
function scoreGeneLearning(gene, signals, envFingerprint) {
if (!gene || gene.type !== 'Gene') return 0;
let boost = 0;
const history = Array.isArray(gene.learning_history) ? gene.learning_history.slice(-8) : [];
for (let i = 0; i < history.length; i++) {
const entry = history[i];
if (!entry) continue;
if (entry.outcome === 'success') boost += 0.12;
else if (entry.mode === 'hard') boost -= 0.22;
else if (entry.mode === 'soft') boost -= 0.08;
}
boost += getEpigeneticBoostLocal(gene, envFingerprint);
if (Array.isArray(gene.anti_patterns) && gene.anti_patterns.length > 0) {
let overlapPenalty = 0;
const signalTags = new Set(require('./learningSignals').expandSignals(signals, ''));
const recentAntiPatterns = gene.anti_patterns.slice(-6);
for (let j = 0; j < recentAntiPatterns.length; j++) {
const anti = recentAntiPatterns[j];
if (!anti || !Array.isArray(anti.learning_signals)) continue;
const overlap = anti.learning_signals.some(function (tag) { return signalTags.has(String(tag)); });
if (overlap) overlapPenalty += anti.mode === 'hard' ? 0.4 : 0.18;
}
boost -= overlapPenalty;
}
return Math.max(-1.5, Math.min(1.5, boost));
}
// Population-size-dependent drift intensity.
// In population genetics, genetic drift is stronger in small populations (Ne).
// driftIntensity: 0 = pure selection, 1 = pure drift (random).
// Formula: intensity = 1 / sqrt(Ne) where Ne = effective population size.
// This replaces the binary driftEnabled flag with a continuous spectrum.
function computeDriftIntensity(opts) {
// If explicitly enabled/disabled, use that as the baseline
const driftEnabled = !!(opts && opts.driftEnabled);
// Effective population size: active gene count in the pool
const effectivePopulationSize = opts && Number.isFinite(Number(opts.effectivePopulationSize))
? Number(opts.effectivePopulationSize)
: null;
// If no Ne provided, fall back to gene pool size
const genePoolSize = opts && Number.isFinite(Number(opts.genePoolSize))
? Number(opts.genePoolSize)
: null;
const ne = effectivePopulationSize || genePoolSize || null;
if (driftEnabled) {
// Explicit drift: use moderate-to-high intensity
return ne && ne > 1 ? Math.min(1, 1 / Math.sqrt(ne) + 0.3) : 0.7;
}
if (ne != null && ne > 0) {
// Population-dependent drift: small population = more drift
// Ne=1: intensity=1.0 (pure drift), Ne=25: intensity=0.2, Ne=100: intensity=0.1
return Math.min(1, 1 / Math.sqrt(ne));
}
return 0; // No drift info available, pure selection
}
function selectGene(genes, signals, opts) {
const genesList = Array.isArray(genes) ? genes : [];
const bannedGeneIds = opts && opts.bannedGeneIds ? opts.bannedGeneIds : new Set();
const driftEnabled = !!(opts && opts.driftEnabled);
const preferredGeneId = opts && typeof opts.preferredGeneId === 'string' ? opts.preferredGeneId : null;
// Diversity-directed drift: capability_gaps from Hub heartbeat
const capabilityGaps = opts && Array.isArray(opts.capabilityGaps) ? opts.capabilityGaps : [];
const noveltyScore = opts && Number.isFinite(Number(opts.noveltyScore)) ? Number(opts.noveltyScore) : null;
// Compute continuous drift intensity based on effective population size
const driftIntensity = computeDriftIntensity({
driftEnabled: driftEnabled,
effectivePopulationSize: opts && opts.effectivePopulationSize,
genePoolSize: genesList.length,
});
const useDrift = driftEnabled || driftIntensity > 0.15;
const DISTILLED_PREFIX = 'gene_distilled_';
const DISTILLED_SCORE_FACTOR = 0.8;
const envFingerprint = captureEnvFingerprint();
const scored = genesList
.map(g => {
let s = scoreGene(g, signals);
s += scoreGeneLearning(g, signals, envFingerprint);
if (s > 0 && g.id && String(g.id).startsWith(DISTILLED_PREFIX)) s *= DISTILLED_SCORE_FACTOR;
return { gene: g, score: s };
})
.filter(x => x.score > 0)
.sort((a, b) => b.score - a.score);
if (scored.length === 0) return { selected: null, alternatives: [], driftIntensity: driftIntensity, driftMode: 'none' };
// Memory graph preference: only override when the preferred gene is already a match candidate.
if (preferredGeneId) {
const preferred = scored.find(x => x.gene && x.gene.id === preferredGeneId);
if (preferred && (useDrift || !bannedGeneIds.has(preferredGeneId))) {
const rest = scored.filter(x => x.gene && x.gene.id !== preferredGeneId);
const filteredRest = useDrift ? rest : rest.filter(x => x.gene && !bannedGeneIds.has(x.gene.id));
return {
selected: preferred.gene,
alternatives: filteredRest.slice(0, 4).map(x => x.gene),
driftIntensity: driftIntensity,
driftMode: 'memory_preferred',
};
}
}
// Low-efficiency suppression: do not repeat low-confidence paths unless drift is active.
const filtered = useDrift ? scored : scored.filter(x => x.gene && !bannedGeneIds.has(x.gene.id));
if (filtered.length === 0) return { selected: null, alternatives: scored.slice(0, 4).map(x => x.gene), driftIntensity: driftIntensity, driftMode: 'none' };
// Diversity-directed drift: when capability gaps are available, prefer genes that
// cover gap areas instead of pure random selection. This replaces the blind
// random drift with an informed exploration toward under-covered capabilities.
let selectedIdx = 0;
let driftMode = 'selection';
if (driftIntensity > 0 && filtered.length > 1 && Math.random() < driftIntensity) {
if (capabilityGaps.length > 0) {
// Directed drift: score each candidate by how well its signals_match
// covers the capability gap dimensions
const gapScores = filtered.map(function(entry, idx) {
const g = entry.gene;
const patterns = Array.isArray(g.signals_match) ? g.signals_match : [];
let gapHits = 0;
for (let gi = 0; gi < capabilityGaps.length && gi < 5; gi++) {
const gapSignal = capabilityGaps[gi];
if (typeof gapSignal === 'string' && patterns.some(function(p) { return matchPatternToSignals(p, [gapSignal]); })) {
gapHits++;
}
}
return { idx: idx, gapHits: gapHits, baseScore: entry.score };
});
const hasGapHits = gapScores.some(function(gs) { return gs.gapHits > 0; });
if (hasGapHits) {
// Sort by gap coverage first, then by base score
gapScores.sort(function(a, b) {
return b.gapHits - a.gapHits || b.baseScore - a.baseScore;
});
selectedIdx = gapScores[0].idx;
driftMode = 'diversity_directed';
} else {
// No gap match: fall back to novelty-weighted random selection
let topN = Math.min(filtered.length, Math.max(2, Math.ceil(filtered.length * driftIntensity)));
// If novelty score is low (agent is too similar to others), increase exploration range
if (noveltyScore != null && noveltyScore < 0.3 && topN < filtered.length) {
topN = Math.min(filtered.length, topN + 1);
}
selectedIdx = Math.floor(Math.random() * topN);
driftMode = 'random_weighted';
}
} else {
// No capability gap data: original random drift behavior
const topN = Math.min(filtered.length, Math.max(2, Math.ceil(filtered.length * driftIntensity)));
selectedIdx = Math.floor(Math.random() * topN);
driftMode = 'random';
}
}
return {
selected: filtered[selectedIdx].gene,
alternatives: filtered.filter(function(_, i) { return i !== selectedIdx; }).slice(0, 4).map(x => x.gene),
driftIntensity: driftIntensity,
driftMode: driftMode,
};
}
function selectCapsule(capsules, signals) {
const scored = (capsules || [])
.map(c => {
const triggers = Array.isArray(c.trigger) ? c.trigger : [];
const score = triggers.reduce((acc, t) => (matchPatternToSignals(t, signals) ? acc + 1 : acc), 0);
return { capsule: c, score };
})
.filter(x => x.score > 0)
.sort((a, b) => b.score - a.score);
return scored.length ? scored[0].capsule : null;
}
function computeSignalOverlap(signalsA, signalsB) {
if (!Array.isArray(signalsA) || !Array.isArray(signalsB)) return 0;
if (signalsA.length === 0 || signalsB.length === 0) return 0;
const setB = new Set(signalsB.map(function (s) { return String(s).toLowerCase(); }));
let hits = 0;
for (let i = 0; i < signalsA.length; i++) {
if (setB.has(String(signalsA[i]).toLowerCase())) hits++;
}
return hits / Math.max(signalsA.length, 1);
}
const FAILED_CAPSULE_BAN_THRESHOLD = 2;
const FAILED_CAPSULE_OVERLAP_MIN = 0.6;
function banGenesFromFailedCapsules(failedCapsules, signals, existingBans) {
const bans = existingBans instanceof Set ? new Set(existingBans) : new Set();
if (!Array.isArray(failedCapsules) || failedCapsules.length === 0) return bans;
const geneFailCounts = {};
for (let i = 0; i < failedCapsules.length; i++) {
const fc = failedCapsules[i];
if (!fc || !fc.gene) continue;
const overlap = computeSignalOverlap(signals, fc.trigger || []);
if (overlap < FAILED_CAPSULE_OVERLAP_MIN) continue;
const gid = String(fc.gene);
geneFailCounts[gid] = (geneFailCounts[gid] || 0) + 1;
}
const keys = Object.keys(geneFailCounts);
for (let j = 0; j < keys.length; j++) {
if (geneFailCounts[keys[j]] >= FAILED_CAPSULE_BAN_THRESHOLD) {
bans.add(keys[j]);
}
}
return bans;
}
function selectGeneAndCapsule({ genes, capsules, signals, memoryAdvice, driftEnabled, failedCapsules, capabilityGaps, noveltyScore }) {
const bannedGeneIds =
memoryAdvice && memoryAdvice.bannedGeneIds instanceof Set ? memoryAdvice.bannedGeneIds : new Set();
const preferredGeneId = memoryAdvice && memoryAdvice.preferredGeneId ? memoryAdvice.preferredGeneId : null;
const effectiveBans = banGenesFromFailedCapsules(
Array.isArray(failedCapsules) ? failedCapsules : [],
signals,
bannedGeneIds
);
const { selected, alternatives, driftIntensity } = selectGene(genes, signals, {
bannedGeneIds: effectiveBans,
preferredGeneId,
driftEnabled: !!driftEnabled,
capabilityGaps: Array.isArray(capabilityGaps) ? capabilityGaps : [],
noveltyScore: Number.isFinite(Number(noveltyScore)) ? Number(noveltyScore) : null,
});
const capsule = selectCapsule(capsules, signals);
const selector = buildSelectorDecision({
gene: selected,
capsule,
signals,
alternatives,
memoryAdvice,
driftEnabled,
driftIntensity,
});
return {
selectedGene: selected,
capsuleCandidates: capsule ? [capsule] : [],
selector,
driftIntensity,
};
}
function buildSelectorDecision({ gene, capsule, signals, alternatives, memoryAdvice, driftEnabled, driftIntensity }) {
const reason = [];
if (gene) reason.push('signals match gene.signals_match');
if (capsule) reason.push('capsule trigger matches signals');
if (!gene) reason.push('no matching gene found; new gene may be required');
if (signals && signals.length) reason.push(`signals: ${signals.join(', ')}`);
if (memoryAdvice && Array.isArray(memoryAdvice.explanation) && memoryAdvice.explanation.length) {
reason.push(`memory_graph: ${memoryAdvice.explanation.join(' | ')}`);
}
if (driftEnabled) {
reason.push('random_drift_override: true');
}
if (Number.isFinite(driftIntensity) && driftIntensity > 0) {
reason.push(`drift_intensity: ${driftIntensity.toFixed(3)}`);
}
return {
selected: gene ? gene.id : null,
reason,
alternatives: Array.isArray(alternatives) ? alternatives.map(g => g.id) : [],
};
}
module.exports = {
selectGeneAndCapsule,
selectGene,
selectCapsule,
buildSelectorDecision,
matchPatternToSignals,
scoreGeneSemantic,
cosineSimilarity,
tokenize,
};