feat(impression): add impression recaller for projected entities

fix(impression): compare active entities by runtimeId
Remove empty impression capability methods
2026-06-27 17:49:16 +08:00 · 2026-06-09 11:43:28 +08:00 · 2026-06-09 11:32:29 +08:00 · 2026-06-07 22:42:37 +08:00 · 2026-06-06 23:02:48 +08:00 · 2026-06-06 22:59:43 +08:00
12 changed files with 1151 additions and 11 deletions
--- a/.codegraph/.gitignore
+++ b/.codegraph/.gitignore
@@ -0,0 +1,16 @@
+# CodeGraph data files
+# These are local to each machine and should not be committed
+
+# Database
+*.db
+*.db-wal
+*.db-shm
+
+# Cache
+cache/
+
+# Logs
+*.log
+
+# Hook markers
+.dirty
--- a/Partner-Core/pom.xml
+++ b/Partner-Core/pom.xml
@@ -65,6 +65,11 @@
            <artifactId>cron-utils</artifactId>
            <version>9.2.1</version>
        </dependency>
+        <dependency>
+            <groupId>com.huaban</groupId>
+            <artifactId>jieba-analysis</artifactId>
+            <version>1.0.2</version>
+        </dependency>
    </dependencies>

    <properties>
--- a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/CognitionCapability.java
+++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/CognitionCapability.java
@@ -2,10 +2,12 @@ package work.slhaf.partner.core.cognition;

 import org.w3c.dom.Element;
 import work.slhaf.partner.core.cognition.context.ContextWorkspace;
+import work.slhaf.partner.core.cognition.impression.ActiveEntity;
 import work.slhaf.partner.framework.agent.factory.capability.annotation.Capability;
 import work.slhaf.partner.framework.agent.model.pojo.Message;

 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.locks.Lock;

@Capability("cognition")
@@ -27,4 +29,6 @@ public interface CognitionCapability {

    Lock getMessageLock();

+    Set<ActiveEntity> projectEntity(String input);
+
 }
--- a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/ActiveEntity.kt
+++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/ActiveEntity.kt
@@ -112,6 +112,16 @@ class ActiveEntity @JvmOverloads constructor(

    private fun modelTime(time: Instant): String =
        time.atZone(ZoneId.systemDefault()).toString()
+
+    override fun equals(other: Any?): Boolean {
+        if (this === other) return true
+        if (other !is ActiveEntity) return false
+        return runtimeId == other.runtimeId
+    }
+
+    override fun hashCode(): Int {
+        return runtimeId.hashCode()
+    }
 }

 private fun newActiveEntityRuntimeId(): String =
--- a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/ImpressionCore.java
+++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/ImpressionCore.java
@@ -3,6 +3,7 @@ package work.slhaf.partner.core.cognition.impression;
 import com.alibaba.fastjson2.JSONArray;
 import com.alibaba.fastjson2.JSONObject;
 import org.jetbrains.annotations.NotNull;
+import work.slhaf.partner.core.cognition.impression.search.*;
 import work.slhaf.partner.framework.agent.factory.capability.annotation.CapabilityCore;
 import work.slhaf.partner.framework.agent.factory.capability.annotation.CapabilityMethod;
 import work.slhaf.partner.framework.agent.state.State;
@@ -10,10 +11,9 @@ import work.slhaf.partner.framework.agent.state.StateSerializable;
 import work.slhaf.partner.framework.agent.state.StateValue;

 import java.nio.file.Path;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.stream.Collectors;

@CapabilityCore(value = "cognition")
 public class ImpressionCore implements StateSerializable {
@@ -23,21 +23,177 @@ public class ImpressionCore implements StateSerializable {
     */
    private final ConcurrentHashMap<String, Entity> knownEntitiesByUuid = new ConcurrentHashMap<>();
    private final ImpressionVectorIndex vectorIndex = new ImpressionVectorIndex();
+    private final Set<ActiveEntity> activeEntities = new HashSet<>();
+    private final ImpressionTextSearch textSearch = new SimpleTextSearch();

+    private static final int TEXT_SEARCH_LIMIT = 20;
+    private static final int ASSOCIATION_MATCH_LIMIT = 8;
+    private static final double SUPPORTING_HIT_FACTOR = 0.3;
+    private static final double ASSOCIATION_CONFIDENCE_DIVISOR = 5.0;
+
+    /**
+     * 根据新的 Input 召回相关的实体，如果实体已重复，则将输入追加到 ActiveEntity 的证据中。
+     *
+     * @param input 本次输入内容
+     * @return 本次被召回的活跃实体（包括重复的实体）
+     */
    @CapabilityMethod
-    public void updateRelation() {
+    public Set<ActiveEntity> projectEntity(String input) {
+        if (input == null || input.isBlank()) {
+            return Set.of();
+        }
+
+        List<ImpressionSearchHit> textSearchHits = textSearch.search(input, TEXT_SEARCH_LIMIT);
+        List<EntityAssociationMatch> associationMatches = aggregateMatches(textSearchHits, ASSOCIATION_MATCH_LIMIT);
+        if (associationMatches.isEmpty()) {
+            return Set.of();
+        }
+
+        Set<ActiveEntity> projected = new HashSet<>();
+        for (EntityAssociationMatch match : associationMatches) {
+            Optional<ActiveEntity> activeEntity = resolveActiveEntity(match.getTarget());
+            if (activeEntity.isEmpty()) {
+                continue;
+            }
+
+            ActiveEntity entity = activeEntity.get();
+            entity.addEvidence(
+                    input,
+                    associationConfidence(match),
+                    EntityEvidence.Source.USER_INPUT
+            );
+            refreshActiveEntityTextSearch(entity);
+            projected.add(entity);
+        }
+
+        return projected;
    }

-    @CapabilityMethod
-    public void updateImpression() {
+    private List<EntityAssociationMatch> aggregateMatches(
+            List<ImpressionSearchHit> hits,
+            int limit
+    ) {
+        if (hits == null || hits.isEmpty() || limit <= 0) {
+            return List.of();
+        }
+
+        return hits.stream()
+                .collect(Collectors.groupingBy(
+                        hit -> hit.getDocument().getTarget(),
+                        LinkedHashMap::new,
+                        Collectors.toList()
+                ))
+                .entrySet()
+                .stream()
+                .map(entry -> {
+                    List<ImpressionSearchHit> sortedHits = entry.getValue()
+                            .stream()
+                            .sorted(Comparator
+                                    .comparingDouble(ImpressionSearchHit::getScore)
+                                    .reversed()
+                                    .thenComparing(hit -> hit.getDocument().getId()))
+                            .toList();
+                    return new EntityAssociationMatch(
+                            entry.getKey(),
+                            aggregateScore(sortedHits),
+                            sortedHits
+                    );
+                })
+                .sorted(Comparator
+                        .comparingDouble(EntityAssociationMatch::getScore)
+                        .reversed()
+                        .thenComparing(match -> match.getTarget().getType().name())
+                        .thenComparing(match -> match.getTarget().getId()))
+                .limit(limit)
+                .toList();
    }

-    @CapabilityMethod
-    public void showImpressions() {
+    private double aggregateScore(List<ImpressionSearchHit> sortedHits) {
+        if (sortedHits.isEmpty()) {
+            return 0.0;
+        }
+
+        double bestHitScore = sortedHits.getFirst().getScore();
+        double supportingScore = sortedHits.stream()
+                .skip(1)
+                .limit(2)
+                .mapToDouble(hit -> hit.getScore() * SUPPORTING_HIT_FACTOR)
+                .sum();
+        return bestHitScore + supportingScore;
    }

-    @CapabilityMethod
-    public void projectEntity(Set<ActiveEntity> activeEntities) {
+    private Optional<ActiveEntity> resolveActiveEntity(ImpressionSearchTarget target) {
+        return switch (target.getType()) {
+            case ACTIVE_ENTITY -> findActiveEntityByRuntimeId(target.getId());
+            case ENTITY -> activateKnownEntity(target.getId());
+        };
+    }
+
+    private Optional<ActiveEntity> findActiveEntityByRuntimeId(String runtimeId) {
+        synchronized (activeEntities) {
+            return activeEntities.stream()
+                    .filter(activeEntity -> activeEntity.getRuntimeId().equals(runtimeId))
+                    .findFirst();
+        }
+    }
+
+    private Optional<ActiveEntity> findActiveEntityByBoundEntityUuid(String uuid) {
+        synchronized (activeEntities) {
+            return activeEntities.stream()
+                    .filter(activeEntity -> uuid.equals(activeEntity.getBoundEntityUuid()))
+                    .findFirst();
+        }
+    }
+
+    private Optional<ActiveEntity> activateKnownEntity(String uuid) {
+        Entity knownEntity = knownEntitiesByUuid.get(uuid);
+        if (knownEntity == null) {
+            return Optional.empty();
+        }
+
+        Optional<ActiveEntity> existing = findActiveEntityByBoundEntityUuid(uuid);
+        if (existing.isPresent()) {
+            return existing;
+        }
+
+        ActiveEntity activeEntity = new ActiveEntity();
+        activeEntity.updateSubject(knownEntity.getSubject());
+        activeEntity.bindEntity(uuid);
+
+        synchronized (activeEntities) {
+            activeEntities.add(activeEntity);
+        }
+        refreshActiveEntityTextSearch(activeEntity);
+        return Optional.of(activeEntity);
+    }
+
+    private double associationConfidence(EntityAssociationMatch match) {
+        double normalized = match.getScore() / ASSOCIATION_CONFIDENCE_DIVISOR;
+        return Math.clamp(normalized, 0.05, 1.0);
+    }
+
+    private void refreshActiveEntityTextSearch(ActiveEntity activeEntity) {
+        ImpressionSearchTarget target = new ImpressionSearchTarget(
+                ImpressionSearchTarget.Type.ACTIVE_ENTITY,
+                activeEntity.getRuntimeId()
+        );
+        textSearch.removeByTarget(target);
+        for (ImpressionSearchDocument document : ImpressionSearchDocuments.INSTANCE.fromActiveEntity(activeEntity)) {
+            textSearch.upsert(document);
+        }
+    }
+
+    private void rebuildTextSearch() {
+        List<ImpressionSearchDocument> documents = new ArrayList<>();
+        knownEntitiesByUuid.values().forEach(entity ->
+                documents.addAll(ImpressionSearchDocuments.INSTANCE.fromEntity(entity))
+        );
+        synchronized (activeEntities) {
+            activeEntities.forEach(activeEntity ->
+                    documents.addAll(ImpressionSearchDocuments.INSTANCE.fromActiveEntity(activeEntity))
+            );
+        }
+        textSearch.rebuild(documents);
    }

    @Override
@@ -70,9 +226,9 @@ public class ImpressionCore implements StateSerializable {
            vectorIndex.sync(entity);
            knownEntitiesByUuid.put(uuid, entity);
        }
+        rebuildTextSearch();
    }

-
    @Override
    public @NotNull State convert() {
        State state = new State();
--- a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/ImpressionTokenizer.kt
+++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/ImpressionTokenizer.kt
@@ -0,0 +1,5 @@
+package work.slhaf.partner.core.cognition.impression.search
+
+interface ImpressionTokenizer {
+    fun tokenize(text: String): Set<String>
+}
--- a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/JiebaImpressionTokenizer.kt
+++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/JiebaImpressionTokenizer.kt
@@ -0,0 +1,37 @@
+package work.slhaf.partner.core.cognition.impression.search
+
+import com.huaban.analysis.jieba.JiebaSegmenter
+
+class JiebaImpressionTokenizer(
+    private val segmenter: JiebaSegmenter = JiebaSegmenter(),
+    private val mode: JiebaSegmenter.SegMode = JiebaSegmenter.SegMode.SEARCH,
+) : ImpressionTokenizer {
+
+    override fun tokenize(text: String): Set<String> {
+        val normalized = normalize(text)
+        if (normalized.isBlank()) {
+            return emptySet()
+        }
+
+        val jiebaTerms = segmenter.process(normalized, mode)
+            .asSequence()
+            .map { it.word }
+            .map(::normalize)
+            .filter { it.isNotBlank() }
+
+        return (jiebaTerms + alphaNumericTerms(normalized)).toSet()
+    }
+
+    private fun alphaNumericTerms(text: String): Sequence<String> =
+        ALPHA_NUMERIC_REGEX.findAll(text).map { it.value }
+
+    private fun normalize(text: String): String =
+        text.lowercase()
+            .replace(WHITESPACE_REGEX, " ")
+            .trim()
+
+    companion object {
+        private val WHITESPACE_REGEX = Regex("\\s+")
+        private val ALPHA_NUMERIC_REGEX = Regex("[a-z0-9]+(?:[-_./][a-z0-9]+)*")
+    }
+}
--- a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearch.kt
+++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearch.kt
@@ -0,0 +1,136 @@
+package work.slhaf.partner.core.cognition.impression.search
+
+class SimpleTextSearch(
+    private val tokenizer: ImpressionTokenizer = JiebaImpressionTokenizer(),
+) : ImpressionTextSearch {
+
+    private val documents = linkedMapOf<String, IndexedDocument>()
+    private val invertedIndex = linkedMapOf<String, MutableSet<String>>()
+
+    @Synchronized
+    override fun rebuild(documents: Collection<ImpressionSearchDocument>) {
+        this.documents.clear()
+        invertedIndex.clear()
+        documents.forEach(::upsertInternal)
+    }
+
+    @Synchronized
+    override fun upsert(document: ImpressionSearchDocument) {
+        removeByDocumentId(document.id)
+        upsertInternal(document)
+    }
+
+    @Synchronized
+    override fun removeByTarget(target: ImpressionSearchTarget) {
+        documents.values
+            .asSequence()
+            .filter { it.document.target == target }
+            .map { it.document.id }
+            .toList()
+            .forEach(::removeByDocumentId)
+    }
+
+    @Synchronized
+    override fun search(query: String, limit: Int): List<ImpressionSearchHit> {
+        if (limit <= 0) {
+            return emptyList()
+        }
+
+        val normalizedQuery = normalize(query)
+        if (normalizedQuery.isBlank()) {
+            return emptyList()
+        }
+
+        val queryTerms = tokenizer.tokenize(normalizedQuery)
+        val candidateIds = if (queryTerms.isEmpty()) {
+            documents.keys.toSet()
+        } else {
+            queryTerms
+                .asSequence()
+                .flatMap { invertedIndex[it].orEmpty().asSequence() }
+                .toSet()
+        }
+
+        return candidateIds
+            .asSequence()
+            .mapNotNull { documentId -> scoreDocument(documents[documentId] ?: return@mapNotNull null, normalizedQuery, queryTerms) }
+            .filter { it.score > 0.0 }
+            .sortedWith(compareByDescending<ImpressionSearchHit> { it.score }.thenBy { it.document.id })
+            .take(limit)
+            .toList()
+    }
+
+    private fun upsertInternal(document: ImpressionSearchDocument) {
+        val normalizedText = normalize(document.text)
+        val terms = tokenizer.tokenize(normalizedText)
+        val indexedDocument = IndexedDocument(document, normalizedText, terms)
+        documents[document.id] = indexedDocument
+        terms.forEach { term ->
+            invertedIndex.getOrPut(term) { linkedSetOf() }.add(document.id)
+        }
+    }
+
+    private fun removeByDocumentId(documentId: String) {
+        val indexedDocument = documents.remove(documentId) ?: return
+        indexedDocument.terms.forEach { term ->
+            val ids = invertedIndex[term] ?: return@forEach
+            ids.remove(documentId)
+            if (ids.isEmpty()) {
+                invertedIndex.remove(term)
+            }
+        }
+    }
+
+    private fun scoreDocument(
+        indexedDocument: IndexedDocument,
+        normalizedQuery: String,
+        queryTerms: Set<String>,
+    ): ImpressionSearchHit? {
+        val matchedTerms = if (queryTerms.isEmpty()) {
+            emptySet()
+        } else {
+            queryTerms.intersect(indexedDocument.terms)
+        }
+        val exactPhraseMatched = indexedDocument.normalizedText.contains(normalizedQuery)
+
+        if (matchedTerms.isEmpty() && !exactPhraseMatched) {
+            return null
+        }
+
+        val coverage = if (queryTerms.isEmpty()) 0.0 else matchedTerms.size.toDouble() / queryTerms.size.toDouble()
+        val termScore = matchedTerms.size.toDouble()
+        val exactPhraseBonus = if (exactPhraseMatched) EXACT_PHRASE_BONUS else 0.0
+        val fieldBonus = fieldBonus(indexedDocument.document.field)
+        val score = (termScore + coverage + exactPhraseBonus + fieldBonus) * indexedDocument.document.weight
+
+        return ImpressionSearchHit(
+            document = indexedDocument.document,
+            score = score,
+            matchedTerms = matchedTerms,
+        )
+    }
+
+    private fun fieldBonus(field: ImpressionSearchField): Double = when (field) {
+        ImpressionSearchField.SUBJECT -> 0.8
+        ImpressionSearchField.FEATURE -> 0.35
+        ImpressionSearchField.IMPRESSION -> 0.25
+        ImpressionSearchField.RELATION -> 0.15
+        ImpressionSearchField.EVIDENCE -> 0.0
+    }
+
+    private fun normalize(text: String): String =
+        text.lowercase()
+            .replace(WHITESPACE_REGEX, " ")
+            .trim()
+
+    private data class IndexedDocument(
+        val document: ImpressionSearchDocument,
+        val normalizedText: String,
+        val terms: Set<String>,
+    )
+
+    companion object {
+        private const val EXACT_PHRASE_BONUS = 1.5
+        private val WHITESPACE_REGEX = Regex("\\s+")
+    }
+}
--- a/Partner-Core/src/main/java/work/slhaf/partner/module/impression/ImpressionRecaller.java
+++ b/Partner-Core/src/main/java/work/slhaf/partner/module/impression/ImpressionRecaller.java
@@ -0,0 +1,54 @@
+package work.slhaf.partner.module.impression;
+
+import lombok.val;
+import org.jetbrains.annotations.NotNull;
+import work.slhaf.partner.core.cognition.CognitionCapability;
+import work.slhaf.partner.core.cognition.context.ContextBlock;
+import work.slhaf.partner.framework.agent.factory.capability.annotation.InjectCapability;
+import work.slhaf.partner.framework.agent.factory.component.abstracts.AbstractAgentModule;
+import work.slhaf.partner.runtime.PartnerRunningFlowContext;
+
+import java.util.Collection;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class ImpressionRecaller extends AbstractAgentModule.Running<PartnerRunningFlowContext> {
+
+    @InjectCapability
+    private CognitionCapability cognitionCapability;
+
+    /**
+     * 从交互中积累谈论的内容的特征（证据），基于证据创建 ActiveEntity，然后交给 CognitionCapability 进行投影并更新上下文
+     */
+    @Override
+    protected void doExecute(@NotNull PartnerRunningFlowContext context) {
+        val contextWorkspace = cognitionCapability.contextWorkspace();
+        context.getInputs()
+                .stream()
+                .map(inputEntry -> {
+                    val content = inputEntry.getContent();
+                    return cognitionCapability.projectEntity(content);
+                })
+                .flatMap(Collection::stream)
+                .collect(Collectors.toSet())
+                .forEach(activeEntity -> {
+                    contextWorkspace.register(new ContextBlock(
+                            activeEntity,
+                            activeEntity,
+                            activeEntity,
+                            Set.of(
+                                    ContextBlock.FocusedDomain.COGNITION,
+                                    ContextBlock.FocusedDomain.MEMORY
+                            ),
+                            100,
+                            0.5,
+                            20
+                    ));
+                });
+    }
+
+    @Override
+    public int order() {
+        return 2;
+    }
+}
--- a/Partner-Core/src/test/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearchTest.kt
+++ b/Partner-Core/src/test/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearchTest.kt
@@ -0,0 +1,226 @@
+package work.slhaf.partner.core.cognition.impression.search
+
+import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.Assertions.assertFalse
+import org.junit.jupiter.api.Assertions.assertTrue
+import org.junit.jupiter.api.Test
+import work.slhaf.partner.core.cognition.impression.ActiveEntity
+
+class SimpleTextSearchTest {
+
+    @Test
+    fun `search ranks subject hit before evidence hit when both match similar terms`() {
+        val search = SimpleTextSearch(TestTokenizer())
+        val targetA = activeTarget("a")
+        val targetB = activeTarget("b")
+
+        search.rebuild(
+            listOf(
+                document("a-subject", targetA, ImpressionSearchField.SUBJECT, "城南旧书店老板", 1.0),
+                document("b-evidence", targetB, ImpressionSearchField.EVIDENCE, "用户提到城南旧书店附近有一家打印店", 0.8),
+            )
+        )
+
+        val hits = search.search("城南旧书店", limit = 10)
+
+        assertEquals(listOf("a-subject", "b-evidence"), hits.map { it.document.id })
+        assertTrue(hits.first().score > hits[1].score)
+        assertTrue(hits.first().matchedTerms.containsAll(setOf("城南", "旧书店")))
+    }
+
+    @Test
+    fun `exact phrase match can beat partial subject match`() {
+        val search = SimpleTextSearch(TestTokenizer())
+        val partialSubject = activeTarget("partial")
+        val exactEvidence = activeTarget("exact")
+
+        search.rebuild(
+            listOf(
+                document("partial-subject", partialSubject, ImpressionSearchField.SUBJECT, "工程教材", 1.0),
+                document("exact-evidence", exactEvidence, ImpressionSearchField.EVIDENCE, "旧书店老板推荐过工程教材", 0.8),
+            )
+        )
+
+        val hits = search.search("旧书店老板推荐过工程教材", limit = 10)
+
+        assertEquals("exact-evidence", hits.first().document.id)
+        assertTrue(hits.first().matchedTerms.containsAll(setOf("旧书店", "老板", "推荐", "工程", "教材")))
+    }
+
+    @Test
+    fun `search recalls bookstore owner from generated active entity documents`() {
+        val search = SimpleTextSearch(TestTokenizer())
+        val bookstoreOwner = activeEntity("bookstore", "城南旧书店老板") {
+            addEvidence("用户上周提到城南旧书店老板推荐过一本水利工程教材")
+            addProjectedFeatures("熟悉工程类旧书" to 0.9)
+        }
+        val technicalPartner = activeEntity("technical", "Java 技术搭子") {
+            addEvidence("用户正在讨论 Jieba 分词、SimpleTextSearch 和倒排索引")
+            addProjectedFeatures("熟悉 Kotlin 与检索实现" to 0.9)
+        }
+        val reportRoommate = activeEntity("report", "实验报告室友") {
+            addEvidence("用户帮室友整理 Vivado 进阶仿真实验报告模板和 docx 文件")
+        }
+
+        search.rebuild(
+            listOf(bookstoreOwner, technicalPartner, reportRoommate)
+                .flatMap(ImpressionSearchDocuments::fromActiveEntity)
+        )
+
+        val hits = search.search("旧书店老板推荐的工程教材", limit = 10)
+
+        assertFalse(hits.isEmpty())
+        assertEquals("bookstore", hits.first().document.target.id)
+    }
+
+    @Test
+    fun `search recalls technical active entity from implementation terms`() {
+        val search = SimpleTextSearch(TestTokenizer())
+        val technicalPartner = activeEntity("technical", "Java 技术搭子") {
+            addEvidence("用户正在讨论 Jieba 分词、SimpleTextSearch 和倒排索引")
+            addProjectedImpressions("需要补充搜索召回测试" to 0.8)
+        }
+        val reportRoommate = activeEntity("report", "实验报告室友") {
+            addEvidence("用户帮室友整理 Vivado 进阶仿真实验报告模板和 docx 文件")
+        }
+
+        search.rebuild(
+            listOf(technicalPartner, reportRoommate)
+                .flatMap(ImpressionSearchDocuments::fromActiveEntity)
+        )
+
+        val hits = search.search("jieba 分词 SimpleTextSearch 倒排索引", limit = 10)
+
+        assertFalse(hits.isEmpty())
+        assertEquals("technical", hits.first().document.target.id)
+    }
+
+    @Test
+    fun `search recalls report active entity from document task terms`() {
+        val search = SimpleTextSearch(TestTokenizer())
+        val technicalPartner = activeEntity("technical", "Java 技术搭子") {
+            addEvidence("用户正在讨论 Kotlin、Jieba 分词和 SimpleTextSearch")
+        }
+        val reportRoommate = activeEntity("report", "实验报告室友") {
+            addEvidence("用户帮室友整理 Vivado 进阶仿真实验报告模板和 docx 文件")
+        }
+
+        search.rebuild(
+            listOf(technicalPartner, reportRoommate)
+                .flatMap(ImpressionSearchDocuments::fromActiveEntity)
+        )
+
+        val hits = search.search("Vivado 实验报告模板", limit = 10)
+
+        assertFalse(hits.isEmpty())
+        assertEquals("report", hits.first().document.target.id)
+    }
+
+    @Test
+    fun `upsert replaces previous index terms for the same document id`() {
+        val search = SimpleTextSearch(TestTokenizer())
+        val target = activeTarget("entity")
+
+        search.upsert(document("doc", target, ImpressionSearchField.EVIDENCE, "旧书店老板", 1.0))
+        assertEquals(listOf("doc"), search.search("老板", limit = 10).map { it.document.id })
+
+        search.upsert(document("doc", target, ImpressionSearchField.EVIDENCE, "实验报告模板", 1.0))
+
+        assertTrue(search.search("老板", limit = 10).isEmpty())
+        assertEquals(listOf("doc"), search.search("实验报告", limit = 10).map { it.document.id })
+    }
+
+    @Test
+    fun `removeByTarget removes all documents belonging to that target`() {
+        val search = SimpleTextSearch(TestTokenizer())
+        val removed = activeTarget("removed")
+        val kept = activeTarget("kept")
+
+        search.rebuild(
+            listOf(
+                document("removed-subject", removed, ImpressionSearchField.SUBJECT, "旧书店老板", 1.0),
+                document("removed-evidence", removed, ImpressionSearchField.EVIDENCE, "工程教材", 0.8),
+                document("kept-evidence", kept, ImpressionSearchField.EVIDENCE, "实验报告模板", 0.8),
+            )
+        )
+
+        search.removeByTarget(removed)
+
+        val hits = search.search("实验报告", limit = 10)
+        assertEquals(listOf("kept-evidence"), hits.map { it.document.id })
+        assertFalse(hits.any { it.document.target == removed })
+        assertTrue(search.search("旧书店", limit = 10).isEmpty())
+    }
+
+    @Test
+    fun `rebuild clears previous documents and index terms`() {
+        val search = SimpleTextSearch(TestTokenizer())
+        val target = activeTarget("entity")
+
+        search.rebuild(listOf(document("old", target, ImpressionSearchField.SUBJECT, "旧书店老板", 1.0)))
+        assertEquals(listOf("old"), search.search("老板", limit = 10).map { it.document.id })
+
+        search.rebuild(listOf(document("new", target, ImpressionSearchField.SUBJECT, "实验报告模板", 1.0)))
+
+        assertTrue(search.search("老板", limit = 10).isEmpty())
+        assertEquals(listOf("new"), search.search("实验报告", limit = 10).map { it.document.id })
+    }
+
+    @Test
+    fun `blank unmatched and zero limit queries return empty hits`() {
+        val search = SimpleTextSearch(TestTokenizer())
+        val target = activeTarget("entity")
+        search.rebuild(listOf(document("doc", target, ImpressionSearchField.SUBJECT, "旧书店老板", 1.0)))
+
+        assertTrue(search.search("   ", limit = 10).isEmpty())
+        assertTrue(search.search("完全不存在", limit = 10).isEmpty())
+        assertTrue(search.search("旧书店", limit = 0).isEmpty())
+    }
+
+    private fun activeTarget(id: String) =
+        ImpressionSearchTarget(ImpressionSearchTarget.Type.ACTIVE_ENTITY, id)
+
+    private fun activeEntity(
+        runtimeId: String,
+        subject: String,
+        configure: ActiveEntity.() -> Unit,
+    ): ActiveEntity = ActiveEntity(runtimeId = runtimeId).apply {
+        updateSubject(subject)
+        configure()
+    }
+
+    private fun document(
+        id: String,
+        target: ImpressionSearchTarget,
+        field: ImpressionSearchField,
+        text: String,
+        weight: Double,
+    ) = ImpressionSearchDocument(
+        id = id,
+        target = target,
+        field = field,
+        text = text,
+        weight = weight,
+    )
+
+    private class TestTokenizer : ImpressionTokenizer {
+        private val dictionary = listOf(
+            "城南", "旧书店", "老板", "推荐", "工程", "教材", "水利", "熟悉", "旧书",
+            "java", "kotlin", "jieba", "分词", "simpletextsearch", "倒排", "索引", "检索", "测试", "召回",
+            "vivado", "实验报告", "实验", "报告", "模板", "docx", "室友", "整理", "文件"
+        )
+        private val alphaNumericRegex = Regex("[a-z0-9]+(?:[-_./][a-z0-9]+)*")
+
+        override fun tokenize(text: String): Set<String> {
+            val normalized = text.lowercase().trim()
+            if (normalized.isBlank()) {
+                return emptySet()
+            }
+
+            return buildSet {
+                dictionary.filterTo(this) { normalized.contains(it) }
+                alphaNumericRegex.findAll(normalized).mapTo(this) { it.value }
+            }
+        }
+    }
+}
--- a/doc/design/first-encounter-module.md
+++ b/doc/design/first-encounter-module.md
@@ -0,0 +1,281 @@
+# First Encounter Module / 初见模块设计草案
+
+## 背景
+
+Partner 当前已经不是“不能跑”的项目，但用户面对一个新的 agent 时，仍然会有明显的启动成本。
+
+这个启动成本不完全来自工程状态，而来自互动预期的不确定：
+
+- 不知道该怎么和它说话；
+- 不知道它知道什么、不知道什么；
+- 不知道它会不会误解用户；
+- 不知道它能不能被纠正；
+- 不知道纠正之后会不会真正改变后续行为。
+
+因此，Partner 需要一个“初见模块”。
+
+它解决的不是程序启动问题，而是关系和预期建立问题。
+
+## 定位
+
+初见模块不应该只是 `InitModule`。
+
+`InitModule` 更像加载配置、初始化资源、检查运行状态；而初见模块面对的是用户第一次或重新面对 Partner 时的交互问题。
+
+因此，代码层可以命名为：
+
+```text
+FirstEncounterModule
+```
+
+产品/概念层称为：
+
+```text
+初见模块
+```
+
+它的职责是：
+
+> 在新用户、长时间未使用、上下文断裂、版本升级，或用户主动询问“你现在知道我什么”时，组织一次清醒、温和、可校准的开场。
+
+## 与 Impression 模块的关系
+
+初见模块应当依托 Impression，但不属于 ImpressionCore。
+
+边界如下：
+
+```text
+ImpressionCore
+  负责存储、召回、更新关于用户、agent 自身、关系契约、项目上下文等印象。
+
+FirstEncounterModule
+  负责判断是否进入初见/重逢模式，并将召回的印象组织成本轮对话可用的 EncounterFrame。
+
+EncounterState
+  负责记录初见流程是否已经完成，以及哪些环节已经向用户公开。
+```
+
+也就是说：
+
+> Impression 负责“我对你有什么印象”。
+> FirstEncounterModule 负责“第一次见面时，我该如何使用这些印象”。
+
+不应把开场策略、纠错协议、对话引导逻辑直接塞进 ImpressionCore，否则记忆模块会被迫承担表达和流程控制职责。
+
+## 触发场景
+
+初见模块可以在以下场景触发：
+
+- 新用户第一次进入；
+- 当前 session 没有足够上下文；
+- 长时间未使用后重新进入；
+- Partner 发生较大版本升级；
+- Impression 召回结果置信度较低；
+- 用户主动询问：
+  - “你知道我什么？”
+  - “你现在能做什么？”
+  - “我该怎么和你说话？”
+  - “你是不是还记得之前的事？”
+- 系统检测到当前对话存在明显预期不稳定，例如用户多次纠正 agent 的语气、事实或任务边界。
+
+## 核心流程
+
+推荐流程：
+
+```text
+User Input
+  ↓
+InteractionHub
+  ↓
+EncounterDetector
+  ↓
+ImpressionRecaller
+  ↓
+FirstEncounterModule
+  ↓
+EncounterFrame
+  ↓
+PromptContributor / AppendPrompt
+  ↓
+CoreModel Reply
+  ↓
+ImpressionUpdater
+```
+
+其中：
+
+1. `EncounterDetector` 判断是否需要进入初见/重逢模式；
+2. `ImpressionRecaller` 召回相关印象；
+3. `FirstEncounterModule` 将召回结果整理成 EncounterFrame；
+4. `PromptContributor` 将 EncounterFrame 注入模型上下文；
+5. 对话结束后，`ImpressionUpdater` 根据用户反馈更新印象。
+
+## EncounterFrame
+
+`EncounterFrame` 是初见模块的核心输出。它不是长期记忆，而是本轮对话使用的临时认知框架。
+
+示例结构：
+
+```kotlin
+data class EncounterFrame(
+    val mode: EncounterMode,
+    val knownAboutUser: List<ImpressionProjection>,
+    val knownAboutSelf: List<ImpressionProjection>,
+    val knownAboutRelationship: List<ImpressionProjection>,
+    val uncertainty: List<String>,
+    val correctionProtocol: CorrectionProtocol,
+    val openingStrategy: OpeningStrategy
+)
+```
+
+其中：
+
+- `mode`：当前是初见、重逢、版本升级后再介绍，还是用户主动询问；
+- `knownAboutUser`：关于用户的可靠印象；
+- `knownAboutSelf`：Partner 对自身能力和边界的描述；
+- `knownAboutRelationship`：关于互动方式、纠错方式、语气偏好等印象；
+- `uncertainty`：当前不能确定的部分；
+- `correctionProtocol`：用户如何纠正 Partner；
+- `openingStrategy`：本次开场应采用的表达策略。
+
+## Impression Subject 建议
+
+为了支持初见模块，Impression 可以支持一些特殊 subject：
+
+```text
+user
+agent_self
+relationship_contract
+interaction_preference
+project_context
+```
+
+例如：
+
+```text
+user:
+- 用户偏好技术回答直接，不喜欢客服腔。
+- 用户面对陌生 agent 时会在意互动预期是否稳定。
+- 用户更容易接受从一个小切口开始推进。
+
+agent_self:
+- Partner 当前不是完全成熟的 agent。
+- Partner 应公开自己的已知、未知和不确定。
+- Partner 不应该在缺少依据时假装熟悉用户。
+
+relationship_contract:
+- 用户可以直接纠正 Partner。
+- Partner 需要区分事实错误、语气偏差、理解偏差和任务边界偏差。
+- 纠正应作为后续 impression 更新的重要信号。
+```
+
+## 初见开场策略
+
+初见模块不应一上来问很多问题，也不应假装已经充分了解用户。
+
+更合适的开场结构是：
+
+```text
+我现在对你还没有足够稳定的了解。
+
+我会先说明：
+- 我目前知道什么；
+- 我不知道什么；
+- 你可以怎么纠正我；
+- 我会如何处理这些纠正。
+
+接下来我们可以从一个很小的任务开始。
+```
+
+在 prompt 中可组织为：
+
+```text
+你正在与用户进行初见/重逢式对话。
+
+你目前可靠知道：
+- 用户希望技术讨论直接、少废话；
+- 用户对陌生 agent 的互动预期尚未建立；
+- 用户不喜欢 agent 在缺少依据时假装熟悉。
+
+你应该主动说明：
+- 你知道什么；
+- 你不知道什么；
+- 用户可以如何纠正你；
+- 你会如何处理纠正。
+
+不要一次性问很多问题。
+不要假装亲近。
+先从一个很小的任务或对话入口开始。
+```
+
+## EncounterState
+
+初见模块需要少量流程状态，但这些状态不一定属于 Impression。
+
+示例：
+
+```kotlin
+data class EncounterState(
+    val hasIntroducedSelf: Boolean,
+    val hasShownKnownUnknown: Boolean,
+    val hasExplainedCorrectionProtocol: Boolean,
+    val firstEncounterCompleted: Boolean,
+    val lastEncounterVersion: String?
+)
+```
+
+这些状态表示流程是否完成，而不是关于用户的长期印象。
+
+真正应该进入 Impression 的，是对用户、关系、互动方式的理解，例如：
+
+```text
+用户面对新的 agent 时，会担心互动预期不稳定。
+用户希望 agent 明确边界，而不是一上来装熟。
+用户能接受通过纠正来校准 agent。
+```
+
+## 最小实现方案
+
+第一版可以很轻，不需要完整工程化。
+
+建议步骤：
+
+1. 新增 `FirstEncounterPromptContributor`；
+2. 新增 `EncounterDetector`，先用简单规则判断是否触发；
+3. 从 `ImpressionRecaller` 召回 `user`、`agent_self`、`relationship_contract`、`interaction_preference`、`project_context` 相关印象；
+4. 生成一个简化版 `EncounterFrame`；
+5. 将 EncounterFrame 注入 AppendPrompt；
+6. 用户纠正后，将纠正内容作为 evidence 交给 ImpressionUpdater。
+
+第一版不需要复杂策略模型，规则足够：
+
+```text
+新 session + 低熟悉度 → 初见模式
+长时间未使用 + 有历史 impression → 重逢模式
+用户主动询问已知/未知 → 自我公开模式
+多次纠正 → 关系校准模式
+```
+
+## 不做什么
+
+初见模块第一版不做以下内容：
+
+- 不做完整 onboarding 表单；
+- 不一次性询问大量偏好；
+- 不把用户画像写死；
+- 不假装已经理解用户；
+- 不替代 ImpressionCore；
+- 不直接负责长期记忆写入；
+- 不在每轮对话中重复自我介绍。
+
+它只负责在关系尚未稳定时，提供一个清醒、可纠正、可继续的开场。
+
+## 价值
+
+初见模块的价值不只是“第一次使用体验更好”。
+
+它实际上补上了 Partner 作为 agent 的一个关键能力：
+
+> 在上下文断裂、长期未见、版本变化或记忆不确定时，仍然能让用户知道该如何继续与它相处。
+
+这使 Partner 不只是一个能运行的程序，而是一个能够建立互动预期、暴露不确定性、接受校准，并逐步形成稳定关系的 agent。
--- a/doc/design/impression-vector-fusion.md
+++ b/doc/design/impression-vector-fusion.md
@@ -0,0 +1,210 @@
+# Impression Vector Fusion Plan
+
+## Context
+
+Current `ImpressionCore.projectEntity` already connects text recall to active entity projection:
+
+```text
+input
+-> SimpleTextSearch.search(input)
+-> group document hits by ImpressionSearchTarget
+-> aggregate into EntityAssociationMatch
+-> resolve ACTIVE_ENTITY or ENTITY target
+-> append EntityEvidence
+-> refresh active entity text-search documents
+```
+
+This gives the Impression module a first explainable recall path. Vector recall should not replace this path. It should become another recall signal that is fused with text recall before projection.
+
+## Why not implement vector fusion immediately
+
+Vector fusion is a recall-source enhancement, not the next foundation step.
+
+Before adding more recall sources, the module still needs a clearer organization pipeline:
+
+- how an unmatched input becomes a new `ActiveEntity`;
+- how runtime evidence is accumulated, merged, or decayed;
+- how an `ActiveEntity` is rolled into a long-term `Entity`;
+- how extracted features and impressions update known entities;
+- when `textSearch` and `vectorIndex` are refreshed after entity updates.
+
+Unmatched entity creation and `ActiveEntity` rolling are closely related: both decide how temporary evidence becomes a stable entity-level impression. They should be considered as one organization chain rather than two unrelated features.
+
+## Target shape
+
+Future `projectEntity` should have this shape:
+
+```text
+input
+-> text recall signals
+-> vector recall signals
+-> normalize scores
+-> fuse signals by ImpressionSearchTarget
+-> resolve or create ActiveEntity
+-> append evidence
+-> refresh runtime indexes
+```
+
+The later half should stay shared. Text recall, vector recall, relation recall, and recency recall should all produce association signals. Projection should not depend on which recall source produced a match.
+
+## First vector scope
+
+The first vector implementation should only recall long-term `ENTITY` targets.
+
+Reason:
+
+- `ImpressionVectorIndex` already syncs known `Entity` data.
+- Known entities have relatively stable features and impressions.
+- Active entity evidence changes frequently; embedding every new evidence item would add update cost and lifecycle complexity too early.
+
+So the first vector target should be:
+
+```text
+Entity feature / impression vector
+-> ImpressionSearchTarget(Type.ENTITY, entityUuid)
+```
+
+Later, after the active entity organization chain is stable, active evidence vectors can be added as:
+
+```text
+ActiveEntity evidence / projected feature / projected impression vector
+-> ImpressionSearchTarget(Type.ACTIVE_ENTITY, runtimeId)
+```
+
+## Signal model
+
+`EntityAssociationMatch` is currently text-oriented because it stores `List<ImpressionSearchHit>`.
+
+For fusion, introduce a source-neutral signal model:
+
+```kotlin
+data class EntityAssociationSignal(
+    val target: ImpressionSearchTarget,
+    val source: Source,
+    val score: Double,
+    val reason: String,
+    val textHit: ImpressionSearchHit? = null,
+    val vectorHit: ImpressionVectorHit? = null,
+) {
+    enum class Source {
+        TEXT,
+        VECTOR,
+        RELATION,
+        RECENCY
+    }
+}
+```
+
+Then change or extend `EntityAssociationMatch` toward:
+
+```kotlin
+data class EntityAssociationMatch(
+    val target: ImpressionSearchTarget,
+    val score: Double,
+    val signals: List<EntityAssociationSignal> = emptyList(),
+)
+```
+
+This keeps fusion explainable. A match can still tell the model or logs why an entity was recalled.
+
+## Score normalization
+
+Text search score and vector similarity should not be added directly.
+
+Text search currently produces an internal score based on token hits, coverage, exact phrase bonus, field bonus, and document weight. Vector search is usually cosine-like similarity. Normalize both into association-strength-like values before fusion.
+
+Possible first normalization:
+
+```text
+textScore01 = clamp(textScore / 5.0, 0.0, 1.0)
+
+vectorScore01 =
+  similarity < 0.55 -> 0.0
+  otherwise -> clamp((similarity - 0.55) / 0.35, 0.0, 1.0)
+```
+
+The constants are placeholders. They should be tuned with tests and logs.
+
+## Fusion rule
+
+Use strong-hit priority with multi-source support, not simple averaging.
+
+A first rule can be:
+
+```text
+targetScore =
+  max(bestTextScore, bestVectorScore * 0.9)
+  + sameTargetCrossSourceBonus
+  + supportingSignalBonus
+```
+
+Suggested behavior:
+
+- direct subject or phrase text match should beat vague vector similarity;
+- vector recall should recover semantically related entities when text recall is weak or empty;
+- if text and vector both hit the same target, the target should receive a small confidence boost;
+- long documents or many weak signals should not dominate a single strong subject/evidence hit.
+
+## Execution strategy
+
+First implementation can be conservative:
+
+```text
+always run TextSearch
+run VectorSearch only when:
+  - text recall is empty; or
+  - top text match confidence is low; or
+  - input is long and semantic rather than name-like
+```
+
+If the embedding model is local and cheap enough, this can later become parallel text + vector recall.
+
+## Implementation phases
+
+### Phase 1: organization chain first
+
+Implement before vector fusion:
+
+- unmatched input -> new `ActiveEntity` candidate;
+- active evidence update and dedup/merge rules;
+- active entity rolling into known `Entity`;
+- known entity feature/impression update;
+- index refresh after entity updates.
+
+### Phase 2: signal abstraction
+
+Introduce `EntityAssociationSignal` and make text hits convert into signals.
+
+Keep current behavior equivalent after refactor.
+
+### Phase 3: long-term entity vector recall
+
+Add vector recall only for known `Entity` targets:
+
+```text
+input embedding
+-> ImpressionVectorIndex.search(...)
+-> vector hits
+-> EntityAssociationSignal(source = VECTOR)
+-> fuse with text signals
+```
+
+### Phase 4: active entity vector recall
+
+Only after active entity lifecycle is stable:
+
+- vectorize active evidence or projected features;
+- update active vector index when evidence changes;
+- fuse `ACTIVE_ENTITY` vector hits with text hits.
+
+## Non-goals for first vector pass
+
+Do not start with:
+
+- vectorizing every raw evidence item immediately;
+- replacing text search ranking;
+- using vector score as direct `associationConfidence` without normalization;
+- adding opaque fusion that cannot explain why an entity was recalled;
+- expanding `projectEntity` into a large source-specific method.
+
+The intended direction is: multiple recall sources produce explainable signals, then `ImpressionCore` performs one shared entity projection flow.
Author	SHA1	Message	Date
slhafzjw	0567837dfe	feat(impression): add impression recaller for projected entities	2026-06-09 11:43:28 +08:00
slhafzjw	6dad6fdd6f	fix(impression): compare active entities by runtimeId	2026-06-09 11:32:29 +08:00
slhafzjw	e583276938	Remove empty impression capability methods	2026-06-07 22:42:37 +08:00
slhafzjw	42407567b1	docs(impression): document vector fusion plan	2026-06-06 23:02:48 +08:00
slhafzjw	e5d19f31ca	doc: adjust location of design documents	2026-06-06 22:59:43 +08:00
slhafzjw	ddf7f8da98	feat(impression): project text search hits into active entities	2026-06-06 22:57:55 +08:00
slhafzjw	9269d4f678	chore: update gitignore	2026-06-06 22:57:55 +08:00
slhaf	03087fb259	docs: note first encounter module design	2026-06-05 22:45:57 +08:00
slhafzjw	b73696cc24	feat(impression): Add impression text search	2026-05-31 21:06:02 +08:00