diff --git a/Partner-Core/pom.xml b/Partner-Core/pom.xml
index d6eafe5b..4e875887 100644
--- a/Partner-Core/pom.xml
+++ b/Partner-Core/pom.xml
@@ -65,6 +65,11 @@
cron-utils
9.2.1
+
+ com.huaban
+ jieba-analysis
+ 1.0.2
+
diff --git a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/ImpressionTokenizer.kt b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/ImpressionTokenizer.kt
new file mode 100644
index 00000000..984c961d
--- /dev/null
+++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/ImpressionTokenizer.kt
@@ -0,0 +1,5 @@
+package work.slhaf.partner.core.cognition.impression.search
+
+interface ImpressionTokenizer {
+ fun tokenize(text: String): Set
+}
\ No newline at end of file
diff --git a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/JiebaImpressionTokenizer.kt b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/JiebaImpressionTokenizer.kt
new file mode 100644
index 00000000..4b5d7f86
--- /dev/null
+++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/JiebaImpressionTokenizer.kt
@@ -0,0 +1,37 @@
+package work.slhaf.partner.core.cognition.impression.search
+
+import com.huaban.analysis.jieba.JiebaSegmenter
+
+class JiebaImpressionTokenizer(
+ private val segmenter: JiebaSegmenter = JiebaSegmenter(),
+ private val mode: JiebaSegmenter.SegMode = JiebaSegmenter.SegMode.SEARCH,
+) : ImpressionTokenizer {
+
+ override fun tokenize(text: String): Set {
+ val normalized = normalize(text)
+ if (normalized.isBlank()) {
+ return emptySet()
+ }
+
+ val jiebaTerms = segmenter.process(normalized, mode)
+ .asSequence()
+ .map { it.word }
+ .map(::normalize)
+ .filter { it.isNotBlank() }
+
+ return (jiebaTerms + alphaNumericTerms(normalized)).toSet()
+ }
+
+ private fun alphaNumericTerms(text: String): Sequence =
+ ALPHA_NUMERIC_REGEX.findAll(text).map { it.value }
+
+ private fun normalize(text: String): String =
+ text.lowercase()
+ .replace(WHITESPACE_REGEX, " ")
+ .trim()
+
+ companion object {
+ private val WHITESPACE_REGEX = Regex("\\s+")
+ private val ALPHA_NUMERIC_REGEX = Regex("[a-z0-9]+(?:[-_./][a-z0-9]+)*")
+ }
+}
diff --git a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearch.kt b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearch.kt
new file mode 100644
index 00000000..e6174af2
--- /dev/null
+++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearch.kt
@@ -0,0 +1,136 @@
+package work.slhaf.partner.core.cognition.impression.search
+
+class SimpleTextSearch(
+ private val tokenizer: ImpressionTokenizer = JiebaImpressionTokenizer(),
+) : ImpressionTextSearch {
+
+ private val documents = linkedMapOf()
+ private val invertedIndex = linkedMapOf>()
+
+ @Synchronized
+ override fun rebuild(documents: Collection) {
+ this.documents.clear()
+ invertedIndex.clear()
+ documents.forEach(::upsertInternal)
+ }
+
+ @Synchronized
+ override fun upsert(document: ImpressionSearchDocument) {
+ removeByDocumentId(document.id)
+ upsertInternal(document)
+ }
+
+ @Synchronized
+ override fun removeByTarget(target: ImpressionSearchTarget) {
+ documents.values
+ .asSequence()
+ .filter { it.document.target == target }
+ .map { it.document.id }
+ .toList()
+ .forEach(::removeByDocumentId)
+ }
+
+ @Synchronized
+ override fun search(query: String, limit: Int): List {
+ if (limit <= 0) {
+ return emptyList()
+ }
+
+ val normalizedQuery = normalize(query)
+ if (normalizedQuery.isBlank()) {
+ return emptyList()
+ }
+
+ val queryTerms = tokenizer.tokenize(normalizedQuery)
+ val candidateIds = if (queryTerms.isEmpty()) {
+ documents.keys.toSet()
+ } else {
+ queryTerms
+ .asSequence()
+ .flatMap { invertedIndex[it].orEmpty().asSequence() }
+ .toSet()
+ }
+
+ return candidateIds
+ .asSequence()
+ .mapNotNull { documentId -> scoreDocument(documents[documentId] ?: return@mapNotNull null, normalizedQuery, queryTerms) }
+ .filter { it.score > 0.0 }
+ .sortedWith(compareByDescending { it.score }.thenBy { it.document.id })
+ .take(limit)
+ .toList()
+ }
+
+ private fun upsertInternal(document: ImpressionSearchDocument) {
+ val normalizedText = normalize(document.text)
+ val terms = tokenizer.tokenize(normalizedText)
+ val indexedDocument = IndexedDocument(document, normalizedText, terms)
+ documents[document.id] = indexedDocument
+ terms.forEach { term ->
+ invertedIndex.getOrPut(term) { linkedSetOf() }.add(document.id)
+ }
+ }
+
+ private fun removeByDocumentId(documentId: String) {
+ val indexedDocument = documents.remove(documentId) ?: return
+ indexedDocument.terms.forEach { term ->
+ val ids = invertedIndex[term] ?: return@forEach
+ ids.remove(documentId)
+ if (ids.isEmpty()) {
+ invertedIndex.remove(term)
+ }
+ }
+ }
+
+ private fun scoreDocument(
+ indexedDocument: IndexedDocument,
+ normalizedQuery: String,
+ queryTerms: Set,
+ ): ImpressionSearchHit? {
+ val matchedTerms = if (queryTerms.isEmpty()) {
+ emptySet()
+ } else {
+ queryTerms.intersect(indexedDocument.terms)
+ }
+ val exactPhraseMatched = indexedDocument.normalizedText.contains(normalizedQuery)
+
+ if (matchedTerms.isEmpty() && !exactPhraseMatched) {
+ return null
+ }
+
+ val coverage = if (queryTerms.isEmpty()) 0.0 else matchedTerms.size.toDouble() / queryTerms.size.toDouble()
+ val termScore = matchedTerms.size.toDouble()
+ val exactPhraseBonus = if (exactPhraseMatched) EXACT_PHRASE_BONUS else 0.0
+ val fieldBonus = fieldBonus(indexedDocument.document.field)
+ val score = (termScore + coverage + exactPhraseBonus + fieldBonus) * indexedDocument.document.weight
+
+ return ImpressionSearchHit(
+ document = indexedDocument.document,
+ score = score,
+ matchedTerms = matchedTerms,
+ )
+ }
+
+ private fun fieldBonus(field: ImpressionSearchField): Double = when (field) {
+ ImpressionSearchField.SUBJECT -> 0.8
+ ImpressionSearchField.FEATURE -> 0.35
+ ImpressionSearchField.IMPRESSION -> 0.25
+ ImpressionSearchField.RELATION -> 0.15
+ ImpressionSearchField.EVIDENCE -> 0.0
+ }
+
+ private fun normalize(text: String): String =
+ text.lowercase()
+ .replace(WHITESPACE_REGEX, " ")
+ .trim()
+
+ private data class IndexedDocument(
+ val document: ImpressionSearchDocument,
+ val normalizedText: String,
+ val terms: Set,
+ )
+
+ companion object {
+ private const val EXACT_PHRASE_BONUS = 1.5
+ private val WHITESPACE_REGEX = Regex("\\s+")
+ }
+}
diff --git a/Partner-Core/src/test/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearchTest.kt b/Partner-Core/src/test/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearchTest.kt
new file mode 100644
index 00000000..3dcf73d7
--- /dev/null
+++ b/Partner-Core/src/test/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearchTest.kt
@@ -0,0 +1,226 @@
+package work.slhaf.partner.core.cognition.impression.search
+
+import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.Assertions.assertFalse
+import org.junit.jupiter.api.Assertions.assertTrue
+import org.junit.jupiter.api.Test
+import work.slhaf.partner.core.cognition.impression.ActiveEntity
+
+class SimpleTextSearchTest {
+
+ @Test
+ fun `search ranks subject hit before evidence hit when both match similar terms`() {
+ val search = SimpleTextSearch(TestTokenizer())
+ val targetA = activeTarget("a")
+ val targetB = activeTarget("b")
+
+ search.rebuild(
+ listOf(
+ document("a-subject", targetA, ImpressionSearchField.SUBJECT, "城南旧书店老板", 1.0),
+ document("b-evidence", targetB, ImpressionSearchField.EVIDENCE, "用户提到城南旧书店附近有一家打印店", 0.8),
+ )
+ )
+
+ val hits = search.search("城南旧书店", limit = 10)
+
+ assertEquals(listOf("a-subject", "b-evidence"), hits.map { it.document.id })
+ assertTrue(hits.first().score > hits[1].score)
+ assertTrue(hits.first().matchedTerms.containsAll(setOf("城南", "旧书店")))
+ }
+
+ @Test
+ fun `exact phrase match can beat partial subject match`() {
+ val search = SimpleTextSearch(TestTokenizer())
+ val partialSubject = activeTarget("partial")
+ val exactEvidence = activeTarget("exact")
+
+ search.rebuild(
+ listOf(
+ document("partial-subject", partialSubject, ImpressionSearchField.SUBJECT, "工程教材", 1.0),
+ document("exact-evidence", exactEvidence, ImpressionSearchField.EVIDENCE, "旧书店老板推荐过工程教材", 0.8),
+ )
+ )
+
+ val hits = search.search("旧书店老板推荐过工程教材", limit = 10)
+
+ assertEquals("exact-evidence", hits.first().document.id)
+ assertTrue(hits.first().matchedTerms.containsAll(setOf("旧书店", "老板", "推荐", "工程", "教材")))
+ }
+
+ @Test
+ fun `search recalls bookstore owner from generated active entity documents`() {
+ val search = SimpleTextSearch(TestTokenizer())
+ val bookstoreOwner = activeEntity("bookstore", "城南旧书店老板") {
+ addEvidence("用户上周提到城南旧书店老板推荐过一本水利工程教材")
+ addProjectedFeatures("熟悉工程类旧书" to 0.9)
+ }
+ val technicalPartner = activeEntity("technical", "Java 技术搭子") {
+ addEvidence("用户正在讨论 Jieba 分词、SimpleTextSearch 和倒排索引")
+ addProjectedFeatures("熟悉 Kotlin 与检索实现" to 0.9)
+ }
+ val reportRoommate = activeEntity("report", "实验报告室友") {
+ addEvidence("用户帮室友整理 Vivado 进阶仿真实验报告模板和 docx 文件")
+ }
+
+ search.rebuild(
+ listOf(bookstoreOwner, technicalPartner, reportRoommate)
+ .flatMap(ImpressionSearchDocuments::fromActiveEntity)
+ )
+
+ val hits = search.search("旧书店老板推荐的工程教材", limit = 10)
+
+ assertFalse(hits.isEmpty())
+ assertEquals("bookstore", hits.first().document.target.id)
+ }
+
+ @Test
+ fun `search recalls technical active entity from implementation terms`() {
+ val search = SimpleTextSearch(TestTokenizer())
+ val technicalPartner = activeEntity("technical", "Java 技术搭子") {
+ addEvidence("用户正在讨论 Jieba 分词、SimpleTextSearch 和倒排索引")
+ addProjectedImpressions("需要补充搜索召回测试" to 0.8)
+ }
+ val reportRoommate = activeEntity("report", "实验报告室友") {
+ addEvidence("用户帮室友整理 Vivado 进阶仿真实验报告模板和 docx 文件")
+ }
+
+ search.rebuild(
+ listOf(technicalPartner, reportRoommate)
+ .flatMap(ImpressionSearchDocuments::fromActiveEntity)
+ )
+
+ val hits = search.search("jieba 分词 SimpleTextSearch 倒排索引", limit = 10)
+
+ assertFalse(hits.isEmpty())
+ assertEquals("technical", hits.first().document.target.id)
+ }
+
+ @Test
+ fun `search recalls report active entity from document task terms`() {
+ val search = SimpleTextSearch(TestTokenizer())
+ val technicalPartner = activeEntity("technical", "Java 技术搭子") {
+ addEvidence("用户正在讨论 Kotlin、Jieba 分词和 SimpleTextSearch")
+ }
+ val reportRoommate = activeEntity("report", "实验报告室友") {
+ addEvidence("用户帮室友整理 Vivado 进阶仿真实验报告模板和 docx 文件")
+ }
+
+ search.rebuild(
+ listOf(technicalPartner, reportRoommate)
+ .flatMap(ImpressionSearchDocuments::fromActiveEntity)
+ )
+
+ val hits = search.search("Vivado 实验报告模板", limit = 10)
+
+ assertFalse(hits.isEmpty())
+ assertEquals("report", hits.first().document.target.id)
+ }
+
+ @Test
+ fun `upsert replaces previous index terms for the same document id`() {
+ val search = SimpleTextSearch(TestTokenizer())
+ val target = activeTarget("entity")
+
+ search.upsert(document("doc", target, ImpressionSearchField.EVIDENCE, "旧书店老板", 1.0))
+ assertEquals(listOf("doc"), search.search("老板", limit = 10).map { it.document.id })
+
+ search.upsert(document("doc", target, ImpressionSearchField.EVIDENCE, "实验报告模板", 1.0))
+
+ assertTrue(search.search("老板", limit = 10).isEmpty())
+ assertEquals(listOf("doc"), search.search("实验报告", limit = 10).map { it.document.id })
+ }
+
+ @Test
+ fun `removeByTarget removes all documents belonging to that target`() {
+ val search = SimpleTextSearch(TestTokenizer())
+ val removed = activeTarget("removed")
+ val kept = activeTarget("kept")
+
+ search.rebuild(
+ listOf(
+ document("removed-subject", removed, ImpressionSearchField.SUBJECT, "旧书店老板", 1.0),
+ document("removed-evidence", removed, ImpressionSearchField.EVIDENCE, "工程教材", 0.8),
+ document("kept-evidence", kept, ImpressionSearchField.EVIDENCE, "实验报告模板", 0.8),
+ )
+ )
+
+ search.removeByTarget(removed)
+
+ val hits = search.search("实验报告", limit = 10)
+ assertEquals(listOf("kept-evidence"), hits.map { it.document.id })
+ assertFalse(hits.any { it.document.target == removed })
+ assertTrue(search.search("旧书店", limit = 10).isEmpty())
+ }
+
+ @Test
+ fun `rebuild clears previous documents and index terms`() {
+ val search = SimpleTextSearch(TestTokenizer())
+ val target = activeTarget("entity")
+
+ search.rebuild(listOf(document("old", target, ImpressionSearchField.SUBJECT, "旧书店老板", 1.0)))
+ assertEquals(listOf("old"), search.search("老板", limit = 10).map { it.document.id })
+
+ search.rebuild(listOf(document("new", target, ImpressionSearchField.SUBJECT, "实验报告模板", 1.0)))
+
+ assertTrue(search.search("老板", limit = 10).isEmpty())
+ assertEquals(listOf("new"), search.search("实验报告", limit = 10).map { it.document.id })
+ }
+
+ @Test
+ fun `blank unmatched and zero limit queries return empty hits`() {
+ val search = SimpleTextSearch(TestTokenizer())
+ val target = activeTarget("entity")
+ search.rebuild(listOf(document("doc", target, ImpressionSearchField.SUBJECT, "旧书店老板", 1.0)))
+
+ assertTrue(search.search(" ", limit = 10).isEmpty())
+ assertTrue(search.search("完全不存在", limit = 10).isEmpty())
+ assertTrue(search.search("旧书店", limit = 0).isEmpty())
+ }
+
+ private fun activeTarget(id: String) =
+ ImpressionSearchTarget(ImpressionSearchTarget.Type.ACTIVE_ENTITY, id)
+
+ private fun activeEntity(
+ runtimeId: String,
+ subject: String,
+ configure: ActiveEntity.() -> Unit,
+ ): ActiveEntity = ActiveEntity(runtimeId = runtimeId).apply {
+ updateSubject(subject)
+ configure()
+ }
+
+ private fun document(
+ id: String,
+ target: ImpressionSearchTarget,
+ field: ImpressionSearchField,
+ text: String,
+ weight: Double,
+ ) = ImpressionSearchDocument(
+ id = id,
+ target = target,
+ field = field,
+ text = text,
+ weight = weight,
+ )
+
+ private class TestTokenizer : ImpressionTokenizer {
+ private val dictionary = listOf(
+ "城南", "旧书店", "老板", "推荐", "工程", "教材", "水利", "熟悉", "旧书",
+ "java", "kotlin", "jieba", "分词", "simpletextsearch", "倒排", "索引", "检索", "测试", "召回",
+ "vivado", "实验报告", "实验", "报告", "模板", "docx", "室友", "整理", "文件"
+ )
+ private val alphaNumericRegex = Regex("[a-z0-9]+(?:[-_./][a-z0-9]+)*")
+
+ override fun tokenize(text: String): Set {
+ val normalized = text.lowercase().trim()
+ if (normalized.isBlank()) {
+ return emptySet()
+ }
+
+ return buildSet {
+ dictionary.filterTo(this) { normalized.contains(it) }
+ alphaNumericRegex.findAll(normalized).mapTo(this) { it.value }
+ }
+ }
+ }
+}