diff --git a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/Entity.kt b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/Entity.kt index 494a5889..02db42e7 100644 --- a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/Entity.kt +++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/Entity.kt @@ -6,6 +6,7 @@ import work.slhaf.partner.framework.agent.state.State import work.slhaf.partner.framework.agent.state.StateSerializable import work.slhaf.partner.framework.agent.state.StateValue import java.nio.file.Path +import java.time.Instant import java.util.* import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.locks.ReentrantLock @@ -13,15 +14,67 @@ import kotlin.concurrent.withLock class Entity @JvmOverloads constructor( val uuid: String = UUID.randomUUID().toString(), - val subject: String, + subject: String, private val relations: MutableMap> = mutableMapOf(), private val impressions: MutableMap = mutableMapOf(), - private val features: MutableMap = mutableMapOf() + private val features: MutableMap = mutableMapOf(), + private val aliases: MutableMap = mutableMapOf() ) : StateSerializable { + private var _subject: String = normalizeIdentityText(subject) + private val impressionLock = ReentrantLock() private val relationLock = ReentrantLock() private val featureLock = ReentrantLock() + private val identityLock = ReentrantLock() + + val subject: String + get() = identityLock.withLock { _subject } + + @JvmOverloads + fun renameSubject(newSubject: String, keepOldSubjectAsAlias: Boolean = true): Boolean = identityLock.withLock { + val normalizedSubject = normalizeIdentityText(newSubject) + if (normalizedSubject.isBlank() || normalizedSubject == _subject) { + return@withLock false + } + + val previousSubject = _subject + if (keepOldSubjectAsAlias && previousSubject.isNotBlank()) { + aliases[previousSubject] = aliases[previousSubject]?.copy(deprecated = true) + ?: AliasMetadata(Instant.now(), deprecated = true) + } + + aliases.remove(normalizedSubject) + _subject = normalizedSubject + true + } + + @JvmOverloads + fun addAlias(alias: String, deprecated: Boolean = false): Boolean = identityLock.withLock { + val normalizedAlias = normalizeIdentityText(alias) + if (normalizedAlias.isBlank() || normalizedAlias == _subject) { + return@withLock false + } + + aliases[normalizedAlias] = aliases[normalizedAlias]?.copy(deprecated = deprecated) + ?: AliasMetadata(Instant.now(), deprecated) + true + } + + @JvmOverloads + fun showAliases(includeDeprecated: Boolean = false): Set = identityLock.withLock { + aliases.asSequence() + .filter { (_, metadata) -> includeDeprecated || !metadata.deprecated } + .map { (alias, metadata) -> + AliasView(alias, metadata.instant, metadata.deprecated) + } + .sortedWith(compareBy { it.createdAt }.thenBy { it.alias }) + .toCollection(LinkedHashSet()) + } + + fun snapshotAliases(): Map = identityLock.withLock { + aliases.mapValues { (_, metadata) -> metadata.copy() } + } @JvmOverloads fun updateRelation( @@ -154,18 +207,56 @@ class Entity @JvmOverloads constructor( } } + identityLock.withLock { + state.getString("subject") + ?.let(::normalizeIdentityText) + ?.takeIf(String::isNotBlank) + ?.let { _subject = it } + } + state.getJSONObject("features")?.let { loadedFeatures -> featureLock.withLock { features.clear() features.putAll(loadIndexableDataMap(loadedFeatures)) } } + + state.getJSONObject("aliases")?.let { loadedAliases -> + identityLock.withLock { + aliases.clear() + loadedAliases.forEach { (alias, metadataValue) -> + val normalizedAlias = normalizeIdentityText(alias) + if (normalizedAlias.isBlank() || normalizedAlias == _subject) { + return@forEach + } + + val metadata = when (metadataValue) { + is JSONObject -> loadAliasMetadata(metadataValue) + else -> AliasMetadata(Instant.now(), deprecated = false) + } + aliases[normalizedAlias] = metadata + } + } + } } override fun convert(): State { val state = State() state.append("uuid", StateValue.str(uuid)) - state.append("subject", StateValue.str(subject)) + + val identityState = identityLock.withLock { + IdentityState( + subject = _subject, + aliases = aliases.mapValues { (_, metadata) -> + mapOf( + "timestamp" to metadata.instant.toEpochMilli(), + "deprecated" to metadata.deprecated + ) + } + ) + } + state.append("subject", StateValue.str(identityState.subject)) + state.append("aliases", StateValue.obj(identityState.aliases)) val relationState = relationLock.withLock { relations.mapValues { (_, relationMap) -> relationMap.toMap() } @@ -187,6 +278,22 @@ class Entity @JvmOverloads constructor( override fun autoLoadOnRegister(): Boolean = false + private fun normalizeIdentityText(value: String): String = + value.replace(IDENTITY_WHITESPACE_REGEX, " ").trim() + + private fun loadAliasMetadata(state: JSONObject): AliasMetadata { + val instant = state.getLong("timestamp") + ?.let(Instant::ofEpochMilli) + ?: state.getString("instant") + ?.let { runCatching { Instant.parse(it) }.getOrNull() } + ?: Instant.now() + + return AliasMetadata( + instant = instant, + deprecated = state.getBoolean("deprecated") ?: false + ) + } + private fun loadIndexableDataMap(state: JSONObject): Map { val loaded = mutableMapOf() state.forEach { (key, value) -> @@ -269,4 +376,24 @@ class Entity @JvmOverloads constructor( val confidence: Double, val vector: FloatArray? ) + + private data class IdentityState( + val subject: String, + val aliases: Map> + ) + + data class AliasView( + val alias: String, + val createdAt: Instant, + val deprecated: Boolean + ) + + data class AliasMetadata( + val instant: Instant, + val deprecated: Boolean + ) + + companion object { + private val IDENTITY_WHITESPACE_REGEX = Regex("\\s+") + } } diff --git a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/ImpressionSearchDocuments.kt b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/ImpressionSearchDocuments.kt index 0538f9ca..9eee4283 100644 --- a/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/ImpressionSearchDocuments.kt +++ b/Partner-Core/src/main/java/work/slhaf/partner/core/cognition/impression/search/ImpressionSearchDocuments.kt @@ -84,6 +84,18 @@ object ImpressionSearchDocuments { ) ) + entity.showAliases(includeDeprecated = true).forEachIndexed { index, alias -> + add( + ImpressionSearchDocument( + id = "entity:${entity.uuid}:alias:$index", + target = target, + field = ImpressionSearchField.SUBJECT, + text = alias.alias, + weight = SUBJECT_WEIGHT * ALIAS_WEIGHT_FACTOR, + ) + ) + } + entity.snapshotFeatures().keys.forEachIndexed { index, feature -> add( ImpressionSearchDocument( @@ -131,6 +143,7 @@ object ImpressionSearchDocuments { } private const val SUBJECT_WEIGHT = 1.0 + private const val ALIAS_WEIGHT_FACTOR = 0.9 private const val FEATURE_WEIGHT = 0.85 private const val IMPRESSION_WEIGHT = 0.75 private const val RELATION_WEIGHT = 0.65 diff --git a/Partner-Core/src/test/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearchTest.kt b/Partner-Core/src/test/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearchTest.kt index 3dcf73d7..a9483cea 100644 --- a/Partner-Core/src/test/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearchTest.kt +++ b/Partner-Core/src/test/java/work/slhaf/partner/core/cognition/impression/search/SimpleTextSearchTest.kt @@ -5,6 +5,7 @@ import org.junit.jupiter.api.Assertions.assertFalse import org.junit.jupiter.api.Assertions.assertTrue import org.junit.jupiter.api.Test import work.slhaf.partner.core.cognition.impression.ActiveEntity +import work.slhaf.partner.core.cognition.impression.Entity class SimpleTextSearchTest { @@ -116,6 +117,21 @@ class SimpleTextSearchTest { assertEquals("report", hits.first().document.target.id) } + @Test + fun `search recalls known entity by alias documents`() { + val search = SimpleTextSearch(TestTokenizer()) + val entity = Entity("entity-1", "Partner") + entity.addAlias("智能体项目") + + search.rebuild(ImpressionSearchDocuments.fromEntity(entity)) + + val hits = search.search("智能体项目", limit = 10) + + assertFalse(hits.isEmpty()) + assertEquals(ImpressionSearchTarget.Type.ENTITY, hits.first().document.target.type) + assertEquals("entity-1", hits.first().document.target.id) + } + @Test fun `upsert replaces previous index terms for the same document id`() { val search = SimpleTextSearch(TestTokenizer()) @@ -207,7 +223,8 @@ class SimpleTextSearchTest { private val dictionary = listOf( "城南", "旧书店", "老板", "推荐", "工程", "教材", "水利", "熟悉", "旧书", "java", "kotlin", "jieba", "分词", "simpletextsearch", "倒排", "索引", "检索", "测试", "召回", - "vivado", "实验报告", "实验", "报告", "模板", "docx", "室友", "整理", "文件" + "vivado", "实验报告", "实验", "报告", "模板", "docx", "室友", "整理", "文件", + "智能体", "项目", "智能体项目" ) private val alphaNumericRegex = Regex("[a-z0-9]+(?:[-_./][a-z0-9]+)*")