feat(impression): support entity aliases

Separate canonical entity subject from aliases and persist alias metadata for recall.

Index aliases as subject-like search documents and cover alias recall in SimpleTextSearch tests.
This commit is contained in:
2026-06-10 14:44:50 +08:00
parent 371b4a01d7
commit a23657ec0c
3 changed files with 161 additions and 4 deletions

View File

@@ -6,6 +6,7 @@ import work.slhaf.partner.framework.agent.state.State
import work.slhaf.partner.framework.agent.state.StateSerializable
import work.slhaf.partner.framework.agent.state.StateValue
import java.nio.file.Path
import java.time.Instant
import java.util.*
import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.locks.ReentrantLock
@@ -13,15 +14,67 @@ import kotlin.concurrent.withLock
class Entity @JvmOverloads constructor(
val uuid: String = UUID.randomUUID().toString(),
val subject: String,
subject: String,
private val relations: MutableMap<String, MutableMap<String, Double>> = mutableMapOf(),
private val impressions: MutableMap<String, IndexableData> = mutableMapOf(),
private val features: MutableMap<String, IndexableData> = mutableMapOf()
private val features: MutableMap<String, IndexableData> = mutableMapOf(),
private val aliases: MutableMap<String, AliasMetadata> = mutableMapOf()
) : StateSerializable {
private var _subject: String = normalizeIdentityText(subject)
private val impressionLock = ReentrantLock()
private val relationLock = ReentrantLock()
private val featureLock = ReentrantLock()
private val identityLock = ReentrantLock()
val subject: String
get() = identityLock.withLock { _subject }
@JvmOverloads
fun renameSubject(newSubject: String, keepOldSubjectAsAlias: Boolean = true): Boolean = identityLock.withLock {
val normalizedSubject = normalizeIdentityText(newSubject)
if (normalizedSubject.isBlank() || normalizedSubject == _subject) {
return@withLock false
}
val previousSubject = _subject
if (keepOldSubjectAsAlias && previousSubject.isNotBlank()) {
aliases[previousSubject] = aliases[previousSubject]?.copy(deprecated = true)
?: AliasMetadata(Instant.now(), deprecated = true)
}
aliases.remove(normalizedSubject)
_subject = normalizedSubject
true
}
@JvmOverloads
fun addAlias(alias: String, deprecated: Boolean = false): Boolean = identityLock.withLock {
val normalizedAlias = normalizeIdentityText(alias)
if (normalizedAlias.isBlank() || normalizedAlias == _subject) {
return@withLock false
}
aliases[normalizedAlias] = aliases[normalizedAlias]?.copy(deprecated = deprecated)
?: AliasMetadata(Instant.now(), deprecated)
true
}
@JvmOverloads
fun showAliases(includeDeprecated: Boolean = false): Set<AliasView> = identityLock.withLock {
aliases.asSequence()
.filter { (_, metadata) -> includeDeprecated || !metadata.deprecated }
.map { (alias, metadata) ->
AliasView(alias, metadata.instant, metadata.deprecated)
}
.sortedWith(compareBy<AliasView> { it.createdAt }.thenBy { it.alias })
.toCollection(LinkedHashSet())
}
fun snapshotAliases(): Map<String, AliasMetadata> = identityLock.withLock {
aliases.mapValues { (_, metadata) -> metadata.copy() }
}
@JvmOverloads
fun updateRelation(
@@ -154,18 +207,56 @@ class Entity @JvmOverloads constructor(
}
}
identityLock.withLock {
state.getString("subject")
?.let(::normalizeIdentityText)
?.takeIf(String::isNotBlank)
?.let { _subject = it }
}
state.getJSONObject("features")?.let { loadedFeatures ->
featureLock.withLock {
features.clear()
features.putAll(loadIndexableDataMap(loadedFeatures))
}
}
state.getJSONObject("aliases")?.let { loadedAliases ->
identityLock.withLock {
aliases.clear()
loadedAliases.forEach { (alias, metadataValue) ->
val normalizedAlias = normalizeIdentityText(alias)
if (normalizedAlias.isBlank() || normalizedAlias == _subject) {
return@forEach
}
val metadata = when (metadataValue) {
is JSONObject -> loadAliasMetadata(metadataValue)
else -> AliasMetadata(Instant.now(), deprecated = false)
}
aliases[normalizedAlias] = metadata
}
}
}
}
override fun convert(): State {
val state = State()
state.append("uuid", StateValue.str(uuid))
state.append("subject", StateValue.str(subject))
val identityState = identityLock.withLock {
IdentityState(
subject = _subject,
aliases = aliases.mapValues { (_, metadata) ->
mapOf(
"timestamp" to metadata.instant.toEpochMilli(),
"deprecated" to metadata.deprecated
)
}
)
}
state.append("subject", StateValue.str(identityState.subject))
state.append("aliases", StateValue.obj(identityState.aliases))
val relationState = relationLock.withLock {
relations.mapValues { (_, relationMap) -> relationMap.toMap() }
@@ -187,6 +278,22 @@ class Entity @JvmOverloads constructor(
override fun autoLoadOnRegister(): Boolean = false
private fun normalizeIdentityText(value: String): String =
value.replace(IDENTITY_WHITESPACE_REGEX, " ").trim()
private fun loadAliasMetadata(state: JSONObject): AliasMetadata {
val instant = state.getLong("timestamp")
?.let(Instant::ofEpochMilli)
?: state.getString("instant")
?.let { runCatching { Instant.parse(it) }.getOrNull() }
?: Instant.now()
return AliasMetadata(
instant = instant,
deprecated = state.getBoolean("deprecated") ?: false
)
}
private fun loadIndexableDataMap(state: JSONObject): Map<String, IndexableData> {
val loaded = mutableMapOf<String, IndexableData>()
state.forEach { (key, value) ->
@@ -269,4 +376,24 @@ class Entity @JvmOverloads constructor(
val confidence: Double,
val vector: FloatArray?
)
private data class IdentityState(
val subject: String,
val aliases: Map<String, Map<String, Any>>
)
data class AliasView(
val alias: String,
val createdAt: Instant,
val deprecated: Boolean
)
data class AliasMetadata(
val instant: Instant,
val deprecated: Boolean
)
companion object {
private val IDENTITY_WHITESPACE_REGEX = Regex("\\s+")
}
}

View File

@@ -84,6 +84,18 @@ object ImpressionSearchDocuments {
)
)
entity.showAliases(includeDeprecated = true).forEachIndexed { index, alias ->
add(
ImpressionSearchDocument(
id = "entity:${entity.uuid}:alias:$index",
target = target,
field = ImpressionSearchField.SUBJECT,
text = alias.alias,
weight = SUBJECT_WEIGHT * ALIAS_WEIGHT_FACTOR,
)
)
}
entity.snapshotFeatures().keys.forEachIndexed { index, feature ->
add(
ImpressionSearchDocument(
@@ -131,6 +143,7 @@ object ImpressionSearchDocuments {
}
private const val SUBJECT_WEIGHT = 1.0
private const val ALIAS_WEIGHT_FACTOR = 0.9
private const val FEATURE_WEIGHT = 0.85
private const val IMPRESSION_WEIGHT = 0.75
private const val RELATION_WEIGHT = 0.65

View File

@@ -5,6 +5,7 @@ import org.junit.jupiter.api.Assertions.assertFalse
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test
import work.slhaf.partner.core.cognition.impression.ActiveEntity
import work.slhaf.partner.core.cognition.impression.Entity
class SimpleTextSearchTest {
@@ -116,6 +117,21 @@ class SimpleTextSearchTest {
assertEquals("report", hits.first().document.target.id)
}
@Test
fun `search recalls known entity by alias documents`() {
val search = SimpleTextSearch(TestTokenizer())
val entity = Entity("entity-1", "Partner")
entity.addAlias("智能体项目")
search.rebuild(ImpressionSearchDocuments.fromEntity(entity))
val hits = search.search("智能体项目", limit = 10)
assertFalse(hits.isEmpty())
assertEquals(ImpressionSearchTarget.Type.ENTITY, hits.first().document.target.type)
assertEquals("entity-1", hits.first().document.target.id)
}
@Test
fun `upsert replaces previous index terms for the same document id`() {
val search = SimpleTextSearch(TestTokenizer())
@@ -207,7 +223,8 @@ class SimpleTextSearchTest {
private val dictionary = listOf(
"城南", "旧书店", "老板", "推荐", "工程", "教材", "水利", "熟悉", "旧书",
"java", "kotlin", "jieba", "分词", "simpletextsearch", "倒排", "索引", "检索", "测试", "召回",
"vivado", "实验报告", "实验", "报告", "模板", "docx", "室友", "整理", "文件"
"vivado", "实验报告", "实验", "报告", "模板", "docx", "室友", "整理", "文件",
"智能体", "项目", "智能体项目"
)
private val alphaNumericRegex = Regex("[a-z0-9]+(?:[-_./][a-z0-9]+)*")