mirror of
https://github.com/slhaf/Partner.git
synced 2026-06-27 17:49:16 +08:00
feat(impression): support entity aliases
Separate canonical entity subject from aliases and persist alias metadata for recall. Index aliases as subject-like search documents and cover alias recall in SimpleTextSearch tests.
This commit is contained in:
@@ -6,6 +6,7 @@ import work.slhaf.partner.framework.agent.state.State
|
|||||||
import work.slhaf.partner.framework.agent.state.StateSerializable
|
import work.slhaf.partner.framework.agent.state.StateSerializable
|
||||||
import work.slhaf.partner.framework.agent.state.StateValue
|
import work.slhaf.partner.framework.agent.state.StateValue
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
|
import java.time.Instant
|
||||||
import java.util.*
|
import java.util.*
|
||||||
import java.util.concurrent.ConcurrentHashMap
|
import java.util.concurrent.ConcurrentHashMap
|
||||||
import java.util.concurrent.locks.ReentrantLock
|
import java.util.concurrent.locks.ReentrantLock
|
||||||
@@ -13,15 +14,67 @@ import kotlin.concurrent.withLock
|
|||||||
|
|
||||||
class Entity @JvmOverloads constructor(
|
class Entity @JvmOverloads constructor(
|
||||||
val uuid: String = UUID.randomUUID().toString(),
|
val uuid: String = UUID.randomUUID().toString(),
|
||||||
val subject: String,
|
subject: String,
|
||||||
private val relations: MutableMap<String, MutableMap<String, Double>> = mutableMapOf(),
|
private val relations: MutableMap<String, MutableMap<String, Double>> = mutableMapOf(),
|
||||||
private val impressions: MutableMap<String, IndexableData> = mutableMapOf(),
|
private val impressions: MutableMap<String, IndexableData> = mutableMapOf(),
|
||||||
private val features: MutableMap<String, IndexableData> = mutableMapOf()
|
private val features: MutableMap<String, IndexableData> = mutableMapOf(),
|
||||||
|
private val aliases: MutableMap<String, AliasMetadata> = mutableMapOf()
|
||||||
) : StateSerializable {
|
) : StateSerializable {
|
||||||
|
|
||||||
|
private var _subject: String = normalizeIdentityText(subject)
|
||||||
|
|
||||||
private val impressionLock = ReentrantLock()
|
private val impressionLock = ReentrantLock()
|
||||||
private val relationLock = ReentrantLock()
|
private val relationLock = ReentrantLock()
|
||||||
private val featureLock = ReentrantLock()
|
private val featureLock = ReentrantLock()
|
||||||
|
private val identityLock = ReentrantLock()
|
||||||
|
|
||||||
|
val subject: String
|
||||||
|
get() = identityLock.withLock { _subject }
|
||||||
|
|
||||||
|
@JvmOverloads
|
||||||
|
fun renameSubject(newSubject: String, keepOldSubjectAsAlias: Boolean = true): Boolean = identityLock.withLock {
|
||||||
|
val normalizedSubject = normalizeIdentityText(newSubject)
|
||||||
|
if (normalizedSubject.isBlank() || normalizedSubject == _subject) {
|
||||||
|
return@withLock false
|
||||||
|
}
|
||||||
|
|
||||||
|
val previousSubject = _subject
|
||||||
|
if (keepOldSubjectAsAlias && previousSubject.isNotBlank()) {
|
||||||
|
aliases[previousSubject] = aliases[previousSubject]?.copy(deprecated = true)
|
||||||
|
?: AliasMetadata(Instant.now(), deprecated = true)
|
||||||
|
}
|
||||||
|
|
||||||
|
aliases.remove(normalizedSubject)
|
||||||
|
_subject = normalizedSubject
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
@JvmOverloads
|
||||||
|
fun addAlias(alias: String, deprecated: Boolean = false): Boolean = identityLock.withLock {
|
||||||
|
val normalizedAlias = normalizeIdentityText(alias)
|
||||||
|
if (normalizedAlias.isBlank() || normalizedAlias == _subject) {
|
||||||
|
return@withLock false
|
||||||
|
}
|
||||||
|
|
||||||
|
aliases[normalizedAlias] = aliases[normalizedAlias]?.copy(deprecated = deprecated)
|
||||||
|
?: AliasMetadata(Instant.now(), deprecated)
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
@JvmOverloads
|
||||||
|
fun showAliases(includeDeprecated: Boolean = false): Set<AliasView> = identityLock.withLock {
|
||||||
|
aliases.asSequence()
|
||||||
|
.filter { (_, metadata) -> includeDeprecated || !metadata.deprecated }
|
||||||
|
.map { (alias, metadata) ->
|
||||||
|
AliasView(alias, metadata.instant, metadata.deprecated)
|
||||||
|
}
|
||||||
|
.sortedWith(compareBy<AliasView> { it.createdAt }.thenBy { it.alias })
|
||||||
|
.toCollection(LinkedHashSet())
|
||||||
|
}
|
||||||
|
|
||||||
|
fun snapshotAliases(): Map<String, AliasMetadata> = identityLock.withLock {
|
||||||
|
aliases.mapValues { (_, metadata) -> metadata.copy() }
|
||||||
|
}
|
||||||
|
|
||||||
@JvmOverloads
|
@JvmOverloads
|
||||||
fun updateRelation(
|
fun updateRelation(
|
||||||
@@ -154,18 +207,56 @@ class Entity @JvmOverloads constructor(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
identityLock.withLock {
|
||||||
|
state.getString("subject")
|
||||||
|
?.let(::normalizeIdentityText)
|
||||||
|
?.takeIf(String::isNotBlank)
|
||||||
|
?.let { _subject = it }
|
||||||
|
}
|
||||||
|
|
||||||
state.getJSONObject("features")?.let { loadedFeatures ->
|
state.getJSONObject("features")?.let { loadedFeatures ->
|
||||||
featureLock.withLock {
|
featureLock.withLock {
|
||||||
features.clear()
|
features.clear()
|
||||||
features.putAll(loadIndexableDataMap(loadedFeatures))
|
features.putAll(loadIndexableDataMap(loadedFeatures))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
state.getJSONObject("aliases")?.let { loadedAliases ->
|
||||||
|
identityLock.withLock {
|
||||||
|
aliases.clear()
|
||||||
|
loadedAliases.forEach { (alias, metadataValue) ->
|
||||||
|
val normalizedAlias = normalizeIdentityText(alias)
|
||||||
|
if (normalizedAlias.isBlank() || normalizedAlias == _subject) {
|
||||||
|
return@forEach
|
||||||
|
}
|
||||||
|
|
||||||
|
val metadata = when (metadataValue) {
|
||||||
|
is JSONObject -> loadAliasMetadata(metadataValue)
|
||||||
|
else -> AliasMetadata(Instant.now(), deprecated = false)
|
||||||
|
}
|
||||||
|
aliases[normalizedAlias] = metadata
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun convert(): State {
|
override fun convert(): State {
|
||||||
val state = State()
|
val state = State()
|
||||||
state.append("uuid", StateValue.str(uuid))
|
state.append("uuid", StateValue.str(uuid))
|
||||||
state.append("subject", StateValue.str(subject))
|
|
||||||
|
val identityState = identityLock.withLock {
|
||||||
|
IdentityState(
|
||||||
|
subject = _subject,
|
||||||
|
aliases = aliases.mapValues { (_, metadata) ->
|
||||||
|
mapOf(
|
||||||
|
"timestamp" to metadata.instant.toEpochMilli(),
|
||||||
|
"deprecated" to metadata.deprecated
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
state.append("subject", StateValue.str(identityState.subject))
|
||||||
|
state.append("aliases", StateValue.obj(identityState.aliases))
|
||||||
|
|
||||||
val relationState = relationLock.withLock {
|
val relationState = relationLock.withLock {
|
||||||
relations.mapValues { (_, relationMap) -> relationMap.toMap() }
|
relations.mapValues { (_, relationMap) -> relationMap.toMap() }
|
||||||
@@ -187,6 +278,22 @@ class Entity @JvmOverloads constructor(
|
|||||||
|
|
||||||
override fun autoLoadOnRegister(): Boolean = false
|
override fun autoLoadOnRegister(): Boolean = false
|
||||||
|
|
||||||
|
private fun normalizeIdentityText(value: String): String =
|
||||||
|
value.replace(IDENTITY_WHITESPACE_REGEX, " ").trim()
|
||||||
|
|
||||||
|
private fun loadAliasMetadata(state: JSONObject): AliasMetadata {
|
||||||
|
val instant = state.getLong("timestamp")
|
||||||
|
?.let(Instant::ofEpochMilli)
|
||||||
|
?: state.getString("instant")
|
||||||
|
?.let { runCatching { Instant.parse(it) }.getOrNull() }
|
||||||
|
?: Instant.now()
|
||||||
|
|
||||||
|
return AliasMetadata(
|
||||||
|
instant = instant,
|
||||||
|
deprecated = state.getBoolean("deprecated") ?: false
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
private fun loadIndexableDataMap(state: JSONObject): Map<String, IndexableData> {
|
private fun loadIndexableDataMap(state: JSONObject): Map<String, IndexableData> {
|
||||||
val loaded = mutableMapOf<String, IndexableData>()
|
val loaded = mutableMapOf<String, IndexableData>()
|
||||||
state.forEach { (key, value) ->
|
state.forEach { (key, value) ->
|
||||||
@@ -269,4 +376,24 @@ class Entity @JvmOverloads constructor(
|
|||||||
val confidence: Double,
|
val confidence: Double,
|
||||||
val vector: FloatArray?
|
val vector: FloatArray?
|
||||||
)
|
)
|
||||||
|
|
||||||
|
private data class IdentityState(
|
||||||
|
val subject: String,
|
||||||
|
val aliases: Map<String, Map<String, Any>>
|
||||||
|
)
|
||||||
|
|
||||||
|
data class AliasView(
|
||||||
|
val alias: String,
|
||||||
|
val createdAt: Instant,
|
||||||
|
val deprecated: Boolean
|
||||||
|
)
|
||||||
|
|
||||||
|
data class AliasMetadata(
|
||||||
|
val instant: Instant,
|
||||||
|
val deprecated: Boolean
|
||||||
|
)
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
private val IDENTITY_WHITESPACE_REGEX = Regex("\\s+")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -84,6 +84,18 @@ object ImpressionSearchDocuments {
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
entity.showAliases(includeDeprecated = true).forEachIndexed { index, alias ->
|
||||||
|
add(
|
||||||
|
ImpressionSearchDocument(
|
||||||
|
id = "entity:${entity.uuid}:alias:$index",
|
||||||
|
target = target,
|
||||||
|
field = ImpressionSearchField.SUBJECT,
|
||||||
|
text = alias.alias,
|
||||||
|
weight = SUBJECT_WEIGHT * ALIAS_WEIGHT_FACTOR,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
entity.snapshotFeatures().keys.forEachIndexed { index, feature ->
|
entity.snapshotFeatures().keys.forEachIndexed { index, feature ->
|
||||||
add(
|
add(
|
||||||
ImpressionSearchDocument(
|
ImpressionSearchDocument(
|
||||||
@@ -131,6 +143,7 @@ object ImpressionSearchDocuments {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private const val SUBJECT_WEIGHT = 1.0
|
private const val SUBJECT_WEIGHT = 1.0
|
||||||
|
private const val ALIAS_WEIGHT_FACTOR = 0.9
|
||||||
private const val FEATURE_WEIGHT = 0.85
|
private const val FEATURE_WEIGHT = 0.85
|
||||||
private const val IMPRESSION_WEIGHT = 0.75
|
private const val IMPRESSION_WEIGHT = 0.75
|
||||||
private const val RELATION_WEIGHT = 0.65
|
private const val RELATION_WEIGHT = 0.65
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import org.junit.jupiter.api.Assertions.assertFalse
|
|||||||
import org.junit.jupiter.api.Assertions.assertTrue
|
import org.junit.jupiter.api.Assertions.assertTrue
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
import work.slhaf.partner.core.cognition.impression.ActiveEntity
|
import work.slhaf.partner.core.cognition.impression.ActiveEntity
|
||||||
|
import work.slhaf.partner.core.cognition.impression.Entity
|
||||||
|
|
||||||
class SimpleTextSearchTest {
|
class SimpleTextSearchTest {
|
||||||
|
|
||||||
@@ -116,6 +117,21 @@ class SimpleTextSearchTest {
|
|||||||
assertEquals("report", hits.first().document.target.id)
|
assertEquals("report", hits.first().document.target.id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `search recalls known entity by alias documents`() {
|
||||||
|
val search = SimpleTextSearch(TestTokenizer())
|
||||||
|
val entity = Entity("entity-1", "Partner")
|
||||||
|
entity.addAlias("智能体项目")
|
||||||
|
|
||||||
|
search.rebuild(ImpressionSearchDocuments.fromEntity(entity))
|
||||||
|
|
||||||
|
val hits = search.search("智能体项目", limit = 10)
|
||||||
|
|
||||||
|
assertFalse(hits.isEmpty())
|
||||||
|
assertEquals(ImpressionSearchTarget.Type.ENTITY, hits.first().document.target.type)
|
||||||
|
assertEquals("entity-1", hits.first().document.target.id)
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun `upsert replaces previous index terms for the same document id`() {
|
fun `upsert replaces previous index terms for the same document id`() {
|
||||||
val search = SimpleTextSearch(TestTokenizer())
|
val search = SimpleTextSearch(TestTokenizer())
|
||||||
@@ -207,7 +223,8 @@ class SimpleTextSearchTest {
|
|||||||
private val dictionary = listOf(
|
private val dictionary = listOf(
|
||||||
"城南", "旧书店", "老板", "推荐", "工程", "教材", "水利", "熟悉", "旧书",
|
"城南", "旧书店", "老板", "推荐", "工程", "教材", "水利", "熟悉", "旧书",
|
||||||
"java", "kotlin", "jieba", "分词", "simpletextsearch", "倒排", "索引", "检索", "测试", "召回",
|
"java", "kotlin", "jieba", "分词", "simpletextsearch", "倒排", "索引", "检索", "测试", "召回",
|
||||||
"vivado", "实验报告", "实验", "报告", "模板", "docx", "室友", "整理", "文件"
|
"vivado", "实验报告", "实验", "报告", "模板", "docx", "室友", "整理", "文件",
|
||||||
|
"智能体", "项目", "智能体项目"
|
||||||
)
|
)
|
||||||
private val alphaNumericRegex = Regex("[a-z0-9]+(?:[-_./][a-z0-9]+)*")
|
private val alphaNumericRegex = Regex("[a-z0-9]+(?:[-_./][a-z0-9]+)*")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user