mirror of
https://github.com/slhaf/Partner.git
synced 2026-06-27 17:49:16 +08:00
Compare commits
3 Commits
23a1b7093e
...
96817d84fe
| Author | SHA1 | Date | |
|---|---|---|---|
| 96817d84fe | |||
| dd64599154 | |||
| 4b638b756e |
@@ -3,18 +3,30 @@ package work.slhaf.partner.core.cognition.impression
|
||||
import org.w3c.dom.Document
|
||||
import org.w3c.dom.Element
|
||||
import work.slhaf.partner.core.cognition.context.BlockContent
|
||||
import java.time.Instant
|
||||
import java.time.ZoneId
|
||||
import java.util.*
|
||||
import java.util.concurrent.atomic.AtomicReference
|
||||
|
||||
class ActiveEntity @JvmOverloads constructor(
|
||||
timestamp: Long = System.currentTimeMillis(),
|
||||
private val _evidences: MutableList<String> = mutableListOf(),
|
||||
) : BlockContent("active_entity_$timestamp", "impression") {
|
||||
val evidences: List<String>
|
||||
val runtimeId: String = newActiveEntityRuntimeId(),
|
||||
val createdAt: Instant = Instant.now(),
|
||||
boundEntityUuid: String? = null,
|
||||
private val _evidences: MutableList<EntityEvidence> = mutableListOf(),
|
||||
) : BlockContent("active_entity_$runtimeId", "impression") {
|
||||
val evidences: List<EntityEvidence>
|
||||
get() = synchronized(_evidences) { _evidences.toList() }
|
||||
|
||||
@Volatile
|
||||
var lastMentionedAt: Instant = createdAt
|
||||
private set
|
||||
|
||||
private val _subject = AtomicReference("UNKNOWN")
|
||||
val subject: String get() = _subject.get()
|
||||
|
||||
private val _boundEntityUuid = AtomicReference<String?>(boundEntityUuid)
|
||||
val boundEntityUuid: String? get() = _boundEntityUuid.get()
|
||||
|
||||
private val _projectedFeatures: MutableMap<String, Double> = mutableMapOf()
|
||||
val projectedFeatures: Map<String, Double>
|
||||
get() = synchronized(_projectedFeatures) { _projectedFeatures.toMap() }
|
||||
@@ -23,12 +35,27 @@ class ActiveEntity @JvmOverloads constructor(
|
||||
val projectedImpressions: Map<String, Double>
|
||||
get() = synchronized(_projectedImpressions) { _projectedImpressions.toMap() }
|
||||
|
||||
fun addEvidence(evidence: String) = synchronized(_evidences) {
|
||||
@JvmOverloads
|
||||
fun addEvidence(
|
||||
content: String,
|
||||
associationConfidence: Double = 1.0,
|
||||
source: EntityEvidence.Source = EntityEvidence.Source.USER_INPUT,
|
||||
timestamp: Long = System.currentTimeMillis(),
|
||||
) = addEvidence(EntityEvidence(content, associationConfidence, source, timestamp))
|
||||
|
||||
fun addEvidence(evidence: EntityEvidence) = synchronized(_evidences) {
|
||||
_evidences.add(evidence)
|
||||
touch(Instant.ofEpochMilli(evidence.timestamp))
|
||||
}
|
||||
|
||||
fun updateSubject(subject: String) = _subject.set(subject)
|
||||
|
||||
fun bindEntity(uuid: String?) = _boundEntityUuid.set(uuid)
|
||||
|
||||
fun touch(time: Instant = Instant.now()) {
|
||||
lastMentionedAt = time
|
||||
}
|
||||
|
||||
fun addProjectedFeatures(vararg features: Pair<String, Double>) = synchronized(_projectedFeatures) {
|
||||
features.forEach { _projectedFeatures[it.first] = it.second }
|
||||
}
|
||||
@@ -38,6 +65,11 @@ class ActiveEntity @JvmOverloads constructor(
|
||||
}
|
||||
|
||||
override fun fillXml(document: Document, root: Element) {
|
||||
root.setAttribute("runtime_id", runtimeId)
|
||||
boundEntityUuid?.let { root.setAttribute("bound_entity_uuid", it) }
|
||||
root.setAttribute("created_at", modelTime(createdAt))
|
||||
root.setAttribute("last_mentioned_at", modelTime(lastMentionedAt))
|
||||
|
||||
appendTextElement(document, root, "subject", subject)
|
||||
|
||||
appendListElement(
|
||||
@@ -46,7 +78,14 @@ class ActiveEntity @JvmOverloads constructor(
|
||||
"evidences",
|
||||
"evidence",
|
||||
synchronized(_evidences) { _evidences.toList() }
|
||||
)
|
||||
) { evidence ->
|
||||
setAttribute("association_confidence", evidence.associationConfidence.toString())
|
||||
setAttribute("source", evidence.source.name)
|
||||
setAttribute("timestamp", evidence.timestamp.toString())
|
||||
setAttribute("truncated", evidence.isContentTruncated().toString())
|
||||
setAttribute("original_length", evidence.content.length.toString())
|
||||
textContent = evidence.contentForContext()
|
||||
}
|
||||
|
||||
appendListElement(
|
||||
document,
|
||||
@@ -70,4 +109,10 @@ class ActiveEntity @JvmOverloads constructor(
|
||||
textContent = entry.key
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun modelTime(time: Instant): String =
|
||||
time.atZone(ZoneId.systemDefault()).toString()
|
||||
}
|
||||
|
||||
private fun newActiveEntityRuntimeId(): String =
|
||||
UUID.randomUUID().toString().replace("-", "").take(12)
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
package work.slhaf.partner.core.cognition.impression
|
||||
|
||||
/**
|
||||
* Runtime evidence associated with an active entity.
|
||||
*
|
||||
* The confidence describes how strongly this evidence is associated with the
|
||||
* current active entity, not whether the evidence content itself is true.
|
||||
*/
|
||||
data class EntityEvidence @JvmOverloads constructor(
|
||||
val content: String,
|
||||
val associationConfidence: Double = 1.0,
|
||||
val source: Source = Source.USER_INPUT,
|
||||
val timestamp: Long = System.currentTimeMillis(),
|
||||
) {
|
||||
enum class Source {
|
||||
USER_INPUT,
|
||||
ASSISTANT_REPLY
|
||||
}
|
||||
|
||||
fun isContentTruncated(maxLength: Int = CONTEXT_CONTENT_MAX_LENGTH): Boolean =
|
||||
content.length > maxLength
|
||||
|
||||
fun contentForContext(maxLength: Int = CONTEXT_CONTENT_MAX_LENGTH): String {
|
||||
if (content.length <= maxLength) {
|
||||
return content
|
||||
}
|
||||
|
||||
val available = maxLength - OMITTED_MARKER.length
|
||||
if (available <= 0) {
|
||||
return content.take(maxLength)
|
||||
}
|
||||
|
||||
val headBudget = available / 2
|
||||
val tailBudget = available - headBudget
|
||||
val headEnd = adjustHeadEnd(content, headBudget)
|
||||
val tailStart = adjustTailStart(content, content.length - tailBudget)
|
||||
|
||||
if (tailStart <= headEnd) {
|
||||
return content.take(maxLength).trimEnd()
|
||||
}
|
||||
|
||||
return content.substring(0, headEnd).trimEnd() +
|
||||
OMITTED_MARKER +
|
||||
content.substring(tailStart).trimStart()
|
||||
}
|
||||
|
||||
private fun adjustHeadEnd(source: String, preferredEnd: Int): Int {
|
||||
val safePreferredEnd = preferredEnd.coerceIn(0, source.length)
|
||||
findForwardBoundary(source, safePreferredEnd, STRONG_BOUNDARY_SEARCH_WINDOW, ::isStrongBoundary)?.let {
|
||||
return it + 1
|
||||
}
|
||||
findForwardBoundary(source, safePreferredEnd, SOFT_BOUNDARY_SEARCH_WINDOW, ::isSoftBoundary)?.let {
|
||||
return it + 1
|
||||
}
|
||||
return safePreferredEnd
|
||||
}
|
||||
|
||||
private fun adjustTailStart(source: String, preferredStart: Int): Int {
|
||||
val safePreferredStart = preferredStart.coerceIn(0, source.length)
|
||||
findBackwardBoundary(source, safePreferredStart, STRONG_BOUNDARY_SEARCH_WINDOW, ::isStrongBoundary)?.let {
|
||||
return it
|
||||
}
|
||||
findBackwardBoundary(source, safePreferredStart, SOFT_BOUNDARY_SEARCH_WINDOW, ::isSoftBoundary)?.let {
|
||||
return it
|
||||
}
|
||||
return safePreferredStart
|
||||
}
|
||||
|
||||
private fun findForwardBoundary(
|
||||
source: String,
|
||||
start: Int,
|
||||
window: Int,
|
||||
predicate: (Char) -> Boolean,
|
||||
): Int? {
|
||||
val end = (start + window).coerceAtMost(source.length)
|
||||
for (index in start until end) {
|
||||
if (predicate(source[index])) {
|
||||
return index
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private fun findBackwardBoundary(
|
||||
source: String,
|
||||
start: Int,
|
||||
window: Int,
|
||||
predicate: (Char) -> Boolean,
|
||||
): Int? {
|
||||
val end = (start - window).coerceAtLeast(0)
|
||||
for (index in start downTo end + 1) {
|
||||
if (predicate(source[index - 1])) {
|
||||
return index
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private fun isStrongBoundary(char: Char): Boolean = char == '\n'
|
||||
|
||||
private fun isSoftBoundary(char: Char): Boolean = when (char) {
|
||||
'。', '!', '?', ';', ';', '.' -> true
|
||||
else -> false
|
||||
}
|
||||
|
||||
companion object {
|
||||
const val CONTEXT_CONTENT_MAX_LENGTH = 480
|
||||
private const val OMITTED_MARKER = "\n...[omitted]...\n"
|
||||
private const val STRONG_BOUNDARY_SEARCH_WINDOW = 120
|
||||
private const val SOFT_BOUNDARY_SEARCH_WINDOW = 80
|
||||
}
|
||||
}
|
||||
@@ -22,7 +22,7 @@ public class ImpressionVectorIndex {
|
||||
}
|
||||
|
||||
public void upsert(String text, Entity.IndexableData indexableData){
|
||||
if (VectorClient.status){
|
||||
if (!VectorClient.status){
|
||||
return;
|
||||
}
|
||||
String modelId = VectorClient.VECTOR_MODEL_ID;
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
package work.slhaf.partner.core.cognition.impression.search
|
||||
|
||||
data class EntityAssociationMatch(
|
||||
val target: ImpressionSearchTarget,
|
||||
val score: Double,
|
||||
val hits: List<ImpressionSearchHit> = emptyList(),
|
||||
)
|
||||
@@ -0,0 +1,10 @@
|
||||
package work.slhaf.partner.core.cognition.impression.search
|
||||
|
||||
data class ImpressionSearchDocument(
|
||||
val id: String,
|
||||
val target: ImpressionSearchTarget,
|
||||
val field: ImpressionSearchField,
|
||||
val text: String,
|
||||
val weight: Double = 1.0,
|
||||
val metadata: Map<String, String> = emptyMap(),
|
||||
)
|
||||
@@ -0,0 +1,138 @@
|
||||
package work.slhaf.partner.core.cognition.impression.search
|
||||
|
||||
import work.slhaf.partner.core.cognition.impression.ActiveEntity
|
||||
import work.slhaf.partner.core.cognition.impression.Entity
|
||||
|
||||
object ImpressionSearchDocuments {
|
||||
|
||||
fun fromActiveEntity(activeEntity: ActiveEntity): List<ImpressionSearchDocument> {
|
||||
val target = ImpressionSearchTarget(
|
||||
ImpressionSearchTarget.Type.ACTIVE_ENTITY,
|
||||
activeEntity.runtimeId
|
||||
)
|
||||
val metadata = activeEntity.boundEntityUuid
|
||||
?.let { mapOf("boundEntityUuid" to it) }
|
||||
.orEmpty()
|
||||
|
||||
return buildList {
|
||||
add(
|
||||
ImpressionSearchDocument(
|
||||
id = "active:${activeEntity.runtimeId}:subject",
|
||||
target = target,
|
||||
field = ImpressionSearchField.SUBJECT,
|
||||
text = activeEntity.subject,
|
||||
weight = SUBJECT_WEIGHT,
|
||||
metadata = metadata,
|
||||
)
|
||||
)
|
||||
|
||||
activeEntity.evidences.forEachIndexed { index, evidence ->
|
||||
add(
|
||||
ImpressionSearchDocument(
|
||||
id = "active:${activeEntity.runtimeId}:evidence:$index",
|
||||
target = target,
|
||||
field = ImpressionSearchField.EVIDENCE,
|
||||
text = evidence.contentForContext(),
|
||||
weight = EVIDENCE_WEIGHT * evidence.associationConfidence,
|
||||
metadata = metadata,
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
activeEntity.projectedFeatures.entries.forEachIndexed { index, entry ->
|
||||
add(
|
||||
ImpressionSearchDocument(
|
||||
id = "active:${activeEntity.runtimeId}:feature:$index",
|
||||
target = target,
|
||||
field = ImpressionSearchField.FEATURE,
|
||||
text = entry.key,
|
||||
weight = FEATURE_WEIGHT * entry.value,
|
||||
metadata = metadata,
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
activeEntity.projectedImpressions.entries.forEachIndexed { index, entry ->
|
||||
add(
|
||||
ImpressionSearchDocument(
|
||||
id = "active:${activeEntity.runtimeId}:impression:$index",
|
||||
target = target,
|
||||
field = ImpressionSearchField.IMPRESSION,
|
||||
text = entry.key,
|
||||
weight = IMPRESSION_WEIGHT * entry.value,
|
||||
metadata = metadata,
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun fromEntity(entity: Entity): List<ImpressionSearchDocument> {
|
||||
val target = ImpressionSearchTarget(
|
||||
ImpressionSearchTarget.Type.ENTITY,
|
||||
entity.uuid
|
||||
)
|
||||
|
||||
return buildList {
|
||||
add(
|
||||
ImpressionSearchDocument(
|
||||
id = "entity:${entity.uuid}:subject",
|
||||
target = target,
|
||||
field = ImpressionSearchField.SUBJECT,
|
||||
text = entity.subject,
|
||||
weight = SUBJECT_WEIGHT,
|
||||
)
|
||||
)
|
||||
|
||||
entity.snapshotFeatures().keys.forEachIndexed { index, feature ->
|
||||
add(
|
||||
ImpressionSearchDocument(
|
||||
id = "entity:${entity.uuid}:feature:$index",
|
||||
target = target,
|
||||
field = ImpressionSearchField.FEATURE,
|
||||
text = feature,
|
||||
weight = FEATURE_WEIGHT,
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
entity.snapshotImpressions().keys.forEachIndexed { index, impression ->
|
||||
add(
|
||||
ImpressionSearchDocument(
|
||||
id = "entity:${entity.uuid}:impression:$index",
|
||||
target = target,
|
||||
field = ImpressionSearchField.IMPRESSION,
|
||||
text = impression,
|
||||
weight = IMPRESSION_WEIGHT,
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
entity.showRelations().forEachIndexed { index, relation ->
|
||||
val relationText = buildString {
|
||||
append(relation.target)
|
||||
relation.relations.keys.forEach { name ->
|
||||
append(' ')
|
||||
append(name)
|
||||
}
|
||||
}
|
||||
|
||||
add(
|
||||
ImpressionSearchDocument(
|
||||
id = "entity:${entity.uuid}:relation:$index",
|
||||
target = target,
|
||||
field = ImpressionSearchField.RELATION,
|
||||
text = relationText,
|
||||
weight = RELATION_WEIGHT,
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private const val SUBJECT_WEIGHT = 1.0
|
||||
private const val FEATURE_WEIGHT = 0.85
|
||||
private const val IMPRESSION_WEIGHT = 0.75
|
||||
private const val RELATION_WEIGHT = 0.65
|
||||
private const val EVIDENCE_WEIGHT = 0.8
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package work.slhaf.partner.core.cognition.impression.search
|
||||
|
||||
enum class ImpressionSearchField {
|
||||
SUBJECT,
|
||||
FEATURE,
|
||||
IMPRESSION,
|
||||
RELATION,
|
||||
EVIDENCE
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
package work.slhaf.partner.core.cognition.impression.search
|
||||
|
||||
data class ImpressionSearchHit(
|
||||
val document: ImpressionSearchDocument,
|
||||
val score: Double,
|
||||
val matchedTerms: Set<String> = emptySet(),
|
||||
)
|
||||
@@ -0,0 +1,11 @@
|
||||
package work.slhaf.partner.core.cognition.impression.search
|
||||
|
||||
data class ImpressionSearchTarget(
|
||||
val type: Type,
|
||||
val id: String,
|
||||
) {
|
||||
enum class Type {
|
||||
ACTIVE_ENTITY,
|
||||
ENTITY
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package work.slhaf.partner.core.cognition.impression.search
|
||||
|
||||
interface ImpressionTextSearch {
|
||||
|
||||
fun rebuild(documents: Collection<ImpressionSearchDocument>)
|
||||
|
||||
fun upsert(document: ImpressionSearchDocument)
|
||||
|
||||
fun removeByTarget(target: ImpressionSearchTarget)
|
||||
|
||||
fun search(
|
||||
query: String,
|
||||
limit: Int = 20,
|
||||
): List<ImpressionSearchHit>
|
||||
}
|
||||
Reference in New Issue
Block a user