3 Commits

10 changed files with 362 additions and 8 deletions

View File

@@ -3,18 +3,30 @@ package work.slhaf.partner.core.cognition.impression
import org.w3c.dom.Document
import org.w3c.dom.Element
import work.slhaf.partner.core.cognition.context.BlockContent
import java.time.Instant
import java.time.ZoneId
import java.util.*
import java.util.concurrent.atomic.AtomicReference
class ActiveEntity @JvmOverloads constructor(
timestamp: Long = System.currentTimeMillis(),
private val _evidences: MutableList<String> = mutableListOf(),
) : BlockContent("active_entity_$timestamp", "impression") {
val evidences: List<String>
val runtimeId: String = newActiveEntityRuntimeId(),
val createdAt: Instant = Instant.now(),
boundEntityUuid: String? = null,
private val _evidences: MutableList<EntityEvidence> = mutableListOf(),
) : BlockContent("active_entity_$runtimeId", "impression") {
val evidences: List<EntityEvidence>
get() = synchronized(_evidences) { _evidences.toList() }
@Volatile
var lastMentionedAt: Instant = createdAt
private set
private val _subject = AtomicReference("UNKNOWN")
val subject: String get() = _subject.get()
private val _boundEntityUuid = AtomicReference<String?>(boundEntityUuid)
val boundEntityUuid: String? get() = _boundEntityUuid.get()
private val _projectedFeatures: MutableMap<String, Double> = mutableMapOf()
val projectedFeatures: Map<String, Double>
get() = synchronized(_projectedFeatures) { _projectedFeatures.toMap() }
@@ -23,12 +35,27 @@ class ActiveEntity @JvmOverloads constructor(
val projectedImpressions: Map<String, Double>
get() = synchronized(_projectedImpressions) { _projectedImpressions.toMap() }
fun addEvidence(evidence: String) = synchronized(_evidences) {
@JvmOverloads
fun addEvidence(
content: String,
associationConfidence: Double = 1.0,
source: EntityEvidence.Source = EntityEvidence.Source.USER_INPUT,
timestamp: Long = System.currentTimeMillis(),
) = addEvidence(EntityEvidence(content, associationConfidence, source, timestamp))
fun addEvidence(evidence: EntityEvidence) = synchronized(_evidences) {
_evidences.add(evidence)
touch(Instant.ofEpochMilli(evidence.timestamp))
}
fun updateSubject(subject: String) = _subject.set(subject)
fun bindEntity(uuid: String?) = _boundEntityUuid.set(uuid)
fun touch(time: Instant = Instant.now()) {
lastMentionedAt = time
}
fun addProjectedFeatures(vararg features: Pair<String, Double>) = synchronized(_projectedFeatures) {
features.forEach { _projectedFeatures[it.first] = it.second }
}
@@ -38,6 +65,11 @@ class ActiveEntity @JvmOverloads constructor(
}
override fun fillXml(document: Document, root: Element) {
root.setAttribute("runtime_id", runtimeId)
boundEntityUuid?.let { root.setAttribute("bound_entity_uuid", it) }
root.setAttribute("created_at", modelTime(createdAt))
root.setAttribute("last_mentioned_at", modelTime(lastMentionedAt))
appendTextElement(document, root, "subject", subject)
appendListElement(
@@ -46,7 +78,14 @@ class ActiveEntity @JvmOverloads constructor(
"evidences",
"evidence",
synchronized(_evidences) { _evidences.toList() }
)
) { evidence ->
setAttribute("association_confidence", evidence.associationConfidence.toString())
setAttribute("source", evidence.source.name)
setAttribute("timestamp", evidence.timestamp.toString())
setAttribute("truncated", evidence.isContentTruncated().toString())
setAttribute("original_length", evidence.content.length.toString())
textContent = evidence.contentForContext()
}
appendListElement(
document,
@@ -70,4 +109,10 @@ class ActiveEntity @JvmOverloads constructor(
textContent = entry.key
}
}
}
private fun modelTime(time: Instant): String =
time.atZone(ZoneId.systemDefault()).toString()
}
private fun newActiveEntityRuntimeId(): String =
UUID.randomUUID().toString().replace("-", "").take(12)

View File

@@ -0,0 +1,112 @@
package work.slhaf.partner.core.cognition.impression
/**
* Runtime evidence associated with an active entity.
*
* The confidence describes how strongly this evidence is associated with the
* current active entity, not whether the evidence content itself is true.
*/
data class EntityEvidence @JvmOverloads constructor(
val content: String,
val associationConfidence: Double = 1.0,
val source: Source = Source.USER_INPUT,
val timestamp: Long = System.currentTimeMillis(),
) {
enum class Source {
USER_INPUT,
ASSISTANT_REPLY
}
fun isContentTruncated(maxLength: Int = CONTEXT_CONTENT_MAX_LENGTH): Boolean =
content.length > maxLength
fun contentForContext(maxLength: Int = CONTEXT_CONTENT_MAX_LENGTH): String {
if (content.length <= maxLength) {
return content
}
val available = maxLength - OMITTED_MARKER.length
if (available <= 0) {
return content.take(maxLength)
}
val headBudget = available / 2
val tailBudget = available - headBudget
val headEnd = adjustHeadEnd(content, headBudget)
val tailStart = adjustTailStart(content, content.length - tailBudget)
if (tailStart <= headEnd) {
return content.take(maxLength).trimEnd()
}
return content.substring(0, headEnd).trimEnd() +
OMITTED_MARKER +
content.substring(tailStart).trimStart()
}
private fun adjustHeadEnd(source: String, preferredEnd: Int): Int {
val safePreferredEnd = preferredEnd.coerceIn(0, source.length)
findForwardBoundary(source, safePreferredEnd, STRONG_BOUNDARY_SEARCH_WINDOW, ::isStrongBoundary)?.let {
return it + 1
}
findForwardBoundary(source, safePreferredEnd, SOFT_BOUNDARY_SEARCH_WINDOW, ::isSoftBoundary)?.let {
return it + 1
}
return safePreferredEnd
}
private fun adjustTailStart(source: String, preferredStart: Int): Int {
val safePreferredStart = preferredStart.coerceIn(0, source.length)
findBackwardBoundary(source, safePreferredStart, STRONG_BOUNDARY_SEARCH_WINDOW, ::isStrongBoundary)?.let {
return it
}
findBackwardBoundary(source, safePreferredStart, SOFT_BOUNDARY_SEARCH_WINDOW, ::isSoftBoundary)?.let {
return it
}
return safePreferredStart
}
private fun findForwardBoundary(
source: String,
start: Int,
window: Int,
predicate: (Char) -> Boolean,
): Int? {
val end = (start + window).coerceAtMost(source.length)
for (index in start until end) {
if (predicate(source[index])) {
return index
}
}
return null
}
private fun findBackwardBoundary(
source: String,
start: Int,
window: Int,
predicate: (Char) -> Boolean,
): Int? {
val end = (start - window).coerceAtLeast(0)
for (index in start downTo end + 1) {
if (predicate(source[index - 1])) {
return index
}
}
return null
}
private fun isStrongBoundary(char: Char): Boolean = char == '\n'
private fun isSoftBoundary(char: Char): Boolean = when (char) {
'。', '', '', ';', '', '.' -> true
else -> false
}
companion object {
const val CONTEXT_CONTENT_MAX_LENGTH = 480
private const val OMITTED_MARKER = "\n...[omitted]...\n"
private const val STRONG_BOUNDARY_SEARCH_WINDOW = 120
private const val SOFT_BOUNDARY_SEARCH_WINDOW = 80
}
}

View File

@@ -22,7 +22,7 @@ public class ImpressionVectorIndex {
}
public void upsert(String text, Entity.IndexableData indexableData){
if (VectorClient.status){
if (!VectorClient.status){
return;
}
String modelId = VectorClient.VECTOR_MODEL_ID;

View File

@@ -0,0 +1,7 @@
package work.slhaf.partner.core.cognition.impression.search
data class EntityAssociationMatch(
val target: ImpressionSearchTarget,
val score: Double,
val hits: List<ImpressionSearchHit> = emptyList(),
)

View File

@@ -0,0 +1,10 @@
package work.slhaf.partner.core.cognition.impression.search
data class ImpressionSearchDocument(
val id: String,
val target: ImpressionSearchTarget,
val field: ImpressionSearchField,
val text: String,
val weight: Double = 1.0,
val metadata: Map<String, String> = emptyMap(),
)

View File

@@ -0,0 +1,138 @@
package work.slhaf.partner.core.cognition.impression.search
import work.slhaf.partner.core.cognition.impression.ActiveEntity
import work.slhaf.partner.core.cognition.impression.Entity
object ImpressionSearchDocuments {
fun fromActiveEntity(activeEntity: ActiveEntity): List<ImpressionSearchDocument> {
val target = ImpressionSearchTarget(
ImpressionSearchTarget.Type.ACTIVE_ENTITY,
activeEntity.runtimeId
)
val metadata = activeEntity.boundEntityUuid
?.let { mapOf("boundEntityUuid" to it) }
.orEmpty()
return buildList {
add(
ImpressionSearchDocument(
id = "active:${activeEntity.runtimeId}:subject",
target = target,
field = ImpressionSearchField.SUBJECT,
text = activeEntity.subject,
weight = SUBJECT_WEIGHT,
metadata = metadata,
)
)
activeEntity.evidences.forEachIndexed { index, evidence ->
add(
ImpressionSearchDocument(
id = "active:${activeEntity.runtimeId}:evidence:$index",
target = target,
field = ImpressionSearchField.EVIDENCE,
text = evidence.contentForContext(),
weight = EVIDENCE_WEIGHT * evidence.associationConfidence,
metadata = metadata,
)
)
}
activeEntity.projectedFeatures.entries.forEachIndexed { index, entry ->
add(
ImpressionSearchDocument(
id = "active:${activeEntity.runtimeId}:feature:$index",
target = target,
field = ImpressionSearchField.FEATURE,
text = entry.key,
weight = FEATURE_WEIGHT * entry.value,
metadata = metadata,
)
)
}
activeEntity.projectedImpressions.entries.forEachIndexed { index, entry ->
add(
ImpressionSearchDocument(
id = "active:${activeEntity.runtimeId}:impression:$index",
target = target,
field = ImpressionSearchField.IMPRESSION,
text = entry.key,
weight = IMPRESSION_WEIGHT * entry.value,
metadata = metadata,
)
)
}
}
}
fun fromEntity(entity: Entity): List<ImpressionSearchDocument> {
val target = ImpressionSearchTarget(
ImpressionSearchTarget.Type.ENTITY,
entity.uuid
)
return buildList {
add(
ImpressionSearchDocument(
id = "entity:${entity.uuid}:subject",
target = target,
field = ImpressionSearchField.SUBJECT,
text = entity.subject,
weight = SUBJECT_WEIGHT,
)
)
entity.snapshotFeatures().keys.forEachIndexed { index, feature ->
add(
ImpressionSearchDocument(
id = "entity:${entity.uuid}:feature:$index",
target = target,
field = ImpressionSearchField.FEATURE,
text = feature,
weight = FEATURE_WEIGHT,
)
)
}
entity.snapshotImpressions().keys.forEachIndexed { index, impression ->
add(
ImpressionSearchDocument(
id = "entity:${entity.uuid}:impression:$index",
target = target,
field = ImpressionSearchField.IMPRESSION,
text = impression,
weight = IMPRESSION_WEIGHT,
)
)
}
entity.showRelations().forEachIndexed { index, relation ->
val relationText = buildString {
append(relation.target)
relation.relations.keys.forEach { name ->
append(' ')
append(name)
}
}
add(
ImpressionSearchDocument(
id = "entity:${entity.uuid}:relation:$index",
target = target,
field = ImpressionSearchField.RELATION,
text = relationText,
weight = RELATION_WEIGHT,
)
)
}
}
}
private const val SUBJECT_WEIGHT = 1.0
private const val FEATURE_WEIGHT = 0.85
private const val IMPRESSION_WEIGHT = 0.75
private const val RELATION_WEIGHT = 0.65
private const val EVIDENCE_WEIGHT = 0.8
}

View File

@@ -0,0 +1,9 @@
package work.slhaf.partner.core.cognition.impression.search
enum class ImpressionSearchField {
SUBJECT,
FEATURE,
IMPRESSION,
RELATION,
EVIDENCE
}

View File

@@ -0,0 +1,7 @@
package work.slhaf.partner.core.cognition.impression.search
data class ImpressionSearchHit(
val document: ImpressionSearchDocument,
val score: Double,
val matchedTerms: Set<String> = emptySet(),
)

View File

@@ -0,0 +1,11 @@
package work.slhaf.partner.core.cognition.impression.search
data class ImpressionSearchTarget(
val type: Type,
val id: String,
) {
enum class Type {
ACTIVE_ENTITY,
ENTITY
}
}

View File

@@ -0,0 +1,15 @@
package work.slhaf.partner.core.cognition.impression.search
interface ImpressionTextSearch {
fun rebuild(documents: Collection<ImpressionSearchDocument>)
fun upsert(document: ImpressionSearchDocument)
fun removeByTarget(target: ImpressionSearchTarget)
fun search(
query: String,
limit: Int = 20,
): List<ImpressionSearchHit>
}