Data Models
Detailed Convex schema definitions, indexes, and data structures for all Cortex tables.
Overview
Cortex uses 12+ Convex tables to implement the 4-layer architecture plus coordination and governance:
| Table | Layer | Purpose | Scoped By | Versioned | Retention |
|---|---|---|---|---|---|
conversations | 1a | ACID message threads | memorySpaceId | Append-only | Forever |
immutable | 1b | Shared versioned data | NOT scoped | Auto | Configurable (20 versions) |
mutable | 1c | Shared live data | NOT scoped | No | N/A (overwrites) |
memories | 2 | Vector index | memorySpaceId | Auto | Configurable (10 versions) |
facts | 3 | Structured knowledge | memorySpaceId | Auto | Configurable (10 versions) |
factHistory | 3 | Belief Revision audit | memorySpaceId | No | Configurable |
memorySpaces | Coord | Memory space registry | memorySpaceId | No | Until archived |
contexts | Coord | Workflow coordination | memorySpaceId | Auto | Configurable |
sessions | Coord | Session lifecycle (v0.27.0+) | userId | No | Timeout-based |
agents | Coord | Agent metadata (optional) | agentId | No | Until unregistered |
governancePolicies | Gov | Retention rules | org/space | No | Until removed |
governanceEnforcement | Gov | Enforcement audit | org/space | No | Configurable |
graphSyncQueue | Graph | Real-time sync queue | NOT scoped | No | Cleared after sync |
Note: All tables support optional tenantId for multi-tenancy (v0.26.0+)
Complete Schema Definition
convex-dev/schema.ts — This documentation reflects the actual deployed schema.convex/schema.ts
import { defineSchema, defineTable } from "convex/server";
import { v } from "convex/values";
export default defineSchema({
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Layer 1a: Conversations (ACID, Immutable)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
conversations: defineTable({
// Identity
conversationId: v.string(),
// Memory Space (fundamental isolation boundary)
memorySpaceId: v.string(),
participantId: v.optional(v.string()), // Hive Mode tracking
// Multi-tenancy
tenantId: v.optional(v.string()),
// Type
type: v.union(v.literal("user-agent"), v.literal("agent-agent")),
// Participants
participants: v.object({
userId: v.optional(v.string()),
agentId: v.optional(v.string()),
participantId: v.optional(v.string()),
memorySpaceIds: v.optional(v.array(v.string())),
}),
// Messages (append-only, immutable)
messages: v.array(
v.object({
id: v.string(),
role: v.union(
v.literal("user"),
v.literal("agent"),
v.literal("system"),
),
content: v.string(),
timestamp: v.number(),
participantId: v.optional(v.string()),
metadata: v.optional(v.any()),
}),
),
messageCount: v.number(),
metadata: v.optional(v.any()),
createdAt: v.number(),
updatedAt: v.number(),
})
.index("by_conversationId", ["conversationId"])
.index("by_memorySpace", ["memorySpaceId"])
.index("by_tenantId", ["tenantId"])
.index("by_tenant_space", ["tenantId", "memorySpaceId"])
.index("by_type", ["type"])
.index("by_user", ["participants.userId"])
.index("by_agent", ["participants.agentId"])
.index("by_memorySpace_user", ["memorySpaceId", "participants.userId"])
.index("by_memorySpace_agent", ["memorySpaceId", "participants.agentId"])
.index("by_created", ["createdAt"]),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Layer 1b: Immutable Store (ACID, Versioned, Shared)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
immutable: defineTable({
// Identity (composite key: type + id)
type: v.string(), // 'kb-article', 'policy', 'audit-log', 'feedback', 'user'
id: v.string(), // Type-specific logical ID
// Data (flexible, immutable once stored)
data: v.any(),
// GDPR support
userId: v.optional(v.string()),
// Multi-tenancy
tenantId: v.optional(v.string()),
// Versioning
version: v.number(),
previousVersions: v.array(
v.object({
version: v.number(),
data: v.any(),
timestamp: v.number(),
metadata: v.optional(v.any()),
}),
),
// Metadata
metadata: v.optional(v.any()),
createdAt: v.number(),
updatedAt: v.number(),
})
.index("by_type_id", ["type", "id"])
.index("by_type", ["type"])
.index("by_tenantId", ["tenantId"])
.index("by_tenant_type_id", ["tenantId", "type", "id"])
.index("by_userId", ["userId"])
.index("by_created", ["createdAt"]),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Layer 1c: Mutable Store (ACID, No Versioning, Shared)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
mutable: defineTable({
// Composite key
namespace: v.string(), // 'inventory', 'config', 'counters', etc.
key: v.string(), // Unique within namespace
// Value (flexible, mutable)
value: v.any(),
// GDPR support
userId: v.optional(v.string()),
// Multi-tenancy
tenantId: v.optional(v.string()),
// Metadata
metadata: v.optional(v.any()),
createdAt: v.number(),
updatedAt: v.number(),
})
.index("by_namespace_key", ["namespace", "key"])
.index("by_namespace", ["namespace"])
.index("by_tenantId", ["tenantId"])
.index("by_tenant_namespace", ["tenantId", "namespace"])
.index("by_tenant_namespace_key", ["tenantId", "namespace", "key"])
.index("by_userId", ["userId"])
.index("by_updated", ["updatedAt"]),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Layer 2: Vector Memory (Searchable, memorySpace-scoped, Versioned)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
memories: defineTable({
// Identity
memoryId: v.string(),
memorySpaceId: v.string(), // PRIMARY: Memory space isolation
participantId: v.optional(v.string()), // Hive Mode participant tracking
// Multi-tenancy
tenantId: v.optional(v.string()),
// Content
content: v.string(),
contentType: v.union(
v.literal("raw"),
v.literal("summarized"),
v.literal("fact"), // NEW: For facts indexed in vector layer
),
embedding: v.optional(v.array(v.float64())),
// Source (flattened for indexing performance)
sourceType: v.union(
v.literal("conversation"),
v.literal("system"),
v.literal("tool"),
v.literal("a2a"),
v.literal("fact-extraction"), // NEW: For facts
),
sourceUserId: v.optional(v.string()),
sourceUserName: v.optional(v.string()),
sourceTimestamp: v.number(),
// Message role (for conversation memories)
messageRole: v.optional(
v.union(v.literal("user"), v.literal("agent"), v.literal("system")),
),
// Owner Attribution
userId: v.optional(v.string()), // For user-owned memories (GDPR)
agentId: v.optional(v.string()), // For agent-owned memories (deletion)
// References to Layer 1
conversationRef: v.optional(
v.object({
conversationId: v.string(),
messageIds: v.array(v.string()),
}),
),
immutableRef: v.optional(
v.object({
type: v.string(),
id: v.string(),
version: v.optional(v.number()),
}),
),
mutableRef: v.optional(
v.object({
namespace: v.string(),
key: v.string(),
snapshotValue: v.any(),
snapshotAt: v.number(),
}),
),
// NEW: Reference to Layer 3 fact
factsRef: v.optional(
v.object({
factId: v.string(),
version: v.optional(v.number()),
}),
),
// Metadata (flattened for indexing/filtering)
importance: v.number(), // 0-100 (flattened for filtering)
tags: v.array(v.string()), // Flattened for filtering
// Enrichment Fields (for bullet-proof retrieval)
enrichedContent: v.optional(v.string()),
factCategory: v.optional(v.string()),
// Flexible metadata
metadata: v.optional(v.any()),
// Versioning
version: v.number(),
previousVersions: v.array(
v.object({
version: v.number(),
content: v.string(),
embedding: v.optional(v.array(v.float64())),
timestamp: v.number(),
}),
),
// Timestamps & Access
createdAt: v.number(),
updatedAt: v.number(),
lastAccessed: v.optional(v.number()),
accessCount: v.number(),
// Streaming support (NEW - v0.23.0+)
isPartial: v.optional(v.boolean()),
partialMetadata: v.optional(v.any()),
})
.index("by_memorySpace", ["memorySpaceId"])
.index("by_memoryId", ["memoryId"])
.index("by_tenantId", ["tenantId"])
.index("by_tenant_space", ["tenantId", "memorySpaceId"])
.index("by_userId", ["userId"])
.index("by_agentId", ["agentId"])
.index("by_memorySpace_created", ["memorySpaceId", "createdAt"])
.index("by_memorySpace_userId", ["memorySpaceId", "userId"])
.index("by_memorySpace_agentId", ["memorySpaceId", "agentId"])
.index("by_participantId", ["participantId"])
.searchIndex("by_content", {
searchField: "content",
filterFields: [
"memorySpaceId",
"tenantId",
"sourceType",
"userId",
"agentId",
"participantId",
],
})
.vectorIndex("by_embedding", {
vectorField: "embedding",
dimensions: 1536, // Default: OpenAI text-embedding-3-small
filterFields: [
"memorySpaceId",
"tenantId",
"userId",
"agentId",
"participantId",
],
}),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Layer 3: Facts Store (NEW - memorySpace-scoped, Versioned)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
facts: defineTable({
// Identity
factId: v.string(),
memorySpaceId: v.string(),
participantId: v.optional(v.string()),
userId: v.optional(v.string()),
// Multi-tenancy
tenantId: v.optional(v.string()),
// Fact content
fact: v.string(),
factType: v.union(
v.literal("preference"),
v.literal("identity"),
v.literal("knowledge"),
v.literal("relationship"),
v.literal("event"),
v.literal("observation"),
v.literal("custom"),
),
// Triple structure
subject: v.optional(v.string()),
predicate: v.optional(v.string()),
object: v.optional(v.string()),
// Quality & Source
confidence: v.number(),
sourceType: v.union(
v.literal("conversation"),
v.literal("system"),
v.literal("tool"),
v.literal("manual"),
v.literal("a2a"),
),
sourceRef: v.optional(
v.object({
conversationId: v.optional(v.string()),
messageIds: v.optional(v.array(v.string())),
memoryId: v.optional(v.string()),
}),
),
// Metadata & Tags
metadata: v.optional(v.any()),
tags: v.array(v.string()),
// Enrichment Fields (v0.15.0+)
category: v.optional(v.string()),
searchAliases: v.optional(v.array(v.string())),
semanticContext: v.optional(v.string()),
entities: v.optional(
v.array(
v.object({
name: v.string(),
type: v.string(),
fullValue: v.optional(v.string()),
}),
),
),
relations: v.optional(
v.array(
v.object({
subject: v.string(),
predicate: v.string(),
object: v.string(),
}),
),
),
// Temporal validity
validFrom: v.optional(v.number()),
validUntil: v.optional(v.number()),
// Versioning (belief revision)
version: v.number(),
supersededBy: v.optional(v.string()),
supersedes: v.optional(v.string()),
createdAt: v.number(),
updatedAt: v.number(),
})
.index("by_factId", ["factId"])
.index("by_memorySpace", ["memorySpaceId"])
.index("by_tenantId", ["tenantId"])
.index("by_tenant_space", ["tenantId", "memorySpaceId"])
.index("by_memorySpace_subject", ["memorySpaceId", "subject"])
.index("by_participantId", ["participantId"])
.index("by_userId", ["userId"])
.searchIndex("by_content", {
searchField: "fact",
filterFields: ["memorySpaceId", "tenantId", "factType"],
}),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Fact History (Belief Revision Audit Trail)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
factHistory: defineTable({
eventId: v.string(),
factId: v.string(),
memorySpaceId: v.string(),
action: v.union(
v.literal("CREATE"),
v.literal("UPDATE"),
v.literal("SUPERSEDE"),
v.literal("DELETE"),
),
oldValue: v.optional(v.string()),
newValue: v.optional(v.string()),
supersededBy: v.optional(v.string()),
supersedes: v.optional(v.string()),
reason: v.optional(v.string()),
confidence: v.optional(v.number()),
pipeline: v.optional(
v.object({
slotMatching: v.optional(v.boolean()),
semanticMatching: v.optional(v.boolean()),
llmResolution: v.optional(v.boolean()),
}),
),
userId: v.optional(v.string()),
participantId: v.optional(v.string()),
conversationId: v.optional(v.string()),
timestamp: v.number(),
})
.index("by_eventId", ["eventId"])
.index("by_factId", ["factId"])
.index("by_memorySpace", ["memorySpaceId"])
.index("by_memorySpace_timestamp", ["memorySpaceId", "timestamp"])
.index("by_action", ["action"])
.index("by_userId", ["userId"])
.index("by_timestamp", ["timestamp"]),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Memory Spaces Registry (Hive/Collaboration Mode Management)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
memorySpaces: defineTable({
memorySpaceId: v.string(),
name: v.optional(v.string()),
tenantId: v.optional(v.string()),
type: v.union(
v.literal("personal"),
v.literal("team"),
v.literal("project"),
v.literal("custom"),
),
participants: v.array(
v.object({
id: v.string(),
type: v.string(),
joinedAt: v.number(),
}),
),
metadata: v.any(),
status: v.union(v.literal("active"), v.literal("archived")),
createdAt: v.number(),
updatedAt: v.number(),
})
.index("by_memorySpaceId", ["memorySpaceId"])
.index("by_tenantId", ["tenantId"])
.index("by_tenant_memorySpaceId", ["tenantId", "memorySpaceId"])
.index("by_tenant_status", ["tenantId", "status"])
.index("by_status", ["status"])
.index("by_type", ["type"])
.index("by_created", ["createdAt"]),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Contexts (Hierarchical Coordination, memorySpace-scoped with cross-space support)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
contexts: defineTable({
// Identity
contextId: v.string(),
memorySpaceId: v.string(),
tenantId: v.optional(v.string()),
// Purpose
purpose: v.string(),
description: v.optional(v.string()),
// Hierarchy
parentId: v.optional(v.string()), // Can be cross-space
rootId: v.optional(v.string()),
depth: v.number(),
childIds: v.array(v.string()),
// Status
status: v.union(
v.literal("active"),
v.literal("completed"),
v.literal("cancelled"),
v.literal("blocked"),
),
// Source conversation
conversationRef: v.optional(
v.object({
conversationId: v.string(),
messageIds: v.optional(v.array(v.string())),
}),
),
// User association
userId: v.optional(v.string()),
// Participants
participants: v.array(v.string()),
// Cross-space access control
grantedAccess: v.optional(
v.array(
v.object({
memorySpaceId: v.string(),
scope: v.string(),
grantedAt: v.number(),
}),
),
),
// Data
data: v.optional(v.any()),
metadata: v.optional(v.any()),
// Versioning
version: v.number(),
previousVersions: v.array(
v.object({
version: v.number(),
status: v.string(),
data: v.optional(v.any()),
timestamp: v.number(),
updatedBy: v.optional(v.string()),
}),
),
createdAt: v.number(),
updatedAt: v.number(),
completedAt: v.optional(v.number()),
})
.index("by_contextId", ["contextId"])
.index("by_memorySpace", ["memorySpaceId"])
.index("by_tenantId", ["tenantId"])
.index("by_tenant_contextId", ["tenantId", "contextId"])
.index("by_tenant_space", ["tenantId", "memorySpaceId"])
.index("by_parentId", ["parentId"])
.index("by_rootId", ["rootId"])
.index("by_status", ["status"])
.index("by_memorySpace_status", ["memorySpaceId", "status"])
.index("by_userId", ["userId"])
.index("by_created", ["createdAt"]),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Agents Registry (Optional Metadata Layer)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
agents: defineTable({
agentId: v.string(),
tenantId: v.optional(v.string()),
name: v.string(),
description: v.optional(v.string()),
metadata: v.optional(v.any()), // Team, capabilities, version, etc.
config: v.optional(v.any()), // Agent-specific configuration
status: v.union(
v.literal("active"),
v.literal("inactive"),
v.literal("archived"),
),
registeredAt: v.number(),
updatedAt: v.number(),
lastActive: v.optional(v.number()),
})
.index("by_agentId", ["agentId"])
.index("by_tenantId", ["tenantId"])
.index("by_tenant_status", ["tenantId", "status"])
.index("by_status", ["status"])
.index("by_registered", ["registeredAt"]),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Sessions (Native Session Management - v0.27.0+)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
sessions: defineTable({
sessionId: v.string(),
userId: v.string(),
tenantId: v.optional(v.string()),
memorySpaceId: v.optional(v.string()),
status: v.union(v.literal("active"), v.literal("idle"), v.literal("ended")),
startedAt: v.number(),
lastActiveAt: v.number(),
endedAt: v.optional(v.number()),
expiresAt: v.optional(v.number()),
metadata: v.optional(v.any()),
messageCount: v.number(),
memoryCount: v.number(),
})
.index("by_sessionId", ["sessionId"])
.index("by_userId", ["userId"])
.index("by_tenantId", ["tenantId"])
.index("by_tenant_user", ["tenantId", "userId"])
.index("by_status", ["status"])
.index("by_memorySpace", ["memorySpaceId"])
.index("by_lastActive", ["lastActiveAt"])
.index("by_tenant_status", ["tenantId", "status"]),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Governance Policies (Data Retention, Purging, and Compliance)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
governancePolicies: defineTable({
organizationId: v.optional(v.string()),
memorySpaceId: v.optional(v.string()),
policy: v.any(),
isActive: v.boolean(),
appliedBy: v.optional(v.string()),
createdAt: v.number(),
updatedAt: v.number(),
})
.index("by_organization", ["organizationId"])
.index("by_memorySpace", ["memorySpaceId"])
.index("by_active", ["isActive", "organizationId"])
.index("by_updated", ["updatedAt"]),
governanceEnforcement: defineTable({
organizationId: v.optional(v.string()),
memorySpaceId: v.optional(v.string()),
enforcementType: v.union(v.literal("automatic"), v.literal("manual")),
layers: v.array(v.string()),
rules: v.array(v.string()),
versionsDeleted: v.number(),
recordsPurged: v.number(),
storageFreed: v.number(),
triggeredBy: v.optional(v.string()),
executedAt: v.number(),
})
.index("by_organization", ["organizationId", "executedAt"])
.index("by_memorySpace", ["memorySpaceId", "executedAt"])
.index("by_executed", ["executedAt"]),
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
// Graph Sync Queue (Real-time Graph Database Synchronization)
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
graphSyncQueue: defineTable({
table: v.string(),
entityId: v.string(),
operation: v.union(
v.literal("insert"),
v.literal("update"),
v.literal("delete"),
),
entity: v.optional(v.any()),
synced: v.boolean(),
syncedAt: v.optional(v.number()),
failedAttempts: v.optional(v.number()),
lastError: v.optional(v.string()),
priority: v.optional(v.string()),
createdAt: v.number(),
})
.index("by_synced", ["synced"])
.index("by_table", ["table"])
.index("by_table_entity", ["table", "entityId"])
.index("by_priority", ["priority", "synced"])
.index("by_created", ["createdAt"]),
});
Table Details
conversations (Layer 1a)
Purpose: Immutable conversation threads (memorySpace-scoped)
Document Structure:
{
_id: Id<"conversations">,
// Identity & Isolation
conversationId: string,
memorySpaceId: string, // PRIMARY: Isolation boundary
participantId?: string, // NEW: Hive Mode tracking
tenantId?: string, // NEW: Multi-tenancy
// Type
type: "user-agent" | "agent-agent",
// Participants
participants: {
userId?: string,
agentId?: string,
participantId?: string, // NEW: Hive Mode
memorySpaceIds?: string[], // For agent-agent
},
// Messages (append-only)
messages: Array<{
id: string,
role: "user" | "agent" | "system",
content: string,
timestamp: number,
participantId?: string, // NEW: Who sent this
metadata?: any,
}>,
messageCount: number,
metadata?: any,
createdAt: number,
updatedAt: number,
}
Indexes:
by_conversationId- Unique lookupby_memorySpace- Memory space's conversationsby_tenantId- Tenant's conversationsby_tenant_space- Tenant + space compoundby_type- Separate user-agent from agent-agentby_user- Find user's conversations (GDPR)by_agent- Find agent's conversationsby_memorySpace_user- Space + user compoundby_memorySpace_agent- Space + agent compoundby_created- Chronological ordering
Query Patterns:
// Find conversations in memory space
await ctx.db
.query("conversations")
.withIndex("by_memorySpace", (q) => q.eq("memorySpaceId", memorySpaceId))
.collect();
// Find user's conversations in specific space
await ctx.db
.query("conversations")
.withIndex("by_memorySpace_user", (q) =>
q.eq("memorySpaceId", memorySpaceId).eq("participants.userId", userId),
)
.collect();
// GDPR: Find all user's conversations across ALL spaces
await ctx.db
.query("conversations")
.withIndex("by_user", (q) => q.eq("participants.userId", userId))
.collect();
immutable (Layer 1b)
Purpose: Shared, versioned, immutable data (TRULY shared - NOT scoped by memorySpace)
Document Structure:
{
_id: Id<"immutable">,
// Composite key
type: string, // 'kb-article', 'policy', 'user', 'feedback', etc.
id: string, // Type-specific logical ID
// Data (flexible, immutable once stored)
data: any,
// GDPR support
userId?: string,
// Multi-tenancy (NEW)
tenantId?: string,
// Metadata
metadata?: any,
// Versioning
version: number,
previousVersions: Array<{
version: number,
data: any,
timestamp: number,
metadata?: any,
}>,
createdAt: number,
updatedAt: number,
}
Indexes:
by_type_id- Primary lookup (unique per type+id)by_type- List all of a typeby_tenantId- Tenant's recordsby_tenant_type_id- Tenant-scoped lookupby_userId- Find all records for user (GDPR)by_created- Chronological
Special Case: type='user' (ONLY Special Type)
The ONLY special type is 'user' - it's accessed via cortex.users.* wrapper which provides GDPR cascade deletion:
// User profile (SPECIAL - has cortex.users.* wrapper)
{
type: "user", // ← ONLY special type
id: "user-123",
data: {
displayName: "Alex Johnson",
email: "alex@example.com",
preferences: { theme: "dark" },
},
version: 5,
previousVersions: [ /* ... */ ],
}
// Accessed via:
await cortex.users.get('user-123'); // ← Wrapper API
// Equivalent to:
await cortex.immutable.get('user', 'user-123'); // ← Direct API
All Other Types (Developer-Defined Examples):
Every other type is just an example - you define whatever types you need:
// Example: KB article (no special treatment)
{
type: "kb-article", // ← Just an example, not special
id: "refund-policy",
data: { title: "Refund Policy", content: "..." },
metadata: { importance: 90, tags: ["policy"] },
version: 3,
}
// Example: Feedback (no special treatment)
{
type: "user-feedback", // ← Your custom type
id: "feedback-456",
data: { rating: 5, comment: "Great!" },
userId: "user-123", // ← GDPR-enabled
version: 1,
}
// Example: Whatever you want (no special treatment)
{
type: "my-custom-type", // ← Anything you want
id: "my-id",
data: { /* your structure */ },
}
Key Point: Only type='user' gets special API treatment (cortex.users.* wrapper). All other types are accessed via cortex.immutable.* directly.
Query Patterns:
// Get current version
await ctx.db
.query("immutable")
.withIndex("by_type_id", (q) =>
q.eq("type", "kb-article").eq("id", "refund-policy"),
)
.first();
// Multi-tenant lookup
await ctx.db
.query("immutable")
.withIndex("by_tenant_type_id", (q) =>
q
.eq("tenantId", tenantId)
.eq("type", "kb-article")
.eq("id", "refund-policy"),
)
.first();
// Get all user profiles
await ctx.db
.query("immutable")
.withIndex("by_type", (q) => q.eq("type", "user"))
.collect();
// GDPR: Get all records for user across ALL memory spaces
await ctx.db
.query("immutable")
.withIndex("by_userId", (q) => q.eq("userId", "user-123"))
.collect();
mutable (Layer 1c)
Purpose: Shared, mutable, current-value data (TRULY shared - NOT scoped by memorySpace)
Document Structure:
{
_id: Id<"mutable">,
// Composite key
namespace: string, // 'inventory', 'config', 'counters', etc.
key: string, // Unique within namespace
// Value (flexible, mutable)
value: any,
// GDPR support
userId?: string,
// Multi-tenancy (NEW)
tenantId?: string,
// Metadata
metadata?: any,
createdAt: number,
updatedAt: number,
}
Indexes:
by_namespace_key- Primary lookup (unique per namespace+key)by_namespace- List all in namespaceby_tenantId- Tenant's recordsby_tenant_namespace- Tenant-scoped namespace listingby_tenant_namespace_key- Tenant-scoped lookupby_userId- Find all records for user (GDPR)by_updated- Sort by update time
Examples:
// Inventory
{
namespace: "inventory",
key: "store-15:produce:apples",
value: {
quantity: 150,
price: 2.99,
unit: "lbs",
},
userId: undefined, // System data
updatedAt: 1729900000000,
}
// User session
{
namespace: "user-sessions",
key: "session-abc123",
value: {
startedAt: 1729900000000,
pagesViewed: 5,
},
userId: "user-123", // ← GDPR-enabled
updatedAt: 1729900500000,
}
Query Patterns:
// Get by namespace + key
await ctx.db
.query("mutable")
.withIndex("by_namespace_key", (q) =>
q.eq("namespace", "inventory").eq("key", "widget-qty"),
)
.unique();
// Multi-tenant lookup
await ctx.db
.query("mutable")
.withIndex("by_tenant_namespace_key", (q) =>
q
.eq("tenantId", tenantId)
.eq("namespace", "inventory")
.eq("key", "widget-qty"),
)
.unique();
// List all in namespace
await ctx.db
.query("mutable")
.withIndex("by_namespace", (q) => q.eq("namespace", "inventory"))
.collect();
// Tenant-scoped namespace
await ctx.db
.query("mutable")
.withIndex("by_tenant_namespace", (q) =>
q.eq("tenantId", tenantId).eq("namespace", "inventory"),
)
.collect();
memories (Layer 2)
Purpose: Searchable memories with semantic and keyword search (memorySpace-scoped)
Document Structure:
{
_id: Id<"memories">,
// Identity & Isolation
memoryId: string,
memorySpaceId: string, // PRIMARY: Isolation boundary
participantId?: string, // NEW: Hive Mode tracking
tenantId?: string, // NEW: Multi-tenancy
// Content
content: string,
contentType: "raw" | "summarized" | "fact", // NEW: "fact" option
embedding?: number[], // 1536-dim default
// Source (flattened for indexing)
sourceType: "conversation" | "system" | "tool" | "a2a" | "fact-extraction",
sourceUserId?: string,
sourceUserName?: string,
sourceTimestamp: number,
// Message role (for conversation memories)
messageRole?: "user" | "agent" | "system",
// Owner Attribution
userId?: string, // For GDPR cascade
agentId?: string, // For agent deletion cascade
// Layer 1 References
conversationRef?: {
conversationId: string,
messageIds: string[],
},
immutableRef?: {
type: string,
id: string,
version?: number,
},
mutableRef?: {
namespace: string,
key: string,
snapshotValue: any,
snapshotAt: number,
},
// NEW: Layer 3 Reference
factsRef?: {
factId: string,
version?: number,
},
// Metadata (flattened for indexing)
importance: number, // 0-100 (flattened)
tags: string[], // Flattened
// Enrichment Fields
enrichedContent?: string,
factCategory?: string,
metadata?: any,
// Versioning
version: number,
previousVersions: Array<{
version: number,
content: string,
embedding?: number[],
timestamp: number,
}>,
// Timestamps & Access
createdAt: number,
updatedAt: number,
lastAccessed?: number,
accessCount: number,
// Streaming support (v0.23.0+)
// Used during progressive storage of streaming responses
isPartial?: boolean, // true when memory is being streamed/stored incrementally
partialMetadata?: any, // Metadata for partial/streaming memories (chunk info, progress, etc.)
}
Indexes:
by_memorySpace- Memory space's memories (primary)by_memoryId- Unique lookupby_tenantId- Tenant's memoriesby_tenant_space- Tenant + space compoundby_userId- GDPR cascadeby_agentId- Agent deletion cascadeby_memorySpace_created- Chronological within spaceby_memorySpace_userId- Space + user compoundby_memorySpace_agentId- Space + agent compoundby_participantId- Hive Mode trackingby_content(search) - Full-text keyword searchby_embedding(vector) - Semantic similarity search
Query Patterns:
// Semantic search (memorySpace-scoped)
await ctx.db
.query("memories")
.withIndex("by_embedding", (q) =>
q
.similar("embedding", queryVector, 10)
.eq("memorySpaceId", memorySpaceId)
.eq("tenantId", tenantId),
)
.collect();
// Semantic search (user-specific within space)
await ctx.db
.query("memories")
.withIndex("by_embedding", (q) =>
q
.similar("embedding", queryVector, 10)
.eq("memorySpaceId", memorySpaceId)
.eq("userId", userId),
)
.collect();
// Keyword search
await ctx.db
.query("memories")
.withSearchIndex("by_content", (q) =>
q.search("content", "password").eq("memorySpaceId", memorySpaceId),
)
.collect();
// Participant's memories (Hive Mode)
await ctx.db
.query("memories")
.withIndex("by_participantId", (q) => q.eq("participantId", "cursor"))
.collect();
// GDPR: All user memories across ALL spaces
await ctx.db
.query("memories")
.withIndex("by_userId", (q) => q.eq("userId", userId))
.collect();
facts (Layer 3)
Purpose: Structured knowledge with belief revision (memorySpace-scoped)
Document Structure:
{
_id: Id<"facts">,
// Identity & Isolation
factId: string,
memorySpaceId: string,
participantId?: string,
userId?: string,
tenantId?: string,
// Fact content
fact: string,
factType: "preference" | "identity" | "knowledge" | "relationship" | "event" | "observation" | "custom",
// Triple structure (subject-predicate-object)
subject?: string,
predicate?: string,
object?: string,
// Quality & Source
confidence: number, // 0-100
sourceType: "conversation" | "system" | "tool" | "manual" | "a2a",
sourceRef?: {
conversationId?: string,
messageIds?: string[],
memoryId?: string,
},
// Metadata & Tags
metadata?: any,
tags: string[],
// Enrichment Fields (v0.15.0+)
category?: string,
searchAliases?: string[],
semanticContext?: string,
entities?: Array<{
name: string,
type: string,
fullValue?: string,
}>,
relations?: Array<{
subject: string,
predicate: string,
object: string,
}>,
// Temporal validity
validFrom?: number,
validUntil?: number,
// Versioning (belief revision)
version: number,
supersededBy?: string, // factId of newer version
supersedes?: string, // factId this replaces
createdAt: number,
updatedAt: number,
}
Indexes:
by_factId- Unique lookupby_memorySpace- Memory space's factsby_tenantId- Tenant's factsby_tenant_space- Tenant + space compoundby_memorySpace_subject- Entity-centric queriesby_participantId- Hive Mode trackingby_userId- GDPR cascadeby_content(search) - Full-text search
Query Patterns:
// Get fact by ID
await ctx.db
.query("facts")
.withIndex("by_factId", (q) => q.eq("factId", factId))
.first();
// Get all facts in memory space
await ctx.db
.query("facts")
.withIndex("by_memorySpace", (q) => q.eq("memorySpaceId", memorySpaceId))
.collect();
// Query by subject (entity-centric)
await ctx.db
.query("facts")
.withIndex("by_memorySpace_subject", (q) =>
q.eq("memorySpaceId", memorySpaceId).eq("subject", "user-123"),
)
.collect();
// Search facts
await ctx.db
.query("facts")
.withSearchIndex("by_content", (q) =>
q.search("fact", "favorite color").eq("memorySpaceId", memorySpaceId),
)
.collect();
factHistory (Belief Revision Audit)
Purpose: Audit trail for Belief Revision System (v0.24.0+)
Document Structure:
{
_id: Id<"factHistory">,
// Identity
eventId: string,
factId: string,
memorySpaceId: string,
// Action
action: "CREATE" | "UPDATE" | "SUPERSEDE" | "DELETE",
// Values
oldValue?: string,
newValue?: string,
// Relationships
supersededBy?: string,
supersedes?: string,
// Decision context
reason?: string,
confidence?: number,
// Pipeline info
pipeline?: {
slotMatching?: boolean,
semanticMatching?: boolean,
llmResolution?: boolean,
},
// Source context
userId?: string,
participantId?: string,
conversationId?: string,
timestamp: number,
}
Indexes:
by_eventId- Unique lookupby_factId- Get history for a factby_memorySpace- All changes in a spaceby_memorySpace_timestamp- Time-range queriesby_action- Filter by action typeby_userId- GDPR cascadeby_timestamp- Chronological
memorySpaces (Coordination)
Purpose: Memory space registry for Hive/Collaboration modes
Document Structure:
{
_id: Id<"memorySpaces">,
// Identity
memorySpaceId: string,
name?: string,
tenantId?: string,
type: "personal" | "team" | "project" | "custom",
// Participants (for Hive Mode)
participants: Array<{
id: string, // Participant ID
type: string, // 'ai-tool', 'human', 'ai-agent', 'system'
joinedAt: number,
}>,
// Metadata
metadata: any,
status: "active" | "archived",
createdAt: number,
updatedAt: number,
}
Indexes:
by_memorySpaceId- Unique lookupby_tenantId- Tenant's memory spacesby_tenant_memorySpaceId- Tenant-scoped lookupby_tenant_status- Tenant + statusby_status- Filter active/archivedby_type- Filter by typeby_created- Chronological
sessions (Coordination - v0.27.0+)
Purpose: Session lifecycle management
Document Structure:
{
_id: Id<"sessions">,
// Identity
sessionId: string,
userId: string,
tenantId?: string,
memorySpaceId?: string,
// Session state
status: "active" | "idle" | "ended",
startedAt: number,
lastActiveAt: number,
endedAt?: number,
expiresAt?: number,
// Extensible metadata
metadata?: any,
// Statistics
messageCount: number,
memoryCount: number,
}
Indexes:
by_sessionId- Unique lookupby_userId- User's sessionsby_tenantId- Tenant's sessionsby_tenant_user- Tenant + user sessionsby_status- Active/idle/endedby_memorySpace- Sessions in memory spaceby_lastActive- For expiration cleanupby_tenant_status- Tenant + status
contexts (Coordination)
Purpose: Workflow and task coordination
Document Structure:
{
_id: Id<"contexts">,
purpose: string,
description?: string,
memorySpaceId: string,
userId?: string,
// Hierarchy
parentId?: string,
rootId: string, // Self if root
depth: number, // 0 = root
childIds: string[],
participants: string[], // Memory spaces or participants involved (not just agents)
conversationRef?: {
conversationId: string,
messageIds: string[],
},
data: any, // Context-specific data
status: "active" | "completed" | "cancelled" | "blocked",
createdAt: number,
updatedAt: number,
completedAt?: number,
version: number,
previousVersions: Array<{
version: number,
status: string,
data?: any,
timestamp: number,
updatedBy?: string, // Agent/participant that made the change
}>,
}
Indexes:
by_contextId- Unique lookupby_memorySpace- Memory space's contextsby_tenantId- Tenant's contextsby_tenant_contextId- Tenant + context ID compoundby_tenant_space- Tenant + space compoundby_parentId- Get childrenby_rootId- Get all in workflow treeby_status- Filter by statusby_memorySpace_status- Space + status compoundby_userId- GDPR cascadeby_created- Chronological ordering
Query Patterns:
// Get active contexts in memory space
await ctx.db
.query("contexts")
.withIndex("by_memorySpace_status", (q) =>
q.eq("memorySpaceId", memorySpaceId).eq("status", "active"),
)
.collect();
// Get all children of a context
await ctx.db
.query("contexts")
.withIndex("by_parentId", (q) => q.eq("parentId", parentId))
.collect();
// Get all contexts in workflow tree
await ctx.db
.query("contexts")
.withIndex("by_rootId", (q) => q.eq("rootId", rootId))
.collect();
// Filter by status (then filter by participants in application code)
await ctx.db
.query("contexts")
.withIndex("by_status", (q) => q.eq("status", "active"))
.filter((q) => q.field("participants").includes(participantId))
.collect();
// Get contexts for user (GDPR)
await ctx.db
.query("contexts")
.withIndex("by_userId", (q) => q.eq("userId", userId))
.collect();
facts (Layer 3)
Purpose: Structured knowledge with belief revision (memorySpace-scoped)
Document Structure: See complete schema above
Key Features:
- Belief Revision System (v0.24.0+) - Automatic conflict detection and resolution
- Triple structure - Subject-predicate-object for knowledge graphs
- Enrichment fields - searchAliases, semanticContext, entities, relations
- Temporal validity - validFrom, validUntil for time-bound facts
- Supersede chains - supersedes/supersededBy for version tracking
Query Patterns:
// Get facts by subject
await ctx.db
.query("facts")
.withIndex("by_memorySpace_subject", (q) =>
q.eq("memorySpaceId", memorySpaceId).eq("subject", "user-123"),
)
.collect();
// Search facts
await ctx.db
.query("facts")
.withSearchIndex("by_content", (q) =>
q.search("fact", query).eq("memorySpaceId", memorySpaceId),
)
.collect();
factHistory (Belief Revision Audit)
Purpose: Complete audit trail for fact changes
Document Structure: See complete schema above
Query Patterns:
// Get history for a fact
await ctx.db
.query("factHistory")
.withIndex("by_factId", (q) => q.eq("factId", factId))
.order("desc")
.collect();
// Get recent changes in memory space
await ctx.db
.query("factHistory")
.withIndex("by_memorySpace_timestamp", (q) =>
q.eq("memorySpaceId", memorySpaceId).gte("timestamp", startTime),
)
.collect();
memorySpaces (Coordination)
Purpose: Memory space registry for Hive/Collaboration modes
Document Structure: See complete schema above
Query Patterns:
// Get memory space
await ctx.db
.query("memorySpaces")
.withIndex("by_memorySpaceId", (q) => q.eq("memorySpaceId", memorySpaceId))
.first();
// List tenant's active spaces
await ctx.db
.query("memorySpaces")
.withIndex("by_tenant_status", (q) =>
q.eq("tenantId", tenantId).eq("status", "active"),
)
.collect();
sessions (Coordination - v0.27.0+)
Purpose: Session lifecycle management
Document Structure: See complete schema above
Query Patterns:
// Get session
await ctx.db
.query("sessions")
.withIndex("by_sessionId", (q) => q.eq("sessionId", sessionId))
.first();
// Get user's active sessions
await ctx.db
.query("sessions")
.withIndex("by_tenant_user", (q) =>
q.eq("tenantId", tenantId).eq("userId", userId),
)
.filter((q) => q.eq(q.field("status"), "active"))
.collect();
// Find idle sessions for cleanup
await ctx.db
.query("sessions")
.withIndex("by_lastActive", (q) => q.lt("lastActiveAt", cutoffTime))
.filter((q) => q.eq(q.field("status"), "active"))
.collect();
agents (Coordination - Optional Metadata)
Purpose: Agent metadata registry for analytics, discovery, and team organization
Note: This is an optional metadata layer complementary to memorySpaces. Memory spaces define isolation boundaries; the agent registry provides optional tracking and analytics.
Document Structure: See complete schema above
Query Patterns:
// Get agent metadata
await ctx.db
.query("agents")
.withIndex("by_agentId", (q) => q.eq("agentId", agentId))
.unique();
governancePolicies (Governance)
Purpose: Data retention, purging, and compliance rules
Document Structure: See complete schema above
Query Patterns:
// Get active policies for organization
await ctx.db
.query("governancePolicies")
.withIndex("by_active", (q) =>
q.eq("isActive", true).eq("organizationId", orgId),
)
.collect();
// Get policy for specific memory space
await ctx.db
.query("governancePolicies")
.withIndex("by_memorySpace", (q) => q.eq("memorySpaceId", memorySpaceId))
.first();
governanceEnforcement (Governance Audit)
Purpose: Audit trail for policy enforcement
Document Structure: See complete schema above
Query Patterns:
// Get recent enforcement events for organization
await ctx.db
.query("governanceEnforcement")
.withIndex("by_organization", (q) =>
q.eq("organizationId", orgId).gte("executedAt", startTime),
)
.collect();
graphSyncQueue (Graph Integration)
Purpose: Real-time graph database synchronization queue
Document Structure: See complete schema above
Query Patterns:
// Get unsynced items (reactive query for worker)
await ctx.db
.query("graphSyncQueue")
.withIndex("by_synced", (q) => q.eq("synced", false))
.order("asc")
.take(100)
.collect();
// Check if entity is synced
await ctx.db
.query("graphSyncQueue")
.withIndex("by_table_entity", (q) =>
q.eq("table", "memories").eq("entityId", memoryId),
)
.first();
Data Relationships
conversationRef (Memory → ACID Conversation)
// Vector memory (Layer 2)
{
_id: "mem_abc",
memorySpaceId: "support-space",
participantId: "support-bot",
content: "User password is Blue",
conversationRef: {
conversationId: "conv_xyz", // ← Points to Layer 1a
messageIds: ["msg_001"],
}
}
// ACID conversation (Layer 1a)
{
_id: "conv_xyz",
conversationId: "conv_xyz",
memorySpaceId: "support-space",
messages: [
{
id: "msg_001", // ← Referenced by memory
content: "My password is Blue",
participantId: "support-bot",
}
]
}
// Relationship: memories (L2) → conversations (L1a) - many-to-one
immutableRef (Vector → Immutable Record)
// Vector memory
{
_id: "mem_def",
content: "Refund policy allows 30 days",
immutableRef: {
type: "kb-article",
id: "refund-policy",
version: 2, // ← Specific version
}
}
// Immutable record
{
_id: "imm_ghi",
type: "kb-article",
id: "refund-policy",
version: 2,
data: {
title: "Refund Policy",
content: "...",
}
}
// Relationship: memories → immutable (many-to-one)
mutableRef (Vector → Mutable Snapshot)
// Vector memory
{
_id: "mem_jkl",
content: "API endpoint changed to v2",
mutableRef: {
namespace: "config",
key: "api-endpoint",
snapshotValue: "https://api.example.com/v2", // ← Value at time
snapshotAt: 1729900000000,
}
}
// Mutable record (current value might have changed!)
{
_id: "mut_mno",
namespace: "config",
key: "api-endpoint",
value: "https://api.example.com/v3", // ← Current (different!)
updatedAt: 1729950000000,
}
// Relationship: memories (L2) → mutable (L1c) - snapshot, not live
factsRef (Memory → Fact)
// Vector memory (Layer 2) indexing a fact
{
_id: "mem_xyz",
memorySpaceId: "user-123-personal",
content: "User prefers dark mode", // Fact in vector layer for search
contentType: "fact",
factsRef: {
factId: "fact_abc", // ← Points to Layer 3
version: 1,
}
}
// Fact (Layer 3)
{
_id: "fact_abc",
factId: "fact_abc",
memorySpaceId: "user-123-personal",
fact: "User prefers dark mode",
factType: "preference",
subject: "user-123",
predicate: "theme_preference",
object: "dark",
confidence: 95,
}
// Relationship: memories (L2) → facts (L3) - many-to-one
// Enables facts to be searchable via vector index
contextId (Memory → Context)
// Memory metadata
{
_id: "mem_pqr",
memorySpaceId: "finance-space",
participantId: "finance-bot",
metadata: {
contextId: "ctx_stu", // ← Links to workflow
}
}
// Context
{
_id: "ctx_stu",
contextId: "ctx_stu",
purpose: "Approve budget increase",
memorySpaceId: "supervisor-space", // Can be different space!
}
// Relationship: memories ← contexts (one-to-many, can be cross-space)
// Note: Stored in metadata, not separate field
Storage Estimates
Size Per Document
| Table | Typical Size | Max Size | Notes |
|---|---|---|---|
conversations | 5-50KB | 1MB | Grows with messages |
immutable | 1-10KB | 100KB | Data payload size |
mutable | 100B-5KB | 1MB | Usually small |
memories | 2-8KB | 50KB | Includes embedding |
facts | 500B-2KB | 10KB | Compact facts |
factHistory | 200B-1KB | 5KB | Event records |
memorySpaces | 500B-2KB | 10KB | Metadata only |
contexts | 500B-2KB | 10KB | Usually small |
sessions | 300B-1KB | 5KB | Metadata only |
agents | 500B | 5KB | Optional metadata |
governancePolicies | 1-5KB | 50KB | Policy definitions |
governanceEnforcement | 500B | 5KB | Audit records |
graphSyncQueue | 1-10KB | 100KB | Entity snapshot + meta |
Embedding sizes:
- 768-dim × 8 bytes = 6KB
- 1024-dim × 8 bytes = 8KB
- 1536-dim × 8 bytes = 12KB (default)
- 3072-dim × 8 bytes = 24KB
Growth Estimates
Small deployment (1K users, 10 memory spaces):
- Conversations: 5K conversations × 10KB = 50MB
- Memories: 50K memories × 5KB = 250MB
- Facts: 100K facts × 1KB = 100MB
- Immutable: 100 records × 5KB = 500KB
- Mutable: 1K records × 1KB = 1MB
- Sessions: 2K sessions × 500B = 1MB
- Total: ~400MB
Medium deployment (100K users, 50 memory spaces):
- Conversations: 500K × 10KB = 5GB
- Memories: 5M × 5KB = 25GB
- Facts: 10M × 1KB = 10GB
- Immutable: 10K × 5KB = 50MB
- Mutable: 100K × 1KB = 100MB
- Sessions: 200K × 500B = 100MB
- factHistory: 1M × 500B = 500MB
- Total: ~41GB
Large deployment (1M users, 200 memory spaces):
- Conversations: 5M × 10KB = 50GB
- Memories: 50M × 5KB = 250GB
- Facts: 100M × 1KB = 100GB
- Immutable: 100K × 5KB = 500MB
- Mutable: 1M × 1KB = 1GB
- Sessions: 2M × 500B = 1GB
- factHistory: 10M × 500B = 5GB
- Total: ~407GB
Convex pricing (estimate):
- Storage: $0.50/GB/month
- Bandwidth: $0.10/GB
- Medium deployment: ~$15-30/month storage
- Large deployment: ~$150-300/month storage
Index Strategy
Compound Indexes for Common Queries
// Single field (basic)
.index("by_memorySpace", ["memorySpaceId"])
// Compound (optimized)
.index("by_tenant_space", ["tenantId", "memorySpaceId"])
.index("by_memorySpace_userId", ["memorySpaceId", "userId"])
.index("by_memorySpace_agentId", ["memorySpaceId", "agentId"])
// Why: Pre-filter before expensive operations
// Example: Get user's memories in a memory space
await ctx.db
.query("memories")
.withIndex("by_memorySpace_userId", (q) =>
q.eq("memorySpaceId", memorySpaceId).eq("userId", userId)
)
.collect();
// Uses compound index (fast)
// vs
await ctx.db
.query("memories")
.withIndex("by_memorySpace", (q) => q.eq("memorySpaceId", memorySpaceId))
.filter((q) => q.eq(q.field("userId"), userId))
.collect();
// Filters after index (slower)
Vector Index with Filters
.vectorIndex("by_embedding", {
vectorField: "embedding",
dimensions: 1536, // Default
filterFields: ["memorySpaceId", "tenantId", "userId", "agentId", "participantId"],
});
// Query with pre-filter (fast)
await ctx.db
.query("memories")
.withIndex("by_embedding", (q) =>
q.similar("embedding", vector, 10)
.eq("memorySpaceId", memorySpaceId) // ← Filtered BEFORE similarity
.eq("tenantId", tenantId) // ← Filtered BEFORE similarity
.eq("userId", userId) // ← Filtered BEFORE similarity
)
.collect();
// Only searches vectors for this space+tenant+user
Search Indexes for Keywords
.searchIndex("by_content", {
searchField: "content",
filterFields: ["memorySpaceId", "tenantId", "sourceType", "userId", "agentId", "participantId"],
});
// Keyword search with filters
await ctx.db
.query("memories")
.withSearchIndex("by_content", (q) =>
q.search("content", "password")
.eq("memorySpaceId", memorySpaceId)
.eq("tenantId", tenantId)
)
.collect();
Versioning Strategy
Automatic Version Arrays
All versioned entities store history in previousVersions array:
{
version: 3, // Current
previousVersions: [
{ version: 1, ..., timestamp: T0 },
{ version: 2, ..., timestamp: T1 },
], // Subject to retention
}
Retention rules:
- Memories: Keep last 10 versions (configurable)
- Immutable: Keep last 20 versions (configurable by type)
- Contexts: Keep last 5 versions (configurable)
- Users: Keep all versions (no limit)
Cleanup: Governance policies automatically trim old versions.
Version Lookup
// Get current version
const record = await ctx.db.get(recordId);
// Get specific version
const v2 = record.previousVersions.find((v) => v.version === 2);
// Get all versions
const allVersions = [
...record.previousVersions,
{
version: record.version,
data: record.data,
timestamp: record.updatedAt,
},
];
GDPR Compliance Schema
userId Propagation
All tables support optional userId field for GDPR compliance:
// Conversations (nested in participants)
{
memorySpaceId: "support-space",
participants: { userId: "user-123", ... }
// ↑ Indexed via by_user, by_memorySpace_user
}
// Immutable (direct field)
{
type: "user",
id: "user-123",
userId: "user-123", // ← Direct field
// ↑ Indexed via by_userId
}
// Mutable (direct field)
{
namespace: "user-sessions",
key: "session-abc",
userId: "user-123", // ← Direct field
// ↑ Indexed via by_userId
}
// Memories (direct field)
{
memorySpaceId: "user-123-personal",
userId: "user-123", // ← Direct field
// ↑ Indexed via by_userId, by_memorySpace_userId
}
// Facts (direct field)
{
memorySpaceId: "user-123-personal",
userId: "user-123", // ← Direct field
subject: "user-123",
// ↑ Indexed via by_userId
}
// Sessions (direct field)
{
sessionId: "sess-abc",
userId: "user-123", // ← Direct field (required)
// ↑ Indexed via by_userId, by_tenant_user
}
// Contexts (direct field)
{
contextId: "ctx-abc",
memorySpaceId: "support-space",
userId: "user-123", // ← Optional field
// ↑ Indexed via by_userId
}
// factHistory (direct field)
{
eventId: "evt-abc",
userId: "user-123", // ← Optional field
// ↑ Indexed via by_userId
}
Cascade Deletion Query Plan (Planned - Cloud Mode)
// 1. Find all conversations (across ALL memory spaces)
const convos = await ctx.db
.query("conversations")
.withIndex("by_user", (q) => q.eq("participants.userId", userId))
.collect();
// 2. Find all immutable records
const immutable = await ctx.db
.query("immutable")
.withIndex("by_userId", (q) => q.eq("userId", userId))
.collect();
// 3. Find all mutable records
const mutable = await ctx.db
.query("mutable")
.withIndex("by_userId", (q) => q.eq("userId", userId))
.collect();
// 4. Find all memories (across ALL memory spaces)
const memories = await ctx.db
.query("memories")
.withIndex("by_userId", (q) => q.eq("userId", userId))
.collect();
// 5. Find all facts (across ALL memory spaces)
const facts = await ctx.db
.query("facts")
.withIndex("by_userId", (q) => q.eq("userId", userId))
.collect();
// 6. Find all sessions
const sessions = await ctx.db
.query("sessions")
.withIndex("by_userId", (q) => q.eq("userId", userId))
.collect();
// 7. Find all contexts
const contexts = await ctx.db
.query("contexts")
.withIndex("by_userId", (q) => q.eq("userId", userId))
.collect();
// 8. Find all factHistory events
const factHistory = await ctx.db
.query("factHistory")
.withIndex("by_userId", (q) => q.eq("userId", userId))
.collect();
// 9. Delete all (in transaction)
for (const record of [
...convos,
...immutable,
...mutable,
...memories,
...facts,
...sessions,
...contexts,
...factHistory,
]) {
await ctx.db.delete(record._id);
}
Performance: With proper indexes, O(log n) per table + result size deletion.
Coverage: User data deleted from ALL layers and all memory spaces.
Flexible Fields with v.any()
Why v.any() for metadata, data, value
Convex allows flexible schemas:
metadata: v.any(), // ← Any JSON-serializable value
// Can be:
metadata: { importance: 85, tags: ["test"] }
metadata: { customField: "value", nested: { deep: true } }
metadata: null
metadata: { anything: "you want" }
TypeScript SDK enforces structure:
// SDK enforces MemoryMetadata structure
interface MemoryMetadata {
importance: number; // REQUIRED at SDK level
tags: string[]; // REQUIRED at SDK level
[key: string]: any; // Custom fields allowed
}
// But Convex schema is flexible (allows evolution)
Benefits:
- Schema evolution without migrations
- Custom fields per use case
- TypeScript type safety at SDK level
- Database flexibility at storage level
Query Optimization
Use Appropriate Index
// Slow: No index
const memories = await ctx.db
.query("memories")
.filter((q) => q.eq(q.field("memorySpaceId"), memorySpaceId))
.collect();
// Scans entire table!
// Fast: With index
const memories = await ctx.db
.query("memories")
.withIndex("by_memorySpace", (q) => q.eq("memorySpaceId", memorySpaceId))
.collect();
// O(log n) lookup
Compound Indexes for Multiple Filters
// Inefficient: Filter after index
await ctx.db
.query("memories")
.withIndex("by_memorySpace", (q) => q.eq("memorySpaceId", memorySpaceId))
.filter((q) => q.eq(q.field("userId"), userId))
.collect();
// Efficient: Compound index
await ctx.db
.query("memories")
.withIndex("by_memorySpace_userId", (q) =>
q.eq("memorySpaceId", memorySpaceId).eq("userId", userId),
)
.collect();
// Multi-tenant compound index
await ctx.db
.query("memories")
.withIndex("by_tenant_space", (q) =>
q.eq("tenantId", tenantId).eq("memorySpaceId", memorySpaceId),
)
.collect();
Vector Search with Pre-Filtering
// Fast: Filter before similarity
.vectorIndex("by_embedding", {
vectorField: "embedding",
dimensions: 1536,
filterFields: ["memorySpaceId", "tenantId", "userId", "agentId", "participantId"],
});
// Query only searches relevant subset
await ctx.db
.query("memories")
.withIndex("by_embedding", (q) =>
q.similar("embedding", vector, 10)
.eq("memorySpaceId", memorySpaceId) // ← Filters BEFORE similarity
.eq("tenantId", tenantId) // ← Multi-tenant isolation
)
.collect();
Next Steps
- Convex Integration - How we use Convex features
- Vector Embeddings - Embedding strategy and dimensions
- Search Strategy - Multi-strategy search implementation
- Performance - Optimization techniques
Questions? Ask in GitHub Discussions.