AI Inference

Package: com.mimik.mimoe.inference

OpenAI-compatible chat completions, streaming, and embeddings.

ChatCompletion

@Serializable
data class ChatCompletion(
    val id: String,
    val objectType: String = "chat.completion",
    val created: Long = 0,
    val model: String = "",
    val choices: List<ChatCompletion.Choice> = emptyList(),
    val usage: ChatCompletion.Usage? = null
)

Response from a chat completion request.

Constructors

constructor(
    id: String,
    objectType: String = "chat.completion",
    created: Long = 0,
    model: String = "",
    choices: List<ChatCompletion.Choice> = emptyList(),
    usage: ChatCompletion.Usage? = null
)

Types

Name	Summary
`Choice`	@Serializable data class `Choice`(val index: `Int` = 0, val message: `ChatMessage`, val finishReason: `String`? = null)
`Usage`	@Serializable data class `Usage`(val promptTokens: `Int` = 0, val completionTokens: `Int` = 0, val totalTokens: `Int` = 0)

Properties

Name	Summary
`choices`	val `choices`: `List`<`ChatCompletion.Choice`>
`created`	val `created`: `Long` = 0
`id`	val `id`: `String`
`model`	val `model`: `String`
`objectType`	@SerialName(value = "object") val `objectType`: `String`
`usage`	val `usage`: `ChatCompletion.Usage`? = null

ChatCompletionChunk

@Serializable
data class ChatCompletionChunk(
    val id: String,
    val objectType: String = "chat.completion.chunk",
    val created: Long = 0,
    val model: String = "",
    val choices: List<ChatCompletionChunk.ChunkChoice> = emptyList()
)

A streaming chunk from a chat completion.

Constructors

constructor(
    id: String,
    objectType: String = "chat.completion.chunk",
    created: Long = 0,
    model: String = "",
    choices: List<ChatCompletionChunk.ChunkChoice> = emptyList()
)

Types

Name	Summary
`ChunkChoice`	@Serializable data class `ChunkChoice`(val index: `Int` = 0, val delta: `ChatCompletionChunk.Delta`, val finishReason: `String`? = null)
`Delta`	@Serializable data class `Delta`(val role: `String`? = null, val content: `String`? = null)

Properties

Name	Summary
`choices`	val `choices`: `List`<`ChatCompletionChunk.ChunkChoice`>
`created`	val `created`: `Long` = 0
`id`	val `id`: `String`
`model`	val `model`: `String`
`objectType`	@SerialName(value = "object") val `objectType`: `String`

ChatMessage

@Serializable
data class ChatMessage(val role: String, val content: String)

A message in a chat conversation.

Constructors

constructor(role: String, content: String)

Properties

Name	Summary
`content`	val `content`: `String` Message content.
`role`	val `role`: `String` Role of the message sender (system, user, assistant).

EmbeddingData

@Serializable
data class EmbeddingData(
    val objectType: String = "embedding",
    val embedding: List<Float>,
    val index: Int = 0
)

Constructors

constructor(
    objectType: String = "embedding",
    embedding: List<Float>,
    index: Int = 0
)

Properties

Name	Summary
`embedding`	val `embedding`: `List`<`Float`>
`index`	val `index`: `Int` = 0
`objectType`	@SerialName(value = "object") val `objectType`: `String`

EmbeddingResponse

@Serializable
data class EmbeddingResponse(
    val objectType: String = "list",
    val data: List<EmbeddingData> = emptyList(),
    val model: String = "",
    val usage: EmbeddingUsage? = null
)

Response from an embeddings request.

Constructors

constructor(
    objectType: String = "list",
    data: List<EmbeddingData> = emptyList(),
    model: String = "",
    usage: EmbeddingUsage? = null
)

Properties

Name	Summary
`data`	val `data`: `List`<`EmbeddingData`>
`model`	val `model`: `String`
`objectType`	@SerialName(value = "object") val `objectType`: `String`
`usage`	val `usage`: `EmbeddingUsage`? = null

EmbeddingUsage

@Serializable
data class EmbeddingUsage(
    val promptTokens: Int = 0,
    val totalTokens: Int = 0
)

Constructors

constructor(promptTokens: Int = 0, totalTokens: Int = 0)

Properties

Name	Summary
`promptTokens`	@SerialName(value = "prompt_tokens") val `promptTokens`: `Int` = 0
`totalTokens`	@SerialName(value = "total_tokens") val `totalTokens`: `Int` = 0

InferenceClient

class InferenceClient

Inference sub-client.

Provides OpenAI-compatible chat completions, streaming, embeddings, and model cache management. Base path: /mimik-ai/openai/v1 Requires the ai-foundation addon.

Access via client.inference.

Functions

`chatCompletion`

suspend fun chatCompletion(model: String, messages: List<ChatMessage>, temperature: Double? = null, topP: Double? = null, maxTokens: Int? = null): Result<ChatCompletion>

Generate a chat completion.

`chatCompletionStream`

fun chatCompletionStream(model: String, messages: List<ChatMessage>, temperature: Double? = null, topP: Double? = null, maxTokens: Int? = null): Flow<ChatCompletionChunk>

Stream a chat completion via SSE.

`createEmbeddings`

suspend fun createEmbeddings(model: String, input: List<String>): Result<EmbeddingResponse>

Generate text embeddings.

`listModels`

suspend fun listModels(): Result<List<LoadedModel>>

List models currently loaded in the runtime cache.

`loadModel`

suspend fun loadModel(model: String, chatTemplateHint: String? = null, contextSize: Int? = null, gpuLayerSize: Int? = null): Result<LoadedModel>

Load a model from the Model Registry into runtime cache.

`unloadModel`

suspend fun unloadModel(modelId: String): Result<Unit>

Unload a model from runtime cache.

LoadedModel

@Serializable
data class LoadedModel(
    val id: String,
    val objectType: String = "model",
    val created: Long = 0,
    val ownedBy: String = "",
    val info: ModelInfoDetail? = null,
    val metrics: ModelMetrics? = null
)

A model loaded in the inference runtime cache.

Returned by GET /mimik-ai/openai/v1/models and POST /mimik-ai/openai/v1/models.

Constructors

constructor(
    id: String,
    objectType: String = "model",
    created: Long = 0,
    ownedBy: String = "",
    info: ModelInfoDetail? = null,
    metrics: ModelMetrics? = null
)

Properties

Name	Summary
`created`	val `created`: `Long` = 0 Creation timestamp (Unix epoch seconds).
`id`	val `id`: `String` Model identifier.
`info`	val `info`: `ModelInfoDetail`? = null Model metadata.
`metrics`	val `metrics`: `ModelMetrics`? = null Runtime performance metrics.
`objectType`	@SerialName(value = "object") val `objectType`: `String` Object type (always "model").
`ownedBy`	@SerialName(value = "owned_by") val `ownedBy`: `String` Model owner.

ModelInfoDetail

@Serializable
data class ModelInfoDetail(
    val kind: String = "",
    val chatTemplateHint: String = "",
    val nGpuLayers: Int = 0,
    val maxContext: Int = 0,
    val nVocab: Int = 0,
    val nCtxTrain: Int = 0,
    val nEmbd: Int = 0,
    val nParams: Long = 0,
    val modelSize: Long = 0
)

Detailed model metadata.

Constructors

constructor(
    kind: String = "",
    chatTemplateHint: String = "",
    nGpuLayers: Int = 0,
    maxContext: Int = 0,
    nVocab: Int = 0,
    nCtxTrain: Int = 0,
    nEmbd: Int = 0,
    nParams: Long = 0,
    modelSize: Long = 0
)

Properties

Name	Summary
`chatTemplateHint`	@SerialName(value = "chat_template_hint") val `chatTemplateHint`: `String` Applied chat template.
`kind`	val `kind`: `String` Model kind: "llm", "vlm", or "embed".
`maxContext`	@SerialName(value = "max_context") val `maxContext`: `Int` = 0 Maximum context size.
`modelSize`	@SerialName(value = "model_size") val `modelSize`: `Long` = 0 Model file size in bytes.
`nCtxTrain`	@SerialName(value = "n_ctx_train") val `nCtxTrain`: `Int` = 0 Training context length.
`nEmbd`	@SerialName(value = "n_embd") val `nEmbd`: `Int` = 0 Embedding dimension.
`nGpuLayers`	@SerialName(value = "n_gpu_layers") val `nGpuLayers`: `Int` = 0 Number of GPU-offloaded layers.
`nParams`	@SerialName(value = "n_params") val `nParams`: `Long` = 0 Total parameter count.
`nVocab`	@SerialName(value = "n_vocab") val `nVocab`: `Int` = 0 Vocabulary size.

ModelMetrics

@Serializable
data class ModelMetrics(
    val inferenceCount: Int = 0,
    val lastUsed: Long = 0,
    val loadedAt: Long = 0,
    val tokensPerSecond: Double = 0.0,
    val avgTokensPerSecond: Double = 0.0,
    val lastLatencyMs: Double = 0.0,
    val avgLatencyMs: Double = 0.0
)

Runtime performance metrics for a loaded model.

Constructors

constructor(
    inferenceCount: Int = 0,
    lastUsed: Long = 0,
    loadedAt: Long = 0,
    tokensPerSecond: Double = 0.0,
    avgTokensPerSecond: Double = 0.0,
    lastLatencyMs: Double = 0.0,
    avgLatencyMs: Double = 0.0
)

Properties

Name	Summary
`avgLatencyMs`	@SerialName(value = "avg_latency_ms") val `avgLatencyMs`: `Double` = 0.0 Average latency (ms). Embed only.
`avgTokensPerSecond`	@SerialName(value = "avg_tokens_per_second") val `avgTokensPerSecond`: `Double` = 0.0 Average throughput (tokens/sec). LLM/VLM only.
`inferenceCount`	@SerialName(value = "inference_count") val `inferenceCount`: `Int` = 0 Total inference calls.
`lastLatencyMs`	@SerialName(value = "last_latency_ms") val `lastLatencyMs`: `Double` = 0.0 Most recent latency (ms). Embed only.
`lastUsed`	@SerialName(value = "last_used") val `lastUsed`: `Long` = 0 Most recent usage (Unix epoch seconds).
`loadedAt`	@SerialName(value = "loaded_at") val `loadedAt`: `Long` = 0 When the model was loaded (Unix epoch seconds).
`tokensPerSecond`	@SerialName(value = "tokens_per_second") val `tokensPerSecond`: `Double` = 0.0 Most recent throughput (tokens/sec). LLM/VLM only.

ChatCompletion​

Constructors​

Types​

Properties​

ChatCompletionChunk​

Constructors​

Types​

Properties​

ChatMessage​

Constructors​

Properties​

EmbeddingData​

Constructors​

Properties​

EmbeddingResponse​

Constructors​

Properties​

EmbeddingUsage​

Constructors​

Properties​

InferenceClient​

Functions​

chatCompletion​

chatCompletionStream​

createEmbeddings​

listModels​

loadModel​

unloadModel​

LoadedModel​

Constructors​

Properties​

ModelInfoDetail​

Constructors​

Properties​

ModelMetrics​

Constructors​

Properties​

ChatCompletion

Constructors

Types

Properties

ChatCompletionChunk

Constructors

Types

Properties

ChatMessage

Constructors

Properties

EmbeddingData

Constructors

Properties

EmbeddingResponse

Constructors

Properties

EmbeddingUsage

Constructors

Properties

InferenceClient

Functions

`chatCompletion`

`chatCompletionStream`

`createEmbeddings`

`listModels`

`loadModel`

`unloadModel`

LoadedModel

Constructors

Properties

ModelInfoDetail

Constructors

Properties

ModelMetrics

Constructors

Properties