AI on demand: NVIDIA/NVIDIA-Nemotron-3-Super-120B-A12B
Jump to navigation
Jump to search
Calling the model
# Set your personal key:
STONEY_KEY=sk-...
# Set the desired model:
MODEL=NVIDIA/NVIDIA-Nemotron-3-Super-120B-A12B
# Set your prompt:
PROMPT='Hello.'
# Set maximum amount of tokens:
MAX_TOKENS=100
curl https://llm.stoney-cloud.com/v1/chat/completions \
--silent --fail --show-error \
--header "Authorization: Bearer $STONEY_KEY" \
--header 'Content-Type: application/json' \
--data '{
"model": "'"$MODEL"'",
"messages": [
{"role": "user", "content": "'"$PROMPT"'"}
],
"max_tokens": '"$MAX_TOKENS"'
}' \
| jq
Example output:
{
"id": "chatcmpl-af98aba2443fdd6f",
"object": "chat.completion",
"created": 1778156848,
"model": "NVIDIA/NVIDIA-Nemotron-3-Super-120B-A12B",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "\n\nHello! How can I assist you today?",
"refusal": null,
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [],
"reasoning": "We need to respond with a friendly greeting. No special instructions.\n\n"
},
"logprobs": null,
"finish_reason": "stop",
"stop_reason": null,
"token_ids": null
}
],
"service_tier": null,
"system_fingerprint": null,
"usage": {
"prompt_tokens": 18,
"total_tokens": 43,
"completion_tokens": 25,
"prompt_tokens_details": null
},
"prompt_logprobs": null,
"prompt_token_ids": null,
"kv_transfer_params": null
}