| Llama 4 Maverick |
llama-4-maverick
|
0.27 |
0.85 |
-
|
|
| Llama 4 Scout |
llama-4-scout
|
0.18 |
0.59 |
-
|
|
| Llama 3.3 70B Instruct-Turbo |
llama-3-3-70b-instruct-turbo
|
0.88 |
0.88 |
-
|
|
| Llama 3.2 3B Instruct Turbo |
llama-3-2-3b-instruct-turbo
|
0.06 |
0.06 |
-
|
|
| Llama 3.1 405B Instruct Turbo |
llama-3-1-405b-instruct-turbo
|
3.50 |
3.50 |
-
|
|
| Llama 3.1 70B Instruct Turbo |
llama-3-1-70b-instruct-turbo
|
0.88 |
0.88 |
-
|
|
| Llama 3.1 8B Instruct Turbo |
llama-3-1-8b-instruct-turbo
|
0.18 |
0.18 |
-
|
|
| Llama 3 8B Instruct Lite |
llama-3-8b-instruct-lite
|
0.10 |
0.10 |
-
|
|
| Llama 3 70B Instruct Reference |
llama-3-70b-instruct-reference
|
0.88 |
0.88 |
-
|
|
| Llama 3 70B Instruct Turbo |
llama-3-70b-instruct-turbo
|
0.88 |
0.88 |
-
|
|
| LLaMA-2 |
llama-2
|
0.90 |
0.90 |
-
|
|
| DeepSeek-R1 |
deepseek-r1
|
3.00 |
7.00 |
-
|
|
| DeepSeek R1 Distilled Qwen 14B |
deepseek-r1-distilled-qwen-14b
|
0.18 |
0.18 |
-
|
|
| DeepSeek R1 Distilled Llama 70B |
deepseek-r1-distilled-llama-70b
|
2.00 |
2.00 |
-
|
|
| DeepSeek R1-0528-tput |
deepseek-r1-0528-tput
|
0.55 |
2.19 |
-
|
|
| DeepSeek-V3-1 |
deepseek-v3-1
|
0.60 |
1.70 |
-
|
|
| DeepSeek-V3 |
deepseek-v3
|
1.25 |
1.25 |
-
|
|
| gpt-oss-120B |
gpt-oss-120b
|
0.15 |
0.60 |
-
|
|
| gpt-oss-20B |
gpt-oss-20b
|
0.05 |
0.20 |
-
|
|
| Qwen3 Next 80B A3B Instruct |
qwen3-next-80b-a3b-instruct
|
0.15 |
1.50 |
-
|
|
| Qwen3 Next 80B A3B Thinking |
qwen3-next-80b-a3b-thinking
|
0.15 |
1.50 |
-
|
|
| Qwen3-VL 32B Instruct |
qwen3-vl-32b-instruct
|
0.50 |
1.50 |
-
|
|
| Qwen3-Coder 480B A35B Instruct |
qwen3-coder-480b-a35b-instruct
|
2.00 |
2.00 |
-
|
|
| Qwen3 235B A22B Instruct 2507 FP8 |
qwen3-235b-a22b-instruct-2507-fp8
|
0.20 |
0.60 |
-
|
|
| Qwen3 235B A22B Thinking 2507 FP8 |
qwen3-235b-a22b-thinking-2507-fp8
|
0.65 |
3.00 |
-
|
|
| Qwen3 235B A22B FP8 Throughput |
qwen3-235b-a22b-fp8-throughput
|
0.20 |
0.60 |
-
|
|
| Qwen 2.5 72B |
qwen-2-5-72b
|
1.20 |
1.20 |
-
|
|
| Qwen2.5-VL 72B Instruct |
qwen2-5-vl-72b-instruct
|
1.95 |
8.00 |
-
|
|
| Qwen2.5 Coder 32B Instruct |
qwen2-5-coder-32b-instruct
|
0.80 |
0.80 |
-
|
|
| Qwen2.5 7B Instruct Turbo |
qwen2-5-7b-instruct-turbo
|
0.30 |
0.30 |
-
|
|
| Qwen QwQ-32B |
qwen-qwq-32b
|
1.20 |
1.20 |
-
|
|
| GLM-4.6 |
glm-4-6
|
0.60 |
2.20 |
-
|
|
| GLM-4.5-Air |
glm-4-5-air
|
0.20 |
1.10 |
-
|
|
| Kimi K2 Instruct |
kimi-k2-instruct
|
1.00 |
3.00 |
-
|
|
| Kimi K2 Thinking |
kimi-k2-thinking
|
1.20 |
4.00 |
-
|
|
| Kimi K2 0905 |
kimi-k2-0905
|
1.00 |
3.00 |
-
|
|
| Mistral (7B) Instruct v0.2 |
mistral-7b-instruct-v0-2
|
0.20 |
0.20 |
-
|
|
| Mistral Instruct |
mistral-instruct
|
0.20 |
0.20 |
-
|
|
| Mistral Small 3 |
mistral-small-3
|
0.80 |
0.80 |
-
|
|
| Mixtral 8x7B Instruct v0.1 |
mixtral-8x7b-instruct-v0-1
|
0.60 |
0.60 |
-
|
|
| Marin 8B Instruct |
marin-8b-instruct
|
0.18 |
0.18 |
-
|
|
| Arcee AI AFM-4.5B |
arcee-ai-afm-4-5b
|
0.10 |
0.40 |
-
|
|
| Arcee AI Coder-Large |
arcee-ai-coder-large
|
0.50 |
0.80 |
-
|
|
| Arcee AI Maestro |
arcee-ai-maestro
|
0.90 |
3.30 |
-
|
|
| Arcee AI Virtuoso-Large |
arcee-ai-virtuoso-large
|
0.75 |
1.20 |
-
|
|
| Cogito v2 preview - 109B MoE |
cogito-v2-preview-109b-moe
|
0.18 |
0.59 |
-
|
|
| Cogito v2 preview - 405B |
cogito-v2-preview-405b
|
3.50 |
3.50 |
-
|
|
| Cogito v2 preview - 671B MoE |
cogito-v2-preview-671b-moe
|
1.25 |
1.25 |
-
|
|
| Cogito v2 preview - 70B |
cogito-v2-preview-70b
|
0.88 |
0.88 |
-
|
|
| Refuel LLM-2 |
refuel-llm-2
|
0.60 |
0.60 |
-
|
|
| Refuel LLM-2 Small |
refuel-llm-2-small
|
0.20 |
0.20 |
-
|
|
| Typhoon 2 70B Instruct |
typhoon-2-70b-instruct
|
0.88 |
0.88 |
-
|
|
| gemma-3n-E4B-it |
gemma-3n-e4b-it
|
0.02 |
0.04 |
-
|
|