AI Providers
Explore the latency and prices of leading AI providers
Provider
Best Price
Best Latency
# of Models
Deepinfra
AI Provider
Best Price
66 Models
Best Latency
59 Models
# Models
77 Models
Model | Latency | Blended $ | Input $ | Output $ |
|---|---|---|---|---|
| llama-3.2-1b | 1st 0.44 | 2nd $0.01 | $0.01 | $0.01 |
| llama-3.1-8b-turbo-fp8 | 1st 0.18 | 1st $0.02 | $0.02 | $0.03 |
| llama-3.2-3b | 1st 0.46 | 1st $0.02 | $0.02 | $0.02 |
| mistral-nemo | 1st 0.19 | 1st $0.03 | $0.02 | $0.04 |
| mistral-7b | 3rd 0.45 | 1st $0.03 | $0.03 | $0.05 |
| llama-3.1-8b | 0.28 | 3rd $0.04 | $0.03 | $0.05 |
| llama-3-8b | 1st 0.31 | 1st $0.04 | $0.03 | $0.06 |
| llama-3.2-11b-vision | 2nd 1.92 | 1st $0.05 | $0.05 | $0.05 |
| gemma-3-4b | 1st 0.35 | 1st $0.05 | $0.04 | $0.08 |
| ocr | 1st 0.17 | 2nd $0.05 | $0.03 | $0.10 |
| gpt-oss-20b-high | 2nd 0.18 | 2nd $0.06 | $0.03 | $0.14 |
| gemma-3-12b | 1st 0.29 | 1st $0.06 | $0.04 | $0.13 |
| mistral-small-3 | 3rd 0.39 | 1st $0.06 | $0.05 | $0.08 |
| mistral-small-3.1 | 3rd 0.39 | 1st $0.06 | $0.05 | $0.10 |
| nvidia-nemotron-nano-9b-v2 | 1st 0.42 | 1st $0.07 | $0.04 | $0.16 |
| devstral-small-may | 1st 0.25 | 1st $0.07 | $0.06 | $0.12 |
| gpt-oss-120b-high | 2nd 0.26 | 1st $0.08 | $0.04 | $0.19 |
| qwen2.5-coder-32b | 2nd 0.40 | 1st $0.08 | $0.06 | $0.15 |
| phi-4 | 1st 0.31 | 1st $0.09 | $0.07 | $0.14 |
| nvidia-nemotron-3-nano | 1st 0.24 | 1st $0.10 | $0.06 | $0.24 |
| mistral-small-3.2-fp8 | 1st 0.25 | 1st $0.11 | $0.07 | $0.20 |
| gemma-3-27b | 1st 0.36 | 1st $0.11 | $0.09 | $0.16 |
| qwen3-14b-fp8 | 1st 0.21 | 1st $0.12 | $0.08 | $0.24 |
| devstral-small | 1st 0.26 | 1st $0.12 | $0.07 | $0.28 |
| qwen3-coder-30b-a3b-fp8 | 1st 0.22 | 1st $0.12 | $0.07 | $0.26 |
| qwen3-30b-fp8 | 1st 0.21 | 1st $0.13 | $0.08 | $0.29 |
| llama-4-scout | 0.32 | 2nd $0.14 | $0.08 | $0.30 |
| qwq-32b-preview | 1st 0.37 | 1st $0.14 | $0.12 | $0.18 |
| qwen3-32b-fp8 | 1st 0.54 | 1st $0.15 | $0.10 | $0.30 |
| llama-3.3-70b-turbo-fp8 | 1st 0.52 | 1st $0.15 | $0.10 | $0.32 |
| qwen3-235b-2507 | 2nd 0.41 | 1st $0.17 | $0.07 | $0.46 |
| llama-nemotron-super-49b-v1.5 | 1st 0.44 | 1st $0.17 | $0.10 | $0.40 |
| qwen2.5-72b | 1st 0.31 | 2nd $0.19 | $0.12 | $0.39 |
| qwen3-vl-4b-fp8 | 1st 0.45 | 1st $0.23 | $0.10 | $0.60 |
| deepseek-v3.2-exp | 1st 0.62 | 1st $0.24 | $0.21 | $0.32 |
| qwen3-235b-fp8 | 1st 0.39 | 1st $0.25 | $0.13 | $0.60 |
| gpt-oss-120b-high-turbo | 1st 0.19 | 1st $0.26 | $0.15 | $0.60 |
| llama-4-maverick-fp8 | 1st 0.41 | 1st $0.26 | $0.15 | $0.60 |
| qwen3-vl-30b-a3b-fp8 | 1st 0.19 | 1st $0.26 | $0.15 | $0.60 |
| llama-3.3-70b | 0.55 | 2nd $0.27 | $0.23 | $0.40 |
| deepseek-r1-distill-qwen-32b | 1st 0.37 | 1st $0.27 | $0.27 | $0.27 |
| deepseek-v3.2 | 1st 0.61 | 1st $0.29 | $0.26 | $0.39 |
| hermes-3-llama-3.1-70b | 1st 0.30 | 1st $0.30 | $0.30 | $0.30 |
| nvidia-nemotron-nano-12b-v2-vl-fp8 | 1st 0.21 | 1st $0.30 | $0.20 | $0.60 |
| olmo-3.1-32b-instruct | 1st 0.24 | 1st $0.30 | $0.20 | $0.60 |
| llama-3-70b | 1st 0.35 | 1st $0.33 | $0.30 | $0.40 |
| qwen3-next-80b-a3b | 1st 0.30 | 2nd $0.34 | $0.09 | $1.10 |
| deepseek-v3.1-terminus-fp4 | 1st 0.42 | 1st $0.35 | $0.21 | $0.79 |
| deepseek-v3.1-fp4 | 1st 0.75 | 1st $0.35 | $0.21 | $0.79 |
| llama-3.2-90b-vision | 2nd 0.57 | 1st $0.36 | $0.35 | $0.40 |
| deepseek-v3-0324 | 0.57 | 1st $0.37 | $0.20 | $0.88 |
| llama-3.1-70b | 1st 0.42 | 1st $0.40 | $0.40 | $0.40 |
| llama-3.1-70b-turbo-fp8 | 1st 0.33 | 1st $0.40 | $0.40 | $0.40 |
| glm-4.5-air | 1st 0.17 | 2nd $0.42 | $0.20 | $1.10 |
| minimax-m2 | 1st 0.27 | 1st $0.45 | $0.25 | $1.02 |
| glm-4.6v-fp8 | 1st 0.25 | 1st $0.45 | $0.30 | $0.90 |
| qwen3-vl-235b-a22b-fp8 | 1st 0.34 | 1st $0.45 | $0.20 | $1.20 |
| deepseek-v3-dec | 1st 0.38 | 1st $0.46 | $0.32 | $0.89 |
| llama-4-maverick-turbo-fp8 | 1st 0.47 | 1st $0.50 | $0.50 | $0.50 |
| qwen3-coder-480b-turbo-fp4 | 1st 0.23 | 1st $0.51 | $0.28 | $1.20 |
| minimax-m2.1-fp8 | 1st 0.27 | 1st $0.51 | $0.28 | $1.20 |
| mixtral-8x7b | 2nd 0.32 | 1st $0.54 | $0.54 | $0.54 |
| qwen3-vl-8b-fp8 | 1st 15.00 | 1st $0.66 | $0.18 | $2.09 |
| glm-4.5 | 1st 0.36 | 1st $0.69 | $0.38 | $1.60 |
| qwen3-coder-480b-fp8 | 1st 0.28 | 2nd $0.70 | $0.40 | $1.60 |
| deepseek-r1-distill-llama-70b | 1st 0.36 | 1st $0.75 | $0.60 | $1.20 |
| glm-4.7-fp4 | 1st 0.31 | 1st $0.76 | $0.43 | $1.75 |
| glm-4.6-fp4 | 1st 0.38 | 1st $0.76 | $0.43 | $1.75 |
| qwen3-235b-a22b-2507-fp8 | 1st 0.30 | 2nd $0.77 | $0.23 | $2.39 |
| kimi-k2-0905 | 0.78 | 1st $0.80 | $0.40 | $2.00 |
| kimi-k2-thinking | 0.61 | 1st $0.85 | $0.47 | $2.00 |
| kimi-k2 | 0.76 | 1st $0.88 | $0.50 | $2.00 |
| deepseek-r1-0528 | 1st 0.38 | 1st $0.91 | $0.50 | $2.15 |
| glm-4.6-fp8 | 2nd 35.49 | 1st $0.93 | $0.60 | $1.90 |
| deepseek-r1-jan | 1st 0.35 | 1st $1.13 | $0.70 | $2.40 |
| llama-3.1-nemotron-70b | 1st 0.32 | 1st $1.20 | $1.20 | $1.20 |
| deepseek-r1-jan-turbo-fp4 | 1st 0.41 | 1st $1.50 | $1.00 | $3.00 |
llama-3.2-1b
Latency
0.44
Blended $
$0.01
Input $
$0.01
Output $
$0.01
llama-3.1-8b-turbo-fp8
Latency
0.18
Blended $
$0.02
Input $
$0.02
Output $
$0.03
llama-3.2-3b
Latency
0.46
Blended $
$0.02
Input $
$0.02
Output $
$0.02
mistral-nemo
Latency
0.19
Blended $
$0.03
Input $
$0.02
Output $
$0.04
mistral-7b
Latency
0.45
Blended $
$0.03
Input $
$0.03
Output $
$0.05
llama-3.1-8b
Latency
0.28
Blended $
$0.04
Input $
$0.03
Output $
$0.05
llama-3-8b
Latency
0.31
Blended $
$0.04
Input $
$0.03
Output $
$0.06
llama-3.2-11b-vision
Latency
1.92
Blended $
$0.05
Input $
$0.05
Output $
$0.05
gemma-3-4b
Latency
0.35
Blended $
$0.05
Input $
$0.04
Output $
$0.08
ocr
Latency
0.17
Blended $
$0.05
Input $
$0.03
Output $
$0.10
gpt-oss-20b-high
Latency
0.18
Blended $
$0.06
Input $
$0.03
Output $
$0.14
gemma-3-12b
Latency
0.29
Blended $
$0.06
Input $
$0.04
Output $
$0.13
mistral-small-3
Latency
0.39
Blended $
$0.06
Input $
$0.05
Output $
$0.08
mistral-small-3.1
Latency
0.39
Blended $
$0.06
Input $
$0.05
Output $
$0.10
nvidia-nemotron-nano-9b-v2
Latency
0.42
Blended $
$0.07
Input $
$0.04
Output $
$0.16
devstral-small-may
Latency
0.25
Blended $
$0.07
Input $
$0.06
Output $
$0.12
gpt-oss-120b-high
Latency
0.26
Blended $
$0.08
Input $
$0.04
Output $
$0.19
qwen2.5-coder-32b
Latency
0.40
Blended $
$0.08
Input $
$0.06
Output $
$0.15
phi-4
Latency
0.31
Blended $
$0.09
Input $
$0.07
Output $
$0.14
nvidia-nemotron-3-nano
Latency
0.24
Blended $
$0.10
Input $
$0.06
Output $
$0.24
mistral-small-3.2-fp8
Latency
0.25
Blended $
$0.11
Input $
$0.07
Output $
$0.20
gemma-3-27b
Latency
0.36
Blended $
$0.11
Input $
$0.09
Output $
$0.16
qwen3-14b-fp8
Latency
0.21
Blended $
$0.12
Input $
$0.08
Output $
$0.24
devstral-small
Latency
0.26
Blended $
$0.12
Input $
$0.07
Output $
$0.28
qwen3-coder-30b-a3b-fp8
Latency
0.22
Blended $
$0.12
Input $
$0.07
Output $
$0.26
qwen3-30b-fp8
Latency
0.21
Blended $
$0.13
Input $
$0.08
Output $
$0.29
llama-4-scout
Latency
0.32
Blended $
$0.14
Input $
$0.08
Output $
$0.30
qwq-32b-preview
Latency
0.37
Blended $
$0.14
Input $
$0.12
Output $
$0.18
qwen3-32b-fp8
Latency
0.54
Blended $
$0.15
Input $
$0.10
Output $
$0.30
llama-3.3-70b-turbo-fp8
Latency
0.52
Blended $
$0.15
Input $
$0.10
Output $
$0.32
qwen3-235b-2507
Latency
0.41
Blended $
$0.17
Input $
$0.07
Output $
$0.46
llama-nemotron-super-49b-v1.5
Latency
0.44
Blended $
$0.17
Input $
$0.10
Output $
$0.40
qwen2.5-72b
Latency
0.31
Blended $
$0.19
Input $
$0.12
Output $
$0.39
qwen3-vl-4b-fp8
Latency
0.45
Blended $
$0.23
Input $
$0.10
Output $
$0.60
deepseek-v3.2-exp
Latency
0.62
Blended $
$0.24
Input $
$0.21
Output $
$0.32
qwen3-235b-fp8
Latency
0.39
Blended $
$0.25
Input $
$0.13
Output $
$0.60
gpt-oss-120b-high-turbo
Latency
0.19
Blended $
$0.26
Input $
$0.15
Output $
$0.60
llama-4-maverick-fp8
Latency
0.41
Blended $
$0.26
Input $
$0.15
Output $
$0.60
qwen3-vl-30b-a3b-fp8
Latency
0.19
Blended $
$0.26
Input $
$0.15
Output $
$0.60
llama-3.3-70b
Latency
0.55
Blended $
$0.27
Input $
$0.23
Output $
$0.40
deepseek-r1-distill-qwen-32b
Latency
0.37
Blended $
$0.27
Input $
$0.27
Output $
$0.27
deepseek-v3.2
Latency
0.61
Blended $
$0.29
Input $
$0.26
Output $
$0.39
hermes-3-llama-3.1-70b
Latency
0.30
Blended $
$0.30
Input $
$0.30
Output $
$0.30
nvidia-nemotron-nano-12b-v2-vl-fp8
Latency
0.21
Blended $
$0.30
Input $
$0.20
Output $
$0.60
olmo-3.1-32b-instruct
Latency
0.24
Blended $
$0.30
Input $
$0.20
Output $
$0.60
llama-3-70b
Latency
0.35
Blended $
$0.33
Input $
$0.30
Output $
$0.40
qwen3-next-80b-a3b
Latency
0.30
Blended $
$0.34
Input $
$0.09
Output $
$1.10
deepseek-v3.1-terminus-fp4
Latency
0.42
Blended $
$0.35
Input $
$0.21
Output $
$0.79
deepseek-v3.1-fp4
Latency
0.75
Blended $
$0.35
Input $
$0.21
Output $
$0.79
llama-3.2-90b-vision
Latency
0.57
Blended $
$0.36
Input $
$0.35
Output $
$0.40
deepseek-v3-0324
Latency
0.57
Blended $
$0.37
Input $
$0.20
Output $
$0.88
llama-3.1-70b
Latency
0.42
Blended $
$0.40
Input $
$0.40
Output $
$0.40
llama-3.1-70b-turbo-fp8
Latency
0.33
Blended $
$0.40
Input $
$0.40
Output $
$0.40
glm-4.5-air
Latency
0.17
Blended $
$0.42
Input $
$0.20
Output $
$1.10
minimax-m2
Latency
0.27
Blended $
$0.45
Input $
$0.25
Output $
$1.02
glm-4.6v-fp8
Latency
0.25
Blended $
$0.45
Input $
$0.30
Output $
$0.90
qwen3-vl-235b-a22b-fp8
Latency
0.34
Blended $
$0.45
Input $
$0.20
Output $
$1.20
deepseek-v3-dec
Latency
0.38
Blended $
$0.46
Input $
$0.32
Output $
$0.89
llama-4-maverick-turbo-fp8
Latency
0.47
Blended $
$0.50
Input $
$0.50
Output $
$0.50
qwen3-coder-480b-turbo-fp4
Latency
0.23
Blended $
$0.51
Input $
$0.28
Output $
$1.20
minimax-m2.1-fp8
Latency
0.27
Blended $
$0.51
Input $
$0.28
Output $
$1.20
mixtral-8x7b
Latency
0.32
Blended $
$0.54
Input $
$0.54
Output $
$0.54
qwen3-vl-8b-fp8
Latency
15.00
Blended $
$0.66
Input $
$0.18
Output $
$2.09
glm-4.5
Latency
0.36
Blended $
$0.69
Input $
$0.38
Output $
$1.60
qwen3-coder-480b-fp8
Latency
0.28
Blended $
$0.70
Input $
$0.40
Output $
$1.60
deepseek-r1-distill-llama-70b
Latency
0.36
Blended $
$0.75
Input $
$0.60
Output $
$1.20
glm-4.7-fp4
Latency
0.31
Blended $
$0.76
Input $
$0.43
Output $
$1.75
glm-4.6-fp4
Latency
0.38
Blended $
$0.76
Input $
$0.43
Output $
$1.75
qwen3-235b-a22b-2507-fp8
Latency
0.30
Blended $
$0.77
Input $
$0.23
Output $
$2.39
kimi-k2-0905
Latency
0.78
Blended $
$0.80
Input $
$0.40
Output $
$2.00
kimi-k2-thinking
Latency
0.61
Blended $
$0.85
Input $
$0.47
Output $
$2.00
kimi-k2
Latency
0.76
Blended $
$0.88
Input $
$0.50
Output $
$2.00
deepseek-r1-0528
Latency
0.38
Blended $
$0.91
Input $
$0.50
Output $
$2.15
glm-4.6-fp8
Latency
35.49
Blended $
$0.93
Input $
$0.60
Output $
$1.90
deepseek-r1-jan
Latency
0.35
Blended $
$1.13
Input $
$0.70
Output $
$2.40
llama-3.1-nemotron-70b
Latency
0.32
Blended $
$1.20
Input $
$1.20
Output $
$1.20
deepseek-r1-jan-turbo-fp4
Latency
0.41
Blended $
$1.50
Input $
$1.00
Output $
$3.00