No results found.

AI Providers

Explore the latency and prices of leading AI providers

Provider
Best Price
Best Latency
# of Models
Deepinfra
Deepinfra
AI Provider
Best Price
57 Models
Best Latency
34 Models
# Models
64 Models
Model
Latency
Blended $
Input $
Output $
llama-3.2-1b
1st
0.44
1st
$0.01
$0.01$0.01
llama-3.1-8b-turbo-fp8
1st
0.51
1st
$0.02
$0.02$0.03
llama-3.2-3b
1st
0.55
1st
$0.02
$0.02$0.02
mistral-nemo
1st
0.19
1st
$0.03
$0.02$0.04
mistral-7b
3rd
0.45
1st
$0.03
$0.03$0.05
llama-3.1-8b
0.38
2nd
$0.04
$0.03$0.05
llama-3-8b
1st
0.30
1st
$0.04
$0.03$0.06
llama-3.2-11b-vision
2nd
2.79
1st
$0.05
$0.05$0.05
gemma-3-4b
1st
0.33
1st
$0.05
$0.04$0.08
gpt-oss-20b-high
13.22
2nd
$0.06
$0.03$0.14
gemma-3-12b
2nd
0.57
1st
$0.06
$0.04$0.13
mistral-small-3
2nd
0.22
1st
$0.06
$0.05$0.08
mistral-small-3.1
3rd
0.39
1st
$0.06
$0.05$0.10
nvidia-nemotron-nano-9b-v2
1st
0.23
1st
$0.07
$0.04$0.16
devstral-small-may
2nd
0.35
1st
$0.07
$0.06$0.12
qwen2.5-coder-32b
2nd
0.38
1st
$0.08
$0.06$0.15
phi-4
2nd
0.40
1st
$0.09
$0.07$0.14
gpt-oss-120b-high
23.42
1st
$0.10
$0.05$0.27
qwen3-14b-fp8
1st
0.16
1st
$0.10
$0.06$0.24
mistral-small-3.2-fp8
1st
0.49
1st
$0.11
$0.07$0.20
gemma-3-27b
2nd
0.68
1st
$0.11
$0.09$0.16
devstral-small
2nd
0.31
1st
$0.12
$0.07$0.28
qwen3-30b-fp8
1st
0.56
1st
$0.13
$0.08$0.29
llama-4-scout
0.33
2nd
$0.14
$0.08$0.30
qwq-32b-preview
1st
22.23
1st
$0.14
$0.12$0.18
qwen3-32b-fp8
1st
0.55
1st
$0.15
$0.10$0.30
llama-nemotron-super-49b-v1.5
1st
0.31
1st
$0.17
$0.10$0.40
qwen2.5-72b
1st
0.57
1st
$0.19
$0.12$0.39
llama-3.3-70b-turbo-fp8
1st
0.51
1st
$0.20
$0.13$0.39
qwen3-235b-2507
2.65
1st
$0.22
$0.09$0.60
qwen3-vl-4b-fp8
1st
0.45
1st
$0.23
$0.10$0.60
qwen3-235b-fp8
3rd
3.46
1st
$0.25
$0.13$0.60
gpt-oss-120b-high-turbo
1st
4.61
1st
$0.26
$0.15$0.60
llama-4-maverick-fp8
1st
0.30
1st
$0.26
$0.15$0.60
llama-3.3-70b
0.55
2nd
$0.27
$0.23$0.40
deepseek-r1-distill-qwen-32b
1st
24.24
1st
$0.27
$0.27$0.27
deepseek-v3.2-exp
1st
0.43
1st
$0.30
$0.27$0.40
deepseek-r1-distill-llama-70b
3rd
23.27
1st
$0.30
$0.20$0.60
hermes-3-llama-3.1-70b
1st
0.35
1st
$0.30
$0.30$0.30
llama-3-70b
2nd
0.59
1st
$0.33
$0.30$0.40
llama-3.2-90b-vision
2nd
0.42
1st
$0.36
$0.35$0.40
qwen3-next-80b-a3b
1st
0.24
2nd
$0.38
$0.14$1.10
llama-3.1-70b
2nd
0.64
1st
$0.40
$0.40$0.40
llama-3.1-70b-turbo-fp8
1st
0.57
1st
$0.40
$0.40$0.40
deepseek-v3-0324
0.67
1st
$0.41
$0.25$0.88
glm-4.5-air
1st
10.46
1st
$0.42
$0.20$1.10
deepseek-v3.1-terminus-fp4
1st
0.33
1st
$0.45
$0.27$1.00
deepseek-v3.1-fp4
1st
0.49
1st
$0.45
$0.27$1.00
qwen3-vl-30b-a3b-fp8
1st
14.75
1st
$0.47
$0.29$0.99
llama-4-maverick-turbo-fp8
1st
0.36
1st
$0.50
$0.50$0.50
deepseek-v3-dec
1st
0.45
1st
$0.51
$0.38$0.89
qwen3-coder-480b-turbo-fp4
1st
0.25
1st
$0.52
$0.29$1.20
mixtral-8x7b
2nd
0.31
1st
$0.54
$0.54$0.54
llama-3.1-nemotron-70b
1st
0.65
1st
$0.60
$0.60$0.60
qwen3-vl-8b-fp8
1st
15.00
1st
$0.66
$0.18$2.09
glm-4.5
2nd
33.04
1st
$0.69
$0.38$1.60
qwen3-coder-480b-fp8
1st
0.29
2nd
$0.70
$0.40$1.60
kimi-k2-0905
2nd
0.35
1st
$0.88
$0.50$2.00
kimi-k2
2nd
0.32
1st
$0.88
$0.50$2.00
deepseek-r1-0528
118.92
1st
$0.91
$0.50$2.15
glm-4.6-fp8
2nd
35.49
1st
$0.93
$0.60$1.90
qwen3-235b-a22b-2507-fp8
3rd
51.48
2nd
$0.95
$0.30$2.90
deepseek-r1-jan
3rd
30.02
1st
$1.13
$0.70$2.40
deepseek-r1-jan-turbo-fp4
1st
23.88
1st
$1.50
$1.00$3.00
llama-3.2-1b
Latency
0.44
Blended $
$0.01
Input $
$0.01
Output $
$0.01
llama-3.1-8b-turbo-fp8
Latency
0.51
Blended $
$0.02
Input $
$0.02
Output $
$0.03
llama-3.2-3b
Latency
0.55
Blended $
$0.02
Input $
$0.02
Output $
$0.02
mistral-nemo
Latency
0.19
Blended $
$0.03
Input $
$0.02
Output $
$0.04
mistral-7b
Latency
0.45
Blended $
$0.03
Input $
$0.03
Output $
$0.05
llama-3.1-8b
Latency
0.38
Blended $
$0.04
Input $
$0.03
Output $
$0.05
llama-3-8b
Latency
0.30
Blended $
$0.04
Input $
$0.03
Output $
$0.06
llama-3.2-11b-vision
Latency
2.79
Blended $
$0.05
Input $
$0.05
Output $
$0.05
gemma-3-4b
Latency
0.33
Blended $
$0.05
Input $
$0.04
Output $
$0.08
gpt-oss-20b-high
Latency
13.22
Blended $
$0.06
Input $
$0.03
Output $
$0.14
gemma-3-12b
Latency
0.57
Blended $
$0.06
Input $
$0.04
Output $
$0.13
mistral-small-3
Latency
0.22
Blended $
$0.06
Input $
$0.05
Output $
$0.08
mistral-small-3.1
Latency
0.39
Blended $
$0.06
Input $
$0.05
Output $
$0.10
nvidia-nemotron-nano-9b-v2
Latency
0.23
Blended $
$0.07
Input $
$0.04
Output $
$0.16
devstral-small-may
Latency
0.35
Blended $
$0.07
Input $
$0.06
Output $
$0.12
qwen2.5-coder-32b
Latency
0.38
Blended $
$0.08
Input $
$0.06
Output $
$0.15
phi-4
Latency
0.40
Blended $
$0.09
Input $
$0.07
Output $
$0.14
gpt-oss-120b-high
Latency
23.42
Blended $
$0.10
Input $
$0.05
Output $
$0.27
qwen3-14b-fp8
Latency
0.16
Blended $
$0.10
Input $
$0.06
Output $
$0.24
mistral-small-3.2-fp8
Latency
0.49
Blended $
$0.11
Input $
$0.07
Output $
$0.20
gemma-3-27b
Latency
0.68
Blended $
$0.11
Input $
$0.09
Output $
$0.16
devstral-small
Latency
0.31
Blended $
$0.12
Input $
$0.07
Output $
$0.28
qwen3-30b-fp8
Latency
0.56
Blended $
$0.13
Input $
$0.08
Output $
$0.29
llama-4-scout
Latency
0.33
Blended $
$0.14
Input $
$0.08
Output $
$0.30
qwq-32b-preview
Latency
22.23
Blended $
$0.14
Input $
$0.12
Output $
$0.18
qwen3-32b-fp8
Latency
0.55
Blended $
$0.15
Input $
$0.10
Output $
$0.30
llama-nemotron-super-49b-v1.5
Latency
0.31
Blended $
$0.17
Input $
$0.10
Output $
$0.40
qwen2.5-72b
Latency
0.57
Blended $
$0.19
Input $
$0.12
Output $
$0.39
llama-3.3-70b-turbo-fp8
Latency
0.51
Blended $
$0.20
Input $
$0.13
Output $
$0.39
qwen3-235b-2507
Latency
2.65
Blended $
$0.22
Input $
$0.09
Output $
$0.60
qwen3-vl-4b-fp8
Latency
0.45
Blended $
$0.23
Input $
$0.10
Output $
$0.60
qwen3-235b-fp8
Latency
3.46
Blended $
$0.25
Input $
$0.13
Output $
$0.60
gpt-oss-120b-high-turbo
Latency
4.61
Blended $
$0.26
Input $
$0.15
Output $
$0.60
llama-4-maverick-fp8
Latency
0.30
Blended $
$0.26
Input $
$0.15
Output $
$0.60
llama-3.3-70b
Latency
0.55
Blended $
$0.27
Input $
$0.23
Output $
$0.40
deepseek-r1-distill-qwen-32b
Latency
24.24
Blended $
$0.27
Input $
$0.27
Output $
$0.27
deepseek-v3.2-exp
Latency
0.43
Blended $
$0.30
Input $
$0.27
Output $
$0.40
deepseek-r1-distill-llama-70b
Latency
23.27
Blended $
$0.30
Input $
$0.20
Output $
$0.60
hermes-3-llama-3.1-70b
Latency
0.35
Blended $
$0.30
Input $
$0.30
Output $
$0.30
llama-3-70b
Latency
0.59
Blended $
$0.33
Input $
$0.30
Output $
$0.40
llama-3.2-90b-vision
Latency
0.42
Blended $
$0.36
Input $
$0.35
Output $
$0.40
qwen3-next-80b-a3b
Latency
0.24
Blended $
$0.38
Input $
$0.14
Output $
$1.10
llama-3.1-70b
Latency
0.64
Blended $
$0.40
Input $
$0.40
Output $
$0.40
llama-3.1-70b-turbo-fp8
Latency
0.57
Blended $
$0.40
Input $
$0.40
Output $
$0.40
deepseek-v3-0324
Latency
0.67
Blended $
$0.41
Input $
$0.25
Output $
$0.88
glm-4.5-air
Latency
10.46
Blended $
$0.42
Input $
$0.20
Output $
$1.10
deepseek-v3.1-terminus-fp4
Latency
0.33
Blended $
$0.45
Input $
$0.27
Output $
$1.00
deepseek-v3.1-fp4
Latency
0.49
Blended $
$0.45
Input $
$0.27
Output $
$1.00
qwen3-vl-30b-a3b-fp8
Latency
14.75
Blended $
$0.47
Input $
$0.29
Output $
$0.99
llama-4-maverick-turbo-fp8
Latency
0.36
Blended $
$0.50
Input $
$0.50
Output $
$0.50
deepseek-v3-dec
Latency
0.45
Blended $
$0.51
Input $
$0.38
Output $
$0.89
qwen3-coder-480b-turbo-fp4
Latency
0.25
Blended $
$0.52
Input $
$0.29
Output $
$1.20
mixtral-8x7b
Latency
0.31
Blended $
$0.54
Input $
$0.54
Output $
$0.54
llama-3.1-nemotron-70b
Latency
0.65
Blended $
$0.60
Input $
$0.60
Output $
$0.60
qwen3-vl-8b-fp8
Latency
15.00
Blended $
$0.66
Input $
$0.18
Output $
$2.09
glm-4.5
Latency
33.04
Blended $
$0.69
Input $
$0.38
Output $
$1.60
qwen3-coder-480b-fp8
Latency
0.29
Blended $
$0.70
Input $
$0.40
Output $
$1.60
kimi-k2-0905
Latency
0.35
Blended $
$0.88
Input $
$0.50
Output $
$2.00
kimi-k2
Latency
0.32
Blended $
$0.88
Input $
$0.50
Output $
$2.00
deepseek-r1-0528
Latency
118.92
Blended $
$0.91
Input $
$0.50
Output $
$2.15
glm-4.6-fp8
Latency
35.49
Blended $
$0.93
Input $
$0.60
Output $
$1.90
qwen3-235b-a22b-2507-fp8
Latency
51.48
Blended $
$0.95
Input $
$0.30
Output $
$2.90
deepseek-r1-jan
Latency
30.02
Blended $
$1.13
Input $
$0.70
Output $
$2.40
deepseek-r1-jan-turbo-fp4
Latency
23.88
Blended $
$1.50
Input $
$1.00
Output $
$3.00

FAQ