Contact Us
No results found.

AI Providers

Explore the latency and prices of leading AI providers

Provider
Best Price
Best Latency
# of Models
Deepinfra
Deepinfra
AI Provider
Best Price
66 Models
Best Latency
59 Models
# Models
77 Models
Model
Latency
Blended $
Input $
Output $
llama-3.2-1b
1st
0.44
2nd
$0.01
$0.01$0.01
llama-3.1-8b-turbo-fp8
1st
0.18
1st
$0.02
$0.02$0.03
llama-3.2-3b
1st
0.46
1st
$0.02
$0.02$0.02
mistral-nemo
1st
0.19
1st
$0.03
$0.02$0.04
mistral-7b
3rd
0.45
1st
$0.03
$0.03$0.05
llama-3.1-8b
0.28
3rd
$0.04
$0.03$0.05
llama-3-8b
1st
0.31
1st
$0.04
$0.03$0.06
llama-3.2-11b-vision
2nd
1.92
1st
$0.05
$0.05$0.05
gemma-3-4b
1st
0.35
1st
$0.05
$0.04$0.08
ocr
1st
0.17
2nd
$0.05
$0.03$0.10
gpt-oss-20b-high
2nd
0.18
2nd
$0.06
$0.03$0.14
gemma-3-12b
1st
0.29
1st
$0.06
$0.04$0.13
mistral-small-3
3rd
0.39
1st
$0.06
$0.05$0.08
mistral-small-3.1
3rd
0.39
1st
$0.06
$0.05$0.10
nvidia-nemotron-nano-9b-v2
1st
0.42
1st
$0.07
$0.04$0.16
devstral-small-may
1st
0.25
1st
$0.07
$0.06$0.12
gpt-oss-120b-high
2nd
0.26
1st
$0.08
$0.04$0.19
qwen2.5-coder-32b
2nd
0.40
1st
$0.08
$0.06$0.15
phi-4
1st
0.31
1st
$0.09
$0.07$0.14
nvidia-nemotron-3-nano
1st
0.24
1st
$0.10
$0.06$0.24
mistral-small-3.2-fp8
1st
0.25
1st
$0.11
$0.07$0.20
gemma-3-27b
1st
0.36
1st
$0.11
$0.09$0.16
qwen3-14b-fp8
1st
0.21
1st
$0.12
$0.08$0.24
devstral-small
1st
0.26
1st
$0.12
$0.07$0.28
qwen3-coder-30b-a3b-fp8
1st
0.22
1st
$0.12
$0.07$0.26
qwen3-30b-fp8
1st
0.21
1st
$0.13
$0.08$0.29
llama-4-scout
0.32
2nd
$0.14
$0.08$0.30
qwq-32b-preview
1st
0.37
1st
$0.14
$0.12$0.18
qwen3-32b-fp8
1st
0.54
1st
$0.15
$0.10$0.30
llama-3.3-70b-turbo-fp8
1st
0.52
1st
$0.15
$0.10$0.32
qwen3-235b-2507
2nd
0.41
1st
$0.17
$0.07$0.46
llama-nemotron-super-49b-v1.5
1st
0.44
1st
$0.17
$0.10$0.40
qwen2.5-72b
1st
0.31
2nd
$0.19
$0.12$0.39
qwen3-vl-4b-fp8
1st
0.45
1st
$0.23
$0.10$0.60
deepseek-v3.2-exp
1st
0.62
1st
$0.24
$0.21$0.32
qwen3-235b-fp8
1st
0.39
1st
$0.25
$0.13$0.60
gpt-oss-120b-high-turbo
1st
0.19
1st
$0.26
$0.15$0.60
llama-4-maverick-fp8
1st
0.41
1st
$0.26
$0.15$0.60
qwen3-vl-30b-a3b-fp8
1st
0.19
1st
$0.26
$0.15$0.60
llama-3.3-70b
0.55
2nd
$0.27
$0.23$0.40
deepseek-r1-distill-qwen-32b
1st
0.37
1st
$0.27
$0.27$0.27
deepseek-v3.2
1st
0.61
1st
$0.29
$0.26$0.39
hermes-3-llama-3.1-70b
1st
0.30
1st
$0.30
$0.30$0.30
nvidia-nemotron-nano-12b-v2-vl-fp8
1st
0.21
1st
$0.30
$0.20$0.60
olmo-3.1-32b-instruct
1st
0.24
1st
$0.30
$0.20$0.60
llama-3-70b
1st
0.35
1st
$0.33
$0.30$0.40
qwen3-next-80b-a3b
1st
0.30
2nd
$0.34
$0.09$1.10
deepseek-v3.1-terminus-fp4
1st
0.42
1st
$0.35
$0.21$0.79
deepseek-v3.1-fp4
1st
0.75
1st
$0.35
$0.21$0.79
llama-3.2-90b-vision
2nd
0.57
1st
$0.36
$0.35$0.40
deepseek-v3-0324
0.57
1st
$0.37
$0.20$0.88
llama-3.1-70b
1st
0.42
1st
$0.40
$0.40$0.40
llama-3.1-70b-turbo-fp8
1st
0.33
1st
$0.40
$0.40$0.40
glm-4.5-air
1st
0.17
2nd
$0.42
$0.20$1.10
minimax-m2
1st
0.27
1st
$0.45
$0.25$1.02
glm-4.6v-fp8
1st
0.25
1st
$0.45
$0.30$0.90
qwen3-vl-235b-a22b-fp8
1st
0.34
1st
$0.45
$0.20$1.20
deepseek-v3-dec
1st
0.38
1st
$0.46
$0.32$0.89
llama-4-maverick-turbo-fp8
1st
0.47
1st
$0.50
$0.50$0.50
qwen3-coder-480b-turbo-fp4
1st
0.23
1st
$0.51
$0.28$1.20
minimax-m2.1-fp8
1st
0.27
1st
$0.51
$0.28$1.20
mixtral-8x7b
2nd
0.32
1st
$0.54
$0.54$0.54
qwen3-vl-8b-fp8
1st
15.00
1st
$0.66
$0.18$2.09
glm-4.5
1st
0.36
1st
$0.69
$0.38$1.60
qwen3-coder-480b-fp8
1st
0.28
2nd
$0.70
$0.40$1.60
deepseek-r1-distill-llama-70b
1st
0.36
1st
$0.75
$0.60$1.20
glm-4.7-fp4
1st
0.31
1st
$0.76
$0.43$1.75
glm-4.6-fp4
1st
0.38
1st
$0.76
$0.43$1.75
qwen3-235b-a22b-2507-fp8
1st
0.30
2nd
$0.77
$0.23$2.39
kimi-k2-0905
0.78
1st
$0.80
$0.40$2.00
kimi-k2-thinking
0.61
1st
$0.85
$0.47$2.00
kimi-k2
0.76
1st
$0.88
$0.50$2.00
deepseek-r1-0528
1st
0.38
1st
$0.91
$0.50$2.15
glm-4.6-fp8
2nd
35.49
1st
$0.93
$0.60$1.90
deepseek-r1-jan
1st
0.35
1st
$1.13
$0.70$2.40
llama-3.1-nemotron-70b
1st
0.32
1st
$1.20
$1.20$1.20
deepseek-r1-jan-turbo-fp4
1st
0.41
1st
$1.50
$1.00$3.00
llama-3.2-1b
Latency
0.44
Blended $
$0.01
Input $
$0.01
Output $
$0.01
llama-3.1-8b-turbo-fp8
Latency
0.18
Blended $
$0.02
Input $
$0.02
Output $
$0.03
llama-3.2-3b
Latency
0.46
Blended $
$0.02
Input $
$0.02
Output $
$0.02
mistral-nemo
Latency
0.19
Blended $
$0.03
Input $
$0.02
Output $
$0.04
mistral-7b
Latency
0.45
Blended $
$0.03
Input $
$0.03
Output $
$0.05
llama-3.1-8b
Latency
0.28
Blended $
$0.04
Input $
$0.03
Output $
$0.05
llama-3-8b
Latency
0.31
Blended $
$0.04
Input $
$0.03
Output $
$0.06
llama-3.2-11b-vision
Latency
1.92
Blended $
$0.05
Input $
$0.05
Output $
$0.05
gemma-3-4b
Latency
0.35
Blended $
$0.05
Input $
$0.04
Output $
$0.08
ocr
Latency
0.17
Blended $
$0.05
Input $
$0.03
Output $
$0.10
gpt-oss-20b-high
Latency
0.18
Blended $
$0.06
Input $
$0.03
Output $
$0.14
gemma-3-12b
Latency
0.29
Blended $
$0.06
Input $
$0.04
Output $
$0.13
mistral-small-3
Latency
0.39
Blended $
$0.06
Input $
$0.05
Output $
$0.08
mistral-small-3.1
Latency
0.39
Blended $
$0.06
Input $
$0.05
Output $
$0.10
nvidia-nemotron-nano-9b-v2
Latency
0.42
Blended $
$0.07
Input $
$0.04
Output $
$0.16
devstral-small-may
Latency
0.25
Blended $
$0.07
Input $
$0.06
Output $
$0.12
gpt-oss-120b-high
Latency
0.26
Blended $
$0.08
Input $
$0.04
Output $
$0.19
qwen2.5-coder-32b
Latency
0.40
Blended $
$0.08
Input $
$0.06
Output $
$0.15
phi-4
Latency
0.31
Blended $
$0.09
Input $
$0.07
Output $
$0.14
nvidia-nemotron-3-nano
Latency
0.24
Blended $
$0.10
Input $
$0.06
Output $
$0.24
mistral-small-3.2-fp8
Latency
0.25
Blended $
$0.11
Input $
$0.07
Output $
$0.20
gemma-3-27b
Latency
0.36
Blended $
$0.11
Input $
$0.09
Output $
$0.16
qwen3-14b-fp8
Latency
0.21
Blended $
$0.12
Input $
$0.08
Output $
$0.24
devstral-small
Latency
0.26
Blended $
$0.12
Input $
$0.07
Output $
$0.28
qwen3-coder-30b-a3b-fp8
Latency
0.22
Blended $
$0.12
Input $
$0.07
Output $
$0.26
qwen3-30b-fp8
Latency
0.21
Blended $
$0.13
Input $
$0.08
Output $
$0.29
llama-4-scout
Latency
0.32
Blended $
$0.14
Input $
$0.08
Output $
$0.30
qwq-32b-preview
Latency
0.37
Blended $
$0.14
Input $
$0.12
Output $
$0.18
qwen3-32b-fp8
Latency
0.54
Blended $
$0.15
Input $
$0.10
Output $
$0.30
llama-3.3-70b-turbo-fp8
Latency
0.52
Blended $
$0.15
Input $
$0.10
Output $
$0.32
qwen3-235b-2507
Latency
0.41
Blended $
$0.17
Input $
$0.07
Output $
$0.46
llama-nemotron-super-49b-v1.5
Latency
0.44
Blended $
$0.17
Input $
$0.10
Output $
$0.40
qwen2.5-72b
Latency
0.31
Blended $
$0.19
Input $
$0.12
Output $
$0.39
qwen3-vl-4b-fp8
Latency
0.45
Blended $
$0.23
Input $
$0.10
Output $
$0.60
deepseek-v3.2-exp
Latency
0.62
Blended $
$0.24
Input $
$0.21
Output $
$0.32
qwen3-235b-fp8
Latency
0.39
Blended $
$0.25
Input $
$0.13
Output $
$0.60
gpt-oss-120b-high-turbo
Latency
0.19
Blended $
$0.26
Input $
$0.15
Output $
$0.60
llama-4-maverick-fp8
Latency
0.41
Blended $
$0.26
Input $
$0.15
Output $
$0.60
qwen3-vl-30b-a3b-fp8
Latency
0.19
Blended $
$0.26
Input $
$0.15
Output $
$0.60
llama-3.3-70b
Latency
0.55
Blended $
$0.27
Input $
$0.23
Output $
$0.40
deepseek-r1-distill-qwen-32b
Latency
0.37
Blended $
$0.27
Input $
$0.27
Output $
$0.27
deepseek-v3.2
Latency
0.61
Blended $
$0.29
Input $
$0.26
Output $
$0.39
hermes-3-llama-3.1-70b
Latency
0.30
Blended $
$0.30
Input $
$0.30
Output $
$0.30
nvidia-nemotron-nano-12b-v2-vl-fp8
Latency
0.21
Blended $
$0.30
Input $
$0.20
Output $
$0.60
olmo-3.1-32b-instruct
Latency
0.24
Blended $
$0.30
Input $
$0.20
Output $
$0.60
llama-3-70b
Latency
0.35
Blended $
$0.33
Input $
$0.30
Output $
$0.40
qwen3-next-80b-a3b
Latency
0.30
Blended $
$0.34
Input $
$0.09
Output $
$1.10
deepseek-v3.1-terminus-fp4
Latency
0.42
Blended $
$0.35
Input $
$0.21
Output $
$0.79
deepseek-v3.1-fp4
Latency
0.75
Blended $
$0.35
Input $
$0.21
Output $
$0.79
llama-3.2-90b-vision
Latency
0.57
Blended $
$0.36
Input $
$0.35
Output $
$0.40
deepseek-v3-0324
Latency
0.57
Blended $
$0.37
Input $
$0.20
Output $
$0.88
llama-3.1-70b
Latency
0.42
Blended $
$0.40
Input $
$0.40
Output $
$0.40
llama-3.1-70b-turbo-fp8
Latency
0.33
Blended $
$0.40
Input $
$0.40
Output $
$0.40
glm-4.5-air
Latency
0.17
Blended $
$0.42
Input $
$0.20
Output $
$1.10
minimax-m2
Latency
0.27
Blended $
$0.45
Input $
$0.25
Output $
$1.02
glm-4.6v-fp8
Latency
0.25
Blended $
$0.45
Input $
$0.30
Output $
$0.90
qwen3-vl-235b-a22b-fp8
Latency
0.34
Blended $
$0.45
Input $
$0.20
Output $
$1.20
deepseek-v3-dec
Latency
0.38
Blended $
$0.46
Input $
$0.32
Output $
$0.89
llama-4-maverick-turbo-fp8
Latency
0.47
Blended $
$0.50
Input $
$0.50
Output $
$0.50
qwen3-coder-480b-turbo-fp4
Latency
0.23
Blended $
$0.51
Input $
$0.28
Output $
$1.20
minimax-m2.1-fp8
Latency
0.27
Blended $
$0.51
Input $
$0.28
Output $
$1.20
mixtral-8x7b
Latency
0.32
Blended $
$0.54
Input $
$0.54
Output $
$0.54
qwen3-vl-8b-fp8
Latency
15.00
Blended $
$0.66
Input $
$0.18
Output $
$2.09
glm-4.5
Latency
0.36
Blended $
$0.69
Input $
$0.38
Output $
$1.60
qwen3-coder-480b-fp8
Latency
0.28
Blended $
$0.70
Input $
$0.40
Output $
$1.60
deepseek-r1-distill-llama-70b
Latency
0.36
Blended $
$0.75
Input $
$0.60
Output $
$1.20
glm-4.7-fp4
Latency
0.31
Blended $
$0.76
Input $
$0.43
Output $
$1.75
glm-4.6-fp4
Latency
0.38
Blended $
$0.76
Input $
$0.43
Output $
$1.75
qwen3-235b-a22b-2507-fp8
Latency
0.30
Blended $
$0.77
Input $
$0.23
Output $
$2.39
kimi-k2-0905
Latency
0.78
Blended $
$0.80
Input $
$0.40
Output $
$2.00
kimi-k2-thinking
Latency
0.61
Blended $
$0.85
Input $
$0.47
Output $
$2.00
kimi-k2
Latency
0.76
Blended $
$0.88
Input $
$0.50
Output $
$2.00
deepseek-r1-0528
Latency
0.38
Blended $
$0.91
Input $
$0.50
Output $
$2.15
glm-4.6-fp8
Latency
35.49
Blended $
$0.93
Input $
$0.60
Output $
$1.90
deepseek-r1-jan
Latency
0.35
Blended $
$1.13
Input $
$0.70
Output $
$2.40
llama-3.1-nemotron-70b
Latency
0.32
Blended $
$1.20
Input $
$1.20
Output $
$1.20
deepseek-r1-jan-turbo-fp4
Latency
0.41
Blended $
$1.50
Input $
$1.00
Output $
$3.00

FAQ