POST /predict¶
Run inference on input data.
Overview¶
The /predict endpoint runs model inference on provided input data.
Request¶
Method¶
POST
URL¶
Headers¶
Content-Type: application/json(required)
Body¶
JSON object mapping input names to values:
Input Types:
- Arrays: For tensor inputs
- Strings: For text inputs
- Numbers: For scalar inputs
Response¶
Success Response¶
Status: 200 OK
Content-Type: application/json
Body:
Error Responses¶
400 Bad Request:
500 Internal Server Error:
Examples¶
Sentiment Analysis¶
Request:
curl -X POST http://localhost:8080/predict \
-H "Content-Type: application/json" \
-d '{
"input_ids": [101, 1045, 2293, 2023, 102],
"attention_mask": [1, 1, 1, 1, 1]
}'
Response:
Image Classification¶
Request:
curl -X POST http://localhost:8080/predict \
-H "Content-Type: application/json" \
-d '{
"image": [[[[0.5, 0.3, ...]]]]
}'
Response:
Text Input¶
Request:
curl -X POST http://localhost:8080/predict \
-H "Content-Type: application/json" \
-d '{"text": "I love this product!"}'
Response:
Client Examples¶
Python¶
import requests
import numpy as np
# Text input
response = requests.post(
"http://localhost:8080/predict",
json={"text": "I love GPUX!"}
)
result = response.json()
print(f"Sentiment: {result['sentiment']}")
# Tensor input
response = requests.post(
"http://localhost:8080/predict",
json={
"input_ids": [101, 2054, 2003, 102],
"attention_mask": [1, 1, 1, 1]
}
)
print(response.json())
JavaScript¶
// Fetch API
const response = await fetch('http://localhost:8080/predict', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text: 'I love GPUX!'
})
});
const result = await response.json();
console.log(result.sentiment);
// Axios
const { data } = await axios.post('http://localhost:8080/predict', {
text: 'I love GPUX!'
});
console.log(data.sentiment);
Go¶
package main
import (
"bytes"
"encoding/json"
"net/http"
)
type PredictRequest struct {
Text string `json:"text"`
}
type PredictResponse struct {
Sentiment []float64 `json:"sentiment"`
}
func predict(text string) (*PredictResponse, error) {
reqBody, _ := json.Marshal(PredictRequest{Text: text})
resp, err := http.Post(
"http://localhost:8080/predict",
"application/json",
bytes.NewBuffer(reqBody),
)
if err != nil {
return nil, err
}
defer resp.Body.Close()
var result PredictResponse
json.NewDecoder(resp.Body).Decode(&result)
return &result, nil
}
Error Handling¶
Missing Input¶
Request:
Response (400):
Invalid Input Type¶
Request:
Response (400):
Inference Failure¶
Response (500):
Best Practices¶
Batch Requests
Send multiple items in a batch for better throughput:
Connection Pooling
Use connection pooling for high-throughput applications:
Error Handling
Always handle errors gracefully: