Version: 0.0.0

Introduction

Anyscale Endpoints offers the best open-source large language models (LLMs) as fully managed API endpoints. This allows you to focus on building applications powered by LLMs without the need to worry about the underlying infrastructure.

Ease of use: Our platform provides simple APIs to query and, soon, fine-tune LLMs.
Fully managed: With features such as auto-scaling and pay-as-you-go, we keep the models up and running so you don't have to.

Get started

Register an account. (If you are viewing this from Anyscale Endpoints, you can skip this step.)
Generate an API key.
Run your first query:

Please go to Query a model page for more model query details.

Python SDK streaming
Python SDK
Node Streaming
Node
cURL

import openai

query = "Write a program to load data from S3 with Ray and train using PyTorch."

client = openai.OpenAI(
    base_url = "https://api.endpoints.anyscale.com/v1",
    api_key = "esecret_ANYSCALE_API_KEY"
)
# Note: not all arguments are currently supported and will be ignored by the backend.
chat_completion = client.chat.completions.create(
    model="mistralai/Mistral-7B-Instruct-v0.1",
    messages=[{"role": "system", "content": "You are a helpful assistant."}, 
              {"role": "user", "content": query}],
    temperature=0.1,
    stream=True
)
for message in chat_completion:
    print(message.choices[0].delta.content, end="", flush=True)

import openai

query = "Write a program to load data from S3 with Ray and train using PyTorch."

client = openai.OpenAI(
    base_url = "https://api.endpoints.anyscale.com/v1",
    api_key = "esecret_ANYSCALE_API_KEY"
)
# Note: not all arguments are currently supported and will be ignored by the backend.
chat_completion = client.chat.completions.create(
    model="mistralai/Mistral-7B-Instruct-v0.1",
    messages=[{"role": "system", "content": "You are a helpful assistant."}, 
              {"role": "user", "content": query}],
    temperature=0.1
)
print(chat_completion.choices[0].message.content)

import OpenAI from "openai";
const anyscale = new OpenAI({
    baseURL: "https://api.endpoints.anyscale.com/v1",
    apiKey: "esecret_ANYSCALE_API_KEY"
});

async function chat_complete(prompt) {
    const completion = await anyscale.chat.completions.create({
        model: "mistralai/Mistral-7B-Instruct-v0.1",
        messages: [{"role": "system", "content": "You are a helpful assistant."},
                   {"role": "user", "content": prompt}],
        temperature: 0.1,
        stream: true
    });
    for await (const chunk of completion) {
      process.stdout.write(chunk.choices[0]?.delta?.content || '');
    }
}

const query = "Write a program to load data from S3 with Ray and train using PyTorch."
chat_complete(query);

import OpenAI from "openai";
const anyscale = new OpenAI({
    baseURL: "https://api.endpoints.anyscale.com/v1",
    apiKey: "esecret_ANYSCALE_API_KEY"
});

async function chat_complete(prompt) {
    const completion = await anyscale.chat.completions.create({
        model: "mistralai/Mistral-7B-Instruct-v0.1",
        messages: [{"role": "system", "content": "You are a helpful assistant."},
                   {"role": "user", "content": prompt}],
        temperature: 0.1
    });
    process.stdout.write(completion.choices[0]?.message?.content);
}

const query = "Write a program to load data from S3 with Ray and train using PyTorch."
chat_complete(query);

export ANYSCALE_BASE_URL="https://api.endpoints.anyscale.com/v1"
export ANYSCALE_API_KEY="YOUR_ANYSCALE_ENDPOINT_API_KEY"

curl "$ANYSCALE_BASE_URL/chat/completions" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $ANYSCALE_API_KEY" \
  -d '{
    "model": "mistralai/Mistral-7B-Instruct-v0.1",
    "messages": [{"role": "system", "content": "You are a helpful assistant."}, 
                 {"role": "user", "content": "Write a program to load data from S3 with Ray and train using PyTorch."}],
    "temperature": 0.7
  }'

Introduction

Get started​

Get started