import requests
import base64
import json
from IPython.display import Image

api_key = '<api-key>'
base_url = '<endpoint>'

endpoint = f'{base_url}/api/v1/chat/completions'

headers = {'accept': 'application/json', 'authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}

data = {'model': 'llama_4_maverick', 'messages': [{'role': 'user', 'content': 'Hello'}]}

resp = requests.post(endpoint, json=data, headers=headers)
if resp.status_code >= 300:
    print('error', resp.status_code)

response_object = resp.json()
response_object

{'id': 'chatcmpl-688626cc-fb3e-477a-9a0d-1da7ea49bb87',
 'object': 'chat.completion',
 'created': 1772229798,
 'model': 'llama_4_maverick',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': "Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?"},
   'finish_reason': 'stop'}],
 'usage': {'prompt_tokens': 36, 'completion_tokens': 24, 'total_tokens': 60}}

data = {
    'model': 'llama_4_maverick',
    'messages': [
        {'role': 'system', 'content': 'You are a pirate and only speak in pirate'},
        {'role': 'user', 'content': 'Hello'},
    ],
}

resp = requests.post(endpoint, json=data, headers=headers)
if resp.status_code >= 300:
    print('error', resp.status_code)

response_object = resp.json()
print(response_object['choices'][0]['message']['content'])

Arrrr, 'ello there, matey! Yer lookin' fer a swashbucklin' adventure, eh? Yer in fer a treat, I be Captain Blackbeak, the scurviest pirate to ever sail the seven seas! What be bringin' ye to these waters?

seal_image_path = 'images/ous_white.jpg'
stapler_image_path = 'images/red_stapler.jpg'
Image(filename=seal_image_path)

def make_prompt(image_path, description):
    image = Image(filename=image_path)
    image_bytes = image.data
    # encode image bytes as base64
    # and create a data url
    s = base64.b64encode(image_bytes).decode('utf-8')
    encoded = 'data:image/png;base64,' + s
    return {
        'model': 'gemini-2.5-flash',
        'messages': [
            {
                'role': 'system',
                'content': """
                    I want to directly parse your response as json. Do not include any other text other than the plain json. 
                    Do not not include the word json, do not include new line characters. Only the exact json text.
                    """,
            },
            {
                'role': 'user',
                'content': [
                    {
                        'type': 'text',
                        'text': f"""I have the following description of this image:
                  {description} 
                  I would like to know if this description as genally correct. The description does not need to be 
                  specfic or detailed. The general category is still considered correct if it matches
                  the image. Format your answer as a json object with two pieces of information, a boolean indicating
                  whether the teh description matches, and a description, which is your identification of the image:
                  {{"is_correct": boolean indicating if the description is generally correct, "description":your description of the image.}}.
                  """,
                    },
                    {'type': 'image_url', 'image_url': {'url': encoded}, 'detail': 'auto'},
                ],
            },
        ],
    }

prompt = make_prompt(seal_image_path, 'a red stapler')

resp = requests.post(endpoint, json=prompt, headers=headers)
response_object = resp.json()
response_content = response_object['choices'][0]['message']['content']
json.loads(response_content)

{'is_correct': False,
 'description': "A circular emblem or seal featuring a cartoon computer with a grumpy face, surrounded by stars, and the text 'OFFICE OF UNSPECIFIED SERVICES' on a blue and gold ribbon."}

def make_document_prompt(model, document_path, prompt):
    with open(document_path, 'rb') as f:
        pdf_bytes = f.read()
        pdf_base_64 = base64.b64encode(pdf_bytes).decode('utf-8')
        encoded = 'data:application/pdf;base64,' + pdf_base_64

    return {
        'model': model,
        'messages': [
            {
                'role': 'user',
                'content': [
                    {'type': 'text', 'text': prompt},
                    {'type': 'file', 'file': {'file_name': 'Test', 'file_data': encoded}},
                ],
            }
        ],
    }

prompt = make_document_prompt('gemini-2.5-flash', 'documents/AWorldofLovesample.pdf', 'Please summarize this document.')

resp = requests.post(endpoint, json=prompt, headers=headers)
print(resp)
response_object = resp.json()
response_object['choices'][0]['message']['content']

<Response [200]>

'The document vividly describes an unusual June sunrise over an Irish landscape, highlighting a river gorge, distant mountains, and the mansion of Montefort. Montefort itself is depicted as a once grand estate now in picturesque disrepair, with neglected grounds and dilapidated farm buildings contrasting with remnants of its former elegance, like a stone archway, and its dramatic position overlooking the gorge.\n\nA 20-year-old woman named Jane emerges from the house, dressed in an anachronistic Edwardian muslin dress. She walks to an obelisk, rereads a letter, and then intently observes two drawn upstairs windows of Montefort. The text concludes with an intimate glimpse inside a dimly lit, claret-red room, featuring a large four-poster bed and an assortment of personal effects on a bedside table, hinting at a complex inner world or domestic setting.'

prompt = make_document_prompt('llama_4_maverick', 'documents/AWorldofLovesample.pdf', 'Please summarize this document.')

resp = requests.post(endpoint, json=prompt, headers=headers)
response_object = resp.json()
response_object['choices'][0]['message']['content']

'There is no document content to summarize. The document "Untitled.pdf" is listed, but its content is empty.'

import random


# An arbitrary function you would like the LLM to have at its disposal.
# This function could be anything — something that interfaces with a DB, calls an API, etc.
# This is a toy example — there's nothing magic about this number, but the LLM doesn't nee to know that.
def magic_number(factor):
    """Generate a magic number that is a multiple of factor"""
    return random.randint(10000, 50000) * factor


# Tell the LLM about this function and expected paramters
prompt = {
    'model': 'gemini-2.5-flash',
    'messages': [{'role': 'user', 'content': 'Generate a magic number that is a multiple of 100'}],
    'tools': [
        {
            'type': 'function',
            'function': {
                'name': 'magic_number',
                'description': 'Generates a magic number that is a mutiple of a given factor',
                'parameters': {
                    'type': 'object',
                    'properties': {
                        'factor': {
                            'type': 'number',
                            'description': 'The number that will be a factor of the magic number',
                        }
                    },
                    'required': ['factor'],
                },
            },
        }
    ],
}

resp = requests.post(endpoint, json=prompt, headers=headers)
response_object = resp.json()

# The LLM responds in the `tool_calls` section of the response that it
# would like to call this function in order to answer the user's prompt
function_call = response_object['choices'][0]['message']['tool_calls'][0]

tool_id = function_call['id']
tool_name = function_call['function']['name']
args = function_call['function']['arguments']
print('id:', tool_id, 'name:', tool_name, 'args:', args)

id: call_3757dadc6d name: magic_number args: {"factor": 100}

# Call the function

args = json.loads(args)
func_result = {'magic_number': magic_number(args['factor'])}
func_result

{'magic_number': 2683700}

# Craft a prompt that contains the result of the tool call

prompt = {
    'model': 'gemini-2.5-flash',
    'messages': [
        {'role': 'user', 'content': 'Generate a magic number that is a multiple of 100'},
        {
            'role': 'assistant',
            'tool_calls': [
                {'id': tool_id, 'type': 'function', 'function': {'name': tool_name, 'arguments': json.dumps(args)}}
            ],
        },
        {'role': 'tool', 'tool_call_id': tool_id, 'content': json.dumps(func_result)},
    ],
}

resp = requests.post(endpoint, json=prompt, headers=headers)
response_object = resp.json()

# The LLM can now answer the original prompt
# using the result obtained from calling the funtion

response_object['choices'][0]['message']

{'role': 'assistant', 'content': 'The magic number is 2683700.'}

import random
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.openai import OpenAIProvider

model = OpenAIChatModel('gemini-2.5-flash', provider=OpenAIProvider(base_url=base_url + '/api/v1', api_key=api_key))
agent = Agent(model)


# Use Pydantic's decorator to make this function
# available as a tool to the LLM.
@agent.tool_plain
def magic_number(factor: int) -> int:
    """Generate a magic number that is a multiple of factor"""
    print('Calling magin number tool with factor: ', factor)
    return random.randint(10000, 50000) * factor


# We can give it more than one tool to use
@agent.tool_plain
def special_number() -> int:
    """Generate a special number"""
    sn = random.randint(1, 20)
    print('Calling tool to get special number which is:', sn)
    return sn


resp = await agent.run(
    'Please get a special number and then give me a magic number that is a mutiple of the special number',
)
print(resp.output)

Calling tool to get special number which is: 15
Calling magin number tool with factor:  15
The special number is 15. The magic number is 438000.

endpoint = f'{base_url}/api/v1/embeddings'

headers = {'accept': 'application/json', 'authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}

request = {
    'model': 'cohere_english_v3',
    'encodingFormat': 'float',
    'input': [
        'Sweet is the breath of morn, her rising sweet, With charm of earliest birds',
        'With Midnight to the North of Her — And Midnight to the South of Her — And Maelstrom — in the Sky',
    ],
    'input_type': 'search_document',
}

resp = requests.post(endpoint, json=request, headers=headers)
result = resp.json()

result['data'][0]['embedding'][0:10]

[-0.0036468506,
 -0.023422241,
 -0.038757324,
 -0.038635254,
 -0.008544922,
 -0.0076293945,
 0.004508972,
 0.022476196,
 0.0019721985,
 0.017059326]

# If you need to install these:
# intall umap-learn not umap
# !pip -q install datasets

import random
import numpy as np
import matplotlib.pyplot as plt
import umap.umap_ as umap
from datasets import load_dataset

dataset = load_dataset('ag_news', split='train')  # 120k rows
label_names = {0: 'World', 1: 'Sports', 2: 'Business', 3: 'Sci/Tech'}

target_labels = [0, 1, 3]  # World, Sports, Sci/Tech
sample_size = 32

rows = []
for lbl in target_labels:
    subset = [r for r in dataset if r['label'] == lbl]
    rows.extend(random.sample(subset, sample_size))

texts = [r['text'] for r in rows]
labels = [label_names[r['label']] for r in rows]

Downloading readme:   0%|          | 0.00/8.07k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

print(f'Loaded {len(texts)} headlines: {[f"{label}={labels.count(label)}" for label in set(labels)]}\n')
print('Example text:', texts[0])

Loaded 96 headlines: ['World=32', 'Sci/Tech=32', 'Sports=32']

Example text: Budget fight deals setback to Sharon The government of Prime Minister Ariel Sharon was thrown into crisis last night after Israel #39;s parliament rejected the proposed 2005 budget and Sharon

request = {'model': 'cohere_english_v3', 'encodingFormat': 'float', 'input': texts, 'input_type': 'clustering'}

resp = requests.post(endpoint, json=request, headers=headers)
result = resp.json()

raw_embeddings = [data['embedding'] for data in result['data']]
embeddings = np.array(raw_embeddings)
embeddings.shape

(96, 1024)

reducer = umap.UMAP(random_state=42, n_jobs=1, min_dist=0.2)
emb_2d = reducer.fit_transform(embeddings)

plt.figure(figsize=(9, 7))
unique_labels = sorted(set(labels))
colors = {lbl: idx for idx, lbl in enumerate(unique_labels)}

for lbl in unique_labels:
    idxs = [i for i, label in enumerate(labels) if label == lbl]
    plt.scatter(emb_2d[idxs, 0], emb_2d[idxs, 1], label=lbl, s=45)

plt.title('UMAP projection of 96 AG‑News headlines')
plt.legend()
plt.axis('off')
plt.show()

API Examples¶

Authenticating & Headers¶

Minimal Chat Example¶

Using a system prompt¶

Including image data with prompt¶

Including documents¶

Model Differences¶

Tool Calls¶

Using third party libraries for tool calls¶

Embeddings¶

Using embeddings for simple clustering¶

Create a small corpus of new headlines¶

Use the API to get embeddings for each headline¶

Reduce dimensions¶