Client wrapper library for OpenAI, Anthropic, and Ollama APIs
Warning
OpenAI and Anthropic clients have not been thoroughly tested due to lack of API keys.
pip install git+https://github.com/dlg1206/llumpy.gitor add to requirements.txt
llumpy @ git+https://github.com/dlg1206/llumpy.gitfrom llumpy import AnthropicClient, OpenAIClient, OllamaClient
# API key env var: OPENAI_API_KEY
gpt = OpenAIClient('gpt-5.4')
# API key env var: ANTHROPIC_API_KEY
claude = AnthropicClient('claude-sonnet-4-6')
# Ollama Host env var: OLLAMA_HOST (Default: localhost)
# Ollama Port env var: OLLAMA_PORT (Default: 11434)
ollama = OllamaClient('llama3', '8b')Warning
Clients will fail to be initialized if API keys are bad, models do not exist, or key does not have access to that model.
One-shot prompt to an LLM. Useful for simple, one off prompts
from llumpy import OllamaClient
ollama = OllamaClient('llama3', '8b')
response = ollama.prompt_one("Hello!")
print(response)Few-shot prompt to an LLM. Useful for advanced, chain-of-thought prompting
import textwrap
from llumpy import OllamaClient, Conversation, ConversationBuilder
ollama = OllamaClient('llama3', '8b')
conversation = (
ConversationBuilder()
.system(
"You are a pirate. You must speak as a pirate at all times, using phrases like 'Arrr', 'matey', and 'shiver me timbers'.")
.user("What is the weather like today?")
.assistant(
"Arrr matey! The skies be grey as Davy Jones' locker and the winds be howlin' somethin' fierce! Shiver me timbers, tis a fine day fer sailin'!")
.user("What should I wear?")
.build()
)
response = ollama.prompt_many(conversation)
print(textwrap.fill(response, width=100))The ConversationBuilder is the main builder for LLM conversations. It ensures the resulting conversions is in a valid
order to send to the LLM. Using the file param allows to read prompts directly from files like so:
from core import ConversationBuilder
conversation = ConversationBuilder().user(file="prompts/user.prompt").build()The builder also supports ephemeral messages, allowing for a root conversation to be reused with only the final prompt swapped out like so:
from llumpy import ConversationBuilder
builder = ConversationBuilder().system("Foo")
for tail in ['bar', 'baz']:
print(builder.build_with_user(tail))
print(builder.build())build_with_user() and build_with_assistant() also support the file arg as well.
Stream token responses from LLM instead of waiting for complete response
from llumpy import OllamaClient, ConversationBuilder
ollama = OllamaClient('llama3', '8b')
conversation = ConversationBuilder().user("Hello!").build()
for chunk in ollama.prompt_stream(conversation):
print(ollama.extract_text(chunk), end="_", flush=True)For other LLM params, they can be provided as additional params in the prompt method
from llumpy import OllamaClient, ConversationBuilder
ollama = OllamaClient('llama3', '8b')
conversation = (ConversationBuilder()
.system("You are an color expert. Return a single line of text without extra explanation")
.user("Create a name for a shade of red")
.build())
print(ollama.prompt_many(conversation, temperature=0.0))
print("---")
print(ollama.prompt_many(conversation, temperature=1.0))Async versions are available for OpenAI, Anthropic, and Ollama clients and their corresponding methods
import asyncio
from llumpy import AsyncOllamaClient
async def main():
ollama = AsyncOllamaClient('llama3', '8b')
response = await ollama.prompt_one("Hello!")
print(response)
if __name__ == '__main__':
asyncio.run(main())Retry handlers validate the LLM response and automatically reprompts if fails
from llumpy import OllamaClient, ConversationBuilder, JSONRetryHandler
ollama = OllamaClient('llama3', '8b')
print(ollama.prompt_one("Hello!", handler=JSONRetryHandler(), retries=2))from llumpy import OllamaClient, ConversationBuilder, JSONRetryHandler
ollama = OllamaClient('llama3', '8b')
conversation = ConversationBuilder().system("Only reply in JSON").user("Hello!").build()
print(ollama.prompt_many(conversation, handler=JSONRetryHandler()))Currently, JSONRetryHandler is the only handler that parses the LLM response into a JSON object.
See Custom Handlers for custom handlers.
To access vendor specific LLM responses, use the
vendor_prompt()orvender_prompt_stream()methods
from llumpy import OllamaClient, ConversationBuilder, JSONRetryHandler
ollama = OllamaClient('llama3', '8b')
conversation = ConversationBuilder().user("Hello!").build()
response = ollama.vendor_prompt(conversation)
# ollama uses OpenAI APIs
print(type(response))Inherit the
ModelClientorAsyncModelClientclasses and methods
from typing import Any
from llumpy import ModelClient, Conversation, AsyncModelClient
class MyLLMClient(ModelClient):
def vendor_prompt(self, conversation: Conversation, **prompt_kwargs: Any) -> Any:
"""Raw API call, returns vendor-specific response object"""
pass
def vendor_prompt_stream(self, conversation: Conversation, **prompt_kwargs: Any) -> Any:
"""Raw API call, returns vendor-specific response stream object"""
pass
def extract_text(self, response: Any) -> str | None:
"""Extract text from vendor-specific response object"""
pass
def validate(self) -> None:
"""Validate the client is ready to use"""
pass
class MyAsyncLLMClient(AsyncModelClient):
async def vendor_prompt(self, conversation: Conversation, **prompt_kwargs: Any) -> Any:
"""Raw API call, returns vendor-specific response object"""
pass
async def vendor_prompt_stream(self, conversation: Conversation, **prompt_kwargs: Any) -> Any:
"""Raw API call, returns vendor-specific response stream object"""
pass
def extract_text(self, response: Any) -> str | None:
"""Extract text from vendor-specific response object"""
pass
async def validate(self) -> None:
"""Validate the client is ready to use"""
passInherit the
RetryHandlerorAsyncRetryHandlerclasses and methods
from typing import Any, Tuple, Type
from llumpy import RetryHandler, AsyncRetryHandler
class MyRetryHandler(RetryHandler):
def _format(self, response: str) -> Any:
"""Attempt to format the response to validate it and raise an exception"""
pass
@property
def _retry_on(self) -> Tuple[Type[Exception], ...]:
"""(Optional) Return the tuple of exceptions thrown by _format"""
pass
class MyAsyncRetryHandler(AsyncRetryHandler):
def _format(self, response: str) -> Any:
"""Attempt to format the response to validate it and raise an exception"""
pass
@property
def _retry_on(self) -> Tuple[Type[Exception], ...]:
"""(Optional) Return the tuple of exceptions thrown by _format"""
pass







