Skip to main content
Version: Latest

Streaming chatbot with LangChain

Add your Anyscale Endpoints token and run the following code to create a local chatbot with LangChain
To handle streaming in LangChain, you have to respond to callbacks, which makes the program flow complicated.

Install openai>=1.0.0 and langchain>=0.1.7 packages for this example.

from langchain_community.chat_models import ChatAnyscale
from langchain.callbacks.base import BaseCallbackHandler
from langchain.memory import ChatMessageHistory
from queue import Queue
from threading import Thread
import sys

INPUTMARKER_END = "-- END --"
ANYSCALE_ENDPOINT_TOKEN = "YOUR_ANYSCALE_ENDPOINT_TOKEN"

class LangchainChatAgent():

class StreamingCBH(BaseCallbackHandler):
def __init__(self, q):
self.q = q

def on_llm_new_token(
self,
token,
*,
run_id,
parent_run_id = None,
**kwargs,
) -> None:
self.q.put(token)

def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs):
self.q.put(INPUTMARKER_END)


def __init__(self, model: str = None):
# This simple example doesn't modify the past conversation.
# Eventually you run out of context window, but this should be enough
# for a 30-step conversation.
# You need to either trim the message history or summarize it for longer conversations.
self.message_history = ChatMessageHistory()
self.model = model
self.llm = ChatAnyscale(anyscale_api_key=ANYSCALE_ENDPOINT_TOKEN,
temperature=0, model_name=self.model, streaming=True)

def process_input(self, user_message: str):
self.message_history.add_user_message(user_message)
myq = Queue()

# Handling streaming for LangChain with callbacks
# Kick off a thread that calls the predict_messages method, put tokens into the queue.
# Then in the main control program thread, wait for elements to be
# pushed into the queue in the loop below.
thread = Thread(target = self.llm.invoke, kwargs =
{'input': self.message_history.messages,
'config': {'callbacks':[self.StreamingCBH(myq)]}}
)
thread.start()
ai_message = ''
while True:
token = myq.get()
if token == INPUTMARKER_END:
break
ai_message += token
yield token

self.message_history.add_ai_message(ai_message)

agent = LangchainChatAgent("meta-llama/Llama-2-70b-chat-hf")
sys.stdout.write("Let's have a chat. (Enter `quit` to exit)\n")
while True:
sys.stdout.write('> ')
inp = input()
if inp == 'quit':
break
for word in agent.process_input(inp):
sys.stdout.write(word)
sys.stdout.flush()
sys.stdout.write('\n')