Version: Latest

Streaming chatbot with LangChain

Add your Anyscale Endpoints token and run the following code to create a local chatbot with LangChain
To handle streaming in LangChain, you have to respond to callbacks, which makes the program flow complicated.

Install openai>=1.0.0 and langchain>=0.1.7 packages for this example.

from langchain_community.chat_models import ChatAnyscale
from langchain.callbacks.base import BaseCallbackHandler
from langchain.memory import ChatMessageHistory
from queue import Queue
from threading import Thread
import sys

INPUTMARKER_END = "-- END --"
ANYSCALE_ENDPOINT_TOKEN = "YOUR_ANYSCALE_ENDPOINT_TOKEN"

class LangchainChatAgent():

    class StreamingCBH(BaseCallbackHandler):
        def __init__(self, q):
            self.q = q

        def on_llm_new_token(
            self,
            token,
            *,
            run_id,
            parent_run_id = None,
            **kwargs,
        ) -> None:
            self.q.put(token)

        def on_llm_end(self, response, *, run_id, parent_run_id, **kwargs):
            self.q.put(INPUTMARKER_END)


    def __init__(self, model: str = None):
        # This simple example doesn't modify the past conversation.
        # Eventually you run out of context window, but this should be enough
        # for a 30-step conversation.
        # You need to either trim the message history or summarize it for longer conversations.
        self.message_history = ChatMessageHistory()
        self.model = model
        self.llm = ChatAnyscale(anyscale_api_key=ANYSCALE_ENDPOINT_TOKEN,
                  temperature=0, model_name=self.model, streaming=True)

    def process_input(self, user_message: str):
        self.message_history.add_user_message(user_message)
        myq = Queue()

        # Handling streaming for LangChain with callbacks
        # Kick off a thread that calls the predict_messages method, put tokens into the queue.
        # Then in the main control program thread, wait for elements to be
        # pushed into the queue in the loop below.
	thread =  Thread(target = self.llm.invoke, kwargs =
                        {'input': self.message_history.messages,
                         'config': {'callbacks':[self.StreamingCBH(myq)]}}
                   )
        thread.start()
        ai_message = ''
        while True:
            token = myq.get()
            if token == INPUTMARKER_END:
                break
            ai_message += token
            yield token

        self.message_history.add_ai_message(ai_message)

agent = LangchainChatAgent("meta-llama/Llama-2-70b-chat-hf")
sys.stdout.write("Let's have a chat. (Enter `quit` to exit)\n")
while True:
    sys.stdout.write('> ')
    inp = input()
    if inp == 'quit':
        break
    for word in agent.process_input(inp):
        sys.stdout.write(word)
        sys.stdout.flush()
    sys.stdout.write('\n')