from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="exact", use_dummy_dataset=True)
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
input_text = "What is the capital of France?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
outputs = model.generate(input_ids=input_ids)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
RAG combines information retrieval with language generation...
This reduces hallucinations and ensures answers are grounded...
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer, util
import torch
# Example user documents
user_docs = [
"Paris is the capital city of France.",
"France is located in Western Europe.",
"The Eiffel Tower is one of the most famous landmarks in Paris."
]
# Step 1: Embed user documents
embedder = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = embedder.encode(user_docs, convert_to_tensor=True)
# Step 2: Accept user query
query = "Where is the Eiffel Tower?"
query_embedding = embedder.encode(query, convert_to_tensor=True)
# Step 3: Find most relevant document using cosine similarity
scores = util.pytorch_cos_sim(query_embedding, doc_embeddings)
best_doc_idx = torch.argmax(scores).item()
context = user_docs[best_doc_idx]
# Step 4: Generate an answer using context
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
prompt = f"Answer the question based on the context:\nContext: {context}\nQuestion: {query}"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))