|
import uuid |
|
from qdrant_client.http import models |
|
from qdrant_client import QdrantClient |
|
|
|
|
|
class QdrantU: |
|
def __init__(self, collection_name): |
|
self.client = QdrantClient( |
|
url="https://5c32ac64-b1f7-4665-91eb-e321a98c02f6.europe-west3-0.gcp.cloud.qdrant.io:6333", |
|
api_key="Wd_RTregmznFMCyDLagJHM_7a5TjJJuFLVTuMgfjQD44-BHLnhYbUg", |
|
) |
|
self.collection_name = collection_name |
|
|
|
def _upload_documents_to_Qdrant(self, data, source): |
|
points = [] |
|
for title, content, publishdate, embedding in zip(data["title"], data["content"], data["publishdate"], data["embedding"]): |
|
new_id = str(uuid.uuid4()) |
|
point = models.PointStruct( |
|
id=new_id, |
|
vector=embedding, |
|
payload={ |
|
"title": title, |
|
"content": content, |
|
"publishdate": publishdate, |
|
"source" : source |
|
} |
|
) |
|
points.append(point) |
|
|
|
self.client.upsert( |
|
collection_name=self.collection_name, |
|
points=points |
|
) |
|
|
|
print("Uploaded:", len(data["embedding"]), "documents to the Qdrant database") |
|
|
|
|
|
def upload_to_Qdrant(self, data, batch_size=35, source=''): |
|
for i in range(0, len(data), batch_size): |
|
batch = data[i:i + batch_size] |
|
self._upload_documents_to_Qdrant(batch , source) |
|
print(f"Uploaded {i + len(batch)} documents") |
|
|
|
|
|
def get_number_of_vectors(self): |
|
collection_info = self.client.get_collection(self.collection_name) |
|
num_vectors = collection_info.points_count |
|
return num_vectors |
|
|
|
def close_connection(self): |
|
self.client.close() |
|
|
|
def search(self, query, text_embedder, limit): |
|
query_vector = text_embedder.embed_query(query_text=query) |
|
query_result = self.client.search( |
|
collection_name=self.collection_name, |
|
query_vector=query_vector[0].tolist(), |
|
limit=limit, |
|
with_payload=True |
|
) |
|
return query_result |