oceansweep commited on
Commit
904ea9e
·
verified ·
1 Parent(s): f9d0201

Delete App_Function_Libraries/RAG_Libary_2.py

Browse files
App_Function_Libraries/RAG_Libary_2.py DELETED
@@ -1,721 +0,0 @@
1
- # Import necessary modules and functions
2
- import configparser
3
- from typing import Dict, Any
4
- # Local Imports
5
- #from App_Function_Libraries.ChromaDB_Library import process_and_store_content, vector_search, chroma_client
6
- from Article_Extractor_Lib import scrape_article
7
- from SQLite_DB import search_db, db
8
- # 3rd-Party Imports
9
- #import openai
10
- # Initialize OpenAI client (adjust this based on your API key management)
11
- #openai.api_key = "your-openai-api-key"
12
-
13
-
14
- # Main RAG pipeline function
15
- def rag_pipeline(url: str, query: str, api_choice=None) -> Dict[str, Any]:
16
- # Extract content
17
- # article_data = scrape_article(url)
18
- # content = article_data['content']
19
-
20
- # Process and store content
21
- # collection_name = "article_" + str(hash(url))
22
- # process_and_store_content(content, collection_name)
23
-
24
- # Perform searches
25
- # vector_results = vector_search(collection_name, query, k=5)
26
- # fts_results = search_db(query, ["content"], "", page=1, results_per_page=5)
27
-
28
- # Combine results
29
- # all_results = vector_results + [result['content'] for result in fts_results]
30
- # context = "\n".join(all_results)
31
-
32
- # Generate answer using the selected API
33
- # answer = generate_answer(api_choice, context, query)
34
-
35
- # return {
36
- # "answer": answer,
37
- # "context": context
38
- # }
39
- pass
40
-
41
- config = configparser.ConfigParser()
42
- config.read('config.txt')
43
-
44
- def generate_answer(api_choice: str, context: str, query: str) -> str:
45
- prompt = f"Context: {context}\n\nQuestion: {query}"
46
- if api_choice == "OpenAI":
47
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_openai
48
- return summarize_with_openai(config['API']['openai_api_key'], prompt, "")
49
- elif api_choice == "Anthropic":
50
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_anthropic
51
- return summarize_with_anthropic(config['API']['anthropic_api_key'], prompt, "")
52
- elif api_choice == "Cohere":
53
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_cohere
54
- return summarize_with_cohere(config['API']['cohere_api_key'], prompt, "")
55
- elif api_choice == "Groq":
56
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_groq
57
- return summarize_with_groq(config['API']['groq_api_key'], prompt, "")
58
- elif api_choice == "OpenRouter":
59
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_openrouter
60
- return summarize_with_openrouter(config['API']['openrouter_api_key'], prompt, "")
61
- elif api_choice == "HuggingFace":
62
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_huggingface
63
- return summarize_with_huggingface(config['API']['huggingface_api_key'], prompt, "")
64
- elif api_choice == "DeepSeek":
65
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_deepseek
66
- return summarize_with_deepseek(config['API']['deepseek_api_key'], prompt, "")
67
- elif api_choice == "Mistral":
68
- from App_Function_Libraries.Summarization_General_Lib import summarize_with_mistral
69
- return summarize_with_mistral(config['API']['mistral_api_key'], prompt, "")
70
- elif api_choice == "Local-LLM":
71
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_local_llm
72
- return summarize_with_local_llm(config['API']['local_llm_path'], prompt, "")
73
- elif api_choice == "Llama.cpp":
74
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_llama
75
- return summarize_with_llama(config['API']['llama_api_key'], prompt, "")
76
- elif api_choice == "Kobold":
77
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_kobold
78
- return summarize_with_kobold(config['API']['kobold_api_key'], prompt, "")
79
- elif api_choice == "Ooba":
80
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_oobabooga
81
- return summarize_with_oobabooga(config['API']['ooba_api_key'], prompt, "")
82
- elif api_choice == "TabbyAPI":
83
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_tabbyapi
84
- return summarize_with_tabbyapi(config['API']['tabby_api_key'], prompt, "")
85
- elif api_choice == "vLLM":
86
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_vllm
87
- return summarize_with_vllm(config['API']['vllm_api_key'], prompt, "")
88
- elif api_choice == "ollama":
89
- from App_Function_Libraries.Local_Summarization_Lib import summarize_with_ollama
90
- return summarize_with_ollama(config['API']['ollama_api_key'], prompt, "")
91
- else:
92
- raise ValueError(f"Unsupported API choice: {api_choice}")
93
-
94
- # Function to preprocess and store all existing content in the database
95
- #def preprocess_all_content():
96
- # with db.get_connection() as conn:
97
- # cursor = conn.cursor()
98
- # cursor.execute("SELECT id, content FROM Media")
99
- # for row in cursor.fetchall():
100
- # process_and_store_content(row[1], f"media_{row[0]}")
101
-
102
-
103
- # Function to perform RAG search across all stored content
104
- def rag_search(query: str, api_choice: str) -> Dict[str, Any]:
105
- # Perform vector search across all collections
106
- # all_collections = chroma_client.list_collections()
107
- # vector_results = []
108
- # for collection in all_collections:
109
- # vector_results.extend(vector_search(collection.name, query, k=2))
110
-
111
- # Perform FTS search
112
- # fts_results = search_db(query, ["content"], "", page=1, results_per_page=10)
113
-
114
- # Combine results
115
- # all_results = vector_results + [result['content'] for result in fts_results]
116
- # context = "\n".join(all_results[:10]) # Limit to top 10 results
117
-
118
- # Generate answer using the selected API
119
- # answer = generate_answer(api_choice, context, query)
120
-
121
- # return {
122
- # "answer": answer,
123
- # "context": context
124
- # }
125
- pass
126
-
127
- # Example usage:
128
- # 1. Initialize the system:
129
- # create_tables(db) # Ensure FTS tables are set up
130
- # preprocess_all_content() # Process and store all existing content
131
-
132
- # 2. Perform RAG on a specific URL:
133
- # result = rag_pipeline("https://example.com/article", "What is the main topic of this article?")
134
- # print(result['answer'])
135
-
136
- # 3. Perform RAG search across all content:
137
- # result = rag_search("What are the key points about climate change?")
138
- # print(result['answer'])
139
-
140
-
141
-
142
-
143
- ##################################################################################################################
144
- # RAG Pipeline 1
145
- #0.62 0.61 0.75 63402.0
146
- # from langchain_openai import ChatOpenAI
147
- #
148
- # from langchain_community.document_loaders import WebBaseLoader
149
- # from langchain_openai import OpenAIEmbeddings
150
- # from langchain.text_splitter import RecursiveCharacterTextSplitter
151
- # from langchain_chroma import Chroma
152
- #
153
- # from langchain_community.retrievers import BM25Retriever
154
- # from langchain.retrievers import ParentDocumentRetriever
155
- # from langchain.storage import InMemoryStore
156
- # import os
157
- # from operator import itemgetter
158
- # from langchain import hub
159
- # from langchain_core.output_parsers import StrOutputParser
160
- # from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda
161
- # from langchain.retrievers import MergerRetriever
162
- # from langchain.retrievers.document_compressors import DocumentCompressorPipeline
163
-
164
-
165
- # def rag_pipeline():
166
- # try:
167
- # def format_docs(docs):
168
- # return "\n".join(doc.page_content for doc in docs)
169
- #
170
- # llm = ChatOpenAI(model='gpt-4o-mini')
171
- #
172
- # loader = WebBaseLoader('https://en.wikipedia.org/wiki/European_debt_crisis')
173
- # docs = loader.load()
174
- #
175
- # embedding = OpenAIEmbeddings(model='text-embedding-3-large')
176
- #
177
- # splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=200)
178
- # splits = splitter.split_documents(docs)
179
- # c = Chroma.from_documents(documents=splits, embedding=embedding,
180
- # collection_name='testindex-ragbuilder-1724657573', )
181
- # retrievers = []
182
- # retriever = c.as_retriever(search_type='mmr', search_kwargs={'k': 10})
183
- # retrievers.append(retriever)
184
- # retriever = BM25Retriever.from_documents(docs)
185
- # retrievers.append(retriever)
186
- #
187
- # parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=600)
188
- # splits = parent_splitter.split_documents(docs)
189
- # store = InMemoryStore()
190
- # retriever = ParentDocumentRetriever(vectorstore=c, docstore=store, child_splitter=splitter,
191
- # parent_splitter=parent_splitter)
192
- # retriever.add_documents(docs)
193
- # retrievers.append(retriever)
194
- # retriever = MergerRetriever(retrievers=retrievers)
195
- # prompt = hub.pull("rlm/rag-prompt")
196
- # rag_chain = (
197
- # RunnableParallel(context=retriever, question=RunnablePassthrough())
198
- # .assign(context=itemgetter("context") | RunnableLambda(format_docs))
199
- # .assign(answer=prompt | llm | StrOutputParser())
200
- # .pick(["answer", "context"]))
201
- # return rag_chain
202
- # except Exception as e:
203
- # print(f"An error occurred: {e}")
204
-
205
-
206
- ##To get the answer and context, use the following code
207
- # res=rag_pipeline().invoke("your prompt here")
208
- # print(res["answer"])
209
- # print(res["context"])
210
-
211
- ############################################################################################################
212
-
213
-
214
-
215
- ############################################################################################################
216
- # RAG Pipeline 2
217
-
218
- #0.6 0.73 0.68 3125.0
219
- # from langchain_openai import ChatOpenAI
220
- #
221
- # from langchain_community.document_loaders import WebBaseLoader
222
- # from langchain_openai import OpenAIEmbeddings
223
- # from langchain.text_splitter import RecursiveCharacterTextSplitter
224
- # from langchain_chroma import Chroma
225
- # from langchain.retrievers.multi_query import MultiQueryRetriever
226
- # from langchain.retrievers import ParentDocumentRetriever
227
- # from langchain.storage import InMemoryStore
228
- # from langchain_community.document_transformers import EmbeddingsRedundantFilter
229
- # from langchain.retrievers.document_compressors import LLMChainFilter
230
- # from langchain.retrievers.document_compressors import EmbeddingsFilter
231
- # from langchain.retrievers import ContextualCompressionRetriever
232
- # import os
233
- # from operator import itemgetter
234
- # from langchain import hub
235
- # from langchain_core.output_parsers import StrOutputParser
236
- # from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda
237
- # from langchain.retrievers import MergerRetriever
238
- # from langchain.retrievers.document_compressors import DocumentCompressorPipeline
239
-
240
-
241
- # def rag_pipeline():
242
- # try:
243
- # def format_docs(docs):
244
- # return "\n".join(doc.page_content for doc in docs)
245
- #
246
- # llm = ChatOpenAI(model='gpt-4o-mini')
247
- #
248
- # loader = WebBaseLoader('https://en.wikipedia.org/wiki/European_debt_crisis')
249
- # docs = loader.load()
250
- #
251
- # embedding = OpenAIEmbeddings(model='text-embedding-3-large')
252
- #
253
- # splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=200)
254
- # splits = splitter.split_documents(docs)
255
- # c = Chroma.from_documents(documents=splits, embedding=embedding,
256
- # collection_name='testindex-ragbuilder-1724650962', )
257
- # retrievers = []
258
- # retriever = MultiQueryRetriever.from_llm(c.as_retriever(search_type='similarity', search_kwargs={'k': 10}),
259
- # llm=llm)
260
- # retrievers.append(retriever)
261
- #
262
- # parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=600)
263
- # splits = parent_splitter.split_documents(docs)
264
- # store = InMemoryStore()
265
- # retriever = ParentDocumentRetriever(vectorstore=c, docstore=store, child_splitter=splitter,
266
- # parent_splitter=parent_splitter)
267
- # retriever.add_documents(docs)
268
- # retrievers.append(retriever)
269
- # retriever = MergerRetriever(retrievers=retrievers)
270
- # arr_comp = []
271
- # arr_comp.append(EmbeddingsRedundantFilter(embeddings=embedding))
272
- # arr_comp.append(LLMChainFilter.from_llm(llm))
273
- # pipeline_compressor = DocumentCompressorPipeline(transformers=arr_comp)
274
- # retriever = ContextualCompressionRetriever(base_retriever=retriever, base_compressor=pipeline_compressor)
275
- # prompt = hub.pull("rlm/rag-prompt")
276
- # rag_chain = (
277
- # RunnableParallel(context=retriever, question=RunnablePassthrough())
278
- # .assign(context=itemgetter("context") | RunnableLambda(format_docs))
279
- # .assign(answer=prompt | llm | StrOutputParser())
280
- # .pick(["answer", "context"]))
281
- # return rag_chain
282
- # except Exception as e:
283
- # print(f"An error occurred: {e}")
284
-
285
-
286
- ##To get the answer and context, use the following code
287
- # res=rag_pipeline().invoke("your prompt here")
288
- # print(res["answer"])
289
- # print(res["context"])
290
-
291
-
292
-
293
-
294
-
295
-
296
-
297
- ############################################################################################################
298
- # Plain bm25 retriever
299
- # class BM25Retriever(BaseRetriever):
300
- # """`BM25` retriever without Elasticsearch."""
301
- #
302
- # vectorizer: Any
303
- # """ BM25 vectorizer."""
304
- # docs: List[Document] = Field(repr=False)
305
- # """ List of documents."""
306
- # k: int = 4
307
- # """ Number of documents to return."""
308
- # preprocess_func: Callable[[str], List[str]] = default_preprocessing_func
309
- # """ Preprocessing function to use on the text before BM25 vectorization."""
310
- #
311
- # class Config:
312
- # arbitrary_types_allowed = True
313
- #
314
- # @classmethod
315
- # def from_texts(
316
- # cls,
317
- # texts: Iterable[str],
318
- # metadatas: Optional[Iterable[dict]] = None,
319
- # bm25_params: Optional[Dict[str, Any]] = None,
320
- # preprocess_func: Callable[[str], List[str]] = default_preprocessing_func,
321
- # **kwargs: Any,
322
- # ) -> BM25Retriever:
323
- # """
324
- # Create a BM25Retriever from a list of texts.
325
- # Args:
326
- # texts: A list of texts to vectorize.
327
- # metadatas: A list of metadata dicts to associate with each text.
328
- # bm25_params: Parameters to pass to the BM25 vectorizer.
329
- # preprocess_func: A function to preprocess each text before vectorization.
330
- # **kwargs: Any other arguments to pass to the retriever.
331
- #
332
- # Returns:
333
- # A BM25Retriever instance.
334
- # """
335
- # try:
336
- # from rank_bm25 import BM25Okapi
337
- # except ImportError:
338
- # raise ImportError(
339
- # "Could not import rank_bm25, please install with `pip install "
340
- # "rank_bm25`."
341
- # )
342
- #
343
- # texts_processed = [preprocess_func(t) for t in texts]
344
- # bm25_params = bm25_params or {}
345
- # vectorizer = BM25Okapi(texts_processed, **bm25_params)
346
- # metadatas = metadatas or ({} for _ in texts)
347
- # docs = [Document(page_content=t, metadata=m) for t, m in zip(texts, metadatas)]
348
- # return cls(
349
- # vectorizer=vectorizer, docs=docs, preprocess_func=preprocess_func, **kwargs
350
- # )
351
- #
352
- # @classmethod
353
- # def from_documents(
354
- # cls,
355
- # documents: Iterable[Document],
356
- # *,
357
- # bm25_params: Optional[Dict[str, Any]] = None,
358
- # preprocess_func: Callable[[str], List[str]] = default_preprocessing_func,
359
- # **kwargs: Any,
360
- # ) -> BM25Retriever:
361
- # """
362
- # Create a BM25Retriever from a list of Documents.
363
- # Args:
364
- # documents: A list of Documents to vectorize.
365
- # bm25_params: Parameters to pass to the BM25 vectorizer.
366
- # preprocess_func: A function to preprocess each text before vectorization.
367
- # **kwargs: Any other arguments to pass to the retriever.
368
- #
369
- # Returns:
370
- # A BM25Retriever instance.
371
- # """
372
- # texts, metadatas = zip(*((d.page_content, d.metadata) for d in documents))
373
- # return cls.from_texts(
374
- # texts=texts,
375
- # bm25_params=bm25_params,
376
- # metadatas=metadatas,
377
- # preprocess_func=preprocess_func,
378
- # **kwargs,
379
- # )
380
- #
381
- # def _get_relevant_documents(
382
- # self, query: str, *, run_manager: CallbackManagerForRetrieverRun
383
- # ) -> List[Document]:
384
- # processed_query = self.preprocess_func(query)
385
- # return_docs = self.vectorizer.get_top_n(processed_query, self.docs, n=self.k)
386
- # return return_docs
387
- ############################################################################################################
388
-
389
- ############################################################################################################
390
- # ElasticSearch BM25 Retriever
391
- # class ElasticSearchBM25Retriever(BaseRetriever):
392
- # """`Elasticsearch` retriever that uses `BM25`.
393
- #
394
- # To connect to an Elasticsearch instance that requires login credentials,
395
- # including Elastic Cloud, use the Elasticsearch URL format
396
- # https://username:password@es_host:9243. For example, to connect to Elastic
397
- # Cloud, create the Elasticsearch URL with the required authentication details and
398
- # pass it to the ElasticVectorSearch constructor as the named parameter
399
- # elasticsearch_url.
400
- #
401
- # You can obtain your Elastic Cloud URL and login credentials by logging in to the
402
- # Elastic Cloud console at https://cloud.elastic.co, selecting your deployment, and
403
- # navigating to the "Deployments" page.
404
- #
405
- # To obtain your Elastic Cloud password for the default "elastic" user:
406
- #
407
- # 1. Log in to the Elastic Cloud console at https://cloud.elastic.co
408
- # 2. Go to "Security" > "Users"
409
- # 3. Locate the "elastic" user and click "Edit"
410
- # 4. Click "Reset password"
411
- # 5. Follow the prompts to reset the password
412
- #
413
- # The format for Elastic Cloud URLs is
414
- # https://username:password@cluster_id.region_id.gcp.cloud.es.io:9243.
415
- # """
416
- #
417
- # client: Any
418
- # """Elasticsearch client."""
419
- # index_name: str
420
- # """Name of the index to use in Elasticsearch."""
421
- #
422
- # @classmethod
423
- # def create(
424
- # cls, elasticsearch_url: str, index_name: str, k1: float = 2.0, b: float = 0.75
425
- # ) -> ElasticSearchBM25Retriever:
426
- # """
427
- # Create a ElasticSearchBM25Retriever from a list of texts.
428
- #
429
- # Args:
430
- # elasticsearch_url: URL of the Elasticsearch instance to connect to.
431
- # index_name: Name of the index to use in Elasticsearch.
432
- # k1: BM25 parameter k1.
433
- # b: BM25 parameter b.
434
- #
435
- # Returns:
436
- #
437
- # """
438
- # from elasticsearch import Elasticsearch
439
- #
440
- # # Create an Elasticsearch client instance
441
- # es = Elasticsearch(elasticsearch_url)
442
- #
443
- # # Define the index settings and mappings
444
- # settings = {
445
- # "analysis": {"analyzer": {"default": {"type": "standard"}}},
446
- # "similarity": {
447
- # "custom_bm25": {
448
- # "type": "BM25",
449
- # "k1": k1,
450
- # "b": b,
451
- # }
452
- # },
453
- # }
454
- # mappings = {
455
- # "properties": {
456
- # "content": {
457
- # "type": "text",
458
- # "similarity": "custom_bm25", # Use the custom BM25 similarity
459
- # }
460
- # }
461
- # }
462
- #
463
- # # Create the index with the specified settings and mappings
464
- # es.indices.create(index=index_name, mappings=mappings, settings=settings)
465
- # return cls(client=es, index_name=index_name)
466
- #
467
- # def add_texts(
468
- # self,
469
- # texts: Iterable[str],
470
- # refresh_indices: bool = True,
471
- # ) -> List[str]:
472
- # """Run more texts through the embeddings and add to the retriever.
473
- #
474
- # Args:
475
- # texts: Iterable of strings to add to the retriever.
476
- # refresh_indices: bool to refresh ElasticSearch indices
477
- #
478
- # Returns:
479
- # List of ids from adding the texts into the retriever.
480
- # """
481
- # try:
482
- # from elasticsearch.helpers import bulk
483
- # except ImportError:
484
- # raise ImportError(
485
- # "Could not import elasticsearch python package. "
486
- # "Please install it with `pip install elasticsearch`."
487
- # )
488
- # requests = []
489
- # ids = []
490
- # for i, text in enumerate(texts):
491
- # _id = str(uuid.uuid4())
492
- # request = {
493
- # "_op_type": "index",
494
- # "_index": self.index_name,
495
- # "content": text,
496
- # "_id": _id,
497
- # }
498
- # ids.append(_id)
499
- # requests.append(request)
500
- # bulk(self.client, requests)
501
- #
502
- # if refresh_indices:
503
- # self.client.indices.refresh(index=self.index_name)
504
- # return ids
505
- #
506
- # def _get_relevant_documents(
507
- # self, query: str, *, run_manager: CallbackManagerForRetrieverRun
508
- # ) -> List[Document]:
509
- # query_dict = {"query": {"match": {"content": query}}}
510
- # res = self.client.search(index=self.index_name, body=query_dict)
511
- #
512
- # docs = []
513
- # for r in res["hits"]["hits"]:
514
- # docs.append(Document(page_content=r["_source"]["content"]))
515
- # return docs
516
- ############################################################################################################
517
-
518
-
519
- ############################################################################################################
520
- # Multi Query Retriever
521
- # class MultiQueryRetriever(BaseRetriever):
522
- # """Given a query, use an LLM to write a set of queries.
523
- #
524
- # Retrieve docs for each query. Return the unique union of all retrieved docs.
525
- # """
526
- #
527
- # retriever: BaseRetriever
528
- # llm_chain: Runnable
529
- # verbose: bool = True
530
- # parser_key: str = "lines"
531
- # """DEPRECATED. parser_key is no longer used and should not be specified."""
532
- # include_original: bool = False
533
- # """Whether to include the original query in the list of generated queries."""
534
- #
535
- # @classmethod
536
- # def from_llm(
537
- # cls,
538
- # retriever: BaseRetriever,
539
- # llm: BaseLanguageModel,
540
- # prompt: BasePromptTemplate = DEFAULT_QUERY_PROMPT,
541
- # parser_key: Optional[str] = None,
542
- # include_original: bool = False,
543
- # ) -> "MultiQueryRetriever":
544
- # """Initialize from llm using default template.
545
- #
546
- # Args:
547
- # retriever: retriever to query documents from
548
- # llm: llm for query generation using DEFAULT_QUERY_PROMPT
549
- # prompt: The prompt which aims to generate several different versions
550
- # of the given user query
551
- # include_original: Whether to include the original query in the list of
552
- # generated queries.
553
- #
554
- # Returns:
555
- # MultiQueryRetriever
556
- # """
557
- # output_parser = LineListOutputParser()
558
- # llm_chain = prompt | llm | output_parser
559
- # return cls(
560
- # retriever=retriever,
561
- # llm_chain=llm_chain,
562
- # include_original=include_original,
563
- # )
564
- #
565
- # async def _aget_relevant_documents(
566
- # self,
567
- # query: str,
568
- # *,
569
- # run_manager: AsyncCallbackManagerForRetrieverRun,
570
- # ) -> List[Document]:
571
- # """Get relevant documents given a user query.
572
- #
573
- # Args:
574
- # query: user query
575
- #
576
- # Returns:
577
- # Unique union of relevant documents from all generated queries
578
- # """
579
- # queries = await self.agenerate_queries(query, run_manager)
580
- # if self.include_original:
581
- # queries.append(query)
582
- # documents = await self.aretrieve_documents(queries, run_manager)
583
- # return self.unique_union(documents)
584
- #
585
- # async def agenerate_queries(
586
- # self, question: str, run_manager: AsyncCallbackManagerForRetrieverRun
587
- # ) -> List[str]:
588
- # """Generate queries based upon user input.
589
- #
590
- # Args:
591
- # question: user query
592
- #
593
- # Returns:
594
- # List of LLM generated queries that are similar to the user input
595
- # """
596
- # response = await self.llm_chain.ainvoke(
597
- # {"question": question}, config={"callbacks": run_manager.get_child()}
598
- # )
599
- # if isinstance(self.llm_chain, LLMChain):
600
- # lines = response["text"]
601
- # else:
602
- # lines = response
603
- # if self.verbose:
604
- # logger.info(f"Generated queries: {lines}")
605
- # return lines
606
- #
607
- # async def aretrieve_documents(
608
- # self, queries: List[str], run_manager: AsyncCallbackManagerForRetrieverRun
609
- # ) -> List[Document]:
610
- # """Run all LLM generated queries.
611
- #
612
- # Args:
613
- # queries: query list
614
- #
615
- # Returns:
616
- # List of retrieved Documents
617
- # """
618
- # document_lists = await asyncio.gather(
619
- # *(
620
- # self.retriever.ainvoke(
621
- # query, config={"callbacks": run_manager.get_child()}
622
- # )
623
- # for query in queries
624
- # )
625
- # )
626
- # return [doc for docs in document_lists for doc in docs]
627
- #
628
- # def _get_relevant_documents(
629
- # self,
630
- # query: str,
631
- # *,
632
- # run_manager: CallbackManagerForRetrieverRun,
633
- # ) -> List[Document]:
634
- # """Get relevant documents given a user query.
635
- #
636
- # Args:
637
- # query: user query
638
- #
639
- # Returns:
640
- # Unique union of relevant documents from all generated queries
641
- # """
642
- # queries = self.generate_queries(query, run_manager)
643
- # if self.include_original:
644
- # queries.append(query)
645
- # documents = self.retrieve_documents(queries, run_manager)
646
- # return self.unique_union(documents)
647
- #
648
- # def generate_queries(
649
- # self, question: str, run_manager: CallbackManagerForRetrieverRun
650
- # ) -> List[str]:
651
- # """Generate queries based upon user input.
652
- #
653
- # Args:
654
- # question: user query
655
- #
656
- # Returns:
657
- # List of LLM generated queries that are similar to the user input
658
- # """
659
- # response = self.llm_chain.invoke(
660
- # {"question": question}, config={"callbacks": run_manager.get_child()}
661
- # )
662
- # if isinstance(self.llm_chain, LLMChain):
663
- # lines = response["text"]
664
- # else:
665
- # lines = response
666
- # if self.verbose:
667
- # logger.info(f"Generated queries: {lines}")
668
- # return lines
669
- #
670
- # def retrieve_documents(
671
- # self, queries: List[str], run_manager: CallbackManagerForRetrieverRun
672
- # ) -> List[Document]:
673
- # """Run all LLM generated queries.
674
- #
675
- # Args:
676
- # queries: query list
677
- #
678
- # Returns:
679
- # List of retrieved Documents
680
- # """
681
- # documents = []
682
- # for query in queries:
683
- # docs = self.retriever.invoke(
684
- # query, config={"callbacks": run_manager.get_child()}
685
- # )
686
- # documents.extend(docs)
687
- # return documents
688
- #
689
- # def unique_union(self, documents: List[Document]) -> List[Document]:
690
- # """Get unique Documents.
691
- #
692
- # Args:
693
- # documents: List of retrieved Documents
694
- #
695
- # Returns:
696
- # List of unique retrieved Documents
697
- # """
698
- # return _unique_documents(documents)
699
- ############################################################################################################
700
-
701
-
702
-
703
-
704
-
705
-
706
-
707
-
708
- ############################################################################################################
709
- # ElasticSearch Retriever
710
-
711
- # https://github.com/langchain-ai/langchain/tree/44e3e2391c48bfd0a8e6a20adde0b6567f4f43c3/templates/rag-elasticsearch
712
- #
713
- # https://github.com/langchain-ai/langchain/tree/44e3e2391c48bfd0a8e6a20adde0b6567f4f43c3/templates/rag-self-query
714
-
715
-
716
-
717
-
718
-
719
-
720
-
721
-