import streamlit as st import tempfile from dotenv import load_dotenv from langchain_core.prompts import ChatPromptTemplate from langchain.prompts import MessagesPlaceholder from langchain_community.document_loaders import TextLoader, PyPDFLoader, CSVLoader, JSONLoader from langchain.chains import create_retrieval_chain from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_openai import ChatOpenAI from langchain_community.chat_models.tongyi import ChatTongyi from langchain_community.chat_models import ChatZhipuAI from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_chroma import Chroma from langchain_text_splitters import RecursiveCharacterTextSplitter, RecursiveJsonSplitter import requests import pandas as pd import os os.environ["CUDA_VISIBLE_DEVICES"] = "1" device = "cuda" memory = 6 def DataAnalysis(data): return def chroma_save_upload(path): try: # Load the docs file_type = os.path.basename(path).split('.')[1] loader = None doc = None if file_type == "txt": loader = TextLoader(path, encoding="utf-8") elif file_type == "pdf": loader = PyPDFLoader(path) elif file_type == "csv": # 传入大模型进行数据分析 data = pd.read_csv(path,encoding="utf-8") DataAnalysis(data) elif file_type == "json": json_data = requests.get(path).json() splitter = RecursiveJsonSplitter(max_chunk_size=300) doc = splitter.create_documents(texts=[json_data]) if doc is None: doc = loader.load() # Split the doc content tex_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) splits = tex_splitter.split_documents(doc) # Store the content embedding = HuggingFaceEmbeddings(model_name="/home/zhangxj/models/acge_text_embedding", model_kwargs={"device": device}) vs = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory="chromadb") vs.add_documents(documents=splits) vs.as_retriever() print("Upload Files saved: " + str(path)) except Exception as e: print(f"Error in chroma_save_upload: {e}") @st.cache_resource(ttl="1h") def configure_retriever(uploaded_files): try: # 读取上传的文档,并写入一个临时目录 temp_dir = tempfile.TemporaryDirectory(dir="/home/zhangxj/WorkFile/LCA-GPT/tmp") for file in uploaded_files: temp_filepath = os.path.join(temp_dir.name, file.name) print("文档路径:", temp_filepath) with open(temp_filepath, "wb") as f: f.write(file.getvalue()) chroma_save_upload(path=temp_filepath) except Exception as e: print(f"Error in configure_retriever: {e}") def init_chain(): try: # 加载环境变量并初始化模型 load_dotenv(".env") os.environ["DASHSCOPE_API_KEY"] = 'sk-c5f441f863f44094b0ddb96c831b5002' llm = ChatTongyi( streaming=True, model='qwen1.5-72b-chat' ) # llm = ChatZhipuAI( # streaming=True, # api_key="434790cf952335f18b6347e7b6de9777.V50p55zfk8Ye4ojV", # 在这里传入KEY # model="cogview-3", # ) embedding = HuggingFaceEmbeddings(model_name="/home/zhangxj/models/acge_text_embedding", model_kwargs={"device": device}) retriever = Chroma(persist_directory="chromadb", embedding_function=embedding) retriever = retriever.as_retriever() instruct_system_prompt = ( "你是生命周期领域富有经验的专家。" "你要利用检索到的上下文来回答问题。如果上下文没有足够的信息,请说明。" "如果你有不明白的地方,请向用户询问。" "涉及生命后期评价领域的问题,你应该完整地引用文献资料。\n\n" "{context}" ) instruct_prompt = ChatPromptTemplate.from_messages( [ ("system", instruct_system_prompt), MessagesPlaceholder("chat_history"), ("human", "{input}"), ] ) qa_chain = create_stuff_documents_chain(llm, instruct_prompt) rag_chain = create_retrieval_chain(retriever, qa_chain) return rag_chain except Exception as e: print(f"Error in init_chain: {e}") return None def user_in(uin, rag_chain, history): try: result = rag_chain.invoke({"input": uin, "chat_history": history})["answer"] return result except Exception as e: print(f"Error in user_in: {e}") return "An error occurred while processing your request."