-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathsave_model.py
40 lines (31 loc) · 1.09 KB
/
save_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import sys
from pathlib import Path
import faiss
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
import pickle
# pip install python-dotenv
from dotenv import load_dotenv
load_dotenv()
def train_and_save():
trainingData = list(Path("training/facts/").glob("**/*.*"))
if len(trainingData) < 1:
print("The folder training/facts should be populated with at least one .txt or .md file.", file=sys.stderr)
return
data = []
for training in trainingData:
with open(training) as f:
print(f"Add {f.name} to dataset")
data.append(f.read())
textSplitter = CharacterTextSplitter(chunk_size=2000, separator="\n")
docs = []
for sets in data:
docs.extend(textSplitter.split_text(sets))
store = FAISS.from_texts(docs, OpenAIEmbeddings())
faiss.write_index(store.index, "training.index")
store.index = None
with open("faiss.pkl", "wb") as f:
pickle.dump(store, f)
print('done!')
train_and_save()