forked from e-johnstonn/GPT-Doc-Summarizer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreamlit_app_utils.py
128 lines (90 loc) · 3.3 KB
/
streamlit_app_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import tempfile
import PyPDF2
from io import StringIO
from langchain.chat_models import ChatOpenAI
from utils import doc_to_text, token_counter
def pdf_to_text(pdf_file):
"""
Convert a PDF file to a string of text.
:param pdf_file: The PDF file to convert.
:return: A string of text.
"""
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = StringIO()
for i in range(len(pdf_reader.pages)):
p = pdf_reader.pages[i]
text.write(p.extract_text())
return text.getvalue().encode('utf-8')
def check_gpt_4(api_key):
"""
Check if the user has access to GPT-4.
:param api_key: The user's OpenAI API key.
:return: True if the user has access to GPT-4, False otherwise.
"""
try:
ChatOpenAI(openai_api_key=api_key, model_name='gpt-4').call_as_llm('Hi')
return True
except Exception as e:
return False
def token_limit(doc, maximum=200000):
"""
Check if a document has more tokens than a specified maximum.
:param doc: The langchain Document object to check.
:param maximum: The maximum number of tokens allowed.
:return: True if the document has less than the maximum number of tokens, False otherwise.
"""
text = doc_to_text(doc)
count = token_counter(text)
print(count)
if count > maximum:
return False
return True
def token_minimum(doc, minimum=2000):
"""
Check if a document has more tokens than a specified minimum.
:param doc: The langchain Document object to check.
:param minimum: The minimum number of tokens allowed.
:return: True if the document has more than the minimum number of tokens, False otherwise.
"""
text = doc_to_text(doc)
count = token_counter(text)
if count < minimum:
return False
return True
def check_key_validity(api_key):
"""
Check if an OpenAI API key is valid.
:param api_key: The OpenAI API key to check.
:return: True if the API key is valid, False otherwise.
"""
try:
ChatOpenAI(openai_api_key=api_key).call_as_llm('Hi')
print('API Key is valid')
return True
except Exception as e:
print('API key is invalid or OpenAI is having issues.')
print(e)
return False
def create_temp_file(uploaded_file):
"""
Create a temporary file from an uploaded file.
:param uploaded_file: The uploaded file to create a temporary file from.
:return: The path to the temporary file.
"""
with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file:
if uploaded_file.type == 'application/pdf':
temp_file.write(pdf_to_text(uploaded_file))
else:
temp_file.write(uploaded_file.getvalue())
return temp_file.name
def create_chat_model(api_key, use_gpt_4):
"""
Create a chat model ensuring that the token limit of the overall summary is not exceeded - GPT-4 has a higher token limit.
:param api_key: The OpenAI API key to use for the chat model.
:param use_gpt_4: Whether to use GPT-4 or not.
:return: A chat model.
"""
if use_gpt_4:
return ChatOpenAI(openai_api_key=api_key, temperature=0, max_tokens=500, model_name='gpt-3.5-turbo')
else:
return ChatOpenAI(openai_api_key=api_key, temperature=0, max_tokens=250, model_name='gpt-3.5-turbo')