-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmanual_news_entry.py
130 lines (112 loc) · 4.55 KB
/
manual_news_entry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import logging
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from components.utils import get_db_connection, save_to_csv
from datetime import datetime
import re
import os
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Load BERT model and tokenizer once
tokenizer = BertTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = BertForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
# Paths to CSV files
MAIN_CSV_PATH = '/media/boilerrat/Bobby/CryptoData/BlockScent/csv/crypto_news_sentiment.csv'
DAO_CSV_PATH = '/media/boilerrat/Bobby/CryptoData/BlockScent/csv/crypto_news_sentiment_DAO.csv'
def analyze_sentiment(content):
"""Analyzes sentiment using BERT."""
inputs = tokenizer(content, return_tensors='pt', truncation=True, padding=True)
outputs = model(**inputs)
sentiment_scores = torch.nn.functional.softmax(outputs.logits, dim=-1).detach().numpy()[0]
sentiment_label = sentiment_scores.argmax() + 1
sentiment_score = round(float(sentiment_scores[sentiment_label - 1]), 4)
sentiment = 'Positive' if sentiment_score > 0.5 else 'Negative'
return sentiment, sentiment_score, f"{sentiment_label} stars"
def get_manual_entry():
"""Collects manual entry details from the user."""
date_input = input("Enter the publication date (YYYY-MM-DD): ")
while not re.match(r"\d{4}-\d{2}-\d{2}", date_input):
print("Invalid date format. Please enter in YYYY-MM-DD format.")
date_input = input("Enter the publication date (YYYY-MM-DD): ")
date = datetime.strptime(date_input, "%Y-%m-%d").date()
source = input("Enter the news source (e.g., CoinDesk): ")
headline = input("Enter the headline: ")
content = input("Enter the content of the news story: ")
link = input("Enter the URL link to the story: ")
sentiment, sentiment_score, label = analyze_sentiment(content)
entry = {
'Date': date,
'Source': source,
'Headline': headline,
'Sentiment': sentiment,
'Sentiment Score': sentiment_score,
'Label': label,
'Link': link
}
return entry
def save_manual_entry_to_database(entry):
"""Saves the manual entry to the database."""
try:
conn = get_db_connection()
if conn is None:
return
cur = conn.cursor()
# Ensure the table exists with proper schema
create_table_query = """
CREATE TABLE IF NOT EXISTS crypto_news (
date DATE,
source VARCHAR(255),
headline TEXT,
sentiment VARCHAR(50),
sentiment_score NUMERIC,
label VARCHAR(50),
link TEXT,
UNIQUE (date, source, headline, link)
);
"""
cur.execute(create_table_query)
conn.commit()
# Insert data into the table
insert_query = """
INSERT INTO crypto_news (date, source, headline, sentiment, sentiment_score, label, link)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (date, source, headline, link) DO NOTHING;
"""
cur.execute(insert_query, (
entry['Date'],
entry['Source'],
entry['Headline'],
entry['Sentiment'],
entry['Sentiment Score'],
entry['Label'],
entry['Link']
))
conn.commit()
logging.info(f"Manual entry saved to the database.")
cur.close()
conn.close()
except Exception as e:
logging.error(f"Error saving manual entry to database: {e}")
def append_entry_to_csv(entry, csv_path):
"""Appends the manual entry to the specified CSV file."""
try:
df = pd.DataFrame([entry])
if os.path.exists(csv_path):
df.to_csv(csv_path, mode='a', header=False, index=False)
else:
df.to_csv(csv_path, mode='w', header=True, index=False)
logging.info(f"Manual entry appended to {csv_path}.")
except Exception as e:
logging.error(f"Error appending manual entry to CSV: {e}")
def main():
"""Main function to handle the manual news entry process."""
entry = get_manual_entry()
save_manual_entry_to_database(entry)
# Append to main CSV file
append_entry_to_csv(entry, MAIN_CSV_PATH)
# If the headline or content mentions "DAO", append to DAO-specific CSV
if 'dao' in entry['Headline'].lower():
append_entry_to_csv(entry, DAO_CSV_PATH)
if __name__ == "__main__":
main()