This repository has been archived by the owner on Jul 21, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathget_course_info.py
108 lines (82 loc) · 4.09 KB
/
get_course_info.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
OLD - USE DB_CORE.PY
Given a string of a department and course number, pulls information about the course from
https://pisa.ucsc.edu/cs9/prd/sr9_2013/index.php.
"""
raise DeprecationWarning("You probably don't want to actually run this. Use db_core.py to retrive course information.")
import requests # html requests
from bs4 import BeautifulSoup # html parser
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/
class Course:
def __init__(self, name, description, dept, number, link):
self.name = name
self.description = description
self.dept = dept
self.number = number
self.link = link
def __str__(self):
return self.name
def get_course_object_bad(course_dept_number_string):
"""
Given string of department and number, returns a Course object with information about it.
:param course_dept_number_string: string like "cmps 1", must be a space between department and number
:return: a Course object
"""
print(f'running on "{course_dept_number_string}')
split_array = course_dept_number_string.split(' ')
course_department = split_array[0].upper() # server needs department to be all caps
course_number = split_array[1]
if course_department == 'CS':
course_department = "CMPS"
if course_department == 'CE':
course_department = "CMPE"
# print("course_department is \"" + course_department + "\" and course_number is \"" + course_number + "\"")
# Taken from chrome inspector. Go to Network tab before clicking Search on class search form.
# Click Search, then click on index.php in the Network tab.
# In the Headers tab, scroll down to Form Data. This is the HTML POST data.
payload = {
'action': 'results',
'binds[:term]': '2160',
'binds[:reg_status]': 'all',
'binds[:subject]': course_department,
'binds[:catalog_nbr_op]': '=',
'binds[:catalog_nbr]': course_number,
}
# do the request for the search results page
request_result = requests.post('https://pisa.ucsc.edu/class_search/', data=payload)
# error check
status_code = request_result.status_code
if status_code != 200:
raise Exception("Request for search results page returned " + status_code)
# feed search results page to parser
soup = BeautifulSoup(request_result.text, 'html.parser')
# Grotesque element selection. we want class_data for a link to a class.
# Get the <a> element which is the Class Title link in the first row of results.
# Get the only element in the resulting array. Get the href string, then split by '=' and get second element.
print("a element is", soup.select('tbody td:nth-of-type(3) a'))
encoded_course = soup.select('tbody td:nth-of-type(3) a')[0]['href'].split('=')[2]
if encoded_course is None:
raise Exception("Couldn't find link in search results page")
# stick class_data to the end of this thing. you'll get a page with course information.
result_url = 'https://pisa.ucsc.edu/class_search/index.php?action=detail&class_data=' + encoded_course
# loads the course info page
request_result = requests.get(result_url)
# error check
status_code = request_result.status_code
if status_code != 200:
raise Exception("Request for search results page returned " + status_code)
# feed course info page to parser
soup = BeautifulSoup(request_result.text, 'html.parser')
# this does now work
course_name = soup.select("table.PALEVEL0SECONDARY tr:nth-of-type(2) td")[0].text
if course_name is None:
raise Exception("Couldn't find course name")
# http://stackoverflow.com/questions/10993612/python-removing-xa0-from-string
course_name = course_name.replace(u'\xa0', u' ')
course_name = course_name.replace(' ', ': ')
course_description = soup.select("table.detail_table")[1].select("td")[0].text
if course_description is None:
raise Exception("Couldn't find course description")
return Course(course_name, course_description, course_department, course_number, result_url)
# thing = get_course_object('CMPS 5j')
# print(thing)