-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Log in to Astra * Attempt to call backend Astra API * Uncomment cookie code * Successful scrape! TODOs: sorting scrape each day look into login inputting user/pass in wrong sometimes * Request in loop * Scrape until 90 days of less than 10 events After this and next semester seems there's only ever 2 events, one in FO 3.616 with no time (?) and one with no location that always shows up after the current semester and says either the holiday and "Events for Future Terms" as well as "No Events Allowed". This just scrapes 90 days into that, stops at about a year and 2 months out. * Sort by start time * Check if max events exceeded * Run close commands when not needed Closes chromedp when not necessary * Start on previous day
- Loading branch information
Showing
6 changed files
with
205 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
/* | ||
This file contains the code for the Astra scraper. | ||
*/ | ||
|
||
package scrapers | ||
|
||
import ( | ||
"fmt" | ||
"io" | ||
"log" | ||
"net/http" | ||
"os" | ||
"time" | ||
|
||
"github.com/UTDNebula/api-tools/utils" | ||
"github.com/joho/godotenv" | ||
"github.com/valyala/fastjson" | ||
) | ||
|
||
var MAX_EVENTS_PER_DAY = 5000 | ||
|
||
func ScrapeAstra(outDir string) { | ||
|
||
// Load env vars | ||
if err := godotenv.Load(); err != nil { | ||
log.Panic("Error loading .env file") | ||
} | ||
|
||
// Start chromedp | ||
chromedpCtx, cancel := utils.InitChromeDp() | ||
|
||
// Make output folder | ||
err := os.MkdirAll(outDir, 0777) | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
days := "{" // String JSON for storing results by day | ||
firstLoop := true // To avoid adding a comma to the JSON on the first loop | ||
|
||
// Init http client | ||
tr := &http.Transport{ | ||
MaxIdleConns: 10, | ||
IdleConnTimeout: 30 * time.Second, | ||
DisableCompression: true, | ||
} | ||
cli := &http.Client{Transport: tr} | ||
|
||
// Get cookies for auth | ||
astraHeaders := utils.RefreshAstraToken(chromedpCtx) | ||
time.Sleep(500 * time.Millisecond) | ||
cancel() // Don't need chromedp anymore | ||
|
||
// Starting date | ||
date := time.Now() | ||
// Start on previous date to make sure we have today's data, regardless of what timezone the scraper is in | ||
date = date.Add(time.Hour * -24) | ||
|
||
// Stop condition | ||
lt10EventsCount := 0 | ||
|
||
// Run until 90 days of no events | ||
for lt10EventsCount < 90 { | ||
formattedDate := date.Format("2006-01-02") | ||
log.Printf("Scraping %s...", formattedDate) | ||
|
||
// Request daily events | ||
url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=%d&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1&sortOrder=%%2BStartDate,%%2BStartMinute", time.Now().UnixMilli(), MAX_EVENTS_PER_DAY, formattedDate, formattedDate) | ||
req, err := http.NewRequest("GET", url, nil) | ||
if err != nil { | ||
panic(err) | ||
} | ||
req.Header = astraHeaders | ||
res, err := cli.Do(req) | ||
if err != nil { | ||
panic(err) | ||
} | ||
if res.StatusCode != 200 { | ||
log.Panicf("ERROR: Status was: %s\nIf the status is 404, you've likely been IP ratelimited!", res.Status) | ||
} | ||
body, err := io.ReadAll(res.Body) | ||
if err != nil { | ||
panic(err) | ||
} | ||
res.Body.Close() | ||
stringBody := string(body) | ||
|
||
// Check for no events | ||
numEvents := fastjson.GetInt(body, "totalRecords") | ||
if numEvents >= MAX_EVENTS_PER_DAY { | ||
log.Panic("ERROR: Max events per day exceeded!") | ||
} | ||
if numEvents < 10 { | ||
lt10EventsCount += 1 | ||
if lt10EventsCount > 30 { | ||
log.Printf("There have been %d days in a row with fewer than 10 events.", lt10EventsCount) | ||
} | ||
} else { | ||
lt10EventsCount = 0 | ||
} | ||
|
||
// Add to record | ||
comma := "," | ||
if firstLoop { | ||
comma = "" | ||
firstLoop = false | ||
} | ||
days = fmt.Sprintf("%s%s\"%s\":%s", days, comma, formattedDate, stringBody) | ||
date = date.Add(time.Hour * 24) | ||
} | ||
|
||
// Write event data to output file | ||
days = fmt.Sprintf("%s}", days) | ||
fptr, err := os.Create(fmt.Sprintf("%s/reservations.json", outDir)) | ||
if err != nil { | ||
panic(err) | ||
} | ||
_, err = fptr.Write([]byte(days)) | ||
if err != nil { | ||
panic(err) | ||
} | ||
fptr.Close() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters