diff --git a/.env.template b/.env.template index 2ead8de..a560670 100644 --- a/.env.template +++ b/.env.template @@ -1,6 +1,8 @@ #Scrapers LOGIN_NETID= LOGIN_PASSWORD= +LOGIN_ASTRA_USERNAME= +LOGIN_ASTRA_PASSWORD= HEADLESS_MODE=false #Uploader diff --git a/go.mod b/go.mod index d6acab6..e7e353d 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/chromedp/cdproto v0.0.0-20240801214329-3f85d328b335 github.com/chromedp/chromedp v0.10.0 github.com/joho/godotenv v1.5.1 + github.com/valyala/fastjson v1.6.4 go.mongodb.org/mongo-driver v1.15.0 ) diff --git a/go.sum b/go.sum index 8fbfdd0..bab34d8 100644 --- a/go.sum +++ b/go.sum @@ -100,6 +100,8 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/valyala/fastjson v1.6.4 h1:uAUNq9Z6ymTgGhcm0UynUAB6tlbakBrz6CQFax3BXVQ= +github.com/valyala/fastjson v1.6.4/go.mod h1:CLCAqky6SMuOcxStkYQvblddUtoRxhYMGLrsQns1aXY= github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= diff --git a/main.go b/main.go index 04e4271..ff46c56 100644 --- a/main.go +++ b/main.go @@ -36,6 +36,8 @@ func main() { scrapeOrganizations := flag.Bool("organizations", false, "Alongside -scrape, signifies that SOC organizations should be scraped.") // Flag for event scraping scrapeEvents := flag.Bool("events", false, "Alongside -scrape, signifies that events should be scraped.") + // Flag for astra scraping + scrapeAstra := flag.Bool("astra", false, "Alongside -scrape, signifies that Astra should be scraped.") // Flags for parsing parse := flag.Bool("parse", false, "Puts the tool into parsing mode.") @@ -92,6 +94,8 @@ func main() { scrapers.ScrapeOrganizations(*outDir) case *scrapeEvents: scrapers.ScrapeEvents(*outDir) + case *scrapeAstra: + scrapers.ScrapeAstra(*outDir) default: log.Panic("You must specify which type of scraping you would like to perform with one of the scraping flags!") } diff --git a/scrapers/astra.go b/scrapers/astra.go new file mode 100644 index 0000000..d2cf5b1 --- /dev/null +++ b/scrapers/astra.go @@ -0,0 +1,123 @@ +/* + This file contains the code for the Astra scraper. +*/ + +package scrapers + +import ( + "fmt" + "io" + "log" + "net/http" + "os" + "time" + + "github.com/UTDNebula/api-tools/utils" + "github.com/joho/godotenv" + "github.com/valyala/fastjson" +) + +var MAX_EVENTS_PER_DAY = 5000 + +func ScrapeAstra(outDir string) { + + // Load env vars + if err := godotenv.Load(); err != nil { + log.Panic("Error loading .env file") + } + + // Start chromedp + chromedpCtx, cancel := utils.InitChromeDp() + + // Make output folder + err := os.MkdirAll(outDir, 0777) + if err != nil { + panic(err) + } + + days := "{" // String JSON for storing results by day + firstLoop := true // To avoid adding a comma to the JSON on the first loop + + // Init http client + tr := &http.Transport{ + MaxIdleConns: 10, + IdleConnTimeout: 30 * time.Second, + DisableCompression: true, + } + cli := &http.Client{Transport: tr} + + // Get cookies for auth + astraHeaders := utils.RefreshAstraToken(chromedpCtx) + time.Sleep(500 * time.Millisecond) + cancel() // Don't need chromedp anymore + + // Starting date + date := time.Now() + // Start on previous date to make sure we have today's data, regardless of what timezone the scraper is in + date = date.Add(time.Hour * -24) + + // Stop condition + lt10EventsCount := 0 + + // Run until 90 days of no events + for lt10EventsCount < 90 { + formattedDate := date.Format("2006-01-02") + log.Printf("Scraping %s...", formattedDate) + + // Request daily events + url := fmt.Sprintf("https://www.aaiscloud.com/UTXDallas/~api/calendar/CalendarWeekGrid?_dc=%d&action=GET&start=0&limit=%d&isForWeekView=false&fields=ActivityId,ActivityPk,ActivityName,ParentActivityId,ParentActivityName,MeetingType,Description,StartDate,EndDate,DayOfWeek,StartMinute,EndMinute,ActivityTypeCode,ResourceId,CampusName,BuildingCode,RoomNumber,RoomName,LocationName,InstitutionId,SectionId,SectionPk,IsExam,IsCrosslist,IsAllDay,IsPrivate,EventId,EventPk,CurrentState,NotAllowedUsageMask,UsageColor,UsageColorIsPrimary,EventTypeColor,MaxAttendance,ActualAttendance,Capacity&filter=(StartDate%%3C%%3D%%22%sT23%%3A00%%3A00%%22)%%26%%26(EndDate%%3E%%3D%%22%sT00%%3A00%%3A00%%22)&page=1&sortOrder=%%2BStartDate,%%2BStartMinute", time.Now().UnixMilli(), MAX_EVENTS_PER_DAY, formattedDate, formattedDate) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + panic(err) + } + req.Header = astraHeaders + res, err := cli.Do(req) + if err != nil { + panic(err) + } + if res.StatusCode != 200 { + log.Panicf("ERROR: Status was: %s\nIf the status is 404, you've likely been IP ratelimited!", res.Status) + } + body, err := io.ReadAll(res.Body) + if err != nil { + panic(err) + } + res.Body.Close() + stringBody := string(body) + + // Check for no events + numEvents := fastjson.GetInt(body, "totalRecords") + if numEvents >= MAX_EVENTS_PER_DAY { + log.Panic("ERROR: Max events per day exceeded!") + } + if numEvents < 10 { + lt10EventsCount += 1 + if lt10EventsCount > 30 { + log.Printf("There have been %d days in a row with fewer than 10 events.", lt10EventsCount) + } + } else { + lt10EventsCount = 0 + } + + // Add to record + comma := "," + if firstLoop { + comma = "" + firstLoop = false + } + days = fmt.Sprintf("%s%s\"%s\":%s", days, comma, formattedDate, stringBody) + date = date.Add(time.Hour * 24) + } + + // Write event data to output file + days = fmt.Sprintf("%s}", days) + fptr, err := os.Create(fmt.Sprintf("%s/reservations.json", outDir)) + if err != nil { + panic(err) + } + _, err = fptr.Write([]byte(days)) + if err != nil { + panic(err) + } + fptr.Close() +} diff --git a/utils/methods.go b/utils/methods.go index 1105a7a..a6e42e3 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -101,6 +101,79 @@ func RefreshToken(chromedpCtx context.Context) map[string][]string { } } +// This function signs into Astra +func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { + // Get username and password + username, present := os.LookupEnv("LOGIN_ASTRA_USERNAME") + if !present { + log.Panic("LOGIN_ASTRA_USERNAME is missing from .env!") + } + password, present := os.LookupEnv("LOGIN_ASTRA_PASSWORD") + if !present { + log.Panic("LOGIN_ASTRA_PASSWORD is missing from .env!") + } + + // Sign in + VPrintf("Signing in...") + _, err := chromedp.RunResponse(chromedpCtx, + chromedp.ActionFunc(func(ctx context.Context) error { + err := network.ClearBrowserCookies().Do(ctx) + return err + }), + chromedp.Navigate(`https://www.aaiscloud.com/UTXDallas/logon.aspx?ReturnUrl=%2futxdallas%2fcalendars%2fdailygridcalendar.aspx`), + chromedp.WaitVisible(`input#userNameField-inputEl`), + chromedp.SendKeys(`input#userNameField-inputEl`, username), + chromedp.SendKeys(`input#textfield-1029-inputEl`, password), + chromedp.WaitVisible(`a#logonButton`), + chromedp.Click(`a#logonButton`), + chromedp.WaitVisible(`body`, chromedp.ByQuery), + ) + if err != nil { + panic(err) + } + + // Save all cookies to string + cookieStr := "" + _, err = chromedp.RunResponse(chromedpCtx, + chromedp.WaitVisible(`body`, chromedp.ByQuery), + chromedp.ActionFunc(func(ctx context.Context) error { + cookies, err := network.GetCookies().Do(ctx) + gotToken := false + for _, cookie := range cookies { + cookieStr = fmt.Sprintf("%s%s=%s; ", cookieStr, cookie.Name, cookie.Value) + if cookie.Name == "UTXDallas.ASPXFORMSAUTH" { + VPrintf("Got new token: PTGSESSID = %s", cookie.Value) + gotToken = true + } + } + if !gotToken { + return errors.New("failed to get a new token") + } + return err + }), + ) + if err != nil { + panic(err) + } + + // Return headers, copied from a request the actual site made + return map[string][]string{ + "Host": {"www.aaiscloud.com"}, + "User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}, + "Accept": {"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8"}, + "Accept-Language": {"en-US,en;q=0.5"}, + "Accept-Encoding": {"gzip, deflate, br, zstd"}, + "Connection": {"keep-alive"}, + "Cookie": {cookieStr}, + "Upgrade-Insecure-Requests": {"1"}, + "Sec-Fetch-Dest": {"document"}, + "Sec-Fetch-Mode": {"navigate"}, + "Sec-Fetch-Site": {"none"}, + "Sec-Fetch-User": {"?1"}, + "Priority": {"u=0, i"}, + } +} + // Encodes and writes the given data as tab-indented JSON to the given filepath. func WriteJSON(filepath string, data interface{}) error { fptr, err := os.Create(filepath)