diff --git a/scrapers/coursebook.go b/scrapers/coursebook.go index 73a46bb..ac26918 100644 --- a/scrapers/coursebook.go +++ b/scrapers/coursebook.go @@ -6,8 +6,6 @@ package scrapers import ( "bytes" - "context" - "errors" "fmt" "log" "net/http" @@ -16,75 +14,9 @@ import ( "time" "github.com/UTDNebula/api-tools/utils" - "github.com/chromedp/cdproto/network" - "github.com/chromedp/chromedp" "github.com/joho/godotenv" ) -// This function generates a fresh auth token and returns the new headers -func refreshToken(chromedpCtx context.Context) map[string][]string { - netID, present := os.LookupEnv("LOGIN_NETID") - if !present { - log.Panic("LOGIN_NETID is missing from .env!") - } - password, present := os.LookupEnv("LOGIN_PASSWORD") - if !present { - log.Panic("LOGIN_PASSWORD is missing from .env!") - } - - utils.VPrintf("Getting new token...") - _, err := chromedp.RunResponse(chromedpCtx, - chromedp.ActionFunc(func(ctx context.Context) error { - err := network.ClearBrowserCookies().Do(ctx) - return err - }), - chromedp.Navigate(`https://wat.utdallas.edu/login`), - chromedp.WaitVisible(`form#login-form`), - chromedp.SendKeys(`input#netid`, netID), - chromedp.SendKeys(`input#password`, password), - chromedp.WaitVisible(`input#login-button`), - chromedp.Click(`input#login-button`), - //chromedp.WaitVisible(`body`), - ) - if err != nil { - panic(err) - } - - var cookieStrs []string - _, err = chromedp.RunResponse(chromedpCtx, - chromedp.Navigate(`https://coursebook.utdallas.edu/`), - chromedp.ActionFunc(func(ctx context.Context) error { - cookies, err := network.GetCookies().Do(ctx) - cookieStrs = make([]string, len(cookies)) - gotToken := false - for i, cookie := range cookies { - cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value) - if cookie.Name == "PTGSESSID" { - utils.VPrintf("Got new token: PTGSESSID = %s", cookie.Value) - gotToken = true - } - } - if !gotToken { - return errors.New("failed to get a new token") - } - return err - }), - ) - if err != nil { - panic(err) - } - - return map[string][]string{ - "Host": {"coursebook.utdallas.edu"}, - "User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}, - "Accept": {"text/html"}, - "Accept-Language": {"en-US"}, - "Content-Type": {"application/x-www-form-urlencoded"}, - "Cookie": cookieStrs, - "Connection": {"keep-alive"}, - } -} - func ScrapeCoursebook(term string, startPrefix string, outDir string) { // Load env vars @@ -141,7 +73,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) { panic(err) } // Get a fresh token at the start of each new prefix because we can lol - coursebookHeaders := refreshToken(chromedpCtx) + coursebookHeaders := utils.RefreshToken(chromedpCtx) // Give coursebook some time to recognize the new token time.Sleep(500 * time.Millisecond) // String builder to store accumulated course HTML data for both class levels @@ -211,7 +143,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) { utils.VPrintf("Got section: %s", id) if sectionIndex%30 == 0 && sectionIndex != 0 { // Ratelimit? What ratelimit? - coursebookHeaders = refreshToken(chromedpCtx) + coursebookHeaders = utils.RefreshToken(chromedpCtx) // Give coursebook some time to recognize the new token time.Sleep(500 * time.Millisecond) } diff --git a/utils/methods.go b/utils/methods.go index 56ac62d..b2ee223 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -7,6 +7,7 @@ package utils import ( "context" "encoding/json" + "errors" "fmt" "io/fs" "log" @@ -16,6 +17,7 @@ import ( "strconv" "strings" + "github.com/chromedp/cdproto/network" "github.com/chromedp/chromedp" ) @@ -34,6 +36,70 @@ func InitChromeDp() (chromedpCtx context.Context, cancelFnc context.CancelFunc) return } +// This function generates a fresh auth token and returns the new headers +func RefreshToken(chromedpCtx context.Context) map[string][]string { + netID, present := os.LookupEnv("LOGIN_NETID") + if !present { + log.Panic("LOGIN_NETID is missing from .env!") + } + password, present := os.LookupEnv("LOGIN_PASSWORD") + if !present { + log.Panic("LOGIN_PASSWORD is missing from .env!") + } + + VPrintf("Getting new token...") + _, err := chromedp.RunResponse(chromedpCtx, + chromedp.ActionFunc(func(ctx context.Context) error { + err := network.ClearBrowserCookies().Do(ctx) + return err + }), + chromedp.Navigate(`https://wat.utdallas.edu/login`), + chromedp.WaitVisible(`form#login-form`), + chromedp.SendKeys(`input#netid`, netID), + chromedp.SendKeys(`input#password`, password), + chromedp.WaitVisible(`input#login-button`), + chromedp.Click(`input#login-button`), + //chromedp.WaitVisible(`body`), + ) + if err != nil { + panic(err) + } + + var cookieStrs []string + _, err = chromedp.RunResponse(chromedpCtx, + chromedp.Navigate(`https://coursebook.utdallas.edu/`), + chromedp.ActionFunc(func(ctx context.Context) error { + cookies, err := network.GetCookies().Do(ctx) + cookieStrs = make([]string, len(cookies)) + gotToken := false + for i, cookie := range cookies { + cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value) + if cookie.Name == "PTGSESSID" { + VPrintf("Got new token: PTGSESSID = %s", cookie.Value) + gotToken = true + } + } + if !gotToken { + return errors.New("failed to get a new token") + } + return err + }), + ) + if err != nil { + panic(err) + } + + return map[string][]string{ + "Host": {"coursebook.utdallas.edu"}, + "User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"}, + "Accept": {"text/html"}, + "Accept-Language": {"en-US"}, + "Content-Type": {"application/x-www-form-urlencoded"}, + "Cookie": cookieStrs, + "Connection": {"keep-alive"}, + } +} + // Encodes and writes the given data as tab-indented JSON to the given filepath. func WriteJSON(filepath string, data interface{}) error { fptr, err := os.Create(filepath)