Skip to content

Commit

Permalink
Move RefreshToken
Browse files Browse the repository at this point in the history
  • Loading branch information
democat3457 committed Sep 20, 2024
1 parent cc54883 commit 7b62fd8
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 70 deletions.
72 changes: 2 additions & 70 deletions scrapers/coursebook.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ package scrapers

import (
"bytes"
"context"
"errors"
"fmt"
"log"
"net/http"
Expand All @@ -16,75 +14,9 @@ import (
"time"

"github.com/UTDNebula/api-tools/utils"
"github.com/chromedp/cdproto/network"
"github.com/chromedp/chromedp"
"github.com/joho/godotenv"
)

// This function generates a fresh auth token and returns the new headers
func refreshToken(chromedpCtx context.Context) map[string][]string {
netID, present := os.LookupEnv("LOGIN_NETID")
if !present {
log.Panic("LOGIN_NETID is missing from .env!")
}
password, present := os.LookupEnv("LOGIN_PASSWORD")
if !present {
log.Panic("LOGIN_PASSWORD is missing from .env!")
}

utils.VPrintf("Getting new token...")
_, err := chromedp.RunResponse(chromedpCtx,
chromedp.ActionFunc(func(ctx context.Context) error {
err := network.ClearBrowserCookies().Do(ctx)
return err
}),
chromedp.Navigate(`https://wat.utdallas.edu/login`),
chromedp.WaitVisible(`form#login-form`),
chromedp.SendKeys(`input#netid`, netID),
chromedp.SendKeys(`input#password`, password),
chromedp.WaitVisible(`input#login-button`),
chromedp.Click(`input#login-button`),
//chromedp.WaitVisible(`body`),
)
if err != nil {
panic(err)
}

var cookieStrs []string
_, err = chromedp.RunResponse(chromedpCtx,
chromedp.Navigate(`https://coursebook.utdallas.edu/`),
chromedp.ActionFunc(func(ctx context.Context) error {
cookies, err := network.GetCookies().Do(ctx)
cookieStrs = make([]string, len(cookies))
gotToken := false
for i, cookie := range cookies {
cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value)
if cookie.Name == "PTGSESSID" {
utils.VPrintf("Got new token: PTGSESSID = %s", cookie.Value)
gotToken = true
}
}
if !gotToken {
return errors.New("failed to get a new token")
}
return err
}),
)
if err != nil {
panic(err)
}

return map[string][]string{
"Host": {"coursebook.utdallas.edu"},
"User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"},
"Accept": {"text/html"},
"Accept-Language": {"en-US"},
"Content-Type": {"application/x-www-form-urlencoded"},
"Cookie": cookieStrs,
"Connection": {"keep-alive"},
}
}

func ScrapeCoursebook(term string, startPrefix string, outDir string) {

// Load env vars
Expand Down Expand Up @@ -141,7 +73,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
panic(err)
}
// Get a fresh token at the start of each new prefix because we can lol
coursebookHeaders := refreshToken(chromedpCtx)
coursebookHeaders := utils.RefreshToken(chromedpCtx)
// Give coursebook some time to recognize the new token
time.Sleep(500 * time.Millisecond)
// String builder to store accumulated course HTML data for both class levels
Expand Down Expand Up @@ -211,7 +143,7 @@ func ScrapeCoursebook(term string, startPrefix string, outDir string) {
utils.VPrintf("Got section: %s", id)
if sectionIndex%30 == 0 && sectionIndex != 0 {
// Ratelimit? What ratelimit?
coursebookHeaders = refreshToken(chromedpCtx)
coursebookHeaders = utils.RefreshToken(chromedpCtx)
// Give coursebook some time to recognize the new token
time.Sleep(500 * time.Millisecond)
}
Expand Down
66 changes: 66 additions & 0 deletions utils/methods.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package utils
import (
"context"
"encoding/json"
"errors"
"fmt"
"io/fs"
"log"
Expand All @@ -16,6 +17,7 @@ import (
"strconv"
"strings"

"github.com/chromedp/cdproto/network"
"github.com/chromedp/chromedp"
)

Expand All @@ -34,6 +36,70 @@ func InitChromeDp() (chromedpCtx context.Context, cancelFnc context.CancelFunc)
return
}

// This function generates a fresh auth token and returns the new headers
func RefreshToken(chromedpCtx context.Context) map[string][]string {
netID, present := os.LookupEnv("LOGIN_NETID")
if !present {
log.Panic("LOGIN_NETID is missing from .env!")
}
password, present := os.LookupEnv("LOGIN_PASSWORD")
if !present {
log.Panic("LOGIN_PASSWORD is missing from .env!")
}

VPrintf("Getting new token...")
_, err := chromedp.RunResponse(chromedpCtx,
chromedp.ActionFunc(func(ctx context.Context) error {
err := network.ClearBrowserCookies().Do(ctx)
return err
}),
chromedp.Navigate(`https://wat.utdallas.edu/login`),
chromedp.WaitVisible(`form#login-form`),
chromedp.SendKeys(`input#netid`, netID),
chromedp.SendKeys(`input#password`, password),
chromedp.WaitVisible(`input#login-button`),
chromedp.Click(`input#login-button`),
//chromedp.WaitVisible(`body`),
)
if err != nil {
panic(err)
}

var cookieStrs []string
_, err = chromedp.RunResponse(chromedpCtx,
chromedp.Navigate(`https://coursebook.utdallas.edu/`),
chromedp.ActionFunc(func(ctx context.Context) error {
cookies, err := network.GetCookies().Do(ctx)
cookieStrs = make([]string, len(cookies))
gotToken := false
for i, cookie := range cookies {
cookieStrs[i] = fmt.Sprintf("%s=%s", cookie.Name, cookie.Value)
if cookie.Name == "PTGSESSID" {
VPrintf("Got new token: PTGSESSID = %s", cookie.Value)
gotToken = true
}
}
if !gotToken {
return errors.New("failed to get a new token")
}
return err
}),
)
if err != nil {
panic(err)
}

return map[string][]string{
"Host": {"coursebook.utdallas.edu"},
"User-Agent": {"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0"},
"Accept": {"text/html"},
"Accept-Language": {"en-US"},
"Content-Type": {"application/x-www-form-urlencoded"},
"Cookie": cookieStrs,
"Connection": {"keep-alive"},
}
}

// Encodes and writes the given data as tab-indented JSON to the given filepath.
func WriteJSON(filepath string, data interface{}) error {
fptr, err := os.Create(filepath)
Expand Down

0 comments on commit 7b62fd8

Please sign in to comment.