diff --git a/slides.py b/slides.py index 32d19df..d738e30 100644 --- a/slides.py +++ b/slides.py @@ -39,13 +39,14 @@ import io +import json import os from google.auth.transport.requests import Request from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.discovery import build -from googleapiclient.http import MediaIoBaseDownload +from googleapiclient.http import MediaFileUpload, MediaIoBaseDownload def get_credentials(): @@ -73,25 +74,22 @@ def get_credentials(): return creds -def download_as_pdf(service, file_id, output_filename): +def download_as_pdf(service, file_id, output_file_path): request = service.files().export_media(fileId=file_id, mimeType="application/pdf") fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False + + print(f"Download PDF to {output_file_path}...") while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) - with open(output_filename, "wb") as f: + with open(output_file_path, "wb") as f: f.write(fh.getbuffer()) -# Load credentials -creds = Credentials.from_authorized_user_file("secrets/token.json") -service = build("drive", "v3", credentials=creds) - - -def find_file_id_by_path(service, path): +def find_item_id_by_path(service, path): folder_id = "root" # start from the root for name in path.split("/"): if not name: @@ -108,7 +106,8 @@ def find_file_id_by_path(service, path): files = response.get("files", []) if not files: - raise Exception(f"No such file/dir named {name} in path {path}") + print(f"No such file/dir named {name} in path {path}") + return None # Assuming the first found file/folder is the correct one folder_id = files[0]["id"] @@ -116,11 +115,174 @@ def find_file_id_by_path(service, path): return folder_id -if __name__ == "__main__": - path = "CV Course/slides/Intro to Computer Vision" +def generate_current_slides_info(service, folder_path): + """ + Retrieves all Google Slides files along with their last modified times within a specified directory on Google Drive. + + :param service: Authorized Google Drive service instance. + :param folder_path: The path of the directory to search in, e.g., "CV Course/slides". + :return: A list of dictionaries containing file IDs, names, and last modified times of the Google Slides files. + """ + folder_id = find_item_id_by_path(service, folder_path) + slides_mime_type = "application/vnd.google-apps.presentation" + query = f"'{folder_id}' in parents and mimeType='{slides_mime_type}'" + response = ( + service.files() + .list( + q=query, + spaces="drive", + fields="files(id, name, modifiedTime)", + pageSize=100, # Adjust pageSize as needed + ) + .execute() + ) + + slides_info = response.get("files", []) + slides_info_dict = {item["id"]: item for item in slides_info} + + return slides_info_dict + + +def write_slides_info_to_drive(service, folder_path, slides_files, json_filename): + """ + Writes data about Google Slides files to a JSON file and updates or uploads it to a specified directory on Google Drive. + """ + # Write the data to a JSON file locally + with open(json_filename, "w") as json_file: + json.dump(slides_files, json_file, indent=4) + + # Check if the JSON file already exists in the specified folder + drive_json_id = find_item_id_by_path( + service, os.path.join(folder_path, json_filename) + ) + + file_metadata = { + "name": json_filename, + "parents": [find_item_id_by_path(service, folder_path)], + } + media = MediaFileUpload(json_filename, mimetype="application/json") + + if drive_json_id is None: + # File doesn't exist, create a new one + file_metadata = { + "name": json_filename, + "parents": [find_item_id_by_path(service, folder_path)], + } + created_file = ( + service.files() + .create(body=file_metadata, media_body=media, fields="id") + .execute() + ) + print( + f"Uploaded {json_filename} to Google Drive with ID: {created_file.get('id')}" + ) + else: + # File exists, update it + # Note: Removed 'parents' from file_metadata as it's not needed for update + file_metadata = { + "name": json_filename, + # Do not include 'parents' here for the update operation + } + updated_file = ( + service.files() + .update( + fileId=drive_json_id, body=file_metadata, media_body=media, fields="id" + ) + .execute() + ) + print( + f"Updated {json_filename} in Google Drive with ID: {updated_file.get('id')}" + ) + + # Clean up the local file + os.remove(json_filename) + + +def read_json_from_drive(service, file_id): + """ + Reads data from a JSON file stored on Google Drive. + :param service: Authorized Google Drive service instance. + :param file_id: The ID of the JSON file to read data from. + :return: The data read from the JSON file. + """ + # Step 1: Download the file + request = service.files().get_media(fileId=file_id) + fh = io.BytesIO() + downloader = MediaIoBaseDownload(fh, request) + done = False + while not done: + _, done = downloader.next_chunk() + + fh.seek(0) + + # Step 2: Read the JSON data from the file + json_data = json.load(fh) + + return json_data + + +def has_file_changed(old_info, new_info): + """ + Check if the file's modified time in the new info is more recent than in the old info. + """ + old_modified_time = old_info.get("modifiedTime") + new_modified_time = new_info.get("modifiedTime") + return new_modified_time > old_modified_time + + +def get_old_slides_info(folder_path, json_filename, service): + drive_json_id = find_item_id_by_path( + service, os.path.join(folder_path, json_filename) + ) + if drive_json_id is None: + return {} + + slides_info_dict = read_json_from_drive(service, drive_json_id) + + # Convert json_data into a dict for easier comparison + return slides_info_dict + + +def generate_changed_slides_as_pdfs( + service, last_slides_info, slides_info, out_folder_path +): + for slide_id, slide_data in slides_info.items(): + slides_info[slide_id] + if ( + slide_id in last_slides_info + and has_file_changed(last_slides_info[slide_id], slide_data) + ) or (slide_id not in last_slides_info): + output_filename = f"{slide_data['name']}.pdf" + download_as_pdf( + service, slide_id, os.path.join(out_folder_path, output_filename) + ) + print(f"Updated slide downloaded as PDF: {output_filename}") + + +def connect_to_google_drive_service(): creds = get_credentials() service = build("drive", "v3", credentials=creds) + return service + + +if __name__ == "__main__": + GOOGLE_DRIVE_SLIDES_FOLDER_PATH = "CV Course/slides" + JSON_FILENAME = "last_slides_info.json" + PDF_OUT_FOLDER_PATH = "/home/yoni/Desktop/AI_is_Math/lectures" + + service = connect_to_google_drive_service() + + last_slides_info = get_old_slides_info( + GOOGLE_DRIVE_SLIDES_FOLDER_PATH, JSON_FILENAME, service + ) + + slides_info = generate_current_slides_info(service, GOOGLE_DRIVE_SLIDES_FOLDER_PATH) + + generate_changed_slides_as_pdfs( + service, last_slides_info, slides_info, PDF_OUT_FOLDER_PATH + ) - file_id = find_file_id_by_path(service, path) - download_as_pdf(service, file_id, "output2.pdf") + write_slides_info_to_drive( + service, GOOGLE_DRIVE_SLIDES_FOLDER_PATH, slides_info, JSON_FILENAME + )