Skip to content

Commit

Permalink
google slides to pdf
Browse files Browse the repository at this point in the history
  • Loading branch information
YoniChechik committed Feb 1, 2024
1 parent 7979659 commit cf2b6be
Showing 1 changed file with 176 additions and 14 deletions.
190 changes: 176 additions & 14 deletions slides.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@


import io
import json
import os

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.http import MediaFileUpload, MediaIoBaseDownload


def get_credentials():
Expand Down Expand Up @@ -73,25 +74,22 @@ def get_credentials():
return creds


def download_as_pdf(service, file_id, output_filename):
def download_as_pdf(service, file_id, output_file_path):
request = service.files().export_media(fileId=file_id, mimeType="application/pdf")
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False

print(f"Download PDF to {output_file_path}...")
while done is False:
status, done = downloader.next_chunk()
print("Download %d%%." % int(status.progress() * 100))

with open(output_filename, "wb") as f:
with open(output_file_path, "wb") as f:
f.write(fh.getbuffer())


# Load credentials
creds = Credentials.from_authorized_user_file("secrets/token.json")
service = build("drive", "v3", credentials=creds)


def find_file_id_by_path(service, path):
def find_item_id_by_path(service, path):
folder_id = "root" # start from the root
for name in path.split("/"):
if not name:
Expand All @@ -108,19 +106,183 @@ def find_file_id_by_path(service, path):
files = response.get("files", [])

if not files:
raise Exception(f"No such file/dir named {name} in path {path}")
print(f"No such file/dir named {name} in path {path}")
return None

# Assuming the first found file/folder is the correct one
folder_id = files[0]["id"]

return folder_id


if __name__ == "__main__":
path = "CV Course/slides/Intro to Computer Vision"
def generate_current_slides_info(service, folder_path):
"""
Retrieves all Google Slides files along with their last modified times within a specified directory on Google Drive.
:param service: Authorized Google Drive service instance.
:param folder_path: The path of the directory to search in, e.g., "CV Course/slides".
:return: A list of dictionaries containing file IDs, names, and last modified times of the Google Slides files.
"""
folder_id = find_item_id_by_path(service, folder_path)
slides_mime_type = "application/vnd.google-apps.presentation"
query = f"'{folder_id}' in parents and mimeType='{slides_mime_type}'"
response = (
service.files()
.list(
q=query,
spaces="drive",
fields="files(id, name, modifiedTime)",
pageSize=100, # Adjust pageSize as needed
)
.execute()
)

slides_info = response.get("files", [])
slides_info_dict = {item["id"]: item for item in slides_info}

return slides_info_dict


def write_slides_info_to_drive(service, folder_path, slides_files, json_filename):
"""
Writes data about Google Slides files to a JSON file and updates or uploads it to a specified directory on Google Drive.
"""
# Write the data to a JSON file locally
with open(json_filename, "w") as json_file:
json.dump(slides_files, json_file, indent=4)

# Check if the JSON file already exists in the specified folder
drive_json_id = find_item_id_by_path(
service, os.path.join(folder_path, json_filename)
)

file_metadata = {
"name": json_filename,
"parents": [find_item_id_by_path(service, folder_path)],
}
media = MediaFileUpload(json_filename, mimetype="application/json")

if drive_json_id is None:
# File doesn't exist, create a new one
file_metadata = {
"name": json_filename,
"parents": [find_item_id_by_path(service, folder_path)],
}
created_file = (
service.files()
.create(body=file_metadata, media_body=media, fields="id")
.execute()
)
print(
f"Uploaded {json_filename} to Google Drive with ID: {created_file.get('id')}"
)
else:
# File exists, update it
# Note: Removed 'parents' from file_metadata as it's not needed for update
file_metadata = {
"name": json_filename,
# Do not include 'parents' here for the update operation
}
updated_file = (
service.files()
.update(
fileId=drive_json_id, body=file_metadata, media_body=media, fields="id"
)
.execute()
)
print(
f"Updated {json_filename} in Google Drive with ID: {updated_file.get('id')}"
)

# Clean up the local file
os.remove(json_filename)


def read_json_from_drive(service, file_id):
"""
Reads data from a JSON file stored on Google Drive.
:param service: Authorized Google Drive service instance.
:param file_id: The ID of the JSON file to read data from.
:return: The data read from the JSON file.
"""
# Step 1: Download the file
request = service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
_, done = downloader.next_chunk()

fh.seek(0)

# Step 2: Read the JSON data from the file
json_data = json.load(fh)

return json_data


def has_file_changed(old_info, new_info):
"""
Check if the file's modified time in the new info is more recent than in the old info.
"""
old_modified_time = old_info.get("modifiedTime")
new_modified_time = new_info.get("modifiedTime")
return new_modified_time > old_modified_time


def get_old_slides_info(folder_path, json_filename, service):
drive_json_id = find_item_id_by_path(
service, os.path.join(folder_path, json_filename)
)
if drive_json_id is None:
return {}

slides_info_dict = read_json_from_drive(service, drive_json_id)

# Convert json_data into a dict for easier comparison
return slides_info_dict


def generate_changed_slides_as_pdfs(
service, last_slides_info, slides_info, out_folder_path
):
for slide_id, slide_data in slides_info.items():
slides_info[slide_id]
if (
slide_id in last_slides_info
and has_file_changed(last_slides_info[slide_id], slide_data)
) or (slide_id not in last_slides_info):
output_filename = f"{slide_data['name']}.pdf"
download_as_pdf(
service, slide_id, os.path.join(out_folder_path, output_filename)
)
print(f"Updated slide downloaded as PDF: {output_filename}")


def connect_to_google_drive_service():
creds = get_credentials()
service = build("drive", "v3", credentials=creds)
return service


if __name__ == "__main__":
GOOGLE_DRIVE_SLIDES_FOLDER_PATH = "CV Course/slides"
JSON_FILENAME = "last_slides_info.json"
PDF_OUT_FOLDER_PATH = "/home/yoni/Desktop/AI_is_Math/lectures"

service = connect_to_google_drive_service()

last_slides_info = get_old_slides_info(
GOOGLE_DRIVE_SLIDES_FOLDER_PATH, JSON_FILENAME, service
)

slides_info = generate_current_slides_info(service, GOOGLE_DRIVE_SLIDES_FOLDER_PATH)

generate_changed_slides_as_pdfs(
service, last_slides_info, slides_info, PDF_OUT_FOLDER_PATH
)

file_id = find_file_id_by_path(service, path)
download_as_pdf(service, file_id, "output2.pdf")
write_slides_info_to_drive(
service, GOOGLE_DRIVE_SLIDES_FOLDER_PATH, slides_info, JSON_FILENAME
)

0 comments on commit cf2b6be

Please sign in to comment.