From 31453eff7bccffbf75b2f2f9f290bb261cfd1d1f Mon Sep 17 00:00:00 2001
From: David Li <davidli012345@gmail.com>
Date: Thu, 6 Oct 2022 04:39:47 +0000
Subject: [PATCH] feat: closes #34, fixes ffmpeg usage

---
 .coveragerc                            | 19 +++++++++++++++-
 .github/workflows/transcribe_video.yml |  1 +
 processing.py                          | 31 ++++++++++++++++++++++++--
 transcript_manager.py                  |  6 ++---
 4 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/.coveragerc b/.coveragerc
index 8e829eb..2564848 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,2 +1,19 @@
 [run]
-omit = tests
\ No newline at end of file
+omit = tests
+
+# Regexes for lines to exclude from consideration
+exclude_lines =
+    # Have to re-enable the standard pragma
+    pragma: no cover
+
+    # Don't complain about missing debug-only code:
+    def __repr__
+    if self\.debug
+
+    # Don't complain if tests don't hit defensive assertion code:
+    raise AssertionError
+    raise NotImplementedError
+
+    # Don't complain if non-runnable code isn't run:
+    if 0:
+    if __name__ == .__main__.:
\ No newline at end of file
diff --git a/.github/workflows/transcribe_video.yml b/.github/workflows/transcribe_video.yml
index d9ec80d..8c290ae 100644
--- a/.github/workflows/transcribe_video.yml
+++ b/.github/workflows/transcribe_video.yml
@@ -67,6 +67,7 @@ jobs:
         run: |
           echo "ITERATION=${{ github.event.inputs.iteration }}" >> $GITHUB_ENV
           echo "YOUTUBE_URL=${{ github.event.inputs.youtube_url}}" >> $GITHUB_ENV
+          echo ${{github.event.inputs.youtube_url}}
       
       - name: set table name if available
         if: "${{ github.event.inputs.table_name != '' }}"
diff --git a/processing.py b/processing.py
index 61d9a6f..18ca404 100644
--- a/processing.py
+++ b/processing.py
@@ -25,7 +25,7 @@ def get_video_length(video_path: str):
         return None
 
 
-def get_video_from_start(url: str, config: dict):
+def get_video_from_start_legacy(url: str, config: dict):
     """
     Get video from start time.
     """
@@ -50,9 +50,22 @@ def get_video_from_start(url: str, config: dict):
     ic(result)
     return result.stdout.decode("utf-8")
 
+def get_video_from_start(url: str, config: dict):
+    """
+    Get video from start time.
+    """
+    filename = config.get("filename", "livestream01.mp4")
+    end = config.get("end", "00:00:10")
+    (
+        ffmpeg
+        .input(url, t=end)
+        .output(filename)
+        .run()
+    )
+
 # wit ai process integration
 
-def convert_mp4_to_mp3(filename: str):
+def convert_mp4_to_mp3_legacy(filename: str):
     """
     Convert mp4 to mp3 using ffmpeg
     """
@@ -65,6 +78,18 @@ def convert_mp4_to_mp3(filename: str):
     ic(result)
     return result
 
+def convert_to_mp4_to_mp3(filename: str):
+    """
+    Convert mp4 to mp3 using ffmpeg
+    """
+    ic("Converting mp4 to mp3")
+    mp4_filename = filename.replace(".mp4", ".mp3")
+    (
+        ffmpeg
+        .input(filename, vn=None)
+        .output(mp4_filename)
+        .run()
+    )
 
 # parse all the partial json responses and attempt to find the last one
 
@@ -230,6 +255,8 @@ def split_vid_into_chunks(filename: str, is_livestream: bool = False, chunk_size
             ic("No chunks to process for video")
                     # convert_mp4_to_mp3(filename)
         else:
+            # is this even needed for whispers as the library may be able todo it
+            # TODO figure this out later
             convert_mp4_to_mp3(filename)
             yield filename
         t2_start = time.perf_counter()
diff --git a/transcript_manager.py b/transcript_manager.py
index e416919..c70f525 100644
--- a/transcript_manager.py
+++ b/transcript_manager.py
@@ -296,9 +296,9 @@ def main(params: dict):
     args = parser.parse_args()
     # ensure WIT_AI_TOKEN is set
     ic("Running main")
-    if os.environ.get("WIT_AI_TOKEN") is None:
-        print("WIT_AI_TOKEN is not set")
-        exit(1)
+    # if os.environ.get("WIT_AI_TOKEN") is None:
+    #     print("WIT_AI_TOKEN is not set")
+    #     exit(1)
     dict_args = {
         "url": args.url,
         "exit_on_video": args.exit_for_videos,