Merge branch 'main' into docs-to-mkdocs-material

crisishistory · Oct 24, 2023 · b5427a6 · b5427a6
2 parents 1516422 + 9a3d389
commit b5427a6
Show file tree

Hide file tree

Showing 10 changed files with 132 additions and 97 deletions.
diff --git a/.github/workflows/update_contributors.yaml b/.github/workflows/update_contributors.yaml
@@ -0,0 +1,14 @@
+on:
+    push:
+        branches:
+            - main
+
+jobs:
+    contrib-readme-job:
+        runs-on: ubuntu-latest
+        name: A job to automate contrib in readme
+        steps:
+            - name: Contribute List
+              uses: akhilmhdh/[email protected]
+              env:
+                  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/Makefile b/Makefile
@@ -0,0 +1,3 @@
+lint:
+	black .
+	ruff check . --fix
diff --git a/README.md b/README.md
@@ -64,3 +64,8 @@ First install the project using the installation instructions in docs/source/get
 ```
 hist --help
 ```
+
+## Contributors
+
+<!-- readme: contributors -start -->
+<!-- readme: contributors -end -->
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,7 +33,6 @@ dependencies = [
 dev = [
     "black",  # code auto-formatting
     "coverage==7.3.2",  # testing
-    "isort",  # code auto-formatting
     "mypy",  # linting
     "pytest",  # testing
     "ruff==0.0.292",  # linting
@@ -59,30 +58,36 @@ package-dir = {"" = "src"}
 [project.scripts]
 hist = "interviewkit.cli:app"
 
-
-# Isort
-# -----
-
-[tool.isort]
-line_length = 99
-profile = "black"
-default_section = "THIRDPARTY"
-lines_after_imports = 2
-
-
-# Mypy
+# Ruff
 # ----
 
-[tool.mypy]
-files = "."
+[tool.ruff]
+select = [
+    "E",  # pycodestyle
+    "F",  # pyflakes
+    "I",  # isort
+]
+ignore = [
+    "E501",  # line too long - black takes care of this for us
+]
 
-# Use strict defaults
-strict = true
-warn_unreachable = true
-warn_no_return = true
+[tool.ruff.per-file-ignores]
+# Allow unused imports in __init__ files, as these are convenience imports
+"**/__init__.py" = [ "F401" ]
+
+[tool.ruff.isort]
+lines-after-imports = 2
+section-order = [
+    "future",
+    "standard-library",
+    "third-party",
+    "first-party",
+    "project",
+    "local-folder",
+]
 
-[[tool.mypy.overrides]]
-# Don't require test functions to include types
-module = "tests.*"
-allow_untyped_defs = true
-disable_error_code = "attr-defined"
+[tool.ruff.isort.sections]
+"project" = [
+    "src",
+    "tests",
+]
diff --git a/src/interviewkit/cli.py b/src/interviewkit/cli.py
@@ -1,11 +1,10 @@
 import sys
-import typer
-
 from pathlib import Path
-from typing_extensions import Annotated
 
+import typer
 from slicer import audio_slicing
 from transcript import transcribe_from_paths
+from typing_extensions import Annotated
 
 
 __version__ = '0.0.1'
@@ -41,6 +40,25 @@ def slice(
     """Slices an audio file into smaller audio files."""
     audio_slicing(source, start, duration)
 
+@app.command()
+def generate_questions(source: Annotated[
+        Path,
+        typer.Argument(
+            exists=True,
+            file_okay=True,
+            dir_okay=False,
+            readable=True,
+            resolve_path=True,
+            help="Source transcript file",
+        ),
+    ],
+    target: Path
+    ):
+    """Generates questions from a transcript."""
+
+    questions = generate_questions_from_transcript(source.read_text())
+    target.write_text(questions)
+
 
 @app.command()
 def transcribe(

diff --git a/src/interviewkit/interview.py b/src/interviewkit/interview.py
@@ -1,6 +1,6 @@
-from random import choice
-from interviewee import Interviewee
 from enum import StrEnum, auto
+
+from interviewee import Interviewee
 from transcript import Transcript
 
 

diff --git a/src/interviewkit/questions.py b/src/interviewkit/questions.py
@@ -0,0 +1,50 @@
+from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
+from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
+from clarifai_grpc.grpc.api.status import status_code_pb2
+
+# # Securely get your credentials
+# TODO: Pass in arguments or use env vars
+CLARIFAI_PAT = ''
+# Specify the correct user_id/app_id pairings
+# Since you're making inferences outside your app's scope
+CLARIFAI_USER_ID = 'meta'
+CLARIFAI_APP_ID = 'Llama-2'
+# Change these to whatever model and text URL you want to use
+CLARIFAI_MODEL_ID = 'llama2-70b-chat'
+CLARIFAI_MODEL_VERSION_ID = 'acba9c1995f8462390d7cb77d482810b'
+
+
+def generate_questions_from_transcript(transcript: str):
+
+    channel = ClarifaiChannel.get_grpc_channel()
+    stub = service_pb2_grpc.V2Stub(channel)
+
+    metadata = (('authorization', 'Key ' + CLARIFAI_PAT),)
+    userDataObject = resources_pb2.UserAppIDSet(
+        user_id=CLARIFAI_USER_ID, app_id=CLARIFAI_APP_ID)
+
+    post_model_outputs_response = stub.PostModelOutputs(
+        service_pb2.PostModelOutputsRequest(
+            user_app_id=userDataObject,
+            model_id=CLARIFAI_MODEL_ID,
+            version_id=CLARIFAI_MODEL_VERSION_ID,
+            inputs=[
+                resources_pb2.Input(
+                    data=resources_pb2.Data(
+                        text=resources_pb2.Text(
+                            raw=transcript
+                        )
+                    )
+                )
+            ]
+        ),
+        metadata=metadata
+    )
+
+    if post_model_outputs_response.status.code != status_code_pb2.SUCCESS:
+        print(post_model_outputs_response.status)
+        status = post_model_outputs_response.status.description
+        raise Exception(f"Post model outputs failed, status: {status}")
+
+    output = post_model_outputs_response.outputs[0]
+    return output.data.text.raw
diff --git a/src/interviewkit/slicer.py b/src/interviewkit/slicer.py
@@ -27,9 +27,10 @@
 python interviewkit/slicer.py data/Martine+Barrat_FINAL.mp3 80:30 90:40
 
 """
+import shutil
 import sys
 from pathlib import Path
-import shutil
+
 
 try:
     import pydub

diff --git a/src/interviewkit/transcript.py b/src/interviewkit/transcript.py
@@ -1,9 +1,6 @@
 from pathlib import Path
 from rich.console import Console
 import sys
-from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
-from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
-from clarifai_grpc.grpc.api.status import status_code_pb2
 
 try:
     import whisper
@@ -20,53 +17,6 @@ class Transcript(BaseModel):
     """The Transcript entity represents the transcript of an interview."""
     content: str
 
-# # Securely get your credentials
-# PAT = os.getenv('CLARIFAI_PAT')
-# USER_ID = os.getenv('CLARIFAI_USER_ID')
-# APP_ID = os.getenv('CLARIFAI_APP_ID')
-# MODEL_ID = os.getenv('CLARIFAI_MODEL_ID')
-# MODEL_VERSION_ID = os.getenv('CLARIFAI_MODEL_VERSION_ID')
-PAT = ''
-# Specify the correct user_id/app_id pairings
-# Since you're making inferences outside your app's scope
-USER_ID = 'meta'
-APP_ID = 'Llama-2'
-# Change these to whatever model and text URL you want to use
-MODEL_ID = 'llama2-70b-chat'
-MODEL_VERSION_ID = 'acba9c1995f8462390d7cb77d482810b'
-
-def generate_questions(transcript_chunk):
-    channel = ClarifaiChannel.get_grpc_channel()
-    stub = service_pb2_grpc.V2Stub(channel)
-
-    metadata = (('authorization', 'Key ' + PAT),)
-    userDataObject = resources_pb2.UserAppIDSet(user_id=USER_ID, app_id=APP_ID)
-
-    post_model_outputs_response = stub.PostModelOutputs(
-        service_pb2.PostModelOutputsRequest(
-            user_app_id=userDataObject,
-            model_id=MODEL_ID,
-            version_id=MODEL_VERSION_ID,
-            inputs=[
-                resources_pb2.Input(
-                    data=resources_pb2.Data(
-                        text=resources_pb2.Text(
-                            raw=transcript_chunk
-                        )
-                    )
-                )
-            ]
-        ),
-        metadata=metadata
-    )
-
-    if post_model_outputs_response.status.code != status_code_pb2.SUCCESS:
-        print(post_model_outputs_response.status)
-        raise Exception(f"Post model outputs failed, status: {post_model_outputs_response.status.description}")
-
-    output = post_model_outputs_response.outputs[0]
-    return output.data.text.raw
-
 def transcribe_from_paths(source: Path, target: Path) -> None:
     console.print("Loading whisper base model...")
     model = whisper.load_model("base")
@@ -89,20 +39,6 @@ def transcribe_from_paths(source: Path, target: Path) -> None:
     console.print("Transcript saved to:")
     console.print(f"    [green bold]{target / source.name}.txt[/green bold]")
 
-    # Generate questions from the transcript
-    transcript_chunk = result['text']  # Assuming 'result' contains the transcribed text
-    # Debug: Print type and value of transcript_chunk
-    print(f"Type of transcript_chunk: {type(transcript_chunk)}")
-    print(f"Value of transcript_chunk: {transcript_chunk}")
-
-    # Ensure transcript_chunk is a string
-    if not isinstance(transcript_chunk, str):
-        print("Warning: transcript_chunk is not a string. Trying to convert...")
-        transcript_chunk = str(transcript_chunk)
-
-    questions = generate_questions(transcript_chunk)
-    console.print("Generated Questions:\n", questions)
-
 if __name__ == "__main__":
     source = Path(sys.argv[1])
     target = Path(sys.argv[2])

diff --git a/src/interviewkit/transcript_using_m5.py b/src/interviewkit/transcript_using_m5.py
@@ -1,16 +1,19 @@
+import sys
 from pathlib import Path
+
 from rich.console import Console
-import sys
-from transformers import T5Tokenizer, T5ForConditionalGeneration
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+
 
 try:
     import whisper
 except ImportError:
     print("Please install Whisper: pip install openai-whisper")
     exit(1)
 
-from whisper.utils import get_writer
 from pydantic import BaseModel
+from whisper.utils import get_writer
+
 
 console = Console()