From dd8f1e0aaca864836f935c6f50bbd306688b1700 Mon Sep 17 00:00:00 2001 From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com> Date: Mon, 13 Jan 2025 05:46:29 +0100 Subject: [PATCH] feat: enhance parser domain-agnostic support (#117) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: make parser domain-agnostic to support multiple Git hosts - added list of known domains/Git hosts in `query_parser.py` - fixed bug from [#115](https://github.com/cyclotruc/gitingest/pull/115): corrected case handling for URL components—scheme, domain, username, and repository are case-insensitive, but paths beyond (e.g., file names, branches) are case-sensitive - implemented `try_domains_for_user_and_repo` in `query_parser.py` to iteratively guess the correct domain until success or supported hosts are exhausted - added helper functions `_get_user_and_repo_from_path`, `_validate_host`, and `_validate_scheme` in `query_parser.py` - extended `_parse_repo_source` in `query_parser.py` to be Git host agnostic by using `try_domains_for_user_and_repo` - added tests `test_parse_url_unsupported_host` and `test_parse_query_with_branch` in `test_query_parser.py` - created new file `test_git_host_agnostic.py` to verify domain/Git host agnostic behavior --- Dockerfile | 2 +- README.md | 30 +-- src/gitingest/__init__.py | 2 +- src/gitingest/cli.py | 4 +- src/gitingest/exceptions.py | 4 +- src/gitingest/query_ingestion.py | 4 +- src/gitingest/query_parser.py | 221 +++++++++++++----- src/gitingest/repository_clone.py | 56 ++++- src/gitingest/repository_ingest.py | 6 +- src/main.py | 2 +- src/query_processor.py | 10 +- src/routers/dynamic.py | 16 +- src/routers/index.py | 4 +- src/templates/api.jinja | 2 +- src/templates/base.jinja | 4 +- src/templates/components/footer.jinja | 2 +- .../{github_form.jinja => git_form.jinja} | 2 +- src/templates/{github.jinja => git.jinja} | 2 +- src/templates/index.jinja | 4 +- tests/query_parser/test_git_host_agnostic.py | 81 +++++++ tests/{ => query_parser}/test_query_parser.py | 116 +++++---- tests/test_repository_clone.py | 22 +- 22 files changed, 429 insertions(+), 167 deletions(-) rename src/templates/components/{github_form.jinja => git_form.jinja} (98%) rename src/templates/{github.jinja => git.jinja} (97%) create mode 100644 tests/query_parser/test_git_host_agnostic.py rename tests/{ => query_parser}/test_query_parser.py (62%) diff --git a/Dockerfile b/Dockerfile index 564a5ab..cb0eab8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ FROM python:3.12-slim ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 -# Install git +# Install Git RUN apt-get update \ && apt-get install -y --no-install-recommends git curl\ && rm -rf /var/lib/apt/lists/* diff --git a/README.md b/README.md index d5fe307..049f640 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,13 @@ Turn any Git repository into a prompt-friendly text ingest for LLMs. -You can also replace `hub` with `ingest` in any GitHub URL to access the coresponding digest +You can also replace `hub` with `ingest` in any GitHub URL to access the coresponding digest. -[gitingest.com](https://gitingest.com/) · [Chrome Extension](https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood) · [Firefox Add-on](https://addons.mozilla.org/firefox/addon/gitingest/) +[gitingest.com](https://gitingest.com) · [Chrome Extension](https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood) · [Firefox Add-on](https://addons.mozilla.org/firefox/addon/gitingest) ## 🚀 Features -- **Easy code context**: Get a text digest from a git repository URL or a directory +- **Easy code context**: Get a text digest from a Git repository URL or a directory - **Smart Formatting**: Optimized output format for LLM prompts - **Statistics about**: - File and directory structure @@ -36,11 +36,12 @@ pip install gitingest Available in the Chrome Web Store -Get The Add-on for Firefox +Get The Add-on for Firefox Get from the Edge Add-ons The extension is open source at [lcandy2/gitingest-extension](https://github.com/lcandy2/gitingest-extension). + Issues and feature requests are welcome to the repo. ## 💡 Command line usage @@ -71,7 +72,7 @@ summary, tree, content = ingest("path/to/directory") summary, tree, content = ingest("https://github.com/cyclotruc/gitingest") ``` -By default, this won't write a file but can be enabled with the `output` argument +By default, this won't write a file but can be enabled with the `output` argument. ## 🌐 Self-host @@ -87,31 +88,30 @@ By default, this won't write a file but can be enabled with the `output` argumen docker run -d --name gitingest -p 8000:8000 gitingest ``` -The application will be available at `http://localhost:8000` +The application will be available at `http://localhost:8000`. If you are hosting it on a domain, you can specify the allowed hostnames via env variable `ALLOWED_HOSTS`. ```bash - #Default: "gitingest.com,*.gitingest.com,localhost, 127.0.0.1". + # Default: "gitingest.com, *.gitingest.com, localhost, 127.0.0.1". ALLOWED_HOSTS="example.com, localhost, 127.0.0.1" ``` ## 🛠️ Stack -- [Tailwind CSS](https://tailwindcss.com/) - Frontend +- [Tailwind CSS](https://tailwindcss.com) - Frontend - [FastAPI](https://github.com/fastapi/fastapi) - Backend framework -- [Jinja2](https://jinja.palletsprojects.com/) - HTML templating +- [Jinja2](https://jinja.palletsprojects.com) - HTML templating - [tiktoken](https://github.com/openai/tiktoken) - Token estimation -- [apianalytics.dev](https://www.apianalytics.dev/) - Simple Analytics +- [apianalytics.dev](https://www.apianalytics.dev) - Simple Analytics -### Looking for a javascript/node package? +### Looking for a JavaScript/Node package? Check out the NPM alternative 📦 Repomix: ## ✔️ Contributing to Gitingest -Gitingest aims to be friendly for first time contributors, with a simple python and html codebase. - If you need any help while working with the code, reach out to us on [discord](https://discord.com/invite/zerRaGK9EC) +Gitingest aims to be friendly for first time contributors, with a simple python and html codebase. If you need any help while working with the code, reach out to us on [Discord](https://discord.com/invite/zerRaGK9EC). ### Ways to help (non-technical) @@ -125,7 +125,7 @@ Gitingest aims to be friendly for first time contributors, with a simple python 2. Setup the dev environment (see Development section bellow) 3. Run unit tests with `pytest` 4. Commit your changes and run `pre-commit` -5. Open a pull request on Github for review and feedback +5. Open a pull request on GitHub for review and feedback 6. (Optionnal) Invite project maintainer to your branch for easier collaboration ## 🔧 Development @@ -161,7 +161,7 @@ Gitingest aims to be friendly for first time contributors, with a simple python pytest ``` -The application should be available at `http://localhost:8000` +The application should be available at `http://localhost:8000`. ### Working on the CLI diff --git a/src/gitingest/__init__.py b/src/gitingest/__init__.py index c592350..692de60 100644 --- a/src/gitingest/__init__.py +++ b/src/gitingest/__init__.py @@ -1,4 +1,4 @@ -""" Gitingest: A package for ingesting data from git repositories. """ +""" Gitingest: A package for ingesting data from Git repositories. """ from gitingest.query_ingestion import run_ingest_query from gitingest.query_parser import parse_query diff --git a/src/gitingest/cli.py b/src/gitingest/cli.py index ada231a..371263a 100644 --- a/src/gitingest/cli.py +++ b/src/gitingest/cli.py @@ -14,7 +14,7 @@ @click.option("--max-size", "-s", default=MAX_FILE_SIZE, help="Maximum file size to process in bytes") @click.option("--exclude-pattern", "-e", multiple=True, help="Patterns to exclude") @click.option("--include-pattern", "-i", multiple=True, help="Patterns to include") -def main( +async def main( source: str, output: str | None, max_size: int, @@ -54,7 +54,7 @@ def main( if not output: output = "digest.txt" - summary, _, _ = ingest(source, max_size, include_patterns, exclude_patterns, output=output) + summary, _, _ = await ingest(source, max_size, include_patterns, exclude_patterns, output=output) click.echo(f"Analysis complete! Output written to: {output}") click.echo("\nSummary:") diff --git a/src/gitingest/exceptions.py b/src/gitingest/exceptions.py index bfb3888..8808cf7 100644 --- a/src/gitingest/exceptions.py +++ b/src/gitingest/exceptions.py @@ -23,7 +23,7 @@ def __init__(self, pattern: str) -> None: class AsyncTimeoutError(Exception): """ - Raised when an async operation exceeds its timeout limit. + Exception raised when an async operation exceeds its timeout limit. This exception is used by the `async_timeout` decorator to signal that the wrapped asynchronous function has exceeded the specified time limit for execution. @@ -38,7 +38,7 @@ def __init__(self, max_files: int) -> None: class MaxFileSizeReachedError(Exception): - """Raised when the maximum file size is reached.""" + """Exception raised when the maximum file size is reached.""" def __init__(self, max_size: int): super().__init__(f"Maximum file size limit ({max_size/1024/1024:.1f}MB) reached.") diff --git a/src/gitingest/query_ingestion.py b/src/gitingest/query_ingestion.py index c58ea81..3396ca6 100644 --- a/src/gitingest/query_ingestion.py +++ b/src/gitingest/query_ingestion.py @@ -170,7 +170,9 @@ def _read_file_content(file_path: Path) -> str: def _sort_children(children: list[dict[str, Any]]) -> list[dict[str, Any]]: """ - Sort children nodes with: + Sort the children nodes of a directory according to a specific order. + + Order of sorting: 1. README.md first 2. Regular files (not starting with dot) 3. Hidden files (starting with dot) diff --git a/src/gitingest/query_parser.py b/src/gitingest/query_parser.py index f232e63..78dd6cf 100644 --- a/src/gitingest/query_parser.py +++ b/src/gitingest/query_parser.py @@ -11,11 +11,20 @@ from config import TMP_BASE_PATH from gitingest.exceptions import InvalidPatternError from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS +from gitingest.repository_clone import _check_repo_exists -HEX_DIGITS = set(string.hexdigits) +HEX_DIGITS: set[str] = set(string.hexdigits) +KNOWN_GIT_HOSTS: list[str] = [ + "github.com", + "gitlab.com", + "bitbucket.org", + "gitea.com", + "codeberg.org", +] -def parse_query( + +async def parse_query( source: str, max_file_size: int, from_web: bool, @@ -48,16 +57,16 @@ def parse_query( A dictionary containing the parsed query parameters, including 'max_file_size', 'ignore_patterns', and 'include_patterns'. """ - # Normalize and clean up the source string to make it case-insensitive - source = source.lower().strip() # Determine the parsing method based on the source type - if from_web or source.startswith("https://") or "github.com" in source: - query = _parse_url(source) + if from_web or urlparse(source).scheme in ("https", "http") or any(h in source for h in KNOWN_GIT_HOSTS): + # We either have a full URL or a domain-less slug + query = await _parse_repo_source(source) else: + # Local path scenario query = _parse_path(source) - # Process ignore patterns + # Combine ignore patterns ignore_patterns_list = DEFAULT_IGNORE_PATTERNS.copy() if ignore_patterns: ignore_patterns_list += _parse_patterns(ignore_patterns) @@ -69,7 +78,6 @@ def parse_query( else: parsed_include = None - # Update the query dictionary with max_file_size and processed patterns query.update( { "max_file_size": max_file_size, @@ -80,52 +88,54 @@ def parse_query( return query -def _parse_url(url: str) -> dict[str, Any]: +async def _parse_repo_source(source: str) -> dict[str, Any]: """ - Parse a GitHub repository URL into a structured query dictionary. + Parse a repository URL into a structured query dictionary. - This function extracts relevant information from a GitHub URL, such as the username, - repository name, commit, branch, and subpath, and returns them in a structured format. + If source is: + - A fully qualified URL (https://gitlab.com/...), parse & verify that domain + - A URL missing 'https://' (gitlab.com/...), add 'https://' and parse + - A 'slug' (like 'pandas-dev/pandas'), attempt known domains until we find one that exists. Parameters ---------- - url : str - The GitHub URL to parse. + source : str + The URL or domain-less slug to parse. Returns ------- dict[str, Any] - A dictionary containing the parsed details of the GitHub repository, including - the username, repository name, commit, branch, and other relevant information. - - Raises - ------ - ValueError - If the URL is invalid or does not correspond to a valid Git repository. + A dictionary containing the parsed details of the repository, including the username, + repository name, commit, branch, and other relevant information. """ - # Clean up the URL - url = url.split(" ")[0] # remove trailing text - url = unquote(url) # decode URL-encoded characters + source = unquote(source) - if not url.startswith(("https://", "http://")): - url = "https://" + url + # Attempt to parse + parsed_url = urlparse(source) - # Parse URL and reconstruct it without query parameters and fragments - parsed_url = urlparse(url) - url = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}" + if parsed_url.scheme: + _validate_scheme(parsed_url.scheme) + _validate_host(parsed_url.netloc.lower()) - # Extract domain and path - url_parts = url.split("/") - domain = url_parts[2] - path_parts = url_parts[3:] + else: # Will be of the form 'host/user/repo' or 'user/repo' + tmp_host = source.split("/")[0].lower() + if "." in tmp_host: + _validate_host(tmp_host) + else: + # No scheme, no domain => user typed "user/repo", so we'll guess the domain. + host = await try_domains_for_user_and_repo(*_get_user_and_repo_from_path(source)) + source = f"{host}/{source}" - if len(path_parts) < 2: - raise ValueError("Invalid repository URL. Please provide a valid Git repository URL.") + source = "https://" + source + parsed_url = urlparse(source) + + host = parsed_url.netloc.lower() + user_name, repo_name = _get_user_and_repo_from_path(parsed_url.path) - user_name = path_parts[0] - repo_name = path_parts[1] _id = str(uuid.uuid4()) slug = f"{user_name}-{repo_name}" + local_path = Path(TMP_BASE_PATH) / _id / slug + url = f"https://{host}/{user_name}/{repo_name}" parsed = { "user_name": user_name, @@ -134,38 +144,46 @@ def _parse_url(url: str) -> dict[str, Any]: "branch": None, "commit": None, "subpath": "/", - "local_path": Path(TMP_BASE_PATH) / _id / slug, - "url": f"https://{domain}/{user_name}/{repo_name}", - "slug": slug, + "local_path": local_path, + "url": url, + "slug": slug, # e.g. "pandas-dev-pandas" "id": _id, } - # If this is an issues page or pull requests, return early without processing subpath - if len(path_parts) > 2 and (path_parts[2] == "issues" or path_parts[2] == "pull"): + remaining_parts = parsed_url.path.strip("/").split("/")[2:] + + if not remaining_parts: return parsed + possible_type = remaining_parts.pop(0) # e.g. 'issues', 'pull', 'tree', 'blob' + # If no extra path parts, just return - if len(path_parts) < 4: + if not remaining_parts: + return parsed + + # If this is an issues page or pull requests, return early without processing subpath + if remaining_parts and possible_type in ("issues", "pull"): return parsed - parsed["type"] = path_parts[2] # Usually 'tree' or 'blob' - commit = path_parts[3] + parsed["type"] = possible_type - if _is_valid_git_commit_hash(commit): - parsed["commit"] = commit - if len(path_parts) > 4: - parsed["subpath"] += "/".join(path_parts[4:]) + # Commit or branch + commit_or_branch = remaining_parts.pop(0) + if _is_valid_git_commit_hash(commit_or_branch): + parsed["commit"] = commit_or_branch else: - parsed["branch"] = commit - if len(path_parts) > 4: - parsed["subpath"] += "/".join(path_parts[4:]) + parsed["branch"] = commit_or_branch + + # Subpath if anything left + if remaining_parts: + parsed["subpath"] += "/".join(remaining_parts) return parsed def _is_valid_git_commit_hash(commit: str) -> bool: """ - Validates if the provided string is a valid Git commit hash. + Validate if the provided string is a valid Git commit hash. This function checks if the commit hash is a 40-character string consisting only of hexadecimal digits, which is the standard format for Git commit hashes. @@ -185,7 +203,7 @@ def _is_valid_git_commit_hash(commit: str) -> bool: def _normalize_pattern(pattern: str) -> str: """ - Normalizes the given pattern by removing leading separators and appending a wildcard. + Normalize the given pattern by removing leading separators and appending a wildcard. This function processes the pattern string by stripping leading directory separators and appending a wildcard (`*`) if the pattern ends with a separator. @@ -249,7 +267,7 @@ def _parse_patterns(pattern: list[str] | str) -> list[str]: def _override_ignore_patterns(ignore_patterns: list[str], include_patterns: list[str]) -> list[str]: """ - Removes patterns from ignore_patterns that are present in include_patterns using set difference. + Remove patterns from ignore_patterns that are present in include_patterns using set difference. Parameters ---------- @@ -268,7 +286,7 @@ def _override_ignore_patterns(ignore_patterns: list[str], include_patterns: list def _parse_path(path_str: str) -> dict[str, Any]: """ - Parses a file path into a structured query dictionary. + Parse a file path into a structured query dictionary. This function takes a file path and constructs a query dictionary that includes relevant details such as the absolute path and the slug (a combination of the @@ -297,7 +315,7 @@ def _parse_path(path_str: str) -> dict[str, Any]: def _is_valid_pattern(pattern: str) -> bool: """ - Validates if the given pattern contains only valid characters. + Validate if the given pattern contains only valid characters. This function checks if the pattern contains only alphanumeric characters or one of the following allowed characters: dash (`-`), underscore (`_`), dot (`.`), @@ -314,3 +332,92 @@ def _is_valid_pattern(pattern: str) -> bool: True if the pattern is valid, otherwise False. """ return all(c.isalnum() or c in "-_./+*" for c in pattern) + + +async def try_domains_for_user_and_repo(user_name: str, repo_name: str) -> str: + """ + Attempt to find a valid repository host for the given user_name and repo_name. + + Parameters + ---------- + user_name : str + The username or owner of the repository. + repo_name : str + The name of the repository. + + Returns + ------- + str + The domain of the valid repository host. + + Raises + ------ + ValueError + If no valid repository host is found for the given user_name and repo_name. + """ + for domain in KNOWN_GIT_HOSTS: + candidate = f"https://{domain}/{user_name}/{repo_name}" + if await _check_repo_exists(candidate): + return domain + raise ValueError(f"Could not find a valid repository host for '{user_name}/{repo_name}'.") + + +def _get_user_and_repo_from_path(path: str) -> tuple[str, str]: + """ + Extract the user and repository names from a given path. + + Parameters + ---------- + path : str + The path to extract the user and repository names from. + + Returns + ------- + tuple[str, str] + A tuple containing the user and repository names. + + Raises + ------ + ValueError + If the path does not contain at least two parts. + """ + path_parts = path.lower().strip("/").split("/") + if len(path_parts) < 2: + raise ValueError(f"Invalid repository URL '{path}'") + return path_parts[0], path_parts[1] + + +def _validate_host(host: str) -> None: + """ + Validate the given host against the known Git hosts. + + Parameters + ---------- + host : str + The host to validate. + + Raises + ------ + ValueError + If the host is not a known Git host. + """ + if host not in KNOWN_GIT_HOSTS: + raise ValueError(f"Unknown domain '{host}' in URL") + + +def _validate_scheme(scheme: str) -> None: + """ + Validate the given scheme against the known schemes. + + Parameters + ---------- + scheme : str + The scheme to validate. + + Raises + ------ + ValueError + If the scheme is not 'http' or 'https'. + """ + if scheme not in ("https", "http"): + raise ValueError(f"Invalid URL scheme '{scheme}' in URL") diff --git a/src/gitingest/repository_clone.py b/src/gitingest/repository_clone.py index 01ba387..d251a6f 100644 --- a/src/gitingest/repository_clone.py +++ b/src/gitingest/repository_clone.py @@ -37,7 +37,7 @@ class CloneConfig: @async_timeout(CLONE_TIMEOUT) async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: """ - Clones a repository to a local path based on the provided configuration. + Clone a repository to a local path based on the provided configuration. This function handles the process of cloning a Git repository to the local file system. It can clone a specific branch or commit if provided, and it raises exceptions if @@ -55,7 +55,7 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: Returns ------- tuple[bytes, bytes] - A tuple containing the stdout and stderr of the git commands executed. + A tuple containing the stdout and stderr of the Git commands executed. Raises ------ @@ -101,17 +101,21 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]: async def _check_repo_exists(url: str) -> bool: """ - Check if a repository exists at the given URL using an HTTP HEAD request. + Check if a Git repository exists at the provided URL. Parameters ---------- url : str - The URL of the repository. - + The URL of the Git repository to check. Returns ------- bool True if the repository exists, False otherwise. + + Raises + ------ + RuntimeError + If the curl command returns an unexpected status code. """ proc = await asyncio.create_subprocess_exec( "curl", @@ -121,31 +125,40 @@ async def _check_repo_exists(url: str) -> bool: stderr=asyncio.subprocess.PIPE, ) stdout, _ = await proc.communicate() + if proc.returncode != 0: return False - # Check if stdout contains "404" status code - stdout_str = stdout.decode() - return "HTTP/1.1 404" not in stdout_str and "HTTP/2 404" not in stdout_str + + response = stdout.decode() + status_code = _get_status_code(response) + + if status_code in (200, 301): + return True + + if status_code in (404, 302): + return False + + raise RuntimeError(f"Unexpected status code: {status_code}") async def _run_git_command(*args: str) -> tuple[bytes, bytes]: """ - Executes a git command asynchronously and captures its output. + Execute a Git command asynchronously and captures its output. Parameters ---------- *args : str - The git command and its arguments to execute. + The Git command and its arguments to execute. Returns ------- tuple[bytes, bytes] - A tuple containing the stdout and stderr of the git command. + A tuple containing the stdout and stderr of the Git command. Raises ------ RuntimeError - If the git command exits with a non-zero status. + If the Git command exits with a non-zero status. """ proc = await asyncio.create_subprocess_exec( *args, @@ -158,3 +171,22 @@ async def _run_git_command(*args: str) -> tuple[bytes, bytes]: raise RuntimeError(f"Git command failed: {' '.join(args)}\nError: {error_message}") return stdout, stderr + + +def _get_status_code(response: str) -> int: + """ + Extract the status code from an HTTP response. + + Parameters + ---------- + response : str + The HTTP response string. + + Returns + ------- + int + The status code of the response + """ + status_line = response.splitlines()[0].strip() + status_code = int(status_line.split(" ", 2)[1]) + return status_code diff --git a/src/gitingest/repository_ingest.py b/src/gitingest/repository_ingest.py index e2cecaa..c7efa94 100644 --- a/src/gitingest/repository_ingest.py +++ b/src/gitingest/repository_ingest.py @@ -10,7 +10,7 @@ from gitingest.repository_clone import CloneConfig, clone_repo -def ingest( +async def ingest( source: str, max_file_size: int = 10 * 1024 * 1024, # 10 MB include_patterns: list[str] | str | None = None, @@ -27,7 +27,7 @@ def ingest( Parameters ---------- source : str - The source to analyze, which can be a URL (for a GitHub repository) or a local directory path. + The source to analyze, which can be a URL (for a Git repository) or a local directory path. max_file_size : int Maximum allowed file size for file ingestion. Files larger than this size are ignored, by default 10*1024*1024 (10 MB). @@ -52,7 +52,7 @@ def ingest( If `clone_repo` does not return a coroutine, or if the `source` is of an unsupported type. """ try: - query = parse_query( + query = await parse_query( source=source, max_file_size=max_file_size, from_web=False, diff --git a/src/main.py b/src/main.py index 7ba36a8..f2b63fd 100644 --- a/src/main.py +++ b/src/main.py @@ -78,7 +78,7 @@ async def process_folder(folder: Path) -> None: # Extract owner and repository name from the filename if txt_files and "-" in (filename := txt_files[0].stem): owner, repo = filename.split("-", 1) - repo_url = f"https://github.com/{owner}/{repo}" + repo_url = f"{owner}/{repo}" with open("history.txt", mode="a", encoding="utf-8") as history: history.write(f"{repo_url}\n") diff --git a/src/query_processor.py b/src/query_processor.py index f6c7df8..a66bdd3 100644 --- a/src/query_processor.py +++ b/src/query_processor.py @@ -26,7 +26,7 @@ async def process_query( """ Process a query by parsing input, cloning a repository, and generating a summary. - Handle user input, process GitHub repository data, and prepare + Handle user input, process Git repository data, and prepare a response for rendering a template with the processed results or an error message. Parameters @@ -34,7 +34,7 @@ async def process_query( request : Request The HTTP request object. input_text : str - Input text provided by the user, typically a GitHub repository URL or slug. + Input text provided by the user, typically a Git repository URL or slug. slider_position : int Position of the slider, representing the maximum file size in the query. pattern_type : str @@ -63,13 +63,13 @@ async def process_query( else: raise ValueError(f"Invalid pattern type: {pattern_type}") - template = "index.jinja" if is_index else "github.jinja" + template = "index.jinja" if is_index else "git.jinja" template_response = partial(templates.TemplateResponse, name=template) max_file_size = log_slider_to_size(slider_position) context = { "request": request, - "github_url": input_text, + "repo_url": input_text, "examples": EXAMPLE_REPOS if is_index else [], "default_file_size": slider_position, "pattern_type": pattern_type, @@ -77,7 +77,7 @@ async def process_query( } try: - query = parse_query( + query = await parse_query( source=input_text, max_file_size=max_file_size, from_web=True, diff --git a/src/routers/dynamic.py b/src/routers/dynamic.py index add89c4..0787fbf 100644 --- a/src/routers/dynamic.py +++ b/src/routers/dynamic.py @@ -14,29 +14,29 @@ @router.get("/{full_path:path}") async def catch_all(request: Request, full_path: str) -> HTMLResponse: """ - Renders a page with a GitHub URL based on the provided path. + Render a page with a Git URL based on the provided path. - This endpoint catches all GET requests with a dynamic path, constructs a GitHub URL - using the `full_path` parameter, and renders the `github.jinja` template with that URL. + This endpoint catches all GET requests with a dynamic path, constructs a Git URL + using the `full_path` parameter, and renders the `git.jinja` template with that URL. Parameters ---------- request : Request The incoming request object, which provides context for rendering the response. full_path : str - The full path extracted from the URL, which is used to build the GitHub URL. + The full path extracted from the URL, which is used to build the Git URL. Returns ------- HTMLResponse - An HTML response containing the rendered template, with the GitHub URL + An HTML response containing the rendered template, with the Git URL and other default parameters such as loading state and file size. """ return templates.TemplateResponse( - "github.jinja", + "git.jinja", { "request": request, - "github_url": f"https://github.com/{full_path}", + "repo_url": full_path, "loading": True, "default_file_size": 243, }, @@ -53,7 +53,7 @@ async def process_catch_all( pattern: str = Form(...), ) -> HTMLResponse: """ - Processes the form submission with user input for query parameters. + Process the form submission with user input for query parameters. This endpoint handles POST requests, processes the input parameters (e.g., text, file size, pattern), and calls the `process_query` function to handle the query logic, returning the result as an HTML response. diff --git a/src/routers/index.py b/src/routers/index.py index 70a3f6d..b338c30 100644 --- a/src/routers/index.py +++ b/src/routers/index.py @@ -15,7 +15,7 @@ @router.get("/", response_class=HTMLResponse) async def home(request: Request) -> HTMLResponse: """ - Renders the home page with example repositories and default parameters. + Render the home page with example repositories and default parameters. This endpoint serves the home page of the application, rendering the `index.jinja` template and providing it with a list of example repositories and default file size values. @@ -51,7 +51,7 @@ async def index_post( pattern: str = Form(...), ) -> HTMLResponse: """ - Processes the form submission with user input for query parameters. + Process the form submission with user input for query parameters. This endpoint handles POST requests from the home page form. It processes the user-submitted input (e.g., text, file size, pattern type) and invokes the `process_query` function to handle diff --git a/src/templates/api.jinja b/src/templates/api.jinja index 85fa0c3..9bad379 100644 --- a/src/templates/api.jinja +++ b/src/templates/api.jinja @@ -26,7 +26,7 @@ open an issue on github + class="text-[#6e5000] hover:underline">Open an issue on GitHub to suggest features.

diff --git a/src/templates/base.jinja b/src/templates/base.jinja index 7c8359c..a6e30bf 100644 --- a/src/templates/base.jinja +++ b/src/templates/base.jinja @@ -6,7 +6,7 @@ + content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text."> @@ -28,7 +28,7 @@ + content="Replace 'hub' with 'ingest' in any GitHub URL for a prompt-friendly text."> diff --git a/src/templates/components/footer.jinja b/src/templates/components/footer.jinja index 61fadb2..1a8f3e6 100644 --- a/src/templates/components/footer.jinja +++ b/src/templates/components/footer.jinja @@ -1,7 +1,7 @@