From 3326beb9e3faae30222488250edbf960e6af6eb7 Mon Sep 17 00:00:00 2001
From: David Souther <davidsouther+github@gmail.com>
Date: Fri, 20 Sep 2024 09:52:52 -0400
Subject: [PATCH] Docs: Revamp developer docs.

* Consolidate old ARCHITECTURE into DESIGN.
* Add new ARCHITECTURE with overview of packages, folders, and most useful APIs.
---
 ARCHITECTURE.md                      | 126 ++++----------------------
 DESIGN.md                            | 129 +++++++++++++++++++++++++++
 README.md                            |   5 +-
 cli/src/index.ts                     |   5 +-
 core/src/actions/generate_manager.ts |   1 +
 core/src/content/content.ts          |   2 +-
 core/src/engine/openai.ts            |   9 +-
 package.json                         |   2 +-
 8 files changed, 165 insertions(+), 114 deletions(-)

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 7d4a341..7a33d62 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -1,125 +1,37 @@
-# AIlly, an AI writing assistant by David Souther
+# Architecture
 
-My authoring approach is text-based.
-My writing style builds from bullet points.
-I want to write prose that is minimally annotated, yet expressive across media.
-I can use generative AI tooling to augment my writing style.
+Ailly's codebase has a roughly hexagonal shaped architecture. `@ailly/core` provides the domain logic, as well as an extension mechanism for defining engines and plugins. CLI, Web, and Extension expose that core to several surfaces.
 
-This authoring tool will allow me trivial static content publishing (including interactive materials).
-Content will be version controlled.
-It will use the content to tune generative AI on my voice, and I will edit generative AI's suggestions to expand content.
-It can be forked and run by other writers with basic programming skills.
-It can be augmented and developed by anyone with JavaScript programming experience.
+## Core
 
-This project will showcase "good", and it is "cheap" in that I'll do it myself on my own time.
-The codebase aims for usable for myself, with the deployed site being broadly accessible.
-This project will be extensible, and I intend to release the generative AI portion as a standalone utility at a future date.
-The website is static content, and will be deployed to a scalable provider.
+`@ailly/core` provides the domain logic for Ailly. It loads a file system for `Content` (./core/src/content/content.ts), orchestrates that through the chosen `Engine` (./core/src/engine/engine.ts), and saves the updated Content back to disk. The two main functions to interact with Content are `loadContent` and `writeContent`, both in ./core/src/content/content.ts. There's also a utility, `makeCLIContent`, that serves as a way to make an ad-hoc one-off piece of Content.
 
-I am the author & driver, but the content is globally available.
-Readers of this site will see it as a reflection of myself.
-I may seek help for UX design, but I will retain overall architecture and software design.
-I am building it on my own.
-If colleagues want to contribute to portions beyond ideas and encouragement, they can fork and run their own.
-I will consider pull requests that add features or fix bugs.
-I am paying hosting and related costs myself.
+Engines are wrappers around SDK and API calls, with a consistent interface to take a Content and stream its generation. Current Engines are OpenAI and Bedrock, though only Bedrock has been used extensively. To get an engine, call `getEngine` from ./core/src/engine/index.ts passing either a name of a known engine (see same file), or a path starting file `file://` that will be imported as a JavaScript file whose default export must align to the `Engine` interface.
 
-## Constraints
+Plugins are under development at this time, but generally follow the same loading process as custom engines. Call `getPlugin` from ./core/src/plugin/index.ts with a `file://` path to the javascript file whose default export implements the `Plugin` interface.
 
-I have no deadline for this project.
-I will contribute to this as a lower priority for my time.
-For the forseeable timeframe, the code will be run only via next.js. I may add a CLI in the future, and any folder structure details will be specified in a way that is amenable to a Python or other CLI implementation.
+With content and an engine, a `GenerateManager` is responsible for running, retrying, etc all pieces of the pipeline. To get a `GenerateManager`, call the static method `GenerateManager.from`. `from` takes a list of which items to generate, a record of all content in the context, and additional `PipelineSettings`. The list of items must come from the keys of the context. (So, build a record of all context, and then choose the keys to generate for.) The easiest way to get a `PipelineSettings` is the `makePipelineSettings` function in ./core/src/index.ts.
 
-## Context & Scope
+After creating a `GenerateManager`, call `start` on it. While each thread is individually awaitable, the easiest approach is to just call `await manager.allSettled()`, which resolves with a PromiseSettledResult for each content in the list passed to `GenerateManager.from`. The original content in `context` will also have been updated at this point.
 
-Adam Owada's Prompt Engineering site is an inspiration, and I'm sure we'll remain in contact.
-I'll probably ask Gabo for help & feedback on design issues.
-Several colleagues will provide additional feedback on the software and content, as requested.
-At this time I will use OpenAI's GPT family of models for generative ai features. I may try to move this to a local llama based model at some point.
-This starts with no dependents, though I do anticipate releasing the tooling around content generation and voice learning.
+The File System is an abstraction provided by [`JEFRi Jiffies`](https://github.com/jefri/jiffies/blob/main/src/fs.ts), with in-memory & NodeJS backed implementations.
 
-## Solution Strategy
+## Cli
 
-The content will be plaintext markdown files in a version controlled folder structure.
-Authoring tooling runs as an npm cli command.
-Options are available as cli arguments, plugin extension points, and as per-file settings.
-Tooling for the authoring experience will be dev only, will load content files as necessary, and will use node or NextJS server side execution environments to call LLM APIs.
-Engines will be available at least for AWS Bedrock and OpenAI API.
-Publishing will use NextJS static site generation utilities to build & export files appropriate for deploying using GitHub Pages.
+The CLI does the above operations guided by user-provided flags, with some additional flourish for streaming, logging, and debugging.
 
-## Containers
+## Web
 
-### Authoring
+The Web interface at https://ailly.dev provides a guided approach to learning prompt engineering. For developers, the `generateOne` function in ./web/src/app/ailly.ts is an accessible minimal function to make an ad-hoc API call. Note that at this point, the Content was created by the calling app, and no FileSystem is involved.
 
-The NextJS dev environment will have protected pages that are only exposed during development.
-These routes will have access to the project's local file system.
-It will read and write files directly, and will have some git awareness to block operations when the repo is dirty.
-Authoring proper will happen using text editors of choice.
+## Extension
 
-Generative AI features will use engines to allow multiple LLM backends.
-The project will use a base model, and may fine tune that model using version controlled prompts and responses.
-The responses are the markdown content that will get published to the site.
-The prompts are stored adjacent their generated content, and can be edited same as content.
-Fine-tuning will happen when requested, and be tied to a specific git commit of the project's content.
-A section of content will get generated from appropriate prompt text & that file written to the local disk.
-It can then be edited by the author, before making a git commit with that content.
+The VSCode extension is similar to the CLI, but uses VSCode extension APIs to load and write content. Explore the [VSCode Extension API Docs](https://code.visualstudio.com/api) for more on what might be possible here.
 
-### Publishing
+## Integ
 
-Static routes for all content will be registered & generated via NextJS.
-Content will be read as markdown files, processed with off-the-shelf and custom plugins, and saved in a self-contained directory with all files necessary for static HTTP publishing.
-Markdown and plugins will run during build time, however, some injected content like source code and diagrams wil be sent as plain text and evaluated for highlighting and SVG generation in the readers' browser
-Site style, themes, and look & feel are by David.
+integ is a series of shell and batch scripts to widely exercise @ailly/core and @ailly/cli.
 
-## Runtime
+## Content
 
-### Authoring
-
-- David writes and edits prompt and content files.
-- ~Ailly reads git statuses, to prevent OpenAI operations while the working tree is dirty.~
-- ~Ailly reads prompt and content files to prepare model fine-tuning runs.~
-- Ailly reads prompt files to request generated content, and writes that content to disk.
-
-### Publishing
-
-- NextJS has routes for content.
-- NextJS reads content files & renders them as markdown.
-- Markdown-AND plugin adds support for `&[ref];` as a way to inject arbitrary file and url contents as plain text.
-- In the browser, javascript sends analytics data to Google Analytics.
-- In the browser, javascript highlights source code blocks & generates SVG images for diagrams.
-
-## Deployment
-
-- NextJS writes static website files.
-- GitHub Actions publishes static website files.
-
-## Architectural Decisions
-
-NextJS is React framework with leading (circa mid-2023) support for React Server Components.
-This makes it a good choice for this style of edge computing.
-The published end result is static HTML, but NextJS can expose dev systems and UIs when run locally.
-While the generative AI portions could be run as a CLI, writing that in Node would add unrelated overhead to the project.
-Such a CLI could also be written in Python, and perhaps a Python implementation of the resulting spec will be a useful tool, but for now I'm starting from NextJS so let's see where NextJS takes me.
-
-OpenAI's GPT models are comfortable and familiar, and the APIs "just work" (so far).
-AWS 
-Learning to deploy and run a llama model would add significant overhead to getting started on the interesting parts, the authoring experience.
-Perhaps when the Python CLI version comes, it'll have a module that allows switching between OpenAI API and a local llama.
-
-## Quality Commitments
-
-~The only person with a face to get an egg is me, so, that's my own quality bar.~
-As Ailly begins to hit wider usage, unit and integration tests cases are becoming increasingly important. Need to integration test on both *nix and Windows.
-
-## Risks and Technical Debt
-
-I'm doing this as a hobby project, which has a long history of starting strong and losing steam.
-Let's see how much I like using this tool, how much people want me to be writing, and how much feedback I get on wanting to write with it.
-
-### Glossary
-
-| Term        | Definition                                          |
-| ----------- | --------------------------------------------------- |
-| NextJS      | https://nextjs.org                                  |
-| OpenAI API  | https://platform.openai.com/docs/guides/gpt         |
-| Fine Tuning | https://platform.openai.com/docs/guides/fine-tuning |
+Sample content to show various cookbook approaches.
\ No newline at end of file
diff --git a/DESIGN.md b/DESIGN.md
index 8cf0205..5e29430 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -1,5 +1,134 @@
+# AIlly, an AI writing assistant by David Souther
+
+My authoring approach is text-based.
+My writing style builds from bullet points.
+I want to write prose that is minimally annotated, yet expressive across media.
+I can use generative AI tooling to augment my writing style.
+
+This authoring tool will allow me trivial static content publishing (including interactive materials).
+Content will be version controlled.
+It will use the content to tune generative AI on my voice, and I will edit generative AI's suggestions to expand content.
+It can be forked and run by other writers with basic programming skills.
+It can be augmented and developed by anyone with JavaScript programming experience.
+
+This project will showcase "good", and it is "cheap" in that I'll do it myself on my own time.
+The codebase aims for usable for myself, with the deployed site being broadly accessible.
+This project will be extensible, and I intend to release the generative AI portion as a standalone utility at a future date.
+The website is static content, and will be deployed to a scalable provider.
+
+I am the author & driver, but the content is globally available.
+Readers of this site will see it as a reflection of myself.
+I may seek help for UX design, but I will retain overall architecture and software design.
+I am building it on my own.
+If colleagues want to contribute to portions beyond ideas and encouragement, they can fork and run their own.
+I will consider pull requests that add features or fix bugs.
+I am paying hosting and related costs myself.
+
+## Constraints
+
+I have no deadline for this project.
+I will contribute to this as a lower priority for my time.
+For the forseeable timeframe, the code will be run only via next.js. I may add a CLI in the future, and any folder structure details will be specified in a way that is amenable to a Python or other CLI implementation.
+
+## Context & Scope
+
+Adam Owada's Prompt Engineering site is an inspiration, and I'm sure we'll remain in contact.
+I'll probably ask Gabo for help & feedback on design issues.
+Several colleagues will provide additional feedback on the software and content, as requested.
+At this time I will use OpenAI's GPT family of models for generative ai features. I may try to move this to a local llama based model at some point.
+This starts with no dependents, though I do anticipate releasing the tooling around content generation and voice learning.
+
+## Solution Strategy
+
+The content will be plaintext markdown files in a version controlled folder structure.
+Authoring tooling runs as an npm cli command.
+Options are available as cli arguments, plugin extension points, and as per-file settings.
+Tooling for the authoring experience will be dev only, will load content files as necessary, and will use node or NextJS server side execution environments to call LLM APIs.
+Engines will be available at least for AWS Bedrock and OpenAI API.
+Publishing will use NextJS static site generation utilities to build & export files appropriate for deploying using GitHub Pages.
+
+## Containers
+
+### Authoring
+
+The NextJS dev environment will have protected pages that are only exposed during development.
+These routes will have access to the project's local file system.
+It will read and write files directly, and will have some git awareness to block operations when the repo is dirty.
+Authoring proper will happen using text editors of choice.
+
+Generative AI features will use engines to allow multiple LLM backends.
+The project will use a base model, and may fine tune that model using version controlled prompts and responses.
+The responses are the markdown content that will get published to the site.
+The prompts are stored adjacent their generated content, and can be edited same as content.
+Fine-tuning will happen when requested, and be tied to a specific git commit of the project's content.
+A section of content will get generated from appropriate prompt text & that file written to the local disk.
+It can then be edited by the author, before making a git commit with that content.
+
+### Publishing
+
+Static routes for all content will be registered & generated via NextJS.
+Content will be read as markdown files, processed with off-the-shelf and custom plugins, and saved in a self-contained directory with all files necessary for static HTTP publishing.
+Markdown and plugins will run during build time, however, some injected content like source code and diagrams wil be sent as plain text and evaluated for highlighting and SVG generation in the readers' browser
+Site style, themes, and look & feel are by David.
+
+## Runtime
+
+### Authoring
+
+- David writes and edits prompt and content files.
+- ~Ailly reads git statuses, to prevent OpenAI operations while the working tree is dirty.~
+- ~Ailly reads prompt and content files to prepare model fine-tuning runs.~
+- Ailly reads prompt files to request generated content, and writes that content to disk.
+
+### Publishing
+
+- NextJS has routes for content.
+- NextJS reads content files & renders them as markdown.
+- Markdown-AND plugin adds support for `&[ref];` as a way to inject arbitrary file and url contents as plain text.
+- In the browser, javascript sends analytics data to Google Analytics.
+- In the browser, javascript highlights source code blocks & generates SVG images for diagrams.
+
+## Deployment
+
+- NextJS writes static website files.
+- GitHub Actions publishes static website files.
+
+## Architectural Decisions
+
+NextJS is React framework with leading (circa mid-2023) support for React Server Components.
+This makes it a good choice for this style of edge computing.
+The published end result is static HTML, but NextJS can expose dev systems and UIs when run locally.
+While the generative AI portions could be run as a CLI, writing that in Node would add unrelated overhead to the project.
+Such a CLI could also be written in Python, and perhaps a Python implementation of the resulting spec will be a useful tool, but for now I'm starting from NextJS so let's see where NextJS takes me.
+
+OpenAI's GPT models are comfortable and familiar, and the APIs "just work" (so far).
+AWS 
+Learning to deploy and run a llama model would add significant overhead to getting started on the interesting parts, the authoring experience.
+Perhaps when the Python CLI version comes, it'll have a module that allows switching between OpenAI API and a local llama.
+
+## Quality Commitments
+
+~The only person with a face to get an egg is me, so, that's my own quality bar.~
+As Ailly begins to hit wider usage, unit and integration tests cases are becoming increasingly important. Need to integration test on both *nix and Windows.
+
+## Risks and Technical Debt
+
+I'm doing this as a hobby project, which has a long history of starting strong and losing steam.
+Let's see how much I like using this tool, how much people want me to be writing, and how much feedback I get on wanting to write with it.
+
+### Glossary
+
+| Term        | Definition                                          |
+| ----------- | --------------------------------------------------- |
+| NextJS      | https://nextjs.org                                  |
+| OpenAI API  | https://platform.openai.com/docs/guides/gpt         |
+| Fine Tuning | https://platform.openai.com/docs/guides/fine-tuning |
+
+
 # Design Decisions
 
+The following are some design decisions that were documented at the time.
+
 ## Base Platform
 
 NextJS is React framework with leading (circa mid-2023) support for React Server Components.
diff --git a/README.md b/README.md
index 7559a60..0a8093f 100644
--- a/README.md
+++ b/README.md
@@ -127,7 +127,10 @@ To choose an engine, export `AILLY_ENGINE=[bedrock|openai]` or provide `ailly --
 
 ### Developing
 
-See [DEVELOPING.md](./DEVELOPING.md) for details on how to run and debug various Ailly components.
+* See [ARCHITECTURE.md](./ARCHITECTURE.md) for an overview of the packages and components in Ailly.
+* See [DEVELOPING.md](./DEVELOPING.md) for details on how to run and debug various Ailly components.
+* See [CONTRIBUTING.md](./CONTRIBUTING.md) for instructions on making a pull request. (There are no special instructions at this time.)
+* See [DESIGN.md](./DESIGN.md) for historical notes on why some decisions were made. (Not exhaustive, but hopefully interesting.)
 
 ## Ailly plugins
 
diff --git a/cli/src/index.ts b/cli/src/index.ts
index c5d0301..d387032 100755
--- a/cli/src/index.ts
+++ b/cli/src/index.ts
@@ -75,7 +75,10 @@ export async function main() {
         if (!edit) {
           const stream = await assertExists(prompt.responseStream).promise;
           for await (const word of stream) {
-            process.stdout.write(word);
+            if (word) {
+              // ChatGPT sends a final `undefined`
+              process.stdout.write(word);
+            }
           }
           process.stdout.write("\n");
         }
diff --git a/core/src/actions/generate_manager.ts b/core/src/actions/generate_manager.ts
index 6a27809..5c3752e 100644
--- a/core/src/actions/generate_manager.ts
+++ b/core/src/actions/generate_manager.ts
@@ -48,6 +48,7 @@ export class GenerateManager {
     this.threads = partitionPrompts(content, context);
     LOGGER.debug(`Ready to generate ${this.threads.length} messages`);
   }
+
   start() {
     this.started = true;
     this.threadRunners = this.threads.map((t) =>
diff --git a/core/src/content/content.ts b/core/src/content/content.ts
index 62b74c7..886a4fb 100644
--- a/core/src/content/content.ts
+++ b/core/src/content/content.ts
@@ -26,7 +26,7 @@ const END_REGEX = new RegExp(EXTENSION + "$");
 
 type GrayMatterData = GrayMatterFile<string>["data"];
 
-// Content is ordered on the file system using NN_name folders and nnp_name.md files.
+// Content is ordered on the file system using NN_name folders and nn_name.md[.ailly.md] files.
 // The Content needs to keep track of where in the file system it is, so that a Prompt can write a Response.
 // It also needs the predecessor Content at the same level of the file system to build the larger context of its message pairs.
 export interface Content {
diff --git a/core/src/engine/openai.ts b/core/src/engine/openai.ts
index 66ce799..9e217ad 100644
--- a/core/src/engine/openai.ts
+++ b/core/src/engine/openai.ts
@@ -12,7 +12,8 @@ export const name = "openai";
 
 // const MODEL = "gpt-3.5-turbo-0613";
 // const FT_MODEL = process.env["OPENAI_FT_MODEL"];
-const MODEL = "gpt-4-0613";
+// const MODEL = "gpt-4-0613";
+const MODEL = "gpt-4o";
 // const MODEL = `ft:${BASE_MODEL}:personal::${FT_MODEL}`;
 // const MODEL = "gpt-3.5-turbo-16k-0613";
 const EMBEDDING_MODEL = "text-embedding-ada-002";
@@ -70,8 +71,10 @@ export const generate: EngineGenerate<OpenAIDebug> = (
         const writer = stream.writable.getWriter();
         await writer.ready;
         const chunk = block.choices[0]?.delta.content;
-        message += chunk;
-        await writer.write(chunk);
+        if (chunk) {
+          message += chunk;
+          await writer.write(chunk);
+        }
         writer.releaseLock();
       }
 
diff --git a/package.json b/package.json
index 88549e6..a4c3e17 100644
--- a/package.json
+++ b/package.json
@@ -9,7 +9,7 @@
     "pretest": "npm run build",
     "test:cli": "npm run ci -w core && npm run ci -w cli && ./integ/integ-noop.sh",
     "test": "npm run test:cli && npm run ci -w web && npm run ci -w extension",
-    "prepackage": "npm run build-core",
+    "prepackage": "npm run build",
     "package": "npm run --w extension package"
   },
   "workspaces": [