LLM-Shield Dashboard

From: Stefan Gasser Date: Thu, 8 Jan 2026 10:14:12 +0000 (+0100) Subject: Initial release X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=3d1310a05e0ade251cc3859bcc11ed84d5e84269;p=sgasser-llm-shield.git Initial release OpenAI-compatible privacy proxy with two modes: - Mask: Replace PII with placeholders before upstream, unmask in response - Route: Send PII-containing requests to local LLM Features: - 24 language support for PII detection - Real-time streaming with unmasking - Dashboard for monitoring - Microsoft Presidio integration --- 3d1310a05e0ade251cc3859bcc11ed84d5e84269 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..921a10f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,41 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + name: Test & Lint + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Type check + run: bun run typecheck + + - name: Lint & format check + run: bun run check + + - name: Run tests + run: bun test + + docker-build: + name: Docker Build Test + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Test Docker build + run: docker build -t llm-shield:test . \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7c2007f --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# Dependencies +node_modules/ + +# Build output +dist/ + +# Environment +.env +.env.local +.env.*.local + +# Config (user-specific) +config.yaml +config.yml + +# Database +*.db +*.sqlite +*.sqlite3 +data/ + +# Logs +logs/ +*.log + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +.DS_Store + +# Bun +bun.lockb + +# Test +coverage/ +test-*.ts +test-*.sh +test-*.js diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..b881b4e --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,86 @@ +# LLM-Shield + +OpenAI-compatible proxy with two privacy modes: route to local LLM or mask PII for upstream. + +## Tech Stack + +- Runtime: Bun +- Framework: Hono (with JSX for dashboard) +- Validation: Zod +- Styling: Tailwind CSS v4 +- Database: SQLite (`data/llm-shield.db`) +- PII Detection: Microsoft Presidio (Docker) +- Code Style: Biome (see @biome.json) + +## Architecture + +``` +src/ +âââ index.ts # Hono server entry +âââ config.ts # YAML config + Zod validation +âââ schemas/ +â âââ chat.ts # Request/response schemas +âââ routes/ # All route handlers +â âââ chat.ts # POST /openai/v1/chat/completions +â âââ dashboard.tsx # Dashboard routes + API +â âââ health.ts # GET /health +â âââ info.ts # GET /info +âââ views/ # JSX components +â âââ dashboard/ +â âââ page.tsx # Dashboard UI +âââ services/ + âââ decision.ts # Route/mask logic + âââ pii-detector.ts # Presidio client + âââ llm-client.ts # OpenAI/Ollama client + âââ masking.ts # PII mask/unmask + âââ stream-transformer.ts # SSE unmask for streaming + âââ language-detector.ts # Auto language detection + âââ logger.ts # SQLite logging +``` + +Tests are colocated (`*.test.ts`). + +## Modes + +Two modes configured in `config.yaml`: + +- **Route**: Routes PII-containing requests to local LLM (requires `local` provider + `routing` config) +- **Mask**: Masks PII before upstream, unmasks response (no local provider needed) + +See @config.example.yaml for full configuration. + +## Commands + +- `bun run dev` - Development (hot reload) +- `bun run start` - Production +- `bun run build` - Build to dist/ +- `bun test` - Run tests +- `bun run typecheck` - Type check +- `bun run lint` - Lint only +- `bun run check` - Lint + format check +- `bun run format` - Format code + +## Setup + +**Production:** `docker compose up -d` + +**Development:** +```bash +cp config.example.yaml config.yaml +docker compose up presidio-analyzer -d +bun install && bun run dev +``` + +**Dependencies:** +- Presidio (port 5002) - required +- Ollama (port 11434) - route mode only + +**Multi-language PII:** Build with `LANGUAGES=en,de,fr docker compose build`. See @presidio/languages.yaml for 24 available languages. + +## Testing + +- `GET /health` - Health check +- `GET /info` - Mode info +- `POST /openai/v1/chat/completions` - Main endpoint + +Response header `X-LLM-Shield-PII-Masked: true` indicates PII was masked. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..808761f --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,48 @@ +# Contributing to LLM-Shield + +Thank you for considering contributing to LLM-Shield! + +## Development Setup + +1. Fork and clone the repository +2. Install dependencies: `bun install` +3. Copy config: `cp config.example.yaml config.yaml` +4. Start Presidio: `docker compose up presidio-analyzer -d` +5. Run dev server: `bun run dev` + +## Code Quality + +Before submitting a PR, ensure: + +```bash +# Type checking passes +bun run typecheck + +# Linting and formatting pass +bun run check + +# Format code if needed +bun run format +``` + +## Pull Request Process + +1. Create a feature branch from `main` +2. Make your changes +3. Ensure all checks pass +4. Submit a PR with a clear description + +## Code Style + +- Use TypeScript strict mode +- Follow existing code patterns +- Keep functions focused and small +- Add JSDoc comments for public APIs + +## Reporting Issues + +When reporting issues, please include: +- Steps to reproduce +- Expected behavior +- Actual behavior +- Environment (Bun version, OS) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3e2741e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM oven/bun:1-slim + +WORKDIR /app + +# Install dependencies +COPY package.json bun.lock ./ +RUN bun install --frozen-lockfile --production + +# Copy source +COPY src ./src +COPY tsconfig.json ./ + +EXPOSE 3000 + +CMD ["bun", "run", "src/index.ts"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..65ce82e --- /dev/null +++ b/LICENSE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2026 Stefan Gasser + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..1bd2793 --- /dev/null +++ b/README.md @@ -0,0 +1,234 @@ +# ð¡ï¸ LLM-Shield + +[![CI](https://github.com/sgasser/llm-shield/actions/workflows/ci.yml/badge.svg)](https://github.com/sgasser/llm-shield/actions/workflows/ci.yml) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) + +Privacy proxy for LLMs. Masks personal data before sending to your provider (OpenAI, Azure, etc.), or routes sensitive requests to local LLM. + +

+ +## Mask Mode (Default) + +Replaces personal data with placeholders before sending to LLM. Unmasks the response automatically. + +``` +You send: "Email john@acme.com about the meeting with Sarah Miller" +OpenAI receives: "Email about the meeting with " +OpenAI responds: "I'll contact to schedule with ..." +You receive: "I'll contact john@acme.com to schedule with Sarah Miller..." +``` + +- No local GPU needed +- Supports streaming with real-time unmasking + +## Route Mode + +Requests with personal data go to local LLM. Everything else goes to your provider. + +``` +"Help with code review" â OpenAI (best quality) +"Email john@acme.com about..." â Ollama (stays on your network) +``` + +- Requires local LLM (Ollama, vLLM, LocalAI) +- Full data isolation - personal data never leaves your network + +## What It Detects + +| Type | Examples | +|------|----------| +| Names | John Smith, Sarah Miller | +| Emails | john@acme.com | +| Phone numbers | +1 555 123 4567 | +| Credit cards | 4111-1111-1111-1111 | +| IBANs | DE89 3704 0044 0532 0130 00 | +| IP addresses | 192.168.1.1 | +| Locations | New York, Berlin | + +Additional entity types can be enabled: `US_SSN`, `US_PASSPORT`, `CRYPTO`, `NRP`, `MEDICAL_LICENSE`, `URL`. + +**Languages**: 24 languages supported (configurable at build time). Auto-detected per request. + +Powered by [Microsoft Presidio](https://microsoft.github.io/presidio/). + +## Quick Start + +### Docker (recommended) + +```bash +git clone https://github.com/sgasser/llm-shield.git +cd llm-shield +cp config.example.yaml config.yaml + +# Option 1: English only (default, ~1.5GB) +docker compose up -d + +# Option 2: Multiple languages (~2.5GB) +# Edit config.yaml to add languages, then: +LANGUAGES=en,de,fr,es,it docker compose up -d +``` + +### Local Development + +```bash +git clone https://github.com/sgasser/llm-shield.git +cd llm-shield +bun install +cp config.example.yaml config.yaml + +# Option 1: English only (default) +docker compose up presidio-analyzer -d + +# Option 2: Multiple languages +# Edit config.yaml to add languages, then: +LANGUAGES=en,de,fr,es,it docker compose build presidio-analyzer +docker compose up presidio-analyzer -d + +bun run dev +``` + +Dashboard: http://localhost:3000/dashboard + +**Usage:** Point your app to `http://localhost:3000/openai/v1` instead of `https://api.openai.com/v1`. + +## Language Configuration + +By default, only English is installed to minimize image size. Add more languages at build time: + +```bash +# English only (default, smallest image ~1.5GB) +docker compose build + +# English + German +LANGUAGES=en,de docker compose build + +# Multiple languages +LANGUAGES=en,de,fr,it,es docker compose build +``` + +**Available languages (24):** +`ca`, `zh`, `hr`, `da`, `nl`, `en`, `fi`, `fr`, `de`, `el`, `it`, `ja`, `ko`, `lt`, `mk`, `nb`, `pl`, `pt`, `ro`, `ru`, `sl`, `es`, `sv`, `uk` + +**Language Fallback Behavior:** +- Text language is auto-detected for each request +- If detected language is not installed, falls back to `fallback_language` (default: `en`) +- Dashboard shows fallback as `FRâEN` when French text is detected but only English is installed +- Response header `X-LLM-Shield-Language-Fallback: true` indicates fallback was used + +Update `config.yaml` to match your installed languages: + +```yaml +pii_detection: + languages: + - en + - de +``` + +See [presidio/languages.yaml](presidio/languages.yaml) for full details including context words. + +## Configuration + +**Mask mode:** + +```yaml +mode: mask +providers: + upstream: + type: openai + base_url: https://api.openai.com/v1 +masking: + placeholder_format: "<{TYPE}_{N}>" # Format for masked values + show_markers: false # Add visual markers to unmasked values +``` + +**Route mode:** + +```yaml +mode: route +providers: + upstream: + type: openai + base_url: https://api.openai.com/v1 + local: + type: ollama + base_url: http://localhost:11434 + model: llama3.2 # Model for all local requests +routing: + default: upstream + on_pii_detected: local +``` + +**Customize detection:** + +```yaml +pii_detection: + score_threshold: 0.7 # Confidence (0.0 - 1.0) + entities: # What to detect + - PERSON + - EMAIL_ADDRESS + - PHONE_NUMBER + - CREDIT_CARD + - IBAN_CODE +``` + +**Logging options:** + +```yaml +logging: + database: ./data/llm-shield.db + retention_days: 30 # 0 = keep forever + log_content: false # Log full request/response + log_masked_content: true # Log masked content for dashboard +``` + +**Dashboard authentication:** + +```yaml +dashboard: + auth: + username: admin + password: ${DASHBOARD_PASSWORD} +``` + +**Environment variables:** Config values support `${VAR}` and `${VAR:-default}` substitution. + +See [config.example.yaml](config.example.yaml) for all options. + +## API Reference + +**Endpoints:** + +| Endpoint | Description | +|----------|-------------| +| `POST /openai/v1/chat/completions` | Chat API (OpenAI-compatible) | +| `GET /openai/v1/models` | List models | +| `GET /dashboard` | Monitoring UI | +| `GET /dashboard/api/logs` | Request logs (JSON) | +| `GET /dashboard/api/stats` | Statistics (JSON) | +| `GET /health` | Health check | +| `GET /info` | Current configuration | + +**Response headers:** + +| Header | Value | +|--------|-------| +| `X-Request-ID` | Request identifier (forwarded or generated) | +| `X-LLM-Shield-Mode` | `route` / `mask` | +| `X-LLM-Shield-PII-Detected` | `true` / `false` | +| `X-LLM-Shield-PII-Masked` | `true` / `false` (mask mode) | +| `X-LLM-Shield-Provider` | `upstream` / `local` | +| `X-LLM-Shield-Language` | Detected language code | +| `X-LLM-Shield-Language-Fallback` | `true` if fallback was used | + +## Development + +```bash +docker compose up presidio-analyzer -d # Start detection service +bun run dev # Dev server with hot reload +bun test # Run tests +bun run check # Lint & format +``` + +## License + +[Apache 2.0](LICENSE) diff --git a/biome.json b/biome.json new file mode 100644 index 0000000..530e667 --- /dev/null +++ b/biome.json @@ -0,0 +1,37 @@ +{ + "$schema": "https://biomejs.dev/schemas/2.3.11/schema.json", + "vcs": { + "enabled": true, + "clientKind": "git", + "useIgnoreFile": true + }, + "files": { + "ignoreUnknown": false, + "includes": ["src/**/*.ts"] + }, + "formatter": { + "enabled": true, + "indentStyle": "space", + "indentWidth": 2, + "lineWidth": 100 + }, + "linter": { + "enabled": true, + "rules": { + "recommended": true, + "complexity": { + "noForEach": "off" + }, + "style": { + "noNonNullAssertion": "off" + } + } + }, + "javascript": { + "formatter": { + "quoteStyle": "double", + "semicolons": "always", + "trailingCommas": "all" + } + } +} diff --git a/bun.lock b/bun.lock new file mode 100644 index 0000000..3a80089 --- /dev/null +++ b/bun.lock @@ -0,0 +1,164 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "llm-shield", + "dependencies": { + "@hono/zod-validator": "^0.7.6", + "eld": "^2.0.1", + "hono": "^4.11.0", + "hono-tailwind": "^2.2.0", + "tailwindcss": "^4.1.18", + "yaml": "^2.7.0", + "zod": "^3.24.0", + }, + "devDependencies": { + "@biomejs/biome": "^2.3.11", + "@types/bun": "latest", + "typescript": "^5.7.0", + }, + }, + }, + "packages": { + "@alloc/quick-lru": ["@alloc/quick-lru@5.2.0", "", {}, "sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw=="], + + "@biomejs/biome": ["@biomejs/biome@2.3.11", "", { "optionalDependencies": { "@biomejs/cli-darwin-arm64": "2.3.11", "@biomejs/cli-darwin-x64": "2.3.11", "@biomejs/cli-linux-arm64": "2.3.11", "@biomejs/cli-linux-arm64-musl": "2.3.11", "@biomejs/cli-linux-x64": "2.3.11", "@biomejs/cli-linux-x64-musl": "2.3.11", "@biomejs/cli-win32-arm64": "2.3.11", "@biomejs/cli-win32-x64": "2.3.11" }, "bin": { "biome": "bin/biome" } }, "sha512-/zt+6qazBWguPG6+eWmiELqO+9jRsMZ/DBU3lfuU2ngtIQYzymocHhKiZRyrbra4aCOoyTg/BmY+6WH5mv9xmQ=="], + + "@biomejs/cli-darwin-arm64": ["@biomejs/cli-darwin-arm64@2.3.11", "", { "os": "darwin", "cpu": "arm64" }, "sha512-/uXXkBcPKVQY7rc9Ys2CrlirBJYbpESEDme7RKiBD6MmqR2w3j0+ZZXRIL2xiaNPsIMMNhP1YnA+jRRxoOAFrA=="], + + "@biomejs/cli-darwin-x64": ["@biomejs/cli-darwin-x64@2.3.11", "", { "os": "darwin", "cpu": "x64" }, "sha512-fh7nnvbweDPm2xEmFjfmq7zSUiox88plgdHF9OIW4i99WnXrAC3o2P3ag9judoUMv8FCSUnlwJCM1B64nO5Fbg=="], + + "@biomejs/cli-linux-arm64": ["@biomejs/cli-linux-arm64@2.3.11", "", { "os": "linux", "cpu": "arm64" }, "sha512-l4xkGa9E7Uc0/05qU2lMYfN1H+fzzkHgaJoy98wO+b/7Gl78srbCRRgwYSW+BTLixTBrM6Ede5NSBwt7rd/i6g=="], + + "@biomejs/cli-linux-arm64-musl": ["@biomejs/cli-linux-arm64-musl@2.3.11", "", { "os": "linux", "cpu": "arm64" }, "sha512-XPSQ+XIPZMLaZ6zveQdwNjbX+QdROEd1zPgMwD47zvHV+tCGB88VH+aynyGxAHdzL+Tm/+DtKST5SECs4iwCLg=="], + + "@biomejs/cli-linux-x64": ["@biomejs/cli-linux-x64@2.3.11", "", { "os": "linux", "cpu": "x64" }, "sha512-/1s9V/H3cSe0r0Mv/Z8JryF5x9ywRxywomqZVLHAoa/uN0eY7F8gEngWKNS5vbbN/BsfpCG5yeBT5ENh50Frxg=="], + + "@biomejs/cli-linux-x64-musl": ["@biomejs/cli-linux-x64-musl@2.3.11", "", { "os": "linux", "cpu": "x64" }, "sha512-vU7a8wLs5C9yJ4CB8a44r12aXYb8yYgBn+WeyzbMjaCMklzCv1oXr8x+VEyWodgJt9bDmhiaW/I0RHbn7rsNmw=="], + + "@biomejs/cli-win32-arm64": ["@biomejs/cli-win32-arm64@2.3.11", "", { "os": "win32", "cpu": "arm64" }, "sha512-PZQ6ElCOnkYapSsysiTy0+fYX+agXPlWugh6+eQ6uPKI3vKAqNp6TnMhoM3oY2NltSB89hz59o8xIfOdyhi9Iw=="], + + "@biomejs/cli-win32-x64": ["@biomejs/cli-win32-x64@2.3.11", "", { "os": "win32", "cpu": "x64" }, "sha512-43VrG813EW+b5+YbDbz31uUsheX+qFKCpXeY9kfdAx+ww3naKxeVkTD9zLIWxUPfJquANMHrmW3wbe/037G0Qg=="], + + "@hono/zod-validator": ["@hono/zod-validator@0.7.6", "", { "peerDependencies": { "hono": ">=3.9.0", "zod": "^3.25.0 || ^4.0.0" } }, "sha512-Io1B6d011Gj1KknV4rXYz4le5+5EubcWEU/speUjuw9XMMIaP3n78yXLhjd2A3PXaXaUwEAluOiAyLqhBEJgsw=="], + + "@jridgewell/gen-mapping": ["@jridgewell/gen-mapping@0.3.13", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.0", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA=="], + + "@jridgewell/remapping": ["@jridgewell/remapping@2.3.5", "", { "dependencies": { "@jridgewell/gen-mapping": "^0.3.5", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ=="], + + "@jridgewell/resolve-uri": ["@jridgewell/resolve-uri@3.1.2", "", {}, "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw=="], + + "@jridgewell/sourcemap-codec": ["@jridgewell/sourcemap-codec@1.5.5", "", {}, "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og=="], + + "@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.31", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw=="], + + "@tailwindcss/node": ["@tailwindcss/node@4.1.18", "", { "dependencies": { "@jridgewell/remapping": "^2.3.4", "enhanced-resolve": "^5.18.3", "jiti": "^2.6.1", "lightningcss": "1.30.2", "magic-string": "^0.30.21", "source-map-js": "^1.2.1", "tailwindcss": "4.1.18" } }, "sha512-DoR7U1P7iYhw16qJ49fgXUlry1t4CpXeErJHnQ44JgTSKMaZUdf17cfn5mHchfJ4KRBZRFA/Coo+MUF5+gOaCQ=="], + + "@tailwindcss/oxide": ["@tailwindcss/oxide@4.1.18", "", { "optionalDependencies": { "@tailwindcss/oxide-android-arm64": "4.1.18", "@tailwindcss/oxide-darwin-arm64": "4.1.18", "@tailwindcss/oxide-darwin-x64": "4.1.18", "@tailwindcss/oxide-freebsd-x64": "4.1.18", "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.18", "@tailwindcss/oxide-linux-arm64-gnu": "4.1.18", "@tailwindcss/oxide-linux-arm64-musl": "4.1.18", "@tailwindcss/oxide-linux-x64-gnu": "4.1.18", "@tailwindcss/oxide-linux-x64-musl": "4.1.18", "@tailwindcss/oxide-wasm32-wasi": "4.1.18", "@tailwindcss/oxide-win32-arm64-msvc": "4.1.18", "@tailwindcss/oxide-win32-x64-msvc": "4.1.18" } }, "sha512-EgCR5tTS5bUSKQgzeMClT6iCY3ToqE1y+ZB0AKldj809QXk1Y+3jB0upOYZrn9aGIzPtUsP7sX4QQ4XtjBB95A=="], + + "@tailwindcss/oxide-android-arm64": ["@tailwindcss/oxide-android-arm64@4.1.18", "", { "os": "android", "cpu": "arm64" }, "sha512-dJHz7+Ugr9U/diKJA0W6N/6/cjI+ZTAoxPf9Iz9BFRF2GzEX8IvXxFIi/dZBloVJX/MZGvRuFA9rqwdiIEZQ0Q=="], + + "@tailwindcss/oxide-darwin-arm64": ["@tailwindcss/oxide-darwin-arm64@4.1.18", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Gc2q4Qhs660bhjyBSKgq6BYvwDz4G+BuyJ5H1xfhmDR3D8HnHCmT/BSkvSL0vQLy/nkMLY20PQ2OoYMO15Jd0A=="], + + "@tailwindcss/oxide-darwin-x64": ["@tailwindcss/oxide-darwin-x64@4.1.18", "", { "os": "darwin", "cpu": "x64" }, "sha512-FL5oxr2xQsFrc3X9o1fjHKBYBMD1QZNyc1Xzw/h5Qu4XnEBi3dZn96HcHm41c/euGV+GRiXFfh2hUCyKi/e+yw=="], + + "@tailwindcss/oxide-freebsd-x64": ["@tailwindcss/oxide-freebsd-x64@4.1.18", "", { "os": "freebsd", "cpu": "x64" }, "sha512-Fj+RHgu5bDodmV1dM9yAxlfJwkkWvLiRjbhuO2LEtwtlYlBgiAT4x/j5wQr1tC3SANAgD+0YcmWVrj8R9trVMA=="], + + "@tailwindcss/oxide-linux-arm-gnueabihf": ["@tailwindcss/oxide-linux-arm-gnueabihf@4.1.18", "", { "os": "linux", "cpu": "arm" }, "sha512-Fp+Wzk/Ws4dZn+LV2Nqx3IilnhH51YZoRaYHQsVq3RQvEl+71VGKFpkfHrLM/Li+kt5c0DJe/bHXK1eHgDmdiA=="], + + "@tailwindcss/oxide-linux-arm64-gnu": ["@tailwindcss/oxide-linux-arm64-gnu@4.1.18", "", { "os": "linux", "cpu": "arm64" }, "sha512-S0n3jboLysNbh55Vrt7pk9wgpyTTPD0fdQeh7wQfMqLPM/Hrxi+dVsLsPrycQjGKEQk85Kgbx+6+QnYNiHalnw=="], + + "@tailwindcss/oxide-linux-arm64-musl": ["@tailwindcss/oxide-linux-arm64-musl@4.1.18", "", { "os": "linux", "cpu": "arm64" }, "sha512-1px92582HkPQlaaCkdRcio71p8bc8i/ap5807tPRDK/uw953cauQBT8c5tVGkOwrHMfc2Yh6UuxaH4vtTjGvHg=="], + + "@tailwindcss/oxide-linux-x64-gnu": ["@tailwindcss/oxide-linux-x64-gnu@4.1.18", "", { "os": "linux", "cpu": "x64" }, "sha512-v3gyT0ivkfBLoZGF9LyHmts0Isc8jHZyVcbzio6Wpzifg/+5ZJpDiRiUhDLkcr7f/r38SWNe7ucxmGW3j3Kb/g=="], + + "@tailwindcss/oxide-linux-x64-musl": ["@tailwindcss/oxide-linux-x64-musl@4.1.18", "", { "os": "linux", "cpu": "x64" }, "sha512-bhJ2y2OQNlcRwwgOAGMY0xTFStt4/wyU6pvI6LSuZpRgKQwxTec0/3Scu91O8ir7qCR3AuepQKLU/kX99FouqQ=="], + + "@tailwindcss/oxide-wasm32-wasi": ["@tailwindcss/oxide-wasm32-wasi@4.1.18", "", { "dependencies": { "@emnapi/core": "^1.7.1", "@emnapi/runtime": "^1.7.1", "@emnapi/wasi-threads": "^1.1.0", "@napi-rs/wasm-runtime": "^1.1.0", "@tybys/wasm-util": "^0.10.1", "tslib": "^2.4.0" }, "cpu": "none" }, "sha512-LffYTvPjODiP6PT16oNeUQJzNVyJl1cjIebq/rWWBF+3eDst5JGEFSc5cWxyRCJ0Mxl+KyIkqRxk1XPEs9x8TA=="], + + "@tailwindcss/oxide-win32-arm64-msvc": ["@tailwindcss/oxide-win32-arm64-msvc@4.1.18", "", { "os": "win32", "cpu": "arm64" }, "sha512-HjSA7mr9HmC8fu6bdsZvZ+dhjyGCLdotjVOgLA2vEqxEBZaQo9YTX4kwgEvPCpRh8o4uWc4J/wEoFzhEmjvPbA=="], + + "@tailwindcss/oxide-win32-x64-msvc": ["@tailwindcss/oxide-win32-x64-msvc@4.1.18", "", { "os": "win32", "cpu": "x64" }, "sha512-bJWbyYpUlqamC8dpR7pfjA0I7vdF6t5VpUGMWRkXVE3AXgIZjYUYAK7II1GNaxR8J1SSrSrppRar8G++JekE3Q=="], + + "@tailwindcss/postcss": ["@tailwindcss/postcss@4.1.18", "", { "dependencies": { "@alloc/quick-lru": "^5.2.0", "@tailwindcss/node": "4.1.18", "@tailwindcss/oxide": "4.1.18", "postcss": "^8.4.41", "tailwindcss": "4.1.18" } }, "sha512-Ce0GFnzAOuPyfV5SxjXGn0CubwGcuDB0zcdaPuCSzAa/2vII24JTkH+I6jcbXLb1ctjZMZZI6OjDaLPJQL1S0g=="], + + "@types/bun": ["@types/bun@1.3.5", "", { "dependencies": { "bun-types": "1.3.5" } }, "sha512-RnygCqNrd3srIPEWBd5LFeUYG7plCoH2Yw9WaZGyNmdTEei+gWaHqydbaIRkIkcbXwhBT94q78QljxN0Sk838w=="], + + "@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="], + + "bun-types": ["bun-types@1.3.5", "", { "dependencies": { "@types/node": "*" } }, "sha512-inmAYe2PFLs0SUbFOWSVD24sg1jFlMPxOjOSSCYqUgn4Hsc3rDc7dFvfVYjFPNHtov6kgUeulV4SxbuIV/stPw=="], + + "detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], + + "eld": ["eld@2.0.1", "", {}, "sha512-Lo+M5M7IL/N3MSXMbnfBrdsn+qu0rScPyOA/POvxKU7HsLEOfFOJuEBC96vmYxMJShxXtH+wnWVOhgu+rf7u9A=="], + + "enhanced-resolve": ["enhanced-resolve@5.18.4", "", { "dependencies": { "graceful-fs": "^4.2.4", "tapable": "^2.2.0" } }, "sha512-LgQMM4WXU3QI+SYgEc2liRgznaD5ojbmY3sb8LxyguVkIg5FxdpTkvk72te2R38/TGKxH634oLxXRGY6d7AP+Q=="], + + "graceful-fs": ["graceful-fs@4.2.11", "", {}, "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="], + + "hono": ["hono@4.11.3", "", {}, "sha512-PmQi306+M/ct/m5s66Hrg+adPnkD5jiO6IjA7WhWw0gSBSo1EcRegwuI1deZ+wd5pzCGynCcn2DprnE4/yEV4w=="], + + "hono-tailwind": ["hono-tailwind@2.2.0", "", { "dependencies": { "@tailwindcss/postcss": "^4.1.6", "postcss": "^8.5.3" }, "peerDependencies": { "hono": "^4.0.0", "tailwindcss": "^4.1.6" } }, "sha512-orA97f08l2nsKU4tu4EAa4/F5KIIscdvsFQkFi07U0WYCpCVEy+Pw4qAY+z4jYgHf7OSnXo7IzLwFxTq4tRH9Q=="], + + "jiti": ["jiti@2.6.1", "", { "bin": { "jiti": "lib/jiti-cli.mjs" } }, "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ=="], + + "lightningcss": ["lightningcss@1.30.2", "", { "dependencies": { "detect-libc": "^2.0.3" }, "optionalDependencies": { "lightningcss-android-arm64": "1.30.2", "lightningcss-darwin-arm64": "1.30.2", "lightningcss-darwin-x64": "1.30.2", "lightningcss-freebsd-x64": "1.30.2", "lightningcss-linux-arm-gnueabihf": "1.30.2", "lightningcss-linux-arm64-gnu": "1.30.2", "lightningcss-linux-arm64-musl": "1.30.2", "lightningcss-linux-x64-gnu": "1.30.2", "lightningcss-linux-x64-musl": "1.30.2", "lightningcss-win32-arm64-msvc": "1.30.2", "lightningcss-win32-x64-msvc": "1.30.2" } }, "sha512-utfs7Pr5uJyyvDETitgsaqSyjCb2qNRAtuqUeWIAKztsOYdcACf2KtARYXg2pSvhkt+9NfoaNY7fxjl6nuMjIQ=="], + + "lightningcss-android-arm64": ["lightningcss-android-arm64@1.30.2", "", { "os": "android", "cpu": "arm64" }, "sha512-BH9sEdOCahSgmkVhBLeU7Hc9DWeZ1Eb6wNS6Da8igvUwAe0sqROHddIlvU06q3WyXVEOYDZ6ykBZQnjTbmo4+A=="], + + "lightningcss-darwin-arm64": ["lightningcss-darwin-arm64@1.30.2", "", { "os": "darwin", "cpu": "arm64" }, "sha512-ylTcDJBN3Hp21TdhRT5zBOIi73P6/W0qwvlFEk22fkdXchtNTOU4Qc37SkzV+EKYxLouZ6M4LG9NfZ1qkhhBWA=="], + + "lightningcss-darwin-x64": ["lightningcss-darwin-x64@1.30.2", "", { "os": "darwin", "cpu": "x64" }, "sha512-oBZgKchomuDYxr7ilwLcyms6BCyLn0z8J0+ZZmfpjwg9fRVZIR5/GMXd7r9RH94iDhld3UmSjBM6nXWM2TfZTQ=="], + + "lightningcss-freebsd-x64": ["lightningcss-freebsd-x64@1.30.2", "", { "os": "freebsd", "cpu": "x64" }, "sha512-c2bH6xTrf4BDpK8MoGG4Bd6zAMZDAXS569UxCAGcA7IKbHNMlhGQ89eRmvpIUGfKWNVdbhSbkQaWhEoMGmGslA=="], + + "lightningcss-linux-arm-gnueabihf": ["lightningcss-linux-arm-gnueabihf@1.30.2", "", { "os": "linux", "cpu": "arm" }, "sha512-eVdpxh4wYcm0PofJIZVuYuLiqBIakQ9uFZmipf6LF/HRj5Bgm0eb3qL/mr1smyXIS1twwOxNWndd8z0E374hiA=="], + + "lightningcss-linux-arm64-gnu": ["lightningcss-linux-arm64-gnu@1.30.2", "", { "os": "linux", "cpu": "arm64" }, "sha512-UK65WJAbwIJbiBFXpxrbTNArtfuznvxAJw4Q2ZGlU8kPeDIWEX1dg3rn2veBVUylA2Ezg89ktszWbaQnxD/e3A=="], + + "lightningcss-linux-arm64-musl": ["lightningcss-linux-arm64-musl@1.30.2", "", { "os": "linux", "cpu": "arm64" }, "sha512-5Vh9dGeblpTxWHpOx8iauV02popZDsCYMPIgiuw97OJ5uaDsL86cnqSFs5LZkG3ghHoX5isLgWzMs+eD1YzrnA=="], + + "lightningcss-linux-x64-gnu": ["lightningcss-linux-x64-gnu@1.30.2", "", { "os": "linux", "cpu": "x64" }, "sha512-Cfd46gdmj1vQ+lR6VRTTadNHu6ALuw2pKR9lYq4FnhvgBc4zWY1EtZcAc6EffShbb1MFrIPfLDXD6Xprbnni4w=="], + + "lightningcss-linux-x64-musl": ["lightningcss-linux-x64-musl@1.30.2", "", { "os": "linux", "cpu": "x64" }, "sha512-XJaLUUFXb6/QG2lGIW6aIk6jKdtjtcffUT0NKvIqhSBY3hh9Ch+1LCeH80dR9q9LBjG3ewbDjnumefsLsP6aiA=="], + + "lightningcss-win32-arm64-msvc": ["lightningcss-win32-arm64-msvc@1.30.2", "", { "os": "win32", "cpu": "arm64" }, "sha512-FZn+vaj7zLv//D/192WFFVA0RgHawIcHqLX9xuWiQt7P0PtdFEVaxgF9rjM/IRYHQXNnk61/H/gb2Ei+kUQ4xQ=="], + + "lightningcss-win32-x64-msvc": ["lightningcss-win32-x64-msvc@1.30.2", "", { "os": "win32", "cpu": "x64" }, "sha512-5g1yc73p+iAkid5phb4oVFMB45417DkRevRbt/El/gKXJk4jid+vPFF/AXbxn05Aky8PapwzZrdJShv5C0avjw=="], + + "magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="], + + "nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="], + + "picocolors": ["picocolors@1.1.1", "", {}, "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="], + + "postcss": ["postcss@8.5.6", "", { "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", "source-map-js": "^1.2.1" } }, "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg=="], + + "source-map-js": ["source-map-js@1.2.1", "", {}, "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA=="], + + "tailwindcss": ["tailwindcss@4.1.18", "", {}, "sha512-4+Z+0yiYyEtUVCScyfHCxOYP06L5Ne+JiHhY2IjR2KWMIWhJOYZKLSGZaP5HkZ8+bY0cxfzwDE5uOmzFXyIwxw=="], + + "tapable": ["tapable@2.3.0", "", {}, "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg=="], + + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + + "undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + + "yaml": ["yaml@2.8.2", "", { "bin": { "yaml": "bin.mjs" } }, "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A=="], + + "zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="], + + "@tailwindcss/oxide-wasm32-wasi/@emnapi/core": ["@emnapi/core@1.8.1", "", { "dependencies": { "@emnapi/wasi-threads": "1.1.0", "tslib": "^2.4.0" }, "bundled": true }, "sha512-AvT9QFpxK0Zd8J0jopedNm+w/2fIzvtPKPjqyw9jwvBaReTTqPBk9Hixaz7KbjimP+QNz605/XnjFcDAL2pqBg=="], + + "@tailwindcss/oxide-wasm32-wasi/@emnapi/runtime": ["@emnapi/runtime@1.8.1", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg=="], + + "@tailwindcss/oxide-wasm32-wasi/@emnapi/wasi-threads": ["@emnapi/wasi-threads@1.1.0", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-WI0DdZ8xFSbgMjR1sFsKABJ/C5OnRrjT06JXbZKexJGrDuPTzZdDYfFlsgcCXCyf+suG5QU2e/y1Wo2V/OapLQ=="], + + "@tailwindcss/oxide-wasm32-wasi/@napi-rs/wasm-runtime": ["@napi-rs/wasm-runtime@1.1.1", "", { "dependencies": { "@emnapi/core": "^1.7.1", "@emnapi/runtime": "^1.7.1", "@tybys/wasm-util": "^0.10.1" }, "bundled": true }, "sha512-p64ah1M1ld8xjWv3qbvFwHiFVWrq1yFvV4f7w+mzaqiR4IlSgkqhcRdHwsGgomwzBH51sRY4NEowLxnaBjcW/A=="], + + "@tailwindcss/oxide-wasm32-wasi/@tybys/wasm-util": ["@tybys/wasm-util@0.10.1", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="], + + "@tailwindcss/oxide-wasm32-wasi/tslib": ["tslib@2.8.1", "", { "bundled": true }, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="], + } +} diff --git a/config.example.yaml b/config.example.yaml new file mode 100644 index 0000000..98fb32b --- /dev/null +++ b/config.example.yaml @@ -0,0 +1,115 @@ +# LLM-Shield Configuration +# Copy this file to config.yaml and adjust the values + +# Privacy mode: "mask" or "route" +# +# mask: Masks PII before sending to upstream, unmasks in response (no local LLM needed) +# route: Routes requests to local LLM when PII detected (requires local provider) +mode: mask + +# Server settings +server: + port: 3000 + host: "0.0.0.0" + +# LLM Provider configuration +providers: + # Upstream provider (required for both modes) + # The proxy forwards your client's Authorization header to the upstream provider + # You can optionally set api_key here as a fallback + upstream: + type: openai + base_url: https://api.openai.com/v1 + # api_key: ${OPENAI_API_KEY} # Optional fallback if client doesn't send auth header + + # Local provider (only for route mode - can be removed if using mask mode) + # Supports: ollama, openai (for OpenAI-compatible servers like LocalAI, LM Studio) + local: + type: ollama # or "openai" for OpenAI-compatible servers + base_url: http://localhost:11434 + model: llama3.2 # All PII requests use this model + # api_key: ${LOCAL_API_KEY} # Only needed for OpenAI-compatible servers + +# Routing rules (only for route mode - can be removed if using mask mode) +routing: + # Default provider when no PII is detected + default: upstream + + # Provider to use when PII is detected + on_pii_detected: local + +# Masking settings (only for mask mode - can be removed if using route mode) +masking: + # Add visual markers to unmasked values in response (for debugging/demos) + # Interferes with copy/paste, so disabled by default + show_markers: false + marker_text: "[protected]" + +# PII Detection settings (Microsoft Presidio) +pii_detection: + presidio_url: ${PRESIDIO_URL:-http://localhost:5002} + + # Supported languages for PII detection + # Auto-detects language from input text and uses appropriate model + # If only one language is specified, language detection is skipped + # + # Languages must match what was installed during docker build: + # LANGUAGES=en,de docker-compose build + # + # Available (24 languages): ca, zh, hr, da, nl, en, fi, fr, de, el, + # it, ja, ko, lt, mk, nb, pl, pt, ro, ru, sl, es, sv, uk + # See presidio/languages.yaml for full list with details + languages: + - en + # Add more languages to match your Docker build: + # - de + # - fr + # - es + # - it + + # Fallback language if detected language is not in the list above + fallback_language: en + + score_threshold: 0.7 # Minimum confidence score (0.0 - 1.0) + + # Entity types to detect + # See: https://microsoft.github.io/presidio/supported_entities/ + entities: + - PERSON + - EMAIL_ADDRESS + - PHONE_NUMBER + - CREDIT_CARD + - IBAN_CODE + - IP_ADDRESS + - LOCATION + # - US_SSN + # - US_PASSPORT + # - CRYPTO + # - NRP # National Registration Number + # - MEDICAL_LICENSE + # - URL + +# Logging settings +logging: + # SQLite database for request logs + database: ./data/llm-shield.db + + # Log retention in days (0 = keep forever) + retention_days: 30 + + # Log request/response content (may contain sensitive data!) + log_content: false + + # Log masked content for dashboard preview (default: true) + # Shows what was actually sent to upstream LLM with PII replaced by tokens + # Disable if you don't want any content stored, even masked + log_masked_content: true + +# Dashboard settings +dashboard: + enabled: true + + # Basic auth for dashboard (optional) + # auth: + # username: admin + # password: ${DASHBOARD_PASSWORD} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..8fceec9 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,34 @@ +services: + llm-shield: + build: . + ports: + - "3000:3000" + environment: + - PRESIDIO_URL=http://presidio-analyzer:3000 + volumes: + - ./config.yaml:/app/config.yaml:ro + - ./data:/app/data + depends_on: + presidio-analyzer: + condition: service_healthy + restart: unless-stopped + + presidio-analyzer: + build: + context: ./presidio + args: + # Languages to install for PII detection + # Available: ca, zh, hr, da, nl, en, fi, fr, de, el, it, ja, ko, + # lt, mk, nb, pl, pt, ro, ru, sl, es, sv, uk + # See presidio/languages.yaml for full list + # Example: LANGUAGES=en,de,fr docker-compose build + LANGUAGES: ${LANGUAGES:-en} + ports: + - "5002:3000" + healthcheck: + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3000/health')"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + restart: unless-stopped diff --git a/docs/dashboard.png b/docs/dashboard.png new file mode 100644 index 0000000..24eca40 Binary files /dev/null and b/docs/dashboard.png differ diff --git a/package.json b/package.json new file mode 100644 index 0000000..4921794 --- /dev/null +++ b/package.json @@ -0,0 +1,46 @@ +{ + "name": "llm-shield", + "version": "0.1.0", + "description": "Intelligent privacy-aware routing for LLMs - OpenAI-compatible proxy that routes requests based on PII detection", + "type": "module", + "main": "src/index.ts", + "scripts": { + "dev": "bun run --hot src/index.ts", + "start": "bun run src/index.ts", + "build": "bun build src/index.ts --outdir dist --target bun --external lightningcss", + "test": "bun test", + "typecheck": "tsc --noEmit", + "lint": "biome lint src", + "format": "biome format src --write", + "check": "biome check src" + }, + "dependencies": { + "@hono/zod-validator": "^0.7.6", + "eld": "^2.0.1", + "hono": "^4.11.0", + "hono-tailwind": "^2.2.0", + "tailwindcss": "^4.1.18", + "yaml": "^2.7.0", + "zod": "^3.24.0" + }, + "devDependencies": { + "@biomejs/biome": "^2.3.11", + "@types/bun": "latest", + "typescript": "^5.7.0" + }, + "keywords": [ + "llm", + "privacy", + "pii", + "openai", + "proxy", + "gdpr", + "routing" + ], + "author": "Stefan Gasser", + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/sgasser/llm-shield" + } +} diff --git a/presidio/Dockerfile b/presidio/Dockerfile new file mode 100644 index 0000000..f3799c9 --- /dev/null +++ b/presidio/Dockerfile @@ -0,0 +1,49 @@ +# LLM-Shield Presidio Analyzer +# Multi-language PII detection with configurable language support +# +# Build with specific languages: +# docker build --build-arg LANGUAGES=en,de,fr -t presidio-analyzer . +# +# Or via docker-compose: +# LANGUAGES=en,de docker-compose build presidio-analyzer + +ARG LANGUAGES="en" + +# ============================================================================= +# Stage 1: Generate configuration files from language selection +# ============================================================================= +FROM python:3.11-slim AS generator + +WORKDIR /build + +# Install PyYAML for config generation +RUN pip install --no-cache-dir pyyaml + +# Copy registry and generator script +COPY languages.yaml /build/ +COPY scripts/generate-configs.py /build/ + +# Generate configs for selected languages +ARG LANGUAGES +RUN python generate-configs.py \ + --languages="${LANGUAGES}" \ + --registry=/build/languages.yaml \ + --output=/output + +# ============================================================================= +# Stage 2: Final Presidio Analyzer image +# ============================================================================= +FROM mcr.microsoft.com/presidio-analyzer:latest + +# Copy generated configuration files +COPY --from=generator /output/nlp-config.yaml /usr/bin/presidio_analyzer/conf/default.yaml +COPY --from=generator /output/recognizers-config.yaml /usr/bin/presidio_analyzer/conf/default_recognizers.yaml +COPY --from=generator /output/analyzer-config.yaml /usr/bin/presidio_analyzer/conf/default_analyzer.yaml + +# Copy and run model installation script +COPY --from=generator /output/install-models.sh /tmp/ +RUN chmod +x /tmp/install-models.sh && /tmp/install-models.sh && rm /tmp/install-models.sh + +# Use --preload to load models once in master process (shared via copy-on-write) +# Timeout 300s for initial model loading, workers start fast after preload +CMD ["/bin/sh", "-c", "poetry run gunicorn -w $WORKERS -b 0.0.0.0:$PORT --timeout 300 --preload 'app:create_app()'"] diff --git a/presidio/languages.yaml b/presidio/languages.yaml new file mode 100644 index 0000000..266aa36 --- /dev/null +++ b/presidio/languages.yaml @@ -0,0 +1,223 @@ +# LLM-Shield Language Registry +# All 24 spaCy languages with trained pipelines +# +# Usage: Set LANGUAGES build arg to select which to install +# LANGUAGES=en,de docker-compose build +# +# To add a custom language, add an entry here with model name + +spacy_version: "3.8.0" + +languages: + # Catalan + ca: + name: Catalan + model: ca_core_news_md + + # Chinese + zh: + name: Chinese + model: zh_core_web_md + + # Croatian + hr: + name: Croatian + model: hr_core_news_md + + # Danish + da: + name: Danish + model: da_core_news_md + + # Dutch + nl: + name: Dutch + model: nl_core_news_md + phone_context: + - telefoon + - telefoonnummer + - mobiel + - bellen + - fax + + # English + en: + name: English + model: en_core_web_lg + phone_context: + - phone + - telephone + - cell + - mobile + - call + - fax + + # Finnish + fi: + name: Finnish + model: fi_core_news_md + + # French + fr: + name: French + model: fr_core_news_md + phone_context: + - tÃ©lÃ©phone + - portable + - mobile + - numÃ©ro + - appeler + - fax + + # German + de: + name: German + model: de_core_news_md + phone_context: + - telefon + - telefonnummer + - handy + - mobil + - mobilnummer + - fax + - anrufen + + # Greek + el: + name: Greek + model: el_core_news_md + phone_context: + - ÏÎ·Î»ÎÏÏÎ½Î¿ + - ÎºÎ¹Î½Î·ÏÏ + - ÏÎ±Î¾ + + # Italian + it: + name: Italian + model: it_core_news_md + phone_context: + - telefono + - cellulare + - mobile + - numero + - chiamare + - fax + + # Japanese + ja: + name: Japanese + model: ja_core_news_md + phone_context: + - é»è©± + - æºå¸¯ + - ã¢ãã¤ã« + - ãã¡ãã¯ã¹ + + # Korean + ko: + name: Korean + model: ko_core_news_md + phone_context: + - ì í + - í´ëí° + - ëª¨ë°ì¼ + - í©ì¤ + + # Lithuanian + lt: + name: Lithuanian + model: lt_core_news_md + + # Macedonian + mk: + name: Macedonian + model: mk_core_news_md + + # Norwegian BokmÃ¥l + nb: + name: Norwegian + model: nb_core_news_md + phone_context: + - telefon + - mobil + - ringe + - faks + + # Polish + pl: + name: Polish + model: pl_core_news_md + phone_context: + - telefon + - komÃ³rka + - dzwoniÄ + - faks + + # Portuguese + pt: + name: Portuguese + model: pt_core_news_md + phone_context: + - telefone + - celular + - mÃ³vel + - ligar + - fax + + # Romanian + ro: + name: Romanian + model: ro_core_news_md + phone_context: + - telefon + - mobil + - apel + - fax + + # Russian + ru: + name: Russian + model: ru_core_news_md + phone_context: + - ÑÐµÐ»ÐµÑÐ¾Ð½ + - Ð¼Ð¾Ð±Ð¸Ð»ÑÐ½ÑÐ¹ + - Ð·Ð²Ð¾Ð½Ð¸ÑÑ + - ÑÐ°ÐºÑ + + # Slovenian + sl: + name: Slovenian + model: sl_core_news_md + + # Spanish + es: + name: Spanish + model: es_core_news_md + phone_context: + - telÃ©fono + - mÃ³vil + - celular + - nÃºmero + - llamar + - fax + + # Swedish + sv: + name: Swedish + model: sv_core_news_md + phone_context: + - telefon + - mobil + - ringa + - fax + + # Ukrainian + uk: + name: Ukrainian + model: uk_core_news_md + phone_context: + - ÑÐµÐ»ÐµÑÐ¾Ð½ + - Ð¼Ð¾Ð±ÑÐ»ÑÐ½Ð¸Ð¹ + - Ð´Ð·Ð²Ð¾Ð½Ð¸ÑÐ¸ + - ÑÐ°ÐºÑ + diff --git a/presidio/scripts/generate-configs.py b/presidio/scripts/generate-configs.py new file mode 100644 index 0000000..02be7b4 --- /dev/null +++ b/presidio/scripts/generate-configs.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 +""" +Generate Presidio configuration files from selected languages. + +Usage: + python generate-configs.py --languages=en,de --output=/output + +Reads from languages.yaml and generates: + - nlp-config.yaml + - recognizers-config.yaml + - analyzer-config.yaml + - install-models.sh +""" + +import argparse +import sys +from pathlib import Path + +import yaml + + +def load_registry(registry_path: Path) -> dict: + """Load the language registry.""" + with open(registry_path) as f: + return yaml.safe_load(f) + + +def validate_languages(languages: list[str], registry: dict) -> list[str]: + """Validate requested languages exist in registry.""" + available = set(registry["languages"].keys()) + valid = [] + invalid = [] + + for lang in languages: + if lang in available: + valid.append(lang) + else: + invalid.append(lang) + + if invalid: + print(f"Error: Unknown language(s): {', '.join(invalid)}", file=sys.stderr) + print(f"Available: {', '.join(sorted(available))}", file=sys.stderr) + sys.exit(1) + + return valid + + +def generate_nlp_config(languages: list[str], registry: dict) -> dict: + """Generate nlp-config.yaml content.""" + models = [] + for lang in languages: + lang_config = registry["languages"][lang] + models.append({"lang_code": lang, "model_name": lang_config["model"]}) + + return { + "nlp_engine_name": "spacy", + "models": models, + "ner_model_configuration": { + "model_to_presidio_entity_mapping": { + "PER": "PERSON", + "PERSON": "PERSON", + "LOC": "LOCATION", + "GPE": "LOCATION", + "ORG": "ORGANIZATION", + }, + "low_confidence_score_multiplier": 0.4, + "low_score_entity_names": ["ORG"], + "labels_to_ignore": [ + "O", + "CARDINAL", + "EVENT", + "LANGUAGE", + "LAW", + "MONEY", + "ORDINAL", + "PERCENT", + "PRODUCT", + "QUANTITY", + "WORK_OF_ART", + ], + }, + } + + +def generate_analyzer_config(languages: list[str]) -> dict: + """Generate analyzer-config.yaml content.""" + return {"supported_languages": languages, "default_score_threshold": 0} + + +def generate_recognizers_config(languages: list[str], registry: dict) -> dict: + """Generate recognizers-config.yaml content.""" + # Build language entries for each recognizer type + spacy_langs = [{"language": lang} for lang in languages] + + # Phone recognizer needs context words per language + phone_langs = [] + for lang in languages: + lang_config = registry["languages"][lang] + entry = {"language": lang} + if "phone_context" in lang_config: + entry["context"] = lang_config["phone_context"] + phone_langs.append(entry) + + return { + "supported_languages": languages, + "global_regex_flags": 26, + "recognizers": [ + { + "name": "SpacyRecognizer", + "supported_languages": spacy_langs, + "type": "predefined", + }, + { + "name": "EmailRecognizer", + "supported_languages": spacy_langs, + "type": "predefined", + }, + { + "name": "PhoneRecognizer", + "supported_languages": phone_langs, + "type": "predefined", + }, + { + "name": "CreditCardRecognizer", + "supported_languages": spacy_langs, + "type": "predefined", + }, + { + "name": "IbanRecognizer", + "supported_languages": spacy_langs, + "type": "predefined", + }, + { + "name": "IpRecognizer", + "supported_languages": spacy_langs, + "type": "predefined", + }, + ], + } + + +def generate_install_script(languages: list[str], registry: dict) -> str: + """Generate shell script to install spaCy models.""" + version = registry["spacy_version"] + lines = ["#!/bin/sh", "set -e", ""] + + for lang in languages: + model = registry["languages"][lang]["model"] + url = f"https://github.com/explosion/spacy-models/releases/download/{model}-{version}/{model}-{version}-py3-none-any.whl" + lines.append(f'echo "Installing {model} for {lang}..."') + lines.append(f"pip install --no-cache-dir {url}") + lines.append("") + + lines.append('echo "All models installed successfully"') + return "\n".join(lines) + + +def write_yaml(data: dict, path: Path) -> None: + """Write data to YAML file.""" + with open(path, "w") as f: + yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False) + + +def main(): + parser = argparse.ArgumentParser(description="Generate Presidio configs") + parser.add_argument( + "--languages", + required=True, + help="Comma-separated list of language codes (e.g., en,de,fr)", + ) + parser.add_argument( + "--registry", + default="/build/languages.yaml", + help="Path to languages.yaml registry", + ) + parser.add_argument( + "--output", default="/output", help="Output directory for generated files" + ) + args = parser.parse_args() + + # Parse languages + languages = [lang.strip() for lang in args.languages.split(",") if lang.strip()] + if not languages: + print("Error: No languages specified", file=sys.stderr) + sys.exit(1) + + # Load registry + registry_path = Path(args.registry) + if not registry_path.exists(): + print(f"Error: Registry not found: {registry_path}", file=sys.stderr) + sys.exit(1) + + registry = load_registry(registry_path) + + # Validate languages + languages = validate_languages(languages, registry) + + # Create output directory + output_dir = Path(args.output) + output_dir.mkdir(parents=True, exist_ok=True) + + # Generate configs + print(f"Generating configs for: {', '.join(languages)}") + + nlp_config = generate_nlp_config(languages, registry) + write_yaml(nlp_config, output_dir / "nlp-config.yaml") + print(f" - nlp-config.yaml") + + analyzer_config = generate_analyzer_config(languages) + write_yaml(analyzer_config, output_dir / "analyzer-config.yaml") + print(f" - analyzer-config.yaml") + + recognizers_config = generate_recognizers_config(languages, registry) + write_yaml(recognizers_config, output_dir / "recognizers-config.yaml") + print(f" - recognizers-config.yaml") + + install_script = generate_install_script(languages, registry) + install_path = output_dir / "install-models.sh" + with open(install_path, "w") as f: + f.write(install_script) + install_path.chmod(0o755) + print(f" - install-models.sh") + + print("Done!") + + +if __name__ == "__main__": + main() diff --git a/src/config.ts b/src/config.ts new file mode 100644 index 0000000..65f3578 --- /dev/null +++ b/src/config.ts @@ -0,0 +1,219 @@ +import { existsSync, readFileSync } from "node:fs"; +import { parse as parseYaml } from "yaml"; +import { z } from "zod"; + +// Schema definitions + +const LocalProviderSchema = z.object({ + type: z.enum(["openai", "ollama"]), + api_key: z.string().optional(), + base_url: z.string().url(), + model: z.string(), // Required: maps incoming model to local model +}); + +const MaskingSchema = z.object({ + show_markers: z.boolean().default(false), + marker_text: z.string().default("[protected]"), +}); + +const RoutingSchema = z.object({ + default: z.enum(["upstream", "local"]), + on_pii_detected: z.enum(["upstream", "local"]), +}); + +// All 25 spaCy languages with trained pipelines +// See presidio/languages.yaml for full list +const SupportedLanguages = [ + "ca", // Catalan + "zh", // Chinese + "hr", // Croatian + "da", // Danish + "nl", // Dutch + "en", // English + "fi", // Finnish + "fr", // French + "de", // German + "el", // Greek + "it", // Italian + "ja", // Japanese + "ko", // Korean + "lt", // Lithuanian + "mk", // Macedonian + "nb", // Norwegian + "pl", // Polish + "pt", // Portuguese + "ro", // Romanian + "ru", // Russian + "sl", // Slovenian + "es", // Spanish + "sv", // Swedish + "uk", // Ukrainian +] as const; + +const LanguageEnum = z.enum(SupportedLanguages); + +const PIIDetectionSchema = z.object({ + presidio_url: z.string().url(), + languages: z.array(LanguageEnum).default(["en"]), + fallback_language: LanguageEnum.default("en"), + score_threshold: z.number().min(0).max(1).default(0.7), + entities: z + .array(z.string()) + .default([ + "PERSON", + "EMAIL_ADDRESS", + "PHONE_NUMBER", + "CREDIT_CARD", + "IBAN_CODE", + "IP_ADDRESS", + "LOCATION", + ]), +}); + +const ServerSchema = z.object({ + port: z.number().default(3000), + host: z.string().default("0.0.0.0"), +}); + +const LoggingSchema = z.object({ + database: z.string().default("./data/llm-shield.db"), + retention_days: z.number().default(30), + log_content: z.boolean().default(false), + log_masked_content: z.boolean().default(true), +}); + +const DashboardAuthSchema = z.object({ + username: z.string(), + password: z.string(), +}); + +const DashboardSchema = z.object({ + enabled: z.boolean().default(true), + auth: DashboardAuthSchema.optional(), +}); + +const UpstreamProviderSchema = z.object({ + type: z.enum(["openai"]), + api_key: z.string().optional(), + base_url: z.string().url(), +}); + +const ConfigSchema = z + .object({ + mode: z.enum(["route", "mask"]).default("route"), + server: ServerSchema.default({}), + providers: z.object({ + upstream: UpstreamProviderSchema, + local: LocalProviderSchema.optional(), + }), + routing: RoutingSchema.optional(), + masking: MaskingSchema.default({}), + pii_detection: PIIDetectionSchema, + logging: LoggingSchema.default({}), + dashboard: DashboardSchema.default({}), + }) + .refine( + (config) => { + // Route mode requires local provider and routing config + if (config.mode === "route") { + return config.providers.local !== undefined && config.routing !== undefined; + } + return true; + }, + { + message: "Route mode requires 'providers.local' and 'routing' configuration", + }, + ); + +export type Config = z.infer; +export type UpstreamProvider = z.infer; +export type LocalProvider = z.infer; +export type MaskingConfig = z.infer; + +/** + * Replaces ${VAR} and ${VAR:-default} patterns with environment variable values + */ +function substituteEnvVars(value: string): string { + return value.replace(/\$\{([^}]+)\}/g, (_, expr) => { + // Support ${VAR:-default} syntax + const [varName, defaultValue] = expr.split(":-"); + const envValue = process.env[varName]; + if (envValue) { + return envValue; + } + if (defaultValue !== undefined) { + return defaultValue; + } + console.warn(`Warning: Environment variable ${varName} is not set`); + return ""; + }); +} + +/** + * Recursively substitutes environment variables in an object + */ +function substituteEnvVarsInObject(obj: unknown): unknown { + if (typeof obj === "string") { + return substituteEnvVars(obj); + } + if (Array.isArray(obj)) { + return obj.map(substituteEnvVarsInObject); + } + if (obj !== null && typeof obj === "object") { + const result: Record = {}; + for (const [key, value] of Object.entries(obj)) { + result[key] = substituteEnvVarsInObject(value); + } + return result; + } + return obj; +} + +/** + * Loads configuration from YAML file with environment variable substitution + */ +export function loadConfig(configPath?: string): Config { + const paths = configPath + ? [configPath] + : ["./config.yaml", "./config.yml", "./config.example.yaml"]; + + let configFile: string | null = null; + + for (const path of paths) { + if (existsSync(path)) { + configFile = readFileSync(path, "utf-8"); + break; + } + } + + if (!configFile) { + throw new Error( + `No config file found. Tried: ${paths.join(", ")}\nCreate a config.yaml file or copy config.example.yaml`, + ); + } + + const rawConfig = parseYaml(configFile); + const configWithEnv = substituteEnvVarsInObject(rawConfig); + + const result = ConfigSchema.safeParse(configWithEnv); + + if (!result.success) { + console.error("Config validation errors:"); + for (const error of result.error.errors) { + console.error(` - ${error.path.join(".")}: ${error.message}`); + } + throw new Error("Invalid configuration"); + } + + return result.data; +} + +// Singleton config instance +let configInstance: Config | null = null; + +export function getConfig(): Config { + if (!configInstance) { + configInstance = loadConfig(); + } + return configInstance; +} diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..4768477 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,235 @@ +import { Hono } from "hono"; +import { cors } from "hono/cors"; +import { createMiddleware } from "hono/factory"; +import { HTTPException } from "hono/http-exception"; +import { logger } from "hono/logger"; +import { getConfig } from "./config"; +import { chatRoutes } from "./routes/chat"; +import { dashboardRoutes } from "./routes/dashboard"; +import { healthRoutes } from "./routes/health"; +import { infoRoutes } from "./routes/info"; +import { getLogger } from "./services/logger"; +import { getPIIDetector } from "./services/pii-detector"; + +type Variables = { + requestId: string; +}; + +const config = getConfig(); +const app = new Hono<{ Variables: Variables }>(); + +// Request ID middleware +const requestIdMiddleware = createMiddleware<{ Variables: Variables }>(async (c, next) => { + const requestId = c.req.header("x-request-id") || crypto.randomUUID(); + c.set("requestId", requestId); + c.header("X-Request-ID", requestId); + await next(); +}); + +// Middleware +app.use("*", requestIdMiddleware); +app.use("*", cors()); +app.use("*", logger()); + +app.route("/", healthRoutes); +app.route("/", infoRoutes); +app.route("/openai/v1", chatRoutes); + +if (config.dashboard.enabled) { + app.route("/dashboard", dashboardRoutes); +} + +app.notFound((c) => { + return c.json( + { + error: { + message: `Route not found: ${c.req.method} ${c.req.path}`, + type: "not_found", + }, + }, + 404, + ); +}); + +app.onError((err, c) => { + if (err instanceof HTTPException) { + return c.json( + { + error: { + message: err.message, + type: err.status >= 500 ? "server_error" : "client_error", + }, + }, + err.status, + ); + } + + console.error("Unhandled error:", err); + return c.json( + { + error: { + message: "Internal server error", + type: "internal_error", + }, + }, + 500, + ); +}); + +const port = config.server.port; +const host = config.server.host; + +export default { + port, + hostname: host, + fetch: app.fetch, +}; + +// Startup validation +validateStartup().then(() => { + printStartupBanner(config, host, port); + const stopCleanup = startCleanupScheduler(config); + setupGracefulShutdown(stopCleanup); +}); + +async function validateStartup() { + const detector = getPIIDetector(); + + // Wait for Presidio to be ready + console.log("[STARTUP] Connecting to Presidio..."); + const ready = await detector.waitForReady(30, 1000); + + if (!ready) { + console.error( + `[STARTUP] â Could not connect to Presidio at ${config.pii_detection.presidio_url}`, + ); + console.error( + " Make sure Presidio is running: docker compose up presidio-analyzer -d", + ); + process.exit(1); + } + + console.log("[STARTUP] â Presidio connected"); + + // Validate configured languages + console.log(`[STARTUP] Validating languages: ${config.pii_detection.languages.join(", ")}`); + const validation = await detector.validateLanguages(config.pii_detection.languages); + + if (validation.missing.length > 0) { + console.error("\nâ Language mismatch detected!\n"); + console.error(` Configured: ${config.pii_detection.languages.join(", ")}`); + console.error( + ` Available: ${validation.available.length > 0 ? validation.available.join(", ") : "(none)"}`, + ); + console.error(` Missing: ${validation.missing.join(", ")}\n`); + console.error(" To fix, either:"); + console.error( + ` 1. Rebuild: LANGUAGES=${config.pii_detection.languages.join(",")} docker compose build presidio-analyzer`, + ); + console.error(` 2. Update config.yaml languages to: [${validation.available.join(", ")}]\n`); + console.error("[STARTUP] â Language configuration mismatch. Exiting for safety."); + process.exit(1); + } else { + console.log("[STARTUP] â All configured languages available"); + } +} + +function printStartupBanner(config: ReturnType, host: string, port: number) { + const modeInfo = + config.mode === "route" + ? ` +Routing: + Default: ${config.routing?.default || "upstream"} + On PII: ${config.routing?.on_pii_detected || "local"} + +Providers: + Upstream: ${config.providers.upstream.type} + Local: ${config.providers.local?.type || "not configured"} â ${config.providers.local?.model || "n/a"}` + : ` +Masking: + Markers: ${config.masking.show_markers ? "enabled" : "disabled"} + +Provider: + Upstream: ${config.providers.upstream.type}`; + + console.log(` +âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ +â LLM-Shield â +â Intelligent privacy-aware LLM proxy â +âââââââââââââââââââââââââââââââââââââââââââââââââââââââââââââ + +Server: http://${host}:${port} +API: http://${host}:${port}/openai/v1/chat/completions +Health: http://${host}:${port}/health +Info: http://${host}:${port}/info +Dashboard: http://${host}:${port}/dashboard + +Mode: ${config.mode.toUpperCase()} +${modeInfo} + +PII Detection: + Languages: ${config.pii_detection.languages.join(", ")} + Fallback: ${config.pii_detection.fallback_language} + Threshold: ${config.pii_detection.score_threshold} + Entities: ${config.pii_detection.entities.join(", ")} +`); +} + +function startCleanupScheduler(config: ReturnType): () => void { + let cleanupInterval: ReturnType | null = null; + + if (config.logging.retention_days > 0) { + const logger = getLogger(); + + // Run cleanup on startup + try { + const deleted = logger.cleanup(); + if (deleted > 0) { + console.log( + `Log cleanup: removed ${deleted} entries older than ${config.logging.retention_days} days`, + ); + } + } catch (error) { + console.error("Log cleanup failed:", error); + } + + // Schedule daily cleanup + cleanupInterval = setInterval( + () => { + try { + const count = logger.cleanup(); + if (count > 0) { + console.log( + `Log cleanup: removed ${count} entries older than ${config.logging.retention_days} days`, + ); + } + } catch (error) { + console.error("Log cleanup failed:", error); + } + }, + 24 * 60 * 60 * 1000, + ); + } + + return () => { + if (cleanupInterval) { + clearInterval(cleanupInterval); + } + }; +} + +function setupGracefulShutdown(stopCleanup: () => void) { + function shutdown() { + console.log("\nShutting down..."); + stopCleanup(); + try { + getLogger().close(); + } catch { + // Logger might not be initialized + } + process.exit(0); + } + + process.on("SIGTERM", shutdown); + process.on("SIGINT", shutdown); +} diff --git a/src/routes/chat.test.ts b/src/routes/chat.test.ts new file mode 100644 index 0000000..99b0e45 --- /dev/null +++ b/src/routes/chat.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, test } from "bun:test"; +import { Hono } from "hono"; +import { chatRoutes } from "./chat"; + +const app = new Hono(); +app.route("/openai/v1", chatRoutes); + +describe("POST /openai/v1/chat/completions", () => { + test("returns 400 for missing messages", async () => { + const res = await app.request("/openai/v1/chat/completions", { + method: "POST", + body: JSON.stringify({}), + headers: { "Content-Type": "application/json" }, + }); + + expect(res.status).toBe(400); + const body = (await res.json()) as { error: { type: string } }; + expect(body.error.type).toBe("invalid_request_error"); + }); + + test("returns 400 for invalid message format", async () => { + const res = await app.request("/openai/v1/chat/completions", { + method: "POST", + body: JSON.stringify({ + messages: [{ invalid: "format" }], + }), + headers: { "Content-Type": "application/json" }, + }); + + expect(res.status).toBe(400); + }); + + test("returns 400 for invalid role", async () => { + const res = await app.request("/openai/v1/chat/completions", { + method: "POST", + body: JSON.stringify({ + messages: [{ role: "invalid", content: "test" }], + }), + headers: { "Content-Type": "application/json" }, + }); + + expect(res.status).toBe(400); + }); +}); + +describe("GET /openai/v1/models", () => { + test("forwards to upstream (returns error without auth)", async () => { + const res = await app.request("/openai/v1/models"); + // Without auth, upstream returns 401 + expect([200, 401, 500, 502]).toContain(res.status); + }); +}); diff --git a/src/routes/chat.ts b/src/routes/chat.ts new file mode 100644 index 0000000..4cfcad6 --- /dev/null +++ b/src/routes/chat.ts @@ -0,0 +1,232 @@ +import { zValidator } from "@hono/zod-validator"; +import type { Context } from "hono"; +import { Hono } from "hono"; +import { HTTPException } from "hono/http-exception"; +import { proxy } from "hono/proxy"; +import { z } from "zod"; +import type { MaskingConfig } from "../config"; +import { getRouter, type MaskDecision, type RoutingDecision } from "../services/decision"; +import type { + ChatCompletionRequest, + ChatCompletionResponse, + ChatMessage, + LLMResult, +} from "../services/llm-client"; +import { logRequest, type RequestLogData } from "../services/logger"; +import { unmaskResponse } from "../services/masking"; +import { createUnmaskingStream } from "../services/stream-transformer"; + +// Request validation schema +const ChatCompletionSchema = z + .object({ + messages: z + .array( + z.object({ + role: z.enum(["system", "user", "assistant"]), + content: z.string(), + }), + ) + .min(1, "At least one message is required"), + }) + .passthrough(); + +export const chatRoutes = new Hono(); + +/** + * Type guard for MaskDecision + */ +function isMaskDecision(decision: RoutingDecision): decision is MaskDecision { + return decision.mode === "mask"; +} + +chatRoutes.get("/models", (c) => { + const { upstream } = getRouter().getProvidersInfo(); + + return proxy(`${upstream.baseUrl}/models`, { + headers: { + Authorization: c.req.header("Authorization"), + }, + }); +}); + +/** + * POST /v1/chat/completions - OpenAI-compatible chat completion endpoint + */ +chatRoutes.post( + "/chat/completions", + zValidator("json", ChatCompletionSchema, (result, c) => { + if (!result.success) { + return c.json( + { + error: { + message: "Invalid request body", + type: "invalid_request_error", + details: result.error.errors, + }, + }, + 400, + ); + } + }), + async (c) => { + const startTime = Date.now(); + const body = c.req.valid("json") as ChatCompletionRequest; + const router = getRouter(); + + let decision: RoutingDecision; + try { + decision = await router.decide(body.messages); + } catch (error) { + console.error("PII detection error:", error); + throw new HTTPException(503, { message: "PII detection service unavailable" }); + } + + return handleCompletion(c, body, decision, startTime, router); + }, +); + +/** + * Handle chat completion for both route and mask modes + */ +async function handleCompletion( + c: Context, + body: ChatCompletionRequest, + decision: RoutingDecision, + startTime: number, + router: ReturnType, +) { + const client = router.getClient(decision.provider); + const maskingConfig = router.getMaskingConfig(); + const authHeader = decision.provider === "upstream" ? c.req.header("Authorization") : undefined; + + // Prepare request and masked content for logging + let request: ChatCompletionRequest = body; + let maskedContent: string | undefined; + + if (isMaskDecision(decision)) { + request = { ...body, messages: decision.maskedMessages }; + maskedContent = formatMessagesForLog(decision.maskedMessages); + } + + try { + const result = await client.chatCompletion(request, authHeader); + + setShieldHeaders(c, decision); + + if (result.isStreaming) { + return handleStreamingResponse(c, result, decision, startTime, maskedContent, maskingConfig); + } + + return handleJsonResponse(c, result, decision, startTime, maskedContent, maskingConfig); + } catch (error) { + console.error("LLM request error:", error); + const message = error instanceof Error ? error.message : "Unknown error"; + throw new HTTPException(502, { message: `LLM provider error: ${message}` }); + } +} + +/** + * Set X-LLM-Shield response headers + */ +function setShieldHeaders(c: Context, decision: RoutingDecision) { + c.header("X-LLM-Shield-Mode", decision.mode); + c.header("X-LLM-Shield-Provider", decision.provider); + c.header("X-LLM-Shield-PII-Detected", decision.piiResult.hasPII.toString()); + c.header("X-LLM-Shield-Language", decision.piiResult.language); + if (decision.piiResult.languageFallback) { + c.header("X-LLM-Shield-Language-Fallback", "true"); + } + if (decision.mode === "mask") { + c.header("X-LLM-Shield-PII-Masked", decision.piiResult.hasPII.toString()); + } +} + +/** + * Handle streaming response + */ +function handleStreamingResponse( + c: Context, + result: LLMResult & { isStreaming: true }, + decision: RoutingDecision, + startTime: number, + maskedContent: string | undefined, + maskingConfig: MaskingConfig, +) { + logRequest( + createLogData(decision, result, startTime, undefined, maskedContent), + c.req.header("User-Agent") || null, + ); + + c.header("Content-Type", "text/event-stream"); + c.header("Cache-Control", "no-cache"); + c.header("Connection", "keep-alive"); + + if (isMaskDecision(decision)) { + const unmaskingStream = createUnmaskingStream( + result.response, + decision.maskingContext, + maskingConfig, + ); + return c.body(unmaskingStream); + } + + return c.body(result.response); +} + +/** + * Handle JSON response + */ +function handleJsonResponse( + c: Context, + result: LLMResult & { isStreaming: false }, + decision: RoutingDecision, + startTime: number, + maskedContent: string | undefined, + maskingConfig: MaskingConfig, +) { + logRequest( + createLogData(decision, result, startTime, result.response, maskedContent), + c.req.header("User-Agent") || null, + ); + + if (isMaskDecision(decision)) { + return c.json(unmaskResponse(result.response, decision.maskingContext, maskingConfig)); + } + + return c.json(result.response); +} + +/** + * Create log data from decision and result + */ +function createLogData( + decision: RoutingDecision, + result: LLMResult, + startTime: number, + response?: ChatCompletionResponse, + maskedContent?: string, +): RequestLogData { + return { + timestamp: new Date().toISOString(), + mode: decision.mode, + provider: decision.provider, + model: result.model, + piiDetected: decision.piiResult.hasPII, + entities: [...new Set(decision.piiResult.newEntities.map((e) => e.entity_type))], + latencyMs: Date.now() - startTime, + scanTimeMs: decision.piiResult.scanTimeMs, + promptTokens: response?.usage?.prompt_tokens, + completionTokens: response?.usage?.completion_tokens, + language: decision.piiResult.language, + languageFallback: decision.piiResult.languageFallback, + detectedLanguage: decision.piiResult.detectedLanguage, + maskedContent, + }; +} + +/** + * Format messages for logging + */ +function formatMessagesForLog(messages: ChatMessage[]): string { + return messages.map((m) => `[${m.role}] ${m.content}`).join("\n"); +} diff --git a/src/routes/dashboard.tsx b/src/routes/dashboard.tsx new file mode 100644 index 0000000..d7eccc0 --- /dev/null +++ b/src/routes/dashboard.tsx @@ -0,0 +1,72 @@ +import { zValidator } from "@hono/zod-validator"; +import { Hono } from "hono"; +import { basicAuth } from "hono/basic-auth"; +import { tailwind } from "hono-tailwind"; +import { z } from "zod"; +import { getConfig } from "../config"; +import { getLogger } from "../services/logger"; +import DashboardPage from "../views/dashboard/page"; + +const LogsQuerySchema = z.object({ + limit: z.coerce.number().min(1).max(1000).default(100), + offset: z.coerce.number().min(0).default(0), +}); + +const config = getConfig(); + +export const dashboardRoutes = new Hono(); + +dashboardRoutes.use("/tailwind.css", tailwind()); + +if (config.dashboard.auth) { + dashboardRoutes.use( + "*", + basicAuth({ + username: config.dashboard.auth.username, + password: config.dashboard.auth.password, + realm: "LLM-Shield Dashboard", + }), + ); +} + +/** + * GET /api/logs - Get recent request logs + */ +dashboardRoutes.get("/api/logs", zValidator("query", LogsQuerySchema), (c) => { + const { limit, offset } = c.req.valid("query"); + + const logger = getLogger(); + const logs = logger.getLogs(limit, offset); + + return c.json({ + logs, + pagination: { + limit, + offset, + count: logs.length, + }, + }); +}); + +/** + * GET /api/stats - Get statistics + */ +dashboardRoutes.get("/api/stats", (c) => { + const config = getConfig(); + const logger = getLogger(); + const stats = logger.getStats(); + const entityStats = logger.getEntityStats(); + + return c.json({ + ...stats, + entity_breakdown: entityStats, + mode: config.mode, + }); +}); + +/** + * GET /dashboard - Dashboard HTML UI + */ +dashboardRoutes.get("/", (c) => { + return c.html(); +}); diff --git a/src/routes/health.test.ts b/src/routes/health.test.ts new file mode 100644 index 0000000..5bd3c27 --- /dev/null +++ b/src/routes/health.test.ts @@ -0,0 +1,20 @@ +import { describe, expect, test } from "bun:test"; +import { Hono } from "hono"; +import { healthRoutes } from "./health"; + +const app = new Hono(); +app.route("/", healthRoutes); + +describe("GET /health", () => { + test("returns health status", async () => { + const res = await app.request("/health"); + + // May be 200 (healthy) or 503 (degraded) depending on Presidio + expect([200, 503]).toContain(res.status); + + const body = (await res.json()) as Record; + expect(body.status).toMatch(/healthy|degraded/); + expect(body.services).toBeDefined(); + expect(body.timestamp).toBeDefined(); + }); +}); diff --git a/src/routes/health.ts b/src/routes/health.ts new file mode 100644 index 0000000..3486e6d --- /dev/null +++ b/src/routes/health.ts @@ -0,0 +1,29 @@ +import { Hono } from "hono"; +import { getConfig } from "../config"; +import { getRouter } from "../services/decision"; + +export const healthRoutes = new Hono(); + +healthRoutes.get("/health", async (c) => { + const config = getConfig(); + const router = getRouter(); + const health = await router.healthCheck(); + const isHealthy = health.presidio; + + const services: Record = { + presidio: health.presidio ? "up" : "down", + }; + + if (config.mode === "route") { + services.local_llm = health.local ? "up" : "down"; + } + + return c.json( + { + status: isHealthy ? "healthy" : "degraded", + services, + timestamp: new Date().toISOString(), + }, + isHealthy ? 200 : 503, + ); +}); diff --git a/src/routes/info.test.ts b/src/routes/info.test.ts new file mode 100644 index 0000000..f7ef900 --- /dev/null +++ b/src/routes/info.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, test } from "bun:test"; +import { Hono } from "hono"; +import { infoRoutes } from "./info"; + +const app = new Hono(); +app.route("/", infoRoutes); + +describe("GET /info", () => { + test("returns 200 with app info", async () => { + const res = await app.request("/info"); + + expect(res.status).toBe(200); + + const body = (await res.json()) as Record; + expect(body.name).toBe("LLM-Shield"); + expect(body.version).toBe("0.1.0"); + expect(body.mode).toBeDefined(); + expect(body.providers).toBeDefined(); + expect(body.pii_detection).toBeDefined(); + }); + + test("returns correct content-type", async () => { + const res = await app.request("/info"); + + expect(res.headers.get("content-type")).toContain("application/json"); + }); +}); diff --git a/src/routes/info.ts b/src/routes/info.ts new file mode 100644 index 0000000..009041d --- /dev/null +++ b/src/routes/info.ts @@ -0,0 +1,59 @@ +import { Hono } from "hono"; +import pkg from "../../package.json"; +import { getConfig } from "../config"; +import { getRouter } from "../services/decision"; +import { getPIIDetector } from "../services/pii-detector"; + +export const infoRoutes = new Hono(); + +infoRoutes.get("/info", (c) => { + const config = getConfig(); + const router = getRouter(); + const providers = router.getProvidersInfo(); + const detector = getPIIDetector(); + const languageValidation = detector.getLanguageValidation(); + + const info: Record = { + name: "LLM-Shield", + version: pkg.version, + description: "Intelligent privacy-aware LLM proxy", + mode: config.mode, + providers: { + upstream: { + type: providers.upstream.type, + }, + }, + pii_detection: { + languages: languageValidation + ? { + configured: config.pii_detection.languages, + available: languageValidation.available, + missing: languageValidation.missing, + } + : config.pii_detection.languages, + fallback_language: config.pii_detection.fallback_language, + score_threshold: config.pii_detection.score_threshold, + entities: config.pii_detection.entities, + }, + }; + + if (config.mode === "route" && config.routing) { + info.routing = { + default: config.routing.default, + on_pii_detected: config.routing.on_pii_detected, + }; + if (providers.local) { + (info.providers as Record).local = { + type: providers.local.type, + }; + } + } + + if (config.mode === "mask") { + info.masking = { + show_markers: config.masking.show_markers, + }; + } + + return c.json(info); +}); diff --git a/src/services/decision.test.ts b/src/services/decision.test.ts new file mode 100644 index 0000000..f01236c --- /dev/null +++ b/src/services/decision.test.ts @@ -0,0 +1,131 @@ +import { describe, expect, test } from "bun:test"; +import type { PIIDetectionResult } from "./pii-detector"; + +/** + * Pure routing logic extracted for testing + * This mirrors the logic in Router.decideRoute() + */ +function decideRoute( + piiResult: PIIDetectionResult, + routing: { default: "upstream" | "local"; on_pii_detected: "upstream" | "local" }, +): { provider: "upstream" | "local"; reason: string } { + if (piiResult.hasPII) { + const entityTypes = [...new Set(piiResult.newEntities.map((e) => e.entity_type))]; + return { + provider: routing.on_pii_detected, + reason: `PII detected: ${entityTypes.join(", ")}`, + }; + } + + return { + provider: routing.default, + reason: "No PII detected", + }; +} + +/** + * Helper to create a mock PIIDetectionResult + */ +function createPIIResult( + hasPII: boolean, + entities: Array<{ entity_type: string }> = [], +): PIIDetectionResult { + const newEntities = entities.map((e) => ({ + entity_type: e.entity_type, + start: 0, + end: 10, + score: 0.9, + })); + + return { + hasPII, + newEntities, + entitiesByMessage: [newEntities], + language: "en", + languageFallback: false, + scanTimeMs: 50, + }; +} + +describe("decideRoute", () => { + describe("with default=upstream, on_pii_detected=local", () => { + const routing = { default: "upstream" as const, on_pii_detected: "local" as const }; + + test("routes to upstream when no PII detected", () => { + const result = decideRoute(createPIIResult(false), routing); + + expect(result.provider).toBe("upstream"); + expect(result.reason).toBe("No PII detected"); + }); + + test("routes to local when PII detected", () => { + const result = decideRoute(createPIIResult(true, [{ entity_type: "PERSON" }]), routing); + + expect(result.provider).toBe("local"); + expect(result.reason).toContain("PII detected"); + expect(result.reason).toContain("PERSON"); + }); + + test("includes all entity types in reason", () => { + const result = decideRoute( + createPIIResult(true, [ + { entity_type: "PERSON" }, + { entity_type: "EMAIL_ADDRESS" }, + { entity_type: "PHONE_NUMBER" }, + ]), + routing, + ); + + expect(result.reason).toContain("PERSON"); + expect(result.reason).toContain("EMAIL_ADDRESS"); + expect(result.reason).toContain("PHONE_NUMBER"); + }); + + test("deduplicates entity types in reason", () => { + const result = decideRoute( + createPIIResult(true, [ + { entity_type: "PERSON" }, + { entity_type: "PERSON" }, + { entity_type: "PERSON" }, + ]), + routing, + ); + + // Should only contain PERSON once + const matches = result.reason.match(/PERSON/g); + expect(matches?.length).toBe(1); + }); + }); + + describe("with default=local, on_pii_detected=upstream", () => { + const routing = { default: "local" as const, on_pii_detected: "upstream" as const }; + + test("routes to local when no PII detected", () => { + const result = decideRoute(createPIIResult(false), routing); + + expect(result.provider).toBe("local"); + expect(result.reason).toBe("No PII detected"); + }); + + test("routes to upstream when PII detected", () => { + const result = decideRoute( + createPIIResult(true, [{ entity_type: "EMAIL_ADDRESS" }]), + routing, + ); + + expect(result.provider).toBe("upstream"); + expect(result.reason).toContain("PII detected"); + }); + }); + + describe("with same provider for both cases", () => { + const routing = { default: "upstream" as const, on_pii_detected: "upstream" as const }; + + test("always routes to upstream regardless of PII", () => { + expect(decideRoute(createPIIResult(false), routing).provider).toBe("upstream"); + expect( + decideRoute(createPIIResult(true, [{ entity_type: "PERSON" }]), routing).provider, + ).toBe("upstream"); + }); + }); +}); diff --git a/src/services/decision.ts b/src/services/decision.ts new file mode 100644 index 0000000..6cc12e4 --- /dev/null +++ b/src/services/decision.ts @@ -0,0 +1,187 @@ +import { type Config, getConfig } from "../config"; +import { type ChatMessage, LLMClient } from "../services/llm-client"; +import { createMaskingContext, type MaskingContext, maskMessages } from "../services/masking"; +import { getPIIDetector, type PIIDetectionResult } from "../services/pii-detector"; + +/** + * Routing decision result for route mode + */ +export interface RouteDecision { + mode: "route"; + provider: "upstream" | "local"; + reason: string; + piiResult: PIIDetectionResult; +} + +/** + * Masking decision result for mask mode + */ +export interface MaskDecision { + mode: "mask"; + provider: "upstream"; + reason: string; + piiResult: PIIDetectionResult; + maskedMessages: ChatMessage[]; + maskingContext: MaskingContext; +} + +export type RoutingDecision = RouteDecision | MaskDecision; + +/** + * Router that decides how to handle requests based on PII detection + * Supports two modes: route (to local LLM) or mask (anonymize for upstream) + */ +export class Router { + private upstreamClient: LLMClient; + private localClient: LLMClient | null; + private config: Config; + + constructor() { + this.config = getConfig(); + + this.upstreamClient = new LLMClient(this.config.providers.upstream, "upstream"); + this.localClient = this.config.providers.local + ? new LLMClient(this.config.providers.local, "local", this.config.providers.local.model) + : null; + } + + /** + * Returns the current mode + */ + getMode(): "route" | "mask" { + return this.config.mode; + } + + /** + * Decides how to handle messages based on mode and PII detection + */ + async decide(messages: ChatMessage[]): Promise { + const detector = getPIIDetector(); + const piiResult = await detector.analyzeMessages(messages); + + if (this.config.mode === "mask") { + return await this.decideMask(messages, piiResult); + } + + return this.decideRoute(piiResult); + } + + /** + * Route mode: decides which provider to use + */ + private decideRoute(piiResult: PIIDetectionResult): RouteDecision { + const routing = this.config.routing; + if (!routing) { + throw new Error("Route mode requires routing configuration"); + } + + // Route based on PII detection + if (piiResult.hasPII) { + const entityTypes = [...new Set(piiResult.newEntities.map((e) => e.entity_type))]; + return { + mode: "route", + provider: routing.on_pii_detected, + reason: `PII detected: ${entityTypes.join(", ")}`, + piiResult, + }; + } + + // No PII detected, use default provider + return { + mode: "route", + provider: routing.default, + reason: "No PII detected", + piiResult, + }; + } + + private async decideMask( + messages: ChatMessage[], + piiResult: PIIDetectionResult, + ): Promise { + if (!piiResult.hasPII) { + return { + mode: "mask", + provider: "upstream", + reason: "No PII detected", + piiResult, + maskedMessages: messages, + maskingContext: createMaskingContext(), + }; + } + + const detector = getPIIDetector(); + const fullScan = await detector.analyzeAllMessages(messages, { + language: piiResult.language, + usedFallback: piiResult.languageFallback, + }); + + const { masked, context } = maskMessages(messages, fullScan.entitiesByMessage); + + const entityTypes = [...new Set(piiResult.newEntities.map((e) => e.entity_type))]; + + return { + mode: "mask", + provider: "upstream", + reason: `PII masked: ${entityTypes.join(", ")}`, + piiResult, + maskedMessages: masked, + maskingContext: context, + }; + } + + getClient(provider: "upstream" | "local"): LLMClient { + if (provider === "local") { + if (!this.localClient) { + throw new Error("Local provider not configured"); + } + return this.localClient; + } + return this.upstreamClient; + } + + /** + * Gets masking config + */ + getMaskingConfig() { + return this.config.masking; + } + + /** + * Checks health of services (Presidio required, local LLM only in route mode) + */ + async healthCheck(): Promise<{ + local: boolean; + presidio: boolean; + }> { + const detector = getPIIDetector(); + + const [presidioHealth, localHealth] = await Promise.all([ + detector.healthCheck(), + this.localClient?.healthCheck() ?? Promise.resolve(true), + ]); + + return { + local: localHealth, + presidio: presidioHealth, + }; + } + + getProvidersInfo() { + return { + mode: this.config.mode, + upstream: this.upstreamClient.getInfo(), + local: this.localClient?.getInfo() ?? null, + }; + } +} + +// Singleton instance +let routerInstance: Router | null = null; + +export function getRouter(): Router { + if (!routerInstance) { + routerInstance = new Router(); + } + return routerInstance; +} diff --git a/src/services/language-detector.ts b/src/services/language-detector.ts new file mode 100644 index 0000000..5991538 --- /dev/null +++ b/src/services/language-detector.ts @@ -0,0 +1,94 @@ +import eld from "eld/small"; +import { getConfig } from "../config"; + +// All 24 spaCy languages with trained pipelines +export type SupportedLanguage = + | "ca" + | "zh" + | "hr" + | "da" + | "nl" + | "en" + | "fi" + | "fr" + | "de" + | "el" + | "it" + | "ja" + | "ko" + | "lt" + | "mk" + | "nb" + | "pl" + | "pt" + | "ro" + | "ru" + | "sl" + | "es" + | "sv" + | "uk"; + +export interface LanguageDetectionResult { + language: SupportedLanguage; + usedFallback: boolean; + detectedLanguage?: string; + confidence?: number; +} + +// Special case mapping: Norwegian detected as "no" but Presidio expects "nb" +const ISO_TO_PRESIDIO_OVERRIDES: Record = { + no: "nb", // Norwegian (generic) â Norwegian BokmÃ¥l +}; + +export class LanguageDetector { + private configuredLanguages: SupportedLanguage[]; + private fallbackLanguage: SupportedLanguage; + + constructor() { + const config = getConfig(); + this.configuredLanguages = config.pii_detection.languages; + this.fallbackLanguage = config.pii_detection.fallback_language; + } + + detect(text: string): LanguageDetectionResult { + if (this.configuredLanguages.length === 1) { + return { + language: this.configuredLanguages[0], + usedFallback: false, + }; + } + + const result = eld.detect(text); + const detectedIso = result.language; + const scores = result.getScores(); + const confidence = scores[detectedIso] ?? 0; + // Use override if exists, otherwise use the detected code as-is (most are 1:1) + const presidioLang = (ISO_TO_PRESIDIO_OVERRIDES[detectedIso] || + detectedIso) as SupportedLanguage; + + if (presidioLang && this.configuredLanguages.includes(presidioLang)) { + return { + language: presidioLang, + usedFallback: false, + detectedLanguage: detectedIso, + confidence, + }; + } + + return { + language: this.fallbackLanguage, + usedFallback: true, + detectedLanguage: detectedIso, + confidence, + }; + } +} + +let detectorInstance: LanguageDetector | null = null; + +export function getLanguageDetector(): LanguageDetector { + if (!detectorInstance) { + detectorInstance = new LanguageDetector(); + } + return detectorInstance; +} diff --git a/src/services/llm-client.ts b/src/services/llm-client.ts new file mode 100644 index 0000000..8745899 --- /dev/null +++ b/src/services/llm-client.ts @@ -0,0 +1,182 @@ +import type { LocalProvider, UpstreamProvider } from "../config"; + +/** + * OpenAI-compatible message format + */ +export interface ChatMessage { + role: "system" | "user" | "assistant"; + content: string; +} + +/** + * OpenAI-compatible chat completion request + * Only required field is messages - all other params pass through to provider + */ +export interface ChatCompletionRequest { + messages: ChatMessage[]; + model?: string; + stream?: boolean; + [key: string]: unknown; +} + +/** + * OpenAI-compatible chat completion response + */ +export interface ChatCompletionResponse { + id: string; + object: "chat.completion"; + created: number; + model: string; + choices: Array<{ + index: number; + message: ChatMessage; + finish_reason: "stop" | "length" | "content_filter" | null; + }>; + usage?: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; +} + +/** + * Result from LLM client including metadata (Discriminated Union) + */ +export type LLMResult = + | { + isStreaming: true; + response: ReadableStream; + model: string; + provider: "upstream" | "local"; + } + | { + isStreaming: false; + response: ChatCompletionResponse; + model: string; + provider: "upstream" | "local"; + }; + +/** + * LLM Client for OpenAI-compatible APIs (OpenAI, Ollama, etc.) + */ +export class LLMClient { + private baseUrl: string; + private apiKey?: string; + private providerType: "openai" | "ollama"; + private providerName: "upstream" | "local"; + private defaultModel?: string; + + constructor( + provider: UpstreamProvider | LocalProvider, + providerName: "upstream" | "local", + defaultModel?: string, + ) { + this.baseUrl = provider.base_url.replace(/\/$/, ""); + this.apiKey = provider.api_key; + this.providerType = provider.type; + this.providerName = providerName; + this.defaultModel = defaultModel; + } + + /** + * Sends a chat completion request + * @param request The chat completion request + * @param authHeader Optional Authorization header from client (forwarded for upstream) + */ + async chatCompletion(request: ChatCompletionRequest, authHeader?: string): Promise { + // Local uses configured model, upstream uses request model + const model = this.defaultModel || request.model; + const isStreaming = request.stream ?? false; + + if (!model) { + throw new Error("Model is required in request"); + } + + // Build the endpoint URL + const endpoint = + this.providerType === "ollama" + ? `${this.baseUrl}/v1/chat/completions` + : `${this.baseUrl}/chat/completions`; + + // Build headers + const headers: Record = { + "Content-Type": "application/json", + }; + + // Use client's auth header if provided, otherwise fall back to config + if (authHeader) { + headers.Authorization = authHeader; + } else if (this.apiKey) { + headers.Authorization = `Bearer ${this.apiKey}`; + } + + // Build request body - convert max_tokens to max_completion_tokens for OpenAI + const body: Record = { + ...request, + model, + stream: isStreaming, + }; + + // OpenAI newer models use max_completion_tokens instead of max_tokens + if (this.providerType === "openai" && body.max_tokens) { + body.max_completion_tokens = body.max_tokens; + delete body.max_tokens; + } + + const response = await fetch(endpoint, { + method: "POST", + headers, + body: JSON.stringify(body), + signal: AbortSignal.timeout(120_000), // 2 minute timeout for LLM requests + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`LLM API error: ${response.status} ${response.statusText} - ${errorText}`); + } + + if (isStreaming) { + if (!response.body) { + throw new Error("No response body for streaming request"); + } + + return { + response: response.body, + isStreaming: true, + model, + provider: this.providerName, + }; + } + + const data = (await response.json()) as ChatCompletionResponse; + return { + response: data, + isStreaming: false, + model, + provider: this.providerName, + }; + } + + /** + * Checks if the local LLM service is healthy (Ollama) + */ + async healthCheck(): Promise { + try { + const response = await fetch(`${this.baseUrl}/api/tags`, { + method: "GET", + signal: AbortSignal.timeout(5000), + }); + return response.ok; + } catch { + return false; + } + } + + getInfo(): { name: "upstream" | "local"; type: "openai" | "ollama"; baseUrl: string } { + return { + name: this.providerName, + type: this.providerType, + baseUrl: this.baseUrl, + }; + } +} diff --git a/src/services/logger.ts b/src/services/logger.ts new file mode 100644 index 0000000..b79ad8b --- /dev/null +++ b/src/services/logger.ts @@ -0,0 +1,299 @@ +import { Database } from "bun:sqlite"; +import { mkdirSync } from "node:fs"; +import { getConfig } from "../config"; + +export interface RequestLog { + id?: number; + timestamp: string; + mode: "route" | "mask"; + provider: "upstream" | "local"; + model: string; + pii_detected: boolean; + entities: string; + latency_ms: number; + scan_time_ms: number; + prompt_tokens: number | null; + completion_tokens: number | null; + user_agent: string | null; + language: string; + language_fallback: boolean; + detected_language: string | null; + masked_content: string | null; +} + +/** + * Statistics summary + */ +export interface Stats { + total_requests: number; + pii_requests: number; + pii_percentage: number; + upstream_requests: number; + local_requests: number; + avg_scan_time_ms: number; + total_tokens: number; + requests_last_hour: number; +} + +/** + * SQLite-based logger for request tracking + */ +export class Logger { + private db: Database; + private retentionDays: number; + + constructor() { + const config = getConfig(); + this.retentionDays = config.logging.retention_days; + + // Ensure data directory exists + const dbPath = config.logging.database; + const dir = dbPath.substring(0, dbPath.lastIndexOf("/")); + if (dir) { + mkdirSync(dir, { recursive: true }); + } + + this.db = new Database(dbPath); + this.initializeDatabase(); + } + + private initializeDatabase(): void { + this.db.run(` + CREATE TABLE IF NOT EXISTS request_logs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp TEXT NOT NULL, + mode TEXT NOT NULL DEFAULT 'route', + provider TEXT NOT NULL, + model TEXT NOT NULL, + pii_detected INTEGER NOT NULL DEFAULT 0, + entities TEXT, + latency_ms INTEGER NOT NULL, + scan_time_ms INTEGER NOT NULL DEFAULT 0, + prompt_tokens INTEGER, + completion_tokens INTEGER, + user_agent TEXT, + language TEXT NOT NULL DEFAULT 'en', + language_fallback INTEGER NOT NULL DEFAULT 0, + detected_language TEXT, + masked_content TEXT, + created_at TEXT DEFAULT CURRENT_TIMESTAMP + ) + `); + + // Create indexes for performance + this.db.run(` + CREATE INDEX IF NOT EXISTS idx_timestamp ON request_logs(timestamp) + `); + this.db.run(` + CREATE INDEX IF NOT EXISTS idx_provider ON request_logs(provider) + `); + this.db.run(` + CREATE INDEX IF NOT EXISTS idx_pii_detected ON request_logs(pii_detected) + `); + } + + log(entry: Omit): void { + const stmt = this.db.prepare(` + INSERT INTO request_logs + (timestamp, mode, provider, model, pii_detected, entities, latency_ms, scan_time_ms, prompt_tokens, completion_tokens, user_agent, language, language_fallback, detected_language, masked_content) + VALUES + (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + stmt.run( + entry.timestamp, + entry.mode, + entry.provider, + entry.model, + entry.pii_detected ? 1 : 0, + entry.entities, + entry.latency_ms, + entry.scan_time_ms, + entry.prompt_tokens, + entry.completion_tokens, + entry.user_agent, + entry.language, + entry.language_fallback ? 1 : 0, + entry.detected_language, + entry.masked_content, + ); + } + + /** + * Gets recent logs + */ + getLogs(limit: number = 100, offset: number = 0): RequestLog[] { + const stmt = this.db.prepare(` + SELECT * FROM request_logs + ORDER BY timestamp DESC + LIMIT ? OFFSET ? + `); + + return stmt.all(limit, offset) as RequestLog[]; + } + + /** + * Gets statistics + */ + getStats(): Stats { + // Total requests + const totalResult = this.db.prepare(`SELECT COUNT(*) as count FROM request_logs`).get() as { + count: number; + }; + + // PII requests + const piiResult = this.db + .prepare(`SELECT COUNT(*) as count FROM request_logs WHERE pii_detected = 1`) + .get() as { count: number }; + + // Upstream vs Local + const upstreamResult = this.db + .prepare(`SELECT COUNT(*) as count FROM request_logs WHERE provider = 'upstream'`) + .get() as { count: number }; + const localResult = this.db + .prepare(`SELECT COUNT(*) as count FROM request_logs WHERE provider = 'local'`) + .get() as { count: number }; + + // Average scan time + const scanTimeResult = this.db + .prepare(`SELECT AVG(scan_time_ms) as avg FROM request_logs`) + .get() as { avg: number | null }; + + // Total tokens + const tokensResult = this.db + .prepare(` + SELECT COALESCE(SUM(COALESCE(prompt_tokens, 0) + COALESCE(completion_tokens, 0)), 0) as total + FROM request_logs + `) + .get() as { total: number }; + + // Requests last hour + const oneHourAgo = new Date(Date.now() - 60 * 60 * 1000).toISOString(); + const hourResult = this.db + .prepare(` + SELECT COUNT(*) as count FROM request_logs + WHERE timestamp >= ? + `) + .get(oneHourAgo) as { count: number }; + + const total = totalResult.count; + const pii = piiResult.count; + + return { + total_requests: total, + pii_requests: pii, + pii_percentage: total > 0 ? Math.round((pii / total) * 100 * 10) / 10 : 0, + upstream_requests: upstreamResult.count, + local_requests: localResult.count, + avg_scan_time_ms: Math.round(scanTimeResult.avg || 0), + total_tokens: tokensResult.total, + requests_last_hour: hourResult.count, + }; + } + + /** + * Gets entity breakdown + */ + getEntityStats(): Array<{ entity: string; count: number }> { + const logs = this.db + .prepare(` + SELECT entities FROM request_logs WHERE entities IS NOT NULL AND entities != '' + `) + .all() as Array<{ entities: string }>; + + const entityCounts = new Map(); + + for (const log of logs) { + const entities = log.entities + .split(",") + .map((e) => e.trim()) + .filter(Boolean); + for (const entity of entities) { + entityCounts.set(entity, (entityCounts.get(entity) || 0) + 1); + } + } + + return Array.from(entityCounts.entries()) + .map(([entity, count]) => ({ entity, count })) + .sort((a, b) => b.count - a.count); + } + + /** + * Cleans up old logs based on retention policy + */ + cleanup(): number { + if (this.retentionDays <= 0) { + return 0; // Keep forever + } + + const cutoffDate = new Date(); + cutoffDate.setDate(cutoffDate.getDate() - this.retentionDays); + + const result = this.db + .prepare(` + DELETE FROM request_logs WHERE timestamp < ? + `) + .run(cutoffDate.toISOString()); + + return result.changes; + } + + /** + * Closes database connection + */ + close(): void { + this.db.close(); + } +} + +// Singleton instance +let loggerInstance: Logger | null = null; + +export function getLogger(): Logger { + if (!loggerInstance) { + loggerInstance = new Logger(); + } + return loggerInstance; +} + +export interface RequestLogData { + timestamp: string; + mode: "route" | "mask"; + provider: "upstream" | "local"; + model: string; + piiDetected: boolean; + entities: string[]; + latencyMs: number; + scanTimeMs: number; + promptTokens?: number; + completionTokens?: number; + language: string; + languageFallback: boolean; + detectedLanguage?: string; + maskedContent?: string; +} + +export function logRequest(data: RequestLogData, userAgent: string | null): void { + try { + const logger = getLogger(); + logger.log({ + timestamp: data.timestamp, + mode: data.mode, + provider: data.provider, + model: data.model, + pii_detected: data.piiDetected, + entities: data.entities.join(","), + latency_ms: data.latencyMs, + scan_time_ms: data.scanTimeMs, + prompt_tokens: data.promptTokens ?? null, + completion_tokens: data.completionTokens ?? null, + user_agent: userAgent, + language: data.language, + language_fallback: data.languageFallback, + detected_language: data.detectedLanguage ?? null, + masked_content: data.maskedContent ?? null, + }); + } catch (error) { + console.error("Failed to log request:", error); + } +} diff --git a/src/services/masking.test.ts b/src/services/masking.test.ts new file mode 100644 index 0000000..590b509 --- /dev/null +++ b/src/services/masking.test.ts @@ -0,0 +1,433 @@ +import { describe, expect, test } from "bun:test"; +import type { MaskingConfig } from "../config"; +import type { ChatMessage } from "./llm-client"; +import { + createMaskingContext, + flushStreamBuffer, + mask, + maskMessages, + unmask, + unmaskResponse, + unmaskStreamChunk, +} from "./masking"; +import type { PIIEntity } from "./pii-detector"; + +const defaultConfig: MaskingConfig = { + show_markers: false, + marker_text: "[protected]", +}; + +const configWithMarkers: MaskingConfig = { + show_markers: true, + marker_text: "[protected]", +}; + +describe("mask", () => { + test("returns original text when no entities", () => { + const result = mask("Hello world", []); + expect(result.masked).toBe("Hello world"); + expect(Object.keys(result.context.mapping)).toHaveLength(0); + }); + + test("masks single email entity", () => { + // "Contact: john@example.com please" + // ^9 ^25 + const entities: PIIEntity[] = [{ entity_type: "EMAIL_ADDRESS", start: 9, end: 25, score: 1.0 }]; + + const result = mask("Contact: john@example.com please", entities); + + expect(result.masked).toBe("Contact: please"); + expect(result.context.mapping[""]).toBe("john@example.com"); + }); + + test("masks multiple entities of same type", () => { + const text = "Emails: a@b.com and c@d.com"; + const entities: PIIEntity[] = [ + { entity_type: "EMAIL_ADDRESS", start: 8, end: 15, score: 1.0 }, + { entity_type: "EMAIL_ADDRESS", start: 20, end: 27, score: 1.0 }, + ]; + + const result = mask(text, entities); + + expect(result.masked).toBe("Emails: and "); + expect(result.context.mapping[""]).toBe("a@b.com"); + expect(result.context.mapping[""]).toBe("c@d.com"); + }); + + test("masks multiple entity types", () => { + const text = "Hans MÃ¼ller: hans@firma.de"; + const entities: PIIEntity[] = [ + { entity_type: "PERSON", start: 0, end: 11, score: 0.9 }, + { entity_type: "EMAIL_ADDRESS", start: 13, end: 26, score: 1.0 }, + ]; + + const result = mask(text, entities); + + expect(result.masked).toBe(": "); + expect(result.context.mapping[""]).toBe("Hans MÃ¼ller"); + expect(result.context.mapping[""]).toBe("hans@firma.de"); + }); + + test("reuses placeholder for duplicate values", () => { + const text = "a@b.com and again a@b.com"; + const entities: PIIEntity[] = [ + { entity_type: "EMAIL_ADDRESS", start: 0, end: 7, score: 1.0 }, + { entity_type: "EMAIL_ADDRESS", start: 18, end: 25, score: 1.0 }, + ]; + + const result = mask(text, entities); + + // Same value should get same placeholder + expect(result.masked).toBe(" and again "); + expect(Object.keys(result.context.mapping)).toHaveLength(1); + }); + + test("handles adjacent entities", () => { + const text = "HansMÃ¼ller"; + const entities: PIIEntity[] = [ + { entity_type: "PERSON", start: 0, end: 4, score: 0.9 }, + { entity_type: "PERSON", start: 4, end: 10, score: 0.9 }, + ]; + + const result = mask(text, entities); + + expect(result.masked).toBe(""); + }); + + test("preserves context across calls", () => { + const context = createMaskingContext(); + + const result1 = mask( + "Email: a@b.com", + [{ entity_type: "EMAIL_ADDRESS", start: 7, end: 14, score: 1.0 }], + context, + ); + + expect(result1.masked).toBe("Email: "); + + const result2 = mask( + "Another: c@d.com", + [{ entity_type: "EMAIL_ADDRESS", start: 9, end: 16, score: 1.0 }], + context, + ); + + // Should continue numbering + expect(result2.masked).toBe("Another: "); + expect(context.mapping[""]).toBe("a@b.com"); + expect(context.mapping[""]).toBe("c@d.com"); + }); +}); + +describe("unmask", () => { + test("returns original text when no mappings", () => { + const context = createMaskingContext(); + const result = unmask("Hello world", context, defaultConfig); + expect(result).toBe("Hello world"); + }); + + test("restores single placeholder", () => { + const context = createMaskingContext(); + context.mapping[""] = "john@example.com"; + + const result = unmask("Reply to ", context, defaultConfig); + expect(result).toBe("Reply to john@example.com"); + }); + + test("restores multiple placeholders", () => { + const context = createMaskingContext(); + context.mapping[""] = "Hans MÃ¼ller"; + context.mapping[""] = "hans@firma.de"; + + const result = unmask( + "Hello , your email is confirmed", + context, + defaultConfig, + ); + expect(result).toBe("Hello Hans MÃ¼ller, your email hans@firma.de is confirmed"); + }); + + test("restores repeated placeholders", () => { + const context = createMaskingContext(); + context.mapping[""] = "test@test.com"; + + const result = unmask(" and ", context, defaultConfig); + expect(result).toBe("test@test.com and test@test.com"); + }); + + test("adds markers when configured", () => { + const context = createMaskingContext(); + context.mapping[""] = "john@example.com"; + + const result = unmask("Email: ", context, configWithMarkers); + expect(result).toBe("Email: [protected]john@example.com"); + }); + + test("handles partial placeholder (no match)", () => { + const context = createMaskingContext(); + context.mapping[""] = "test@test.com"; + + const result = unmask("Text with ", context, defaultConfig); + expect(result).toBe("Text with "); // No match, unchanged + }); +}); + +describe("mask -> unmask roundtrip", () => { + test("preserves original data through roundtrip", () => { + const originalText = "Contact Hans MÃ¼ller at hans@firma.de or call +49123456789"; + const entities: PIIEntity[] = [ + { entity_type: "PERSON", start: 8, end: 19, score: 0.9 }, + { entity_type: "EMAIL_ADDRESS", start: 23, end: 36, score: 1.0 }, + { entity_type: "PHONE_NUMBER", start: 45, end: 57, score: 0.95 }, + ]; + + const { masked, context } = mask(originalText, entities); + + // Verify masking worked + expect(masked).not.toContain("Hans MÃ¼ller"); + expect(masked).not.toContain("hans@firma.de"); + expect(masked).not.toContain("+49123456789"); + + // Simulate LLM response that echoes placeholders + const llmResponse = `I see your contact info: ${masked.match(//)?.[0]}, email ${masked.match(//)?.[0]}`; + + const unmasked = unmask(llmResponse, context, defaultConfig); + + expect(unmasked).toContain("Hans MÃ¼ller"); + expect(unmasked).toContain("hans@firma.de"); + }); + + test("handles empty entities array", () => { + const text = "No PII here"; + const { masked, context } = mask(text, []); + const unmasked = unmask(masked, context, defaultConfig); + + expect(unmasked).toBe(text); + }); +}); + +describe("maskMessages", () => { + test("masks multiple messages", () => { + const messages: ChatMessage[] = [ + { role: "user", content: "My email is test@example.com" }, + { role: "assistant", content: "Got it" }, + { role: "user", content: "Also john@test.com" }, + ]; + + const entitiesByMessage: PIIEntity[][] = [ + [{ entity_type: "EMAIL_ADDRESS", start: 12, end: 28, score: 1.0 }], + [], + [{ entity_type: "EMAIL_ADDRESS", start: 5, end: 18, score: 1.0 }], + ]; + + const { masked, context } = maskMessages(messages, entitiesByMessage); + + expect(masked[0].content).toBe("My email is "); + expect(masked[1].content).toBe("Got it"); + expect(masked[2].content).toBe("Also "); + + expect(context.mapping[""]).toBe("test@example.com"); + expect(context.mapping[""]).toBe("john@test.com"); + }); + + test("preserves message roles", () => { + const messages: ChatMessage[] = [ + { role: "system", content: "You are helpful" }, + { role: "user", content: "Hi" }, + ]; + + const { masked } = maskMessages(messages, [[], []]); + + expect(masked[0].role).toBe("system"); + expect(masked[1].role).toBe("user"); + }); +}); + +describe("streaming unmask", () => { + test("unmasks complete placeholder in chunk", () => { + const context = createMaskingContext(); + context.mapping[""] = "test@test.com"; + + const { output, remainingBuffer } = unmaskStreamChunk( + "", + "Hello !", + context, + defaultConfig, + ); + + expect(output).toBe("Hello test@test.com!"); + expect(remainingBuffer).toBe(""); + }); + + test("buffers partial placeholder", () => { + const context = createMaskingContext(); + context.mapping[""] = "test@test.com"; + + const { output, remainingBuffer } = unmaskStreamChunk( + "", + "Hello { + const context = createMaskingContext(); + context.mapping[""] = "test@test.com"; + + const { output, remainingBuffer } = unmaskStreamChunk( + " there", + context, + defaultConfig, + ); + + expect(output).toBe("test@test.com there"); + expect(remainingBuffer).toBe(""); + }); + + test("handles text without placeholders", () => { + const context = createMaskingContext(); + + const { output, remainingBuffer } = unmaskStreamChunk( + "", + "Just normal text", + context, + defaultConfig, + ); + + expect(output).toBe("Just normal text"); + expect(remainingBuffer).toBe(""); + }); + + test("flushes remaining buffer", () => { + const context = createMaskingContext(); + context.mapping[""] = "test@test.com"; + + // Partial that never completes + const flushed = flushStreamBuffer(" { + test("unmasks all choices in response", () => { + const context = createMaskingContext(); + context.mapping[""] = "test@test.com"; + context.mapping[""] = "John Doe"; + + const response = { + id: "chatcmpl-123", + object: "chat.completion" as const, + created: 1234567890, + model: "gpt-4", + choices: [ + { + index: 0, + message: { + role: "assistant" as const, + content: "Contact at ", + }, + finish_reason: "stop" as const, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 20, + total_tokens: 30, + }, + }; + + const result = unmaskResponse(response, context, defaultConfig); + + expect(result.choices[0].message.content).toBe("Contact John Doe at test@test.com"); + expect(result.id).toBe("chatcmpl-123"); + expect(result.model).toBe("gpt-4"); + }); + + test("handles multiple choices", () => { + const context = createMaskingContext(); + context.mapping[""] = "a@b.com"; + + const response = { + id: "chatcmpl-456", + object: "chat.completion" as const, + created: 1234567890, + model: "gpt-4", + choices: [ + { + index: 0, + message: { role: "assistant" as const, content: "First: " }, + finish_reason: "stop" as const, + }, + { + index: 1, + message: { role: "assistant" as const, content: "Second: " }, + finish_reason: "stop" as const, + }, + ], + }; + + const result = unmaskResponse(response, context, defaultConfig); + + expect(result.choices[0].message.content).toBe("First: a@b.com"); + expect(result.choices[1].message.content).toBe("Second: a@b.com"); + }); + + test("preserves response structure", () => { + const context = createMaskingContext(); + const response = { + id: "test-id", + object: "chat.completion" as const, + created: 999, + model: "test-model", + choices: [ + { + index: 0, + message: { role: "assistant" as const, content: "No placeholders" }, + finish_reason: null, + }, + ], + usage: { prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 }, + }; + + const result = unmaskResponse(response, context, defaultConfig); + + expect(result.id).toBe("test-id"); + expect(result.object).toBe("chat.completion"); + expect(result.created).toBe(999); + expect(result.model).toBe("test-model"); + expect(result.usage).toEqual({ prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 }); + }); +}); + +describe("edge cases", () => { + test("handles unicode in masked text", () => { + const text = "Kontakt: FranÃ§ois MÃ¼ller"; + const entities: PIIEntity[] = [{ entity_type: "PERSON", start: 9, end: 24, score: 0.9 }]; + + const { masked, context } = mask(text, entities); + expect(masked).toBe("Kontakt: "); + + const unmasked = unmask(masked, context, defaultConfig); + expect(unmasked).toBe("Kontakt: FranÃ§ois MÃ¼ller"); + }); + + test("handles empty text", () => { + const { masked, context } = mask("", []); + expect(masked).toBe(""); + expect(unmask("", context, defaultConfig)).toBe(""); + }); + + test("handles placeholder-like text that is not a real placeholder", () => { + const context = createMaskingContext(); + context.mapping[""] = "test@test.com"; + + const result = unmask("Use format", context, defaultConfig); + expect(result).toBe("Use format"); + }); +}); diff --git a/src/services/masking.ts b/src/services/masking.ts new file mode 100644 index 0000000..fa5da21 --- /dev/null +++ b/src/services/masking.ts @@ -0,0 +1,204 @@ +import type { MaskingConfig } from "../config"; +import type { ChatCompletionResponse, ChatMessage } from "./llm-client"; +import type { PIIEntity } from "./pii-detector"; + +export interface MaskingContext { + mapping: Record; + reverseMapping: Record; + counters: Record; +} + +export interface MaskResult { + masked: string; + context: MaskingContext; +} + +/** + * Creates a new masking context for a request + */ +export function createMaskingContext(): MaskingContext { + return { + mapping: {}, + reverseMapping: {}, + counters: {}, + }; +} + +const PLACEHOLDER_FORMAT = "<{TYPE}_{N}>"; + +/** + * Generates a placeholder for a PII entity type + */ +function generatePlaceholder(entityType: string, context: MaskingContext): string { + const count = (context.counters[entityType] || 0) + 1; + context.counters[entityType] = count; + + return PLACEHOLDER_FORMAT.replace("{TYPE}", entityType).replace("{N}", String(count)); +} + +/** + * Masks PII entities in text, replacing them with placeholders + * + * First assigns placeholders in order of appearance (start position ascending), + * then replaces from end to start to maintain correct string positions + */ +export function mask(text: string, entities: PIIEntity[], context?: MaskingContext): MaskResult { + const ctx = context || createMaskingContext(); + + if (entities.length === 0) { + return { masked: text, context: ctx }; + } + + // First pass: sort by start position ascending to assign placeholders in order + const sortedByStart = [...entities].sort((a, b) => a.start - b.start); + + // Assign placeholders in order of appearance + const entityPlaceholders = new Map(); + for (const entity of sortedByStart) { + const originalValue = text.slice(entity.start, entity.end); + + // Check if we already have a placeholder for this exact value + let placeholder = ctx.reverseMapping[originalValue]; + + if (!placeholder) { + placeholder = generatePlaceholder(entity.entity_type, ctx); + ctx.mapping[placeholder] = originalValue; + ctx.reverseMapping[originalValue] = placeholder; + } + + entityPlaceholders.set(entity, placeholder); + } + + // Second pass: sort by start position descending for replacement + // This ensures string indices remain valid as we replace + const sortedByEnd = [...entities].sort((a, b) => b.start - a.start); + + let result = text; + for (const entity of sortedByEnd) { + const placeholder = entityPlaceholders.get(entity)!; + result = result.slice(0, entity.start) + placeholder + result.slice(entity.end); + } + + return { masked: result, context: ctx }; +} + +/** + * Unmasks text by replacing placeholders with original values + * + * Optionally adds markers to indicate protected content + */ +export function unmask(text: string, context: MaskingContext, config: MaskingConfig): string { + let result = text; + + // Sort placeholders by length descending to avoid partial replacements + const placeholders = Object.keys(context.mapping).sort((a, b) => b.length - a.length); + + for (const placeholder of placeholders) { + const originalValue = context.mapping[placeholder]; + const replacement = config.show_markers + ? `${config.marker_text}${originalValue}` + : originalValue; + + // Replace all occurrences of the placeholder + result = result.split(placeholder).join(replacement); + } + + return result; +} + +/** + * Masks multiple messages (for chat completions) + */ +export function maskMessages( + messages: ChatMessage[], + entitiesByMessage: PIIEntity[][], +): { masked: ChatMessage[]; context: MaskingContext } { + const context = createMaskingContext(); + + const masked = messages.map((msg, i) => { + const entities = entitiesByMessage[i] || []; + const { masked: maskedContent } = mask(msg.content, entities, context); + return { ...msg, content: maskedContent }; + }); + + return { masked, context }; +} + +/** + * Streaming unmask helper - processes chunks and unmasks when complete placeholders are found + * + * Returns the unmasked portion and any remaining buffer that might contain partial placeholders + */ +export function unmaskStreamChunk( + buffer: string, + newChunk: string, + context: MaskingContext, + config: MaskingConfig, +): { output: string; remainingBuffer: string } { + const combined = buffer + newChunk; + + // Find the last safe position to unmask (before any potential partial placeholder) + // Look for the start of any potential placeholder pattern + const placeholderStart = combined.lastIndexOf("<"); + + if (placeholderStart === -1) { + // No potential placeholder, safe to unmask everything + return { + output: unmask(combined, context, config), + remainingBuffer: "", + }; + } + + // Check if there's a complete placeholder after the last < + const afterStart = combined.slice(placeholderStart); + const hasCompletePlaceholder = afterStart.includes(">"); + + if (hasCompletePlaceholder) { + // The placeholder is complete, safe to unmask everything + return { + output: unmask(combined, context, config), + remainingBuffer: "", + }; + } + + // Partial placeholder detected, buffer it + const safeToProcess = combined.slice(0, placeholderStart); + const toBuffer = combined.slice(placeholderStart); + + return { + output: unmask(safeToProcess, context, config), + remainingBuffer: toBuffer, + }; +} + +/** + * Flushes remaining buffer at end of stream + */ +export function flushStreamBuffer( + buffer: string, + context: MaskingContext, + config: MaskingConfig, +): string { + if (!buffer) return ""; + return unmask(buffer, context, config); +} + +/** + * Unmasks a chat completion response by replacing placeholders in all choices + */ +export function unmaskResponse( + response: ChatCompletionResponse, + context: MaskingContext, + config: MaskingConfig, +): ChatCompletionResponse { + return { + ...response, + choices: response.choices.map((choice) => ({ + ...choice, + message: { + ...choice.message, + content: unmask(choice.message.content, context, config), + }, + })), + }; +} diff --git a/src/services/pii-detector.ts b/src/services/pii-detector.ts new file mode 100644 index 0000000..cff9772 --- /dev/null +++ b/src/services/pii-detector.ts @@ -0,0 +1,247 @@ +import { getConfig } from "../config"; +import { + getLanguageDetector, + type LanguageDetectionResult, + type SupportedLanguage, +} from "./language-detector"; + +export interface PIIEntity { + entity_type: string; + start: number; + end: number; + score: number; +} + +interface AnalyzeRequest { + text: string; + language: string; + entities?: string[]; + score_threshold?: number; +} + +export interface PIIDetectionResult { + hasPII: boolean; + entitiesByMessage: PIIEntity[][]; + newEntities: PIIEntity[]; + scanTimeMs: number; + language: SupportedLanguage; + languageFallback: boolean; + detectedLanguage?: string; +} + +export class PIIDetector { + private presidioUrl: string; + private scoreThreshold: number; + private entityTypes: string[]; + private languageValidation?: { available: string[]; missing: string[] }; + + constructor() { + const config = getConfig(); + this.presidioUrl = config.pii_detection.presidio_url; + this.scoreThreshold = config.pii_detection.score_threshold; + this.entityTypes = config.pii_detection.entities; + } + + async detectPII(text: string, language: SupportedLanguage): Promise { + const analyzeEndpoint = `${this.presidioUrl}/analyze`; + + const request: AnalyzeRequest = { + text, + language, + entities: this.entityTypes, + score_threshold: this.scoreThreshold, + }; + + try { + const response = await fetch(analyzeEndpoint, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(request), + signal: AbortSignal.timeout(30_000), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `Presidio API error: ${response.status} ${response.statusText} - ${errorText}`, + ); + } + + return (await response.json()) as PIIEntity[]; + } catch (error) { + if (error instanceof Error) { + if (error.message.includes("fetch")) { + throw new Error(`Failed to connect to Presidio at ${this.presidioUrl}: ${error.message}`); + } + throw error; + } + throw new Error(`Unknown error during PII detection: ${error}`); + } + } + + async analyzeMessages( + messages: Array<{ role: string; content: string }>, + ): Promise { + const startTime = Date.now(); + + const lastUserIndex = messages.findLastIndex((m) => m.role === "user"); + + if (lastUserIndex === -1 || !messages[lastUserIndex].content) { + const config = getConfig(); + return { + hasPII: false, + entitiesByMessage: messages.map(() => []), + newEntities: [], + scanTimeMs: Date.now() - startTime, + language: config.pii_detection.fallback_language, + languageFallback: false, + }; + } + + const text = messages[lastUserIndex].content; + const langResult = getLanguageDetector().detect(text); + const newEntities = await this.detectPII(text, langResult.language); + + const entitiesByMessage = messages.map((_, i) => (i === lastUserIndex ? newEntities : [])); + + return { + hasPII: newEntities.length > 0, + entitiesByMessage, + newEntities, + scanTimeMs: Date.now() - startTime, + language: langResult.language, + languageFallback: langResult.usedFallback, + detectedLanguage: langResult.detectedLanguage, + }; + } + + async analyzeAllMessages( + messages: Array<{ role: string; content: string }>, + langResult: LanguageDetectionResult, + ): Promise { + const startTime = Date.now(); + + const entitiesByMessage = await Promise.all( + messages.map((message) => + message.content && (message.role === "user" || message.role === "assistant") + ? this.detectPII(message.content, langResult.language) + : Promise.resolve([]), + ), + ); + + return { + hasPII: entitiesByMessage.some((e) => e.length > 0), + entitiesByMessage, + newEntities: [], + scanTimeMs: Date.now() - startTime, + language: langResult.language, + languageFallback: langResult.usedFallback, + detectedLanguage: langResult.detectedLanguage, + }; + } + + async healthCheck(): Promise { + try { + const response = await fetch(`${this.presidioUrl}/health`, { + method: "GET", + signal: AbortSignal.timeout(5000), + }); + return response.ok; + } catch { + return false; + } + } + + /** + * Wait for Presidio to be ready (for docker-compose startup order) + */ + async waitForReady(maxRetries = 30, delayMs = 1000): Promise { + for (let i = 1; i <= maxRetries; i++) { + if (await this.healthCheck()) { + return true; + } + if (i < maxRetries) { + // Show initial message, then every 5 attempts + if (i === 1) { + process.stdout.write("[STARTUP] Waiting for Presidio"); + } else if (i % 5 === 0) { + process.stdout.write("."); + } + await new Promise((resolve) => setTimeout(resolve, delayMs)); + } + } + process.stdout.write("\n"); + return false; + } + + /** + * Test if a language is supported by trying to analyze with it + */ + async isLanguageSupported(language: string): Promise { + try { + const response = await fetch(`${this.presidioUrl}/analyze`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + text: "test", + language, + entities: ["PERSON"], + }), + signal: AbortSignal.timeout(5000), + }); + + // If we get a response (even empty array), the language is supported + // If we get an error like "No matching recognizers", it's not supported + if (response.ok) { + return true; + } + + const errorText = await response.text(); + return !errorText.includes("No matching recognizers"); + } catch { + return false; + } + } + + /** + * Validate multiple languages, return available/missing + */ + async validateLanguages(languages: string[]): Promise<{ + available: string[]; + missing: string[]; + }> { + const results = await Promise.all( + languages.map(async (lang) => ({ + lang, + supported: await this.isLanguageSupported(lang), + })), + ); + + this.languageValidation = { + available: results.filter((r) => r.supported).map((r) => r.lang), + missing: results.filter((r) => !r.supported).map((r) => r.lang), + }; + + return this.languageValidation; + } + + /** + * Get the cached language validation result + */ + getLanguageValidation(): { available: string[]; missing: string[] } | undefined { + return this.languageValidation; + } +} + +let detectorInstance: PIIDetector | null = null; + +export function getPIIDetector(): PIIDetector { + if (!detectorInstance) { + detectorInstance = new PIIDetector(); + } + return detectorInstance; +} diff --git a/src/services/stream-transformer.test.ts b/src/services/stream-transformer.test.ts new file mode 100644 index 0000000..0e43ef2 --- /dev/null +++ b/src/services/stream-transformer.test.ts @@ -0,0 +1,153 @@ +import { describe, expect, test } from "bun:test"; +import type { MaskingConfig } from "../config"; +import { createMaskingContext } from "./masking"; +import { createUnmaskingStream } from "./stream-transformer"; + +const defaultConfig: MaskingConfig = { + show_markers: false, + marker_text: "[protected]", +}; + +/** + * Helper to create a ReadableStream from SSE data + */ +function createSSEStream(chunks: string[]): ReadableStream { + const encoder = new TextEncoder(); + let index = 0; + + return new ReadableStream({ + pull(controller) { + if (index < chunks.length) { + controller.enqueue(encoder.encode(chunks[index])); + index++; + } else { + controller.close(); + } + }, + }); +} + +/** + * Helper to consume a stream and return all chunks as string + */ +async function consumeStream(stream: ReadableStream): Promise { + const reader = stream.getReader(); + const decoder = new TextDecoder(); + let result = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + result += decoder.decode(value, { stream: true }); + } + + return result; +} + +describe("createUnmaskingStream", () => { + test("unmasks complete placeholder in single chunk", async () => { + const context = createMaskingContext(); + context.mapping[""] = "test@test.com"; + + const sseData = `data: {"choices":[{"delta":{"content":"Hello !"}}]}\n\n`; + const source = createSSEStream([sseData]); + + const unmaskedStream = createUnmaskingStream(source, context, defaultConfig); + const result = await consumeStream(unmaskedStream); + + expect(result).toContain("Hello test@test.com!"); + }); + + test("handles [DONE] message", async () => { + const context = createMaskingContext(); + + const chunks = [`data: {"choices":[{"delta":{"content":"Hi"}}]}\n\n`, `data: [DONE]\n\n`]; + const source = createSSEStream(chunks); + + const unmaskedStream = createUnmaskingStream(source, context, defaultConfig); + const result = await consumeStream(unmaskedStream); + + expect(result).toContain("data: [DONE]"); + }); + + test("passes through non-content events", async () => { + const context = createMaskingContext(); + + const sseData = `data: {"choices":[{"delta":{}}]}\n\n`; + const source = createSSEStream([sseData]); + + const unmaskedStream = createUnmaskingStream(source, context, defaultConfig); + const result = await consumeStream(unmaskedStream); + + expect(result).toContain(`{"choices":[{"delta":{}}]}`); + }); + + test("buffers partial placeholder across chunks", async () => { + const context = createMaskingContext(); + context.mapping[""] = "a@b.com"; + + // Split placeholder across chunks + const chunks = [ + `data: {"choices":[{"delta":{"content":"Hello world"}}]}\n\n`, + ]; + const source = createSSEStream(chunks); + + const unmaskedStream = createUnmaskingStream(source, context, defaultConfig); + const result = await consumeStream(unmaskedStream); + + // Should eventually contain the unmasked email + expect(result).toContain("a@b.com"); + }); + + test("flushes remaining buffer on stream end", async () => { + const context = createMaskingContext(); + context.mapping[""] = "test@test.com"; + + // Partial placeholder that completes only on flush + const chunks = [`data: {"choices":[{"delta":{"content":"Contact "}}]}\n\n`]; + const source = createSSEStream(chunks); + + const unmaskedStream = createUnmaskingStream(source, context, defaultConfig); + const result = await consumeStream(unmaskedStream); + + expect(result).toContain("test@test.com"); + }); + + test("handles multiple placeholders in stream", async () => { + const context = createMaskingContext(); + context.mapping[""] = "John"; + context.mapping[""] = "john@test.com"; + + const sseData = `data: {"choices":[{"delta":{"content":": "}}]}\n\n`; + const source = createSSEStream([sseData]); + + const unmaskedStream = createUnmaskingStream(source, context, defaultConfig); + const result = await consumeStream(unmaskedStream); + + expect(result).toContain("John"); + expect(result).toContain("john@test.com"); + }); + + test("handles empty stream", async () => { + const context = createMaskingContext(); + const source = createSSEStream([]); + + const unmaskedStream = createUnmaskingStream(source, context, defaultConfig); + const result = await consumeStream(unmaskedStream); + + expect(result).toBe(""); + }); + + test("passes through malformed data", async () => { + const context = createMaskingContext(); + + const chunks = [`data: not-json\n\n`]; + const source = createSSEStream(chunks); + + const unmaskedStream = createUnmaskingStream(source, context, defaultConfig); + const result = await consumeStream(unmaskedStream); + + expect(result).toContain("not-json"); + }); +}); diff --git a/src/services/stream-transformer.ts b/src/services/stream-transformer.ts new file mode 100644 index 0000000..6405ee8 --- /dev/null +++ b/src/services/stream-transformer.ts @@ -0,0 +1,102 @@ +import type { MaskingConfig } from "../config"; +import { flushStreamBuffer, type MaskingContext, unmaskStreamChunk } from "./masking"; + +/** + * Creates a transform stream that unmasks SSE content + * + * Processes Server-Sent Events (SSE) chunks, buffering partial placeholders + * and unmasking complete ones before forwarding to the client. + */ +export function createUnmaskingStream( + source: ReadableStream, + context: MaskingContext, + config: MaskingConfig, +): ReadableStream { + const decoder = new TextDecoder(); + const encoder = new TextEncoder(); + let contentBuffer = ""; + + return new ReadableStream({ + async start(controller) { + const reader = source.getReader(); + + try { + while (true) { + const { done, value } = await reader.read(); + + if (done) { + // Flush remaining buffer content before closing + if (contentBuffer) { + const flushed = flushStreamBuffer(contentBuffer, context, config); + if (flushed) { + const finalEvent = { + id: `flush-${Date.now()}`, + object: "chat.completion.chunk", + created: Math.floor(Date.now() / 1000), + choices: [ + { + index: 0, + delta: { content: flushed }, + finish_reason: null, + }, + ], + }; + controller.enqueue(encoder.encode(`data: ${JSON.stringify(finalEvent)}\n\n`)); + } + } + controller.close(); + break; + } + + const chunk = decoder.decode(value, { stream: true }); + const lines = chunk.split("\n"); + + for (const line of lines) { + if (line.startsWith("data: ")) { + const data = line.slice(6); + + if (data === "[DONE]") { + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + continue; + } + + try { + const parsed = JSON.parse(data); + const content = parsed.choices?.[0]?.delta?.content || ""; + + if (content) { + // Use streaming unmask + const { output, remainingBuffer } = unmaskStreamChunk( + contentBuffer, + content, + context, + config, + ); + contentBuffer = remainingBuffer; + + if (output) { + // Update the parsed object with unmasked content + parsed.choices[0].delta.content = output; + controller.enqueue(encoder.encode(`data: ${JSON.stringify(parsed)}\n\n`)); + } + } else { + // Pass through non-content events + controller.enqueue(encoder.encode(`data: ${data}\n\n`)); + } + } catch { + // Pass through unparseable data + controller.enqueue(encoder.encode(`${line}\n`)); + } + } else if (line.trim()) { + controller.enqueue(encoder.encode(`${line}\n`)); + } + } + } + } catch (error) { + controller.error(error); + } finally { + reader.releaseLock(); + } + }, + }); +} diff --git a/src/views/dashboard/page.tsx b/src/views/dashboard/page.tsx new file mode 100644 index 0000000..1156628 --- /dev/null +++ b/src/views/dashboard/page.tsx @@ -0,0 +1,508 @@ +import type { FC } from "hono/jsx"; + +const DashboardPage: FC = () => { + return ( + + + + + LLM-Shield Dashboard + + + + +