Manual Keyword Mapping: Admin defines which keywords trigger which APIs:
// Basic API configuration
public class CustomApi
{
public int Id { get; set; }
public string Name { get; set; }
public string BaseUrl { get; set; }
public string AuthType { get; set; } // "api_key" or "bearer"
public string ApiKey { get; set; } // encrypted
public List<ApiEndpoint> Endpoints { get; set; }
}
public class ApiEndpoint
{
public int Id { get; set; }
public string EndpointPath { get; set; }
public string Method { get; set; } = "GET";
public string Description { get; set; }
public string[] TriggerKeywords { get; set; }
}
// Simple usage example
query = "What's the current inventory for Product X?"
// System detects "current inventory" keywords
// Calls: GET {BaseUrl}/inventory?product=X
// Adds result to context before LLM generation// chat.component.ts
@Component({
selector: 'app-chat',
template: `
<div class="chat-container">
<app-message-list [messages]="messages"></app-message-list>
<app-message-input
(messageSent)="sendMessage($event)"
[isLoading]="isLoading">
</app-message-input>
</div>
`
})
export class ChatComponent implements OnInit {
messages: ChatMessage[] = [];
isLoading = false;
constructor(
private chatService: ChatService,
private signalRService: SignalRService
) {}
ngOnInit() {
this.signalRService.startConnection();
this.signalRService.addMessageListener((message) => {
this.messages.push(message);
});
}
async sendMessage(content: string) {
this.isLoading = true;
try {
const response = await this.chatService.sendMessage(content);
this.messages.push(response);
} finally {
this.isLoading = false;
}
}
}
// chat.service.ts
@Injectable({ providedIn: 'root' })
export class ChatService {
constructor(private http: HttpClient) {}
sendMessage(content: string): Observable<ChatResponse> {
return this.http.post<ChatResponse>('/api/chat/message', { content });
}
uploadDocument(file: File): Observable<DocumentUploadResponse> {
const formData = new FormData();
formData.append('file', file);
return this.http.post<DocumentUploadResponse>('/api/documents/upload', formData);
}
}@Component({
selector: 'app-document-manager',
template: `
<mat-table [dataSource]="documents">
<ng-container matColumnDef="name">
<mat-header-cell *matHeaderCellDef>Name</mat-header-cell>
<mat-cell *matCellDef="let doc">{{doc.name}}</mat-cell>
</ng-container>
<ng-container matColumnDef="expiryDate">
<mat-header-cell *matHeaderCellDef>Expires</mat-header-cell>
<mat-cell *matCellDef="let doc">
<span [class.expired]="isExpired(doc.expiryDate)">
{{doc.expiryDate | date}}
</span>
</mat-cell>
</ng-container>
<mat-header-row *matHeaderRowDef="displayedColumns"></mat-header-row>
<mat-row *matRowDef="let row; columns: displayedColumns;"></mat-row>
</mat-table>
`
})
export class DocumentManagerComponent {
documents: Document[] = [];
displayedColumns = ['name', 'expiryDate', 'actions'];
constructor(private documentService: DocumentService) {}
isExpired(date: Date): boolean {
return new Date(date) < new Date();
}
}// Program.cs
var builder = WebApplication.CreateBuilder(args);
// Add services
builder.Services.AddControllers();
builder.Services.AddSignalR();
builder.Services.AddDbContext<RAGDbContext>(options =>
options.UseNpgsql(builder.Configuration.GetConnectionString("DefaultConnection")));
// Register custom services
builder.Services.AddScoped<ILLMProvider, LLMProvider>();
builder.Services.AddScoped<IDocumentService, DocumentService>();
builder.Services.AddScoped<IVectorSearchService, VectorSearchService>();
builder.Services.AddHttpClient<LLMClient>();
builder.Services.AddHangfire(config => config.UsePostgreSqlStorage());
var app = builder.Build();
// Configure pipeline
app.UseRouting();
app.UseAuthentication();
app.UseAuthorization();
app.MapControllers();
app.MapHub<ChatHub>("/chatHub");
app.UseHangfireDashboard();
app.Run();
// Controllers/ChatController.cs
[ApiController]
[Route("api/[controller]")]
public class ChatController : ControllerBase
{
private readonly ILLMProvider _llmProvider;
private readonly IVectorSearchService _vectorSearch;
public ChatController(ILLMProvider llmProvider, IVectorSearchService vectorSearch)
{
_llmProvider = llmProvider;
_vectorSearch = vectorSearch;
}
[HttpPost("message")]
public async Task<ActionResult<ChatResponse>> SendMessage([FromBody] ChatRequest request)
{
// Search for relevant documents
var relevantDocs = await _vectorSearch.SearchAsync(request.Content, limit: 5);
// Build context from documents
var context = string.Join("\n", relevantDocs.Select(d => d.Content));
// Generate response
var response = await _llmProvider.GenerateAsync(request.Content, context);
return Ok(new ChatResponse
{
Content = response,
Sources = relevantDocs.Select(d => d.Title).ToList(),
Timestamp = DateTime.UtcNow
});
}
}public class RAGDbContext : DbContext
{
public DbSet<Document> Documents { get; set; }
public DbSet<DocumentEmbedding> DocumentEmbeddings { get; set; }
public DbSet<FAQ> FAQs { get; set; }
public DbSet<Conversation> Conversations { get; set; }
protected override void OnModelCreating(ModelBuilder modelBuilder)
{
// Configure pgvector support
modelBuilder.HasPostgresExtension("vector");
modelBuilder.Entity<DocumentEmbedding>()
.Property(e => e.Embedding)
.HasColumnType("vector(1536)");
}
}
public class Document
{
public int Id { get; set; }
public string Title { get; set; }
public string Source { get; set; }
public string ContentType { get; set; }
public string FilePath { get; set; }
public DateTime? ExpiryDate { get; set; }
public int Version { get; set; } = 1;
public string Status { get; set; } = "active";
public JsonDocument AccessRoles { get; set; }
public DateTime CreatedAt { get; set; } = DateTime.UtcNow;
public DateTime UpdatedAt { get; set; } = DateTime.UtcNow;
public ICollection<DocumentEmbedding> Embeddings { get; set; }
}Project Goal: Build a fully private, GDPR-compliant RAG solution with swappable LLM models for a 100-person organization across EU and Asia.
Timeline: 3-4 weeks for Phase 1 & 2 (MVP - parallel development) Budget: $2,254/month for dual GPU setup (recommended) Team: 1-2 experienced developers + Xamun AI partnership
User → Chat Interface → Xamun AI (RBAC + orchestration) → RAG Service → LLM Server (GPU)
↓ ↓
Web Server (PostgreSQL + APIs) External APIs
(documents + embeddings) (CRM, ERP, etc.)Server 1: Web/Database Server
Server 2: LLM Server (GPU)
Your Team Builds:
Xamun AI Builds:
CREATE TABLE documents (
id SERIAL PRIMARY KEY,
title VARCHAR(500),
source VARCHAR(255),
content_type VARCHAR(100),
file_path TEXT,
expiry_date TIMESTAMP,
version INTEGER DEFAULT 1,
status VARCHAR(50) DEFAULT 'active',
access_roles JSONB,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
CREATE TABLE document_embeddings (
id SERIAL PRIMARY KEY,
document_id INTEGER REFERENCES documents(id),
chunk_text TEXT,
embedding vector(1536), -- OpenAI embedding size
metadata JSONB,
chunk_index INTEGER,
created_at TIMESTAMP DEFAULT NOW()
);
-- Create index for similarity search
CREATE INDEX ON document_embeddings USING ivfflat (embedding vector_cosine_ops);CREATE TABLE faqs (
id SERIAL PRIMARY KEY,
question TEXT,
answer TEXT,
question_embedding vector(1536),
category VARCHAR(100),
usage_count INTEGER DEFAULT 0,
effectiveness_score DECIMAL(3,2),
expiry_date TIMESTAMP,
access_roles JSONB,
created_at TIMESTAMP DEFAULT NOW()
);CREATE TABLE custom_apis (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
base_url VARCHAR(500),
auth_type VARCHAR(50), -- 'api_key' or 'bearer'
api_key VARCHAR(500), -- encrypted
timeout_seconds INTEGER DEFAULT 30,
is_active BOOLEAN DEFAULT true,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE api_endpoints (
id SERIAL PRIMARY KEY,
api_id INTEGER REFERENCES custom_apis(id),
endpoint_path VARCHAR(500),
method VARCHAR(10) DEFAULT 'GET',
description TEXT,
trigger_keywords TEXT[], -- array of keywords that trigger this endpoint
created_at TIMESTAMP DEFAULT NOW()
);CREATE TABLE conversations (
id SERIAL PRIMARY KEY,
user_id VARCHAR(100),
session_id VARCHAR(100),
query TEXT,
response TEXT,
context_documents JSONB,
external_api_calls JSONB, -- Track which APIs were called
feedback_score INTEGER,
created_at TIMESTAMP DEFAULT NOW()
);Server 1 Total: $1,190/month
Option 1: Dual GPU (Recommended)
Option 2: Single GPU (Budget)
Option 3: Premium Performance
Server 2 Total (Dual GPU): $1,710/month
Web Server: $1,190/month LLM Server (Dual GPU): $1,710/month Total Infrastructure: $2,900/month
public class LLMClient
{
private readonly HttpClient _httpClient;
private readonly ILogger<LLMClient> _logger;
public LLMClient(HttpClient httpClient, ILogger<LLMClient> logger)
{
_httpClient = httpClient;
_logger = logger;
}
public async Task<LLMResponse> GenerateResponseAsync(
string prompt,
string context = null,
int maxTokens = 512,
CancellationToken cancellationToken = default)
{
var payload = new
{
prompt,
context,
max_tokens = maxTokens,
temperature = 0.7
};
try
{
var response = await _httpClient.PostAsJsonAsync(
"/generate",
payload,
cancellationToken);
response.EnsureSuccessStatusCode();
return await response.Content.ReadFromJsonAsync<LLMResponse>();
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to generate LLM response");
throw;
}
}
public async Task<bool> HealthCheckAsync(CancellationToken cancellationToken = default)
{
try
{
var response = await _httpClient.GetAsync("/health", cancellationToken);
return response.IsSuccessStatusCode;
}
catch
{
return false;
}
}
}
// Register in Program.cs
builder.Services.AddHttpClient<LLMClient>(client =>
{
client.BaseAddress = new Uri("https://llm-server-internal.domain.com");
client.Timeout = TimeSpan.FromSeconds(30);
})
.AddPolicyHandler(GetRetryPolicy());public interface ILLMProvider
{
Task<string> GenerateAsync(string prompt, string context = null, int maxTokens = 512);
Task<bool> RequiresApiDataAsync(string prompt);
Task<bool> HealthCheckAsync();
}
public class LLMProvider : ILLMProvider
{
private readonly LLMClient _llmClient;
private readonly IAPIIntegrationClient _apiClient;
private readonly ILogger<LLMProvider> _logger;
public LLMProvider(
LLMClient llmClient,
IAPIIntegrationClient apiClient,
ILogger<LLMProvider> logger)
{
_llmClient = llmClient;
_apiClient = apiClient;
_logger = logger;
}
public async Task<string> GenerateAsync(string prompt, string context = null, int maxTokens = 512)
{
// Check if query needs real-time data
if (await RequiresApiDataAsync(prompt))
{
var apiData = await _apiClient.FetchRelevantDataAsync(prompt);
context = MergeContext(context, apiData);
}
// Call separate LLM server
var response = await _llmClient.GenerateResponseAsync(prompt, context, maxTokens);
return response.Text;
}
public async Task<bool> RequiresApiDataAsync(string prompt)
{
// Detect queries needing real-time data
var keywords = new[] { "current", "latest", "status", "today", "now" };
return keywords.Any(keyword => prompt.Contains(keyword, StringComparison.OrdinalIgnoreCase));
}
public async Task<bool> HealthCheckAsync()
{
return await _llmClient.HealthCheckAsync();
}
private string MergeContext(string documentContext, string apiData)
{
if (string.IsNullOrEmpty(documentContext))
return apiData;
return $"{documentContext}\n\nCurrent Data:\n{apiData}";
}
}public class CustomAPIClient : ICustomAPIClient
{
private readonly HttpClient _httpClient;
private readonly IMemoryCache _cache;
private readonly ILogger<CustomAPIClient> _logger;
public CustomAPIClient(
HttpClient httpClient,
IMemoryCache cache,
ILogger<CustomAPIClient> logger)
{
_httpClient = httpClient;
_cache = cache;
_logger = logger;
}
public async Task<string> FetchDataAsync(string query)
{
// Simple keyword matching to find relevant API
var endpoint = await FindRelevantEndpointAsync(query);
if (endpoint == null) return null;
// Check cache first
var cacheKey = $"api_{endpoint.Id}_{query.GetHashCode()}";
if (_cache.TryGetValue(cacheKey, out string cachedResult))
return cachedResult;
try
{
// Build request URL
var url = $"{endpoint.Api.BaseUrl}/{endpoint.EndpointPath}";
// Add auth header
if (endpoint.Api.AuthType == "api_key")
_httpClient.DefaultRequestHeaders.Add("X-API-Key", endpoint.Api.ApiKey);
else if (endpoint.Api.AuthType == "bearer")
_httpClient.DefaultRequestHeaders.Authorization =
new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", endpoint.Api.ApiKey);
// Make request
var response = await _httpClient.GetStringAsync(url);
// Cache for 5 minutes
_cache.Set(cacheKey, response, TimeSpan.FromMinutes(5));
return response;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to call custom API {EndpointName}", endpoint.Description);
return null; // Graceful fallback
}
}
private async Task<ApiEndpoint> FindRelevantEndpointAsync(string query)
{
// Simple keyword matching - check if query contains any trigger keywords
var endpoints = await GetActiveEndpointsAsync();
return endpoints.FirstOrDefault(e =>
e.TriggerKeywords.Any(keyword =>
query.Contains(keyword, StringComparison.OrdinalIgnoreCase)));
}
}Document Version: 1.0
Last Updated: July 8, 2025
Prepared for: Internal Development Team