Following your hybrid approach: JSONB for flexibility + extracted key fields for querying
This design provides:
-- Content source types
CREATE TYPE source_type_enum AS ENUM (
'news_website', 'rss_feed', 'social_media', 'blog',
'press_release', 'user_submission', 'manual_entry'
);
-- Social media platforms
CREATE TYPE platform_enum AS ENUM (
'twitter', 'facebook', 'instagram', 'youtube', 'reddit',
'telegram', 'whatsapp', 'linkedin', 'tiktok'
);
-- Content types
CREATE TYPE content_type_enum AS ENUM (
'article', 'social_post', 'video', 'image', 'comment',
'reply', 'share', 'story', 'reel'
);
-- AI Classification enums
CREATE TYPE topic_enum AS ENUM (
'politics', 'economy', 'technology', 'health', 'sports',
'entertainment', 'science', 'environment', 'crime',
'international', 'education', 'business', 'social', 'other'
);
CREATE TYPE content_classification_enum AS ENUM (
'fact', 'fake', 'distorted', 'opinion', 'satire', 'uncertain'
);
CREATE TYPE sentiment_enum AS ENUM ('positive', 'negative', 'neutral');
CREATE TYPE urgency_enum AS ENUM ('red', 'orange', 'yellow', 'none');
CREATE TYPE bias_enum AS ENUM ('left', 'right', 'neutral', 'unknown');
CREATE TYPE language_enum AS ENUM ('english', 'bangla', 'hindi', 'urdu', 'mixed', 'other');
CREATE TYPE processing_status_enum AS ENUM (
'pending', 'processing', 'classified', 'verified',
'reviewed', 'published', 'rejected', 'failed'
);
-- Workflow stages
CREATE TYPE workflow_stage_enum AS ENUM ('draft', 'review', 'approved', 'published', 'rejected');CREATE TABLE sources (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name VARCHAR(255) NOT NULL,
source_type source_type_enum NOT NULL,
platform platform_enum, -- NULL for news websites
-- Key extracted fields for querying
url TEXT,
domain VARCHAR(255),
country VARCHAR(2), -- ISO country code
language language_enum DEFAULT 'english',
is_verified BOOLEAN DEFAULT FALSE,
is_active BOOLEAN DEFAULT TRUE,
-- Credibility & bias (updated by AI)
credibility_score DECIMAL(3,2), -- 0.00 to 10.00
bias_rating bias_enum DEFAULT 'unknown',
bias_confidence DECIMAL(3,2),
-- Complete source data in JSONB
metadata JSONB NOT NULL DEFAULT '{}',
-- Audit fields
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
created_by UUID,
CONSTRAINT valid_credibility CHECK (credibility_score >= 0 AND credibility_score <= 10)
);
-- Indexes
CREATE INDEX idx_sources_type_platform ON sources(source_type, platform);
CREATE INDEX idx_sources_domain ON sources(domain);
CREATE INDEX idx_sources_credibility ON sources(credibility_score DESC);
CREATE INDEX idx_sources_metadata ON sources USING GIN(metadata);
-- Example metadata structure for news source:
-- {
-- "rss_feed_url": "https://example.com/rss",
-- "scraping_config": {
-- "title_selector": ".article-title",
-- "content_selector": ".article-body",
-- "author_selector": ".author-name"
-- },
-- "contact_info": {
-- "email": "editor@example.com",
-- "twitter": "@examplenews"
-- },
-- "editorial_policy": "...",
-- "fact_check_history": [...]
-- }CREATE TABLE content_items (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Source relationship
source_id UUID NOT NULL REFERENCES sources(id),
external_id VARCHAR(500), -- Source's internal ID for deduplication
parent_id UUID REFERENCES content_items(id), -- For replies/shares
-- Key extracted fields for fast querying
title TEXT,
content_text TEXT, -- Main text content
url TEXT, -- Original URL
author_name VARCHAR(255),
author_handle VARCHAR(255), -- @username for social media
-- Temporal data
published_at TIMESTAMPTZ,
ingested_at TIMESTAMPTZ DEFAULT NOW(),
last_updated TIMESTAMPTZ DEFAULT NOW(),
-- Content classification
content_type content_type_enum NOT NULL,
language language_enum DEFAULT 'english',
-- Processing status
processing_status processing_status_enum DEFAULT 'pending',
needs_manual_review BOOLEAN DEFAULT FALSE,
-- Engagement metrics (for social media)
likes_count INTEGER DEFAULT 0,
shares_count INTEGER DEFAULT 0,
comments_count INTEGER DEFAULT 0,
views_count BIGINT DEFAULT 0,
-- Text analysis
word_count INTEGER,
character_count INTEGER,
-- Media flags
has_images BOOLEAN DEFAULT FALSE,
has_videos BOOLEAN DEFAULT FALSE,
has_audio BOOLEAN DEFAULT FALSE,
-- Complete raw data in JSONB (following your principle)
raw_data JSONB NOT NULL DEFAULT '{}',
-- Search vector for full-text search
search_vector tsvector,
-- Audit
created_by UUID,
version INTEGER DEFAULT 1,
CONSTRAINT unique_source_external UNIQUE(source_id, external_id),
CONSTRAINT valid_counts CHECK (
likes_count >= 0 AND shares_count >= 0 AND
comments_count >= 0 AND views_count >= 0
)
);
-- Indexes for performance
CREATE INDEX idx_content_source_published ON content_items(source_id, published_at DESC);
CREATE INDEX idx_content_status_language ON content_items(processing_status, language);
CREATE INDEX idx_content_type_ingested ON content_items(content_type, ingested_at DESC);
CREATE INDEX idx_content_engagement ON content_items(likes_count DESC, shares_count DESC);
CREATE INDEX idx_content_search ON content_items USING GIN(search_vector);
CREATE INDEX idx_content_raw_data ON content_items USING GIN(raw_data);
CREATE INDEX idx_content_author ON content_items(author_name, author_handle);
-- Partial indexes for active content
CREATE INDEX idx_content_active ON content_items(published_at DESC)
WHERE processing_status IN ('classified', 'verified', 'published');
-- Example raw_data structure for news article:
-- {
-- "headline": "...",
-- "body_html": "...",
-- "reporter": {
-- "name": "John Doe",
-- "email": "john@example.com",
-- "bio": "..."
-- },
-- "metadata": {
-- "section": "politics",
-- "tags": ["election", "bangladesh"],
-- "reading_time": 5
-- },
-- "scraping_info": {
-- "scraped_at": "2025-07-07T10:00:00Z",
-- "scraper_version": "1.2.3",
-- "source_html_hash": "abc123..."
-- }
-- }
-- Example raw_data for social media post:
-- {
-- "post_text": "...",
-- "user_info": {
-- "id": "12345",
-- "username": "example_user",
-- "display_name": "Example User",
-- "follower_count": 10000,
-- "verified": true
-- },
-- "engagement": {
-- "likes": 150,
-- "retweets": 25,
-- "replies": 10,
-- "impressions": 5000
-- },
-- "media_attachments": [
-- {"type": "image", "url": "...", "alt_text": "..."}
-- ],
-- "api_metadata": {
-- "tweet_id": "123456789",
-- "conversation_id": "987654321",
-- "api_version": "v2"
-- }
-- }CREATE TABLE content_classifications (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
content_id UUID NOT NULL REFERENCES content_items(id) ON DELETE CASCADE,
-- AI Model info
model_name VARCHAR(100) NOT NULL, -- 'groq_llama', 'openai_gpt4', etc.
model_version VARCHAR(50),
classified_at TIMESTAMPTZ DEFAULT NOW(),
-- Key classification results (extracted for querying)
primary_topic topic_enum,
topic_confidence DECIMAL(3,2),
content_classification content_classification_enum,
classification_confidence DECIMAL(3,2),
sentiment sentiment_enum,
sentiment_score DECIMAL(4,2), -- -1.00 to 1.00
sentiment_confidence DECIMAL(3,2),
urgency_level urgency_enum,
urgency_confidence DECIMAL(3,2),
political_bias bias_enum,
bias_confidence DECIMAL(3,2),
overall_confidence DECIMAL(3,2), -- Average of all confidences
-- Complete AI response in JSONB
ai_response JSONB NOT NULL DEFAULT '{}',
-- Manual review tracking
manually_reviewed BOOLEAN DEFAULT FALSE,
reviewed_by UUID,
reviewed_at TIMESTAMPTZ,
review_notes TEXT,
-- Original classification before manual edits
original_classification JSONB,
CONSTRAINT unique_content_classification UNIQUE(content_id, model_name),
CONSTRAINT valid_confidence_scores CHECK (
topic_confidence BETWEEN 0 AND 1 AND
classification_confidence BETWEEN 0 AND 1 AND
sentiment_confidence BETWEEN 0 AND 1 AND
urgency_confidence BETWEEN 0 AND 1 AND
bias_confidence BETWEEN 0 AND 1 AND
overall_confidence BETWEEN 0 AND 1
),
CONSTRAINT valid_sentiment_score CHECK (sentiment_score BETWEEN -1 AND 1)
);
-- Indexes
CREATE INDEX idx_classifications_topic ON content_classifications(primary_topic, topic_confidence DESC);
CREATE INDEX idx_classifications_type ON content_classifications(content_classification);
CREATE INDEX idx_classifications_sentiment ON content_classifications(sentiment, sentiment_score);
CREATE INDEX idx_classifications_confidence ON content_classifications(overall_confidence DESC);
CREATE INDEX idx_classifications_model ON content_classifications(model_name, classified_at DESC);
CREATE INDEX idx_classifications_ai_response ON content_classifications USING GIN(ai_response);
-- Example ai_response structure:
-- {
-- "topics": [
-- {"name": "politics", "confidence": 0.95, "reasoning": "..."},
-- {"name": "economy", "confidence": 0.65, "reasoning": "..."}
-- ],
-- "content_type": {
-- "classification": "fact",
-- "confidence": 0.88,
-- "reasoning": "Article cites verifiable sources and official statements",
-- "supporting_evidence": ["source1", "source2"]
-- },
-- "sentiment_analysis": {
-- "sentiment": "negative",
-- "score": -0.3,
-- "confidence": 0.75,
-- "key_phrases": ["concerning", "problematic", "crisis"]
-- },
-- "urgency_assessment": {
-- "level": "orange",
-- "confidence": 0.70,
-- "reasoning": "Developing situation requiring attention"
-- },
-- "bias_analysis": {
-- "bias": "neutral",
-- "confidence": 0.80,
-- "indicators": ["balanced sources", "multiple perspectives"]
-- },
-- "keywords": ["election", "bangladesh", "voting", "democracy"],
-- "summary": "Article discusses upcoming election procedures...",
-- "fact_check_suggestions": [
-- "Verify voting date with election commission",
-- "Cross-reference candidate eligibility"
-- ]
-- }CREATE TABLE verification_results (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
content_id UUID NOT NULL REFERENCES content_items(id) ON DELETE CASCADE,
-- Key verification results (extracted for querying)
overall_credibility_score DECIMAL(3,2), -- 0.00 to 10.00
is_fact_checked BOOLEAN DEFAULT FALSE,
fact_check_status VARCHAR(50), -- 'verified', 'false', 'misleading', 'unverified'
-- Source verification
source_credibility_score DECIMAL(3,2),
author_credibility_score DECIMAL(3,2),
-- Media verification flags
images_verified BOOLEAN DEFAULT FALSE,
videos_verified BOOLEAN DEFAULT FALSE,
-- Cross-reference results
similar_content_found BOOLEAN DEFAULT FALSE,
cross_reference_count INTEGER DEFAULT 0,
-- Complete verification data in JSONB
verification_data JSONB NOT NULL DEFAULT '{}',
-- Processing metadata
verified_at TIMESTAMPTZ DEFAULT NOW(),
verification_engine VARCHAR(100), -- 'google_fact_check', 'manual', etc.
verification_version VARCHAR(50),
CONSTRAINT unique_content_verification UNIQUE(content_id),
CONSTRAINT valid_credibility_scores CHECK (
overall_credibility_score BETWEEN 0 AND 10 AND
(source_credibility_score IS NULL OR source_credibility_score BETWEEN 0 AND 10) AND
(author_credibility_score IS NULL OR author_credibility_score BETWEEN 0 AND 10)
)
);
-- Indexes
CREATE INDEX idx_verification_credibility ON verification_results(overall_credibility_score DESC);
CREATE INDEX idx_verification_fact_check ON verification_results(is_fact_checked, fact_check_status);
CREATE INDEX idx_verification_cross_ref ON verification_results(similar_content_found, cross_reference_count DESC);
CREATE INDEX idx_verification_data ON verification_results USING GIN(verification_data);
-- Example verification_data structure:
-- {
-- "fact_check_results": [
-- {
-- "source": "Google Fact Check API",
-- "claim": "...",
-- "rating": "FALSE",
-- "explanation": "...",
-- "fact_checker": "AFP Fact Check",
-- "date": "2025-07-01"
-- }
-- ],
-- "cross_references": [
-- {
-- "url": "https://example.com/similar-article",
-- "title": "...",
-- "similarity_score": 0.85,
-- "source_credibility": 8.5
-- }
-- ],
-- "media_verification": {
-- "images": [
-- {
-- "url": "...",
-- "reverse_search_results": [...],
-- "metadata_analysis": {...},
-- "authenticity_score": 0.9
-- }
-- ],
-- "videos": [...]
-- },
-- "source_analysis": {
-- "domain_reputation": 7.5,
-- "historical_accuracy": 0.85,
-- "bias_indicators": [...],
-- "fact_check_history": [...]
-- },
-- "author_analysis": {
-- "expertise_score": 8.0,
-- "publication_history": [...],
-- "credibility_indicators": [...]
-- }
-- }CREATE TABLE narratives (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Key extracted fields for querying
title VARCHAR(500) NOT NULL,
primary_topic topic_enum,
-- Origin tracking
origin_content_id UUID REFERENCES content_items(id),
origin_author VARCHAR(255),
origin_source_id UUID REFERENCES sources(id),
first_seen_at TIMESTAMPTZ,
-- Narrative status
is_active BOOLEAN DEFAULT TRUE,
is_misinformation BOOLEAN DEFAULT FALSE,
confidence_score DECIMAL(3,2),
-- Geographic data
primary_geography VARCHAR(100), -- Country/region
geographic_reach TEXT[], -- Array of regions/countries
-- Engagement tracking
total_mentions INTEGER DEFAULT 0,
total_reach BIGINT DEFAULT 0,
peak_engagement_at TIMESTAMPTZ,
-- Complete narrative data in JSONB
narrative_data JSONB NOT NULL DEFAULT '{}',
-- Audit
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
created_by UUID,
CONSTRAINT valid_confidence CHECK (confidence_score BETWEEN 0 AND 1)
);
-- Indexes
CREATE INDEX idx_narratives_topic_active ON narratives(primary_topic, is_active);
CREATE INDEX idx_narratives_origin ON narratives(origin_content_id, first_seen_at);
CREATE INDEX idx_narratives_geography ON narratives USING GIN(geographic_reach);
CREATE INDEX idx_narratives_engagement ON narratives(total_mentions DESC, total_reach DESC);
CREATE INDEX idx_narratives_data ON narratives USING GIN(narrative_data);
-- Example narrative_data structure:
-- {
-- "keywords": ["election", "voting", "fraud", "democracy"],
-- "evolution_timeline": [
-- {
-- "date": "2025-07-01",
-- "event": "First appearance",
-- "content_ids": ["uuid1", "uuid2"],
-- "engagement": 1000
-- }
-- ],
-- "key_amplifiers": [
-- {
-- "author": "influencer_name",
-- "platform": "twitter",
-- "reach": 50000,
-- "content_id": "uuid3"
-- }
-- ],
-- "fact_check_status": "disputed",
-- "related_narratives": ["uuid4", "uuid5"],
-- "linguistic_variants": [
-- {"language": "bangla", "keywords": ["নির্বাচন", "ভোট"]},
-- {"language": "hindi", "keywords": ["चुनाव", "वोट"]}
-- ],
-- "sentiment_trend": [
-- {"date": "2025-07-01", "sentiment": "negative", "score": -0.3}
-- ]
-- }CREATE TABLE content_narratives (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
content_id UUID NOT NULL REFERENCES content_items(id) ON DELETE CASCADE,
narrative_id UUID NOT NULL REFERENCES narratives(id) ON DELETE CASCADE,
-- Relationship strength
relevance_score DECIMAL(3,2) NOT NULL, -- How relevant is this content to the narrative
similarity_score DECIMAL(3,2), -- Semantic similarity to origin content
-- Role in narrative
role_type VARCHAR(50), -- 'origin', 'amplifier', 'supporter', 'contradiction'
influence_weight DECIMAL(3,2), -- How much this content influenced the narrative
-- Timing
associated_at TIMESTAMPTZ DEFAULT NOW(),
CONSTRAINT unique_content_narrative UNIQUE(content_id, narrative_id),
CONSTRAINT valid_scores CHECK (
relevance_score BETWEEN 0 AND 1 AND
(similarity_score IS NULL OR similarity_score BETWEEN 0 AND 1) AND
(influence_weight IS NULL OR influence_weight BETWEEN 0 AND 1)
)
);
-- Indexes
CREATE INDEX idx_content_narratives_content ON content_narratives(content_id);
CREATE INDEX idx_content_narratives_narrative ON content_narratives(narrative_id, relevance_score DESC);
CREATE INDEX idx_content_narratives_role ON content_narratives(role_type, influence_weight DESC);CREATE TABLE hashtags (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
tag VARCHAR(255) NOT NULL UNIQUE,
-- Current metrics (updated in real-time)
current_mentions_24h INTEGER DEFAULT 0,
current_engagement_24h BIGINT DEFAULT 0,
trend_score DECIMAL(5,2) DEFAULT 0,
-- Classification
primary_topic topic_enum,
sentiment_trend sentiment_enum,
-- Geographic data
top_regions TEXT[],
-- Status
is_trending BOOLEAN DEFAULT FALSE,
is_monitored BOOLEAN DEFAULT FALSE,
-- Complete hashtag data in JSONB
hashtag_data JSONB NOT NULL DEFAULT '{}',
-- Audit
first_seen TIMESTAMPTZ DEFAULT NOW(),
last_updated TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX idx_hashtags_trending ON hashtags(is_trending, trend_score DESC);
CREATE INDEX idx_hashtags_engagement ON hashtags(current_engagement_24h DESC);
CREATE INDEX idx_hashtags_topic ON hashtags(primary_topic, sentiment_trend);
CREATE INDEX idx_hashtags_data ON hashtags USING GIN(hashtag_data);
-- Example hashtag_data structure:
-- {
-- "engagement_history": [
-- {"hour": "2025-07-07T10:00:00Z", "mentions": 150, "engagement": 5000}
-- ],
-- "top_posts": [
-- {"content_id": "uuid1", "engagement": 1000, "reach": 50000}
-- ],
-- "geographic_distribution": {
-- "bangladesh": 0.6,
-- "india": 0.3,
-- "global": 0.1
-- },
-- "related_hashtags": ["#election2025", "#democracy"],
-- "influencer_usage": [
-- {"author": "politician_name", "reach": 100000, "posts": 3}
-- ]
-- }CREATE TABLE hashtag_content (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
hashtag_id UUID NOT NULL REFERENCES hashtags(id) ON DELETE CASCADE,
content_id UUID NOT NULL REFERENCES content_items(id) ON DELETE CASCADE,
-- Engagement at time of association
engagement_snapshot JSONB,
associated_at TIMESTAMPTZ DEFAULT NOW(),
CONSTRAINT unique_hashtag_content UNIQUE(hashtag_id, content_id)
);
CREATE INDEX idx_hashtag_content_hashtag ON hashtag_content(hashtag_id, associated_at DESC);
CREATE INDEX idx_hashtag_content_content ON hashtag_content(content_id);CREATE TABLE editorial_workflows (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
content_id UUID NOT NULL REFERENCES content_items(id),
-- Workflow status
current_stage workflow_stage_enum DEFAULT 'draft',
assigned_to UUID, -- References users table
priority_level INTEGER DEFAULT 3, -- 1 (high) to 5 (low)
-- Due dates
assigned_at TIMESTAMPTZ,
due_date TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
-- Workflow data
workflow_data JSONB NOT NULL DEFAULT '{}',
-- Audit
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
created_by UUID
);
-- Indexes
CREATE INDEX idx_workflow_stage_assigned ON editorial_workflows(current_stage, assigned_to);
CREATE INDEX idx_workflow_priority_due ON editorial_workflows(priority_level, due_date);
CREATE INDEX idx_workflow_data ON editorial_workflows USING GIN(workflow_data);
-- Example workflow_data structure:
-- {
-- "stage_history": [
-- {
-- "stage": "draft",
-- "entered_at": "2025-07-07T10:00:00Z",
-- "user_id": "uuid1",
-- "notes": "Initial review required"
-- }
-- ],
-- "review_notes": [
-- {
-- "reviewer": "editor_name",
-- "timestamp": "2025-07-07T11:00:00Z",
-- "note": "Please verify the statistics in paragraph 3",
-- "resolved": false
-- }
-- ],
-- "approval_criteria": {
-- "fact_check_required": true,
-- "bias_review_required": false,
-- "legal_review_required": false
-- },
-- "publication_settings": {
-- "publish_to": ["website", "social_media"],
-- "scheduled_time": "2025-07-07T15:00:00Z"
-- }
-- }CREATE TABLE users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Authentication
email VARCHAR(320) NOT NULL UNIQUE,
username VARCHAR(100) UNIQUE,
password_hash VARCHAR(255), -- For local auth
-- Profile
full_name VARCHAR(255),
role VARCHAR(50) NOT NULL, -- 'admin', 'editor', 'analyst', 'viewer'
department VARCHAR(100),
-- Permissions
permissions TEXT[], -- Array of permission strings
-- Account status
is_active BOOLEAN DEFAULT TRUE,
email_verified BOOLEAN DEFAULT FALSE,
last_login TIMESTAMPTZ,
-- User preferences and data in JSONB
user_data JSONB NOT NULL DEFAULT '{}',
-- Audit
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
created_by UUID
);
-- Indexes
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_role_active ON users(role, is_active);
CREATE INDEX idx_users_permissions ON users USING GIN(permissions);
CREATE INDEX idx_users_data ON users USING GIN(user_data);CREATE TABLE content_analytics (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
content_id UUID NOT NULL REFERENCES content_items(id) ON DELETE CASCADE,
-- Time period for this analytic
period_start TIMESTAMPTZ NOT NULL,
period_end TIMESTAMPTZ NOT NULL,
period_type VARCHAR(20) NOT NULL, -- 'hourly', 'daily', 'weekly'
-- Engagement metrics
engagement_score DECIMAL(10,2) DEFAULT 0,
reach_estimate BIGINT DEFAULT 0,
viral_coefficient DECIMAL(5,2) DEFAULT 0,
-- Share across platforms
platform_distribution JSONB,
-- Geographic distribution
geographic_distribution JSONB,
-- Sentiment tracking over time
sentiment_evolution JSONB,
-- Complete analytics data
analytics_data JSONB NOT NULL DEFAULT '{}',
-- Computation metadata
computed_at TIMESTAMPTZ DEFAULT NOW(),
computation_version VARCHAR(50),
CONSTRAINT unique_content_period UNIQUE(content_id, period_start, period_type)
);
-- Indexes
CREATE INDEX idx_analytics_content_period ON content_analytics(content_id, period_start DESC);
CREATE INDEX idx_analytics_engagement ON content_analytics(engagement_score DESC);
CREATE INDEX idx_analytics_reach ON content_analytics(reach_estimate DESC);
CREATE INDEX idx_analytics_period_type ON content_analytics(period_type, period_start DESC);CREATE TABLE system_config (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
config_key VARCHAR(255) NOT NULL UNIQUE,
config_value JSONB NOT NULL,
description TEXT,
is_sensitive BOOLEAN DEFAULT FALSE, -- For secrets/credentials
-- Version control
version INTEGER DEFAULT 1,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
updated_by UUID REFERENCES users(id)
);
-- Examples of config entries:
-- config_key: 'ai_models.groq_llama.endpoint'
-- config_value: {"url": "https://api.groq.com/...", "version": "2.0"}
--
-- config_key: 'scraping.rate_limits'
-- config_value: {"times_of_india": {"requests_per_second": 1}, "news18": {"requests_per_second": 2}}
--
-- config_key: 'classification.confidence_thresholds'
-- config_value: {"manual_review_threshold": 0.7, "auto_publish_threshold": 0.9}CREATE TABLE audit_log (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- What happened
action VARCHAR(100) NOT NULL, -- 'create', 'update', 'delete', 'classify', 'verify'
entity_type VARCHAR(100) NOT NULL, -- 'content_item', 'classification', etc.
entity_id UUID NOT NULL,
-- Who did it
user_id UUID REFERENCES users(id),
system_component VARCHAR(100), -- 'ai_classifier', 'web_scraper', etc.
-- When and where
timestamp TIMESTAMPTZ DEFAULT NOW(),
ip_address INET,
user_agent TEXT,
-- Change details
changes JSONB, -- Before/after values
-- Context
session_id UUID,
request_id UUID, -- For distributed tracing
correlation_id UUID -- For grouping related operations
);
-- Indexes
CREATE INDEX idx_audit_entity ON audit_log(entity_type, entity_id, timestamp DESC);
CREATE INDEX idx_audit_user ON audit_log(user_id, timestamp DESC);
CREATE INDEX idx_audit_action ON audit_log(action, timestamp DESC);
CREATE INDEX idx_audit_timestamp ON audit_log(timestamp DESC);
CREATE INDEX idx_audit_changes ON audit_log USING GIN(changes);
-- Partitioning by month for performance
CREATE TABLE audit_log_y2025m07 PARTITION OF audit_log
FOR VALUES FROM ('2025-07-01') TO ('2025-08-01');
-- Continue for other months...-- Real-time content summary
CREATE MATERIALIZED VIEW content_summary_24h AS
SELECT
c.source_id,
s.name as source_name,
COUNT(*) as total_articles,
COUNT(*) FILTER (WHERE cl.content_classification = 'fake') as fake_count,
COUNT(*) FILTER (WHERE cl.sentiment = 'negative') as negative_count,
AVG(v.overall_credibility_score) as avg_credibility,
MAX(c.published_at) as latest_article
FROM content_items c
JOIN sources s ON c.source_id = s.id
LEFT JOIN content_classifications cl ON c.id = cl.content_id
LEFT JOIN verification_results v ON c.id = v.content_id
WHERE c.published_at >= NOW() - INTERVAL '24 hours'
GROUP BY c.source_id, s.name;
-- Refresh every 5 minutes
CREATE INDEX idx_content_summary_24h_source ON content_summary_24h(source_id);
-- Topic trending view
CREATE MATERIALIZED VIEW topic_trends_24h AS
SELECT
cl.primary_topic,
COUNT(*) as article_count,
AVG(CASE WHEN cl.sentiment = 'positive' THEN 1
WHEN cl.sentiment = 'negative' THEN -1
ELSE 0 END) as sentiment_avg,
SUM(c.likes_count + c.shares_count) as total_engagement
FROM content_classifications cl
JOIN content_items c ON cl.content_id = c.id
WHERE c.published_at >= NOW() - INTERVAL '24 hours'
GROUP BY cl.primary_topic
ORDER BY total_engagement DESC;-- Auto-update search vector on content changes
CREATE OR REPLACE FUNCTION update_content_search_vector()
RETURNS TRIGGER AS $$
BEGIN
NEW.search_vector := to_tsvector('english',
COALESCE(NEW.title, '') || ' ' ||
COALESCE(NEW.content_text, '') || ' ' ||
COALESCE(NEW.author_name, '')
);
NEW.last_updated := NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER content_search_vector_update
BEFORE INSERT OR UPDATE ON content_items
FOR EACH ROW EXECUTE FUNCTION update_content_search_vector();
-- Auto-update narrative metrics
CREATE OR REPLACE FUNCTION update_narrative_metrics()
RETURNS TRIGGER AS $$
BEGIN
UPDATE narratives
SET
total_mentions = (
SELECT COUNT(*) FROM content_narratives
WHERE narrative_id = NEW.narrative_id
),
updated_at = NOW()
WHERE id = NEW.narrative_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER narrative_metrics_update
AFTER INSERT OR DELETE ON content_narratives
FOR EACH ROW EXECUTE FUNCTION update_narrative_metrics();-- Version tracking for schema migrations
CREATE TABLE schema_migrations (
version VARCHAR(50) PRIMARY KEY,
description TEXT,
applied_at TIMESTAMPTZ DEFAULT NOW(),
checksum VARCHAR(64) -- MD5 of migration script
);
-- Insert initial version
INSERT INTO schema_migrations (version, description)
VALUES ('v1.0.0', 'Initial Shottify schema with hybrid JSONB design');This schema can handle millions of content items while maintaining sub-second query performance for your analytics dashboard and real-time monitoring requirements.