Skip to content

HeliosDB-Lite SQL Reference (v3.4)

Complete reference for SQL syntax supported by HeliosDB-Lite. Compatible with PostgreSQL syntax.

Table of Contents


Data Definition Language (DDL)

CREATE TABLE

CREATE TABLE [IF NOT EXISTS] table_name (
    column_name data_type [constraints],
    ...
    [PRIMARY KEY (column_name, ...)]
);

Data Types: - INTEGER, INT, BIGINT, SMALLINT - FLOAT, DOUBLE, REAL, DECIMAL(p,s), NUMERIC(p,s) - TEXT, VARCHAR(n), CHAR(n) - BOOLEAN, BOOL - TIMESTAMP, DATE, TIME - BLOB, BYTEA - JSON, JSONB - VECTOR(dimensions) - Vector type for similarity search

Constraints: - PRIMARY KEY - NOT NULL - UNIQUE - DEFAULT value - CHECK (expression)

Examples:

-- Basic table
CREATE TABLE users (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    email TEXT UNIQUE,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Table with vector column
CREATE TABLE items (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    embedding VECTOR(384)
);

-- Table with tenant column (for multi-tenancy)
CREATE TABLE orders (
    id INTEGER PRIMARY KEY,
    tenant_id INTEGER NOT NULL,
    total DECIMAL(10,2),
    created_at TIMESTAMP
);

DROP TABLE

DROP TABLE [IF EXISTS] table_name;

TRUNCATE TABLE

TRUNCATE TABLE table_name;

CREATE INDEX

CREATE INDEX [IF NOT EXISTS] index_name
ON table_name (column_name)
[USING index_type]
[WITH (option = value, ...)];

Index Types: - btree (default) - B-tree index for equality/range queries - hnsw - HNSW vector index for similarity search - ivf - IVF vector index for large-scale search

Vector Index Options:

-- HNSW index with options
CREATE INDEX items_embedding_idx ON items
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 200);

-- IVF index with product quantization
CREATE INDEX items_embedding_idx ON items
USING ivf (embedding)
WITH (lists = 100, quantization = 'pq', pq_subquantizers = 8);

ALTER TABLE

-- Set compression
ALTER TABLE table_name SET COMPRESSION codec;

-- Set column compression
ALTER TABLE table_name ALTER COLUMN column_name SET COMPRESSION codec;

-- Enable row-level security
ALTER TABLE table_name ENABLE ROW LEVEL SECURITY;

-- Add column (limited support)
ALTER TABLE table_name ADD COLUMN column_name data_type;

Compression Codecs: - none - No compression - zstd - Zstandard (default, good compression ratio) - lz4 - LZ4 (fast compression)


Data Manipulation Language (DML)

INSERT

INSERT INTO table_name [(column_list)]
VALUES (value_list), ...
[RETURNING column_list | *];

Examples:

-- Basic insert
INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com');

-- Multiple rows
INSERT INTO users (name, email) VALUES
    ('Bob', 'bob@example.com'),
    ('Carol', 'carol@example.com');

-- Insert with RETURNING
INSERT INTO users (name, email)
VALUES ('Dave', 'dave@example.com')
RETURNING id, created_at;

-- Insert vector
INSERT INTO items (name, embedding)
VALUES ('product', '[0.1, 0.2, 0.3, ...]');

-- INSERT ... SELECT (insert from query results)
INSERT INTO archive_orders (id, customer_id, total, created_at)
SELECT id, customer_id, total, created_at
FROM orders
WHERE created_at < '2024-01-01';

-- INSERT ... SELECT with transformation
INSERT INTO order_summary (customer_id, order_count, total_amount)
SELECT customer_id, COUNT(*), SUM(total)
FROM orders
GROUP BY customer_id;

UPDATE

UPDATE table_name
SET column = value, ...
[WHERE condition]
[RETURNING column_list | *];

Examples:

UPDATE users SET name = 'Alice Smith' WHERE id = 1;

UPDATE orders SET status = 'shipped' WHERE status = 'pending'
RETURNING id, status;

DELETE

DELETE FROM table_name
[WHERE condition]
[RETURNING column_list | *];

Examples:

DELETE FROM users WHERE id = 1;

DELETE FROM orders WHERE created_at < '2024-01-01'
RETURNING id;

Query Syntax

SELECT

SELECT [DISTINCT] expression_list
FROM table_reference
[WHERE condition]
[GROUP BY expression_list]
[HAVING condition]
[ORDER BY expression [ASC|DESC], ...]
[LIMIT count]
[OFFSET skip];

WITH (Common Table Expressions)

WITH cte_name AS (
    SELECT ...
)
SELECT ... FROM cte_name ...;

Examples:

-- Basic CTE
WITH active_users AS (
    SELECT * FROM users WHERE status = 'active'
)
SELECT * FROM active_users WHERE created_at > '2024-01-01';

-- Multiple CTEs
WITH
    recent_orders AS (SELECT * FROM orders WHERE created_at > '2024-01-01'),
    high_value AS (SELECT * FROM recent_orders WHERE total > 100)
SELECT * FROM high_value;

JOIN

SELECT ...
FROM table1
[INNER | LEFT | RIGHT | FULL | CROSS] JOIN table2
ON condition;

Examples:

-- Inner join
SELECT o.*, u.name
FROM orders o
JOIN users u ON o.user_id = u.id;

-- Left join
SELECT u.*, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id;

DISTINCT

SELECT DISTINCT column_list FROM table_name;

Subqueries

-- Scalar subquery
SELECT name, (SELECT MAX(total) FROM orders WHERE user_id = u.id)
FROM users u;

-- IN subquery
SELECT * FROM users WHERE id IN (SELECT user_id FROM orders WHERE total > 100);

-- EXISTS subquery
SELECT * FROM users u WHERE EXISTS (SELECT 1 FROM orders WHERE user_id = u.id);

Set Operations

Combine results from multiple queries using set operations.

UNION

Combines results from two queries, removing duplicates by default.

SELECT column_list FROM table1
UNION [ALL]
SELECT column_list FROM table2;
  • UNION - Returns distinct rows from both queries
  • UNION ALL - Returns all rows including duplicates (faster)

Examples:

-- Combine customers from different regions
SELECT name, email FROM customers_us
UNION
SELECT name, email FROM customers_eu;

-- Include duplicates
SELECT product_id FROM orders_2024
UNION ALL
SELECT product_id FROM orders_2025;

INTERSECT

Returns only rows that appear in both query results.

SELECT column_list FROM table1
INTERSECT [ALL]
SELECT column_list FROM table2;

Example:

-- Find customers who ordered in both years
SELECT customer_id FROM orders_2024
INTERSECT
SELECT customer_id FROM orders_2025;

EXCEPT

Returns rows from the first query that don't appear in the second.

SELECT column_list FROM table1
EXCEPT [ALL]
SELECT column_list FROM table2;

Example:

-- Find customers from 2024 who didn't order in 2025
SELECT customer_id FROM orders_2024
EXCEPT
SELECT customer_id FROM orders_2025;

Combining Set Operations

-- Multiple set operations with parentheses for precedence
(SELECT id FROM table1 UNION SELECT id FROM table2)
INTERSECT
SELECT id FROM table3;

Operators and Expressions

Comparison Operators

Operator Description
= Equal
<>, != Not equal
<, > Less than, Greater than
<=, >= Less/Greater than or equal
BETWEEN x AND y Range inclusive
IN (list) In list
LIKE pattern Pattern match
ILIKE pattern Case-insensitive pattern
IS NULL Is null
IS NOT NULL Is not null

Logical Operators

Operator Description
AND Logical AND
OR Logical OR
NOT Logical NOT

Arithmetic Operators

Operator Description
+ Addition
- Subtraction
* Multiplication
/ Division
% Modulo

Vector Operators

Operator Description
<-> Euclidean distance
<#> Negative inner product (for cosine)
<=> Cosine distance

CASE Expression

CASE
    WHEN condition1 THEN result1
    WHEN condition2 THEN result2
    ELSE default_result
END

-- Simple CASE
CASE expression
    WHEN value1 THEN result1
    WHEN value2 THEN result2
    ELSE default_result
END

CAST Expression

CAST(expression AS data_type)
expression::data_type

Functions

Aggregate Functions

Function Description
COUNT(*) Count all rows
COUNT(column) Count non-null values
COUNT(DISTINCT column) Count distinct values
SUM(column) Sum of values
AVG(column) Average of values
MIN(column) Minimum value
MAX(column) Maximum value

String Functions

Function Description
LENGTH(s) String length
UPPER(s) Uppercase
LOWER(s) Lowercase
TRIM(s) Remove whitespace
SUBSTR(s, start, len) Substring
CONCAT(s1, s2, ...) Concatenate
REPLACE(s, from, to) Replace substring

Date/Time Functions

Function Description
CURRENT_TIMESTAMP Current timestamp
CURRENT_DATE Current date
CURRENT_TIME Current time
NOW() Current timestamp
EXTRACT(field FROM ts) Extract component
DATE_TRUNC(field, ts) Truncate to precision

Math Functions

Function Description
ABS(x) Absolute value
CEIL(x) Ceiling
FLOOR(x) Floor
ROUND(x, d) Round to d decimals
SQRT(x) Square root
POWER(x, y) x to power y

JSON Functions

Function Description
json_extract(json, path) Extract value
json_array_length(json) Array length
json_type(json) Value type

Tenant Functions

Function Description
current_tenant_id() Current tenant ID
current_tenant_name() Current tenant name

Data Types

Numeric Types

Type Storage Range
SMALLINT 2 bytes -32768 to 32767
INTEGER 4 bytes -2^31 to 2^31-1
BIGINT 8 bytes -2^63 to 2^63-1
REAL 4 bytes 6 decimal precision
DOUBLE PRECISION 8 bytes 15 decimal precision
DECIMAL(p,s) Variable User-specified precision

Text Types

Type Description
TEXT Variable-length string
VARCHAR(n) Variable up to n characters
CHAR(n) Fixed n characters

Date/Time Types

Type Description
TIMESTAMP Date and time
DATE Date only
TIME Time only

Other Types

Type Description
BOOLEAN true/false
BYTEA Binary data
JSON JSON document
VECTOR(n) n-dimensional vector

Database Branching

CREATE BRANCH

CREATE DATABASE BRANCH branch_name
FROM parent_branch
[AS OF TIMESTAMP 'timestamp' | AS OF TRANSACTION txn_id | AS OF NOW];

Examples:

-- Branch from main at current point
CREATE DATABASE BRANCH dev FROM main;

-- Branch at specific timestamp
CREATE DATABASE BRANCH hotfix FROM main AS OF TIMESTAMP '2025-01-01 00:00:00';

-- Branch at specific transaction
CREATE DATABASE BRANCH feature FROM main AS OF TRANSACTION 12345;

DROP BRANCH

DROP DATABASE BRANCH [IF EXISTS] branch_name;

MERGE BRANCH

MERGE DATABASE BRANCH source INTO target
[WITH (strategy = 'ours' | 'theirs' | 'fail')];

USE BRANCH

USE BRANCH branch_name;

SHOW BRANCHES

SHOW BRANCHES;
-- Or use system view
SELECT * FROM pg_database_branches();

Time-Travel Queries

Query historical data at specific points in time.

-- By timestamp
SELECT * FROM table_name AS OF TIMESTAMP 'YYYY-MM-DD HH:MM:SS';

-- By transaction ID
SELECT * FROM table_name AS OF TRANSACTION txn_id;

-- By system change number (SCN)
SELECT * FROM table_name AS OF SCN scn_value;

Examples:

-- Query orders from yesterday
SELECT * FROM orders AS OF TIMESTAMP '2025-01-15 00:00:00';

-- Query at specific transaction
SELECT * FROM users AS OF TRANSACTION 10000;

-- Compare current vs historical
SELECT
    c.balance as current_balance,
    h.balance as historical_balance
FROM accounts c
JOIN accounts AS OF TIMESTAMP '2025-01-01' h ON c.id = h.id;

Materialized Views

CREATE MATERIALIZED VIEW

CREATE MATERIALIZED VIEW [IF NOT EXISTS] view_name
AS query
[WITH (option = value, ...)];

Options: - auto_refresh - Enable automatic refresh (default: false) - max_cpu_percent - Max CPU for refresh (default: 15) - threshold_table_size - Table size threshold for incremental refresh (e.g., '1GB') - threshold_dml_rate - DML rate threshold for incremental refresh - lazy_update - Enable lazy/deferred updates (default: false) - lazy_catchup_window - Time window for lazy catchup (e.g., '1 hour') - distribution - Distribution strategy for partitioned MVs - replication_factor - Replication factor for distributed MVs

Example:

CREATE MATERIALIZED VIEW monthly_sales AS
SELECT
    DATE_TRUNC('month', created_at) as month,
    COUNT(*) as order_count,
    SUM(total) as revenue
FROM orders
GROUP BY DATE_TRUNC('month', created_at)
WITH (auto_refresh = true, max_cpu_percent = 25);

REFRESH MATERIALIZED VIEW

REFRESH MATERIALIZED VIEW [CONCURRENTLY] view_name [INCREMENTALLY];

Modes: - Default: Full refresh - recomputes entire view from base tables - CONCURRENTLY: Non-blocking refresh (allows reads during refresh) - INCREMENTALLY: Delta-based refresh - only processes changes since last refresh

Examples:

-- Full refresh (blocks reads)
REFRESH MATERIALIZED VIEW monthly_sales;

-- Concurrent refresh (non-blocking)
REFRESH MATERIALIZED VIEW CONCURRENTLY monthly_sales;

-- Incremental refresh (delta-based, most efficient for small changes)
REFRESH MATERIALIZED VIEW monthly_sales INCREMENTALLY;

ALTER MATERIALIZED VIEW

Modify materialized view options after creation.

ALTER MATERIALIZED VIEW view_name SET (option = value, ...);

Options: - staleness_threshold - Staleness threshold in seconds (numeric) - max_cpu_percent - Maximum CPU percentage for refresh operations (numeric) - priority - Refresh priority: 0=low, 1=medium, 2=high, 3=critical (numeric) - refresh_strategy - Refresh strategy: 'manual', 'auto', 'incremental' - incremental_enabled - Enable/disable incremental refresh (boolean)

Examples:

-- Set staleness threshold to 30 minutes
ALTER MATERIALIZED VIEW monthly_sales SET (staleness_threshold = 1800);

-- Set high priority and limit CPU usage
ALTER MATERIALIZED VIEW monthly_sales SET (priority = 2, max_cpu_percent = 25);

-- Enable incremental refresh strategy
ALTER MATERIALIZED VIEW monthly_sales SET (refresh_strategy = 'incremental');

-- Configure multiple options
ALTER MATERIALIZED VIEW monthly_sales SET (
    staleness_threshold = 3600,
    max_cpu_percent = 15,
    incremental_enabled = true
);

DROP MATERIALIZED VIEW

DROP MATERIALIZED VIEW [IF EXISTS] view_name;

Vector Operations

Vector Column

-- Create table with vector column
CREATE TABLE items (
    id INTEGER PRIMARY KEY,
    embedding VECTOR(384)
);

-- Insert vector
INSERT INTO items (id, embedding) VALUES (1, '[0.1, 0.2, 0.3, ...]');
-- Nearest neighbor search (Euclidean distance)
SELECT * FROM items
ORDER BY embedding <-> '[0.1, 0.2, ...]'
LIMIT 10;

-- Cosine similarity search
SELECT * FROM items
ORDER BY embedding <=> '[0.1, 0.2, ...]'
LIMIT 10;

-- Hybrid search (vector + filter)
SELECT * FROM items
WHERE category = 'electronics'
ORDER BY embedding <-> '[0.1, 0.2, ...]'
LIMIT 10;

Vector Index

CREATE INDEX items_embedding_idx ON items
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 200);

Triggers

CREATE TRIGGER

CREATE TRIGGER trigger_name
{BEFORE | AFTER | INSTEAD OF}
{INSERT | UPDATE | DELETE} [OR ...]
ON table_name
[FOR EACH {ROW | STATEMENT}]
[WHEN (condition)]
EXECUTE body;

DROP TRIGGER

DROP TRIGGER [IF EXISTS] trigger_name ON table_name;

Row-Level Security

Row-Level Security (RLS) enables fine-grained access control at the row level, essential for multi-tenant applications.

Enable RLS on a Table

ALTER TABLE table_name ENABLE ROW LEVEL SECURITY;

CREATE POLICY

CREATE POLICY policy_name
ON table_name
[AS { PERMISSIVE | RESTRICTIVE }]
[FOR { ALL | SELECT | INSERT | UPDATE | DELETE }]
[TO { role_name | PUBLIC | CURRENT_USER }]
USING (condition)
[WITH CHECK (condition)];

Parameters: - AS PERMISSIVE (default): Multiple permissive policies are OR'd together - AS RESTRICTIVE: Multiple restrictive policies are AND'd together - FOR ALL: Policy applies to all operations - USING: Filter condition for SELECT/UPDATE/DELETE - WITH CHECK: Validation condition for INSERT/UPDATE

Examples:

-- Basic tenant isolation policy
CREATE POLICY tenant_isolation ON orders
FOR ALL
USING (tenant_id = current_tenant_id());

-- Read-only policy for public role
CREATE POLICY public_read ON products
FOR SELECT
TO PUBLIC
USING (published = true);

-- Insert policy with validation
CREATE POLICY insert_own_data ON user_content
FOR INSERT
WITH CHECK (user_id = current_user_id());

-- Combined read and write policy
CREATE POLICY own_data_access ON documents
FOR ALL
USING (owner_id = current_user_id())
WITH CHECK (owner_id = current_user_id());

-- Restrictive policy (must also pass other policies)
CREATE POLICY active_only ON accounts
AS RESTRICTIVE
FOR SELECT
USING (status = 'active');

ALTER POLICY

ALTER POLICY policy_name ON table_name
[RENAME TO new_name]
[TO { role_name | PUBLIC }]
[USING (condition)]
[WITH CHECK (condition)];

DROP POLICY

DROP POLICY [IF EXISTS] policy_name ON table_name;

Disable RLS

ALTER TABLE table_name DISABLE ROW LEVEL SECURITY;

RLS System Functions

Function Description
current_tenant_id() Returns the current tenant ID
current_tenant_name() Returns the current tenant name
current_user_id() Returns the current user ID

View Policies

-- List all RLS policies
SELECT * FROM pg_rls_policies();

-- Check if RLS is enabled on a table
SELECT relname, relrowsecurity
FROM pg_class
WHERE relname = 'table_name';

System Views

Query system metadata using these views:

Schema Information

View Description
pg_tables List all tables
pg_columns Column metadata for all tables
pg_indexes Index information
pg_class All relations (tables, indexes, views)
pg_attribute Column attributes
pg_type Data types

Branching and Time-Travel

View Description
pg_database_branches() List all branches
pg_branch_commits() Branch commit history
pg_snapshots() Available time-travel snapshots

Materialized Views

View Description
pg_mv_staleness() Materialized view freshness
pg_matviews Materialized view definitions
pg_mv_refresh_history() Refresh operation history

Vector Database

View Description
pg_vector_index_stats() Vector index statistics
pg_vector_columns() Vector column information

Multi-Tenancy and Security

View Description
pg_tenant_usage() Tenant resource usage
pg_rls_policies() Row-level security policies
pg_tenants() Tenant information

CDC and Monitoring

View Description
pg_cdc_events() Change data capture events
pg_stat_activity Current session activity
pg_stat_user_tables Table statistics

Storage and Performance (SMFI)

View Description
pg_smfi_status() Self-Maintaining Filter Index system status
pg_smfi_table_stats() Per-table SMFI statistics
pg_speculative_filters() Auto-created speculative filters
pg_storage_stats() Storage usage statistics

Examples:

-- List all tables
SELECT tablename, tableowner FROM pg_tables;

-- Check branch status
SELECT * FROM pg_database_branches();

-- Find stale materialized views
SELECT * FROM pg_mv_staleness() WHERE is_stale = true;

-- Vector index performance
SELECT * FROM pg_vector_index_stats();

-- Tenant resource usage
SELECT * FROM pg_tenant_usage() ORDER BY storage_bytes DESC;

-- SMFI filter effectiveness
SELECT table_name, filter_hit_rate, rows_filtered
FROM pg_smfi_table_stats();

Session Variables

SHOW

SHOW ALL;
SHOW variable_name;

SET

SET variable_name = value;
SET variable_name TO value;

Query Execution Settings

Variable Default Description
statement_timeout 0 Query timeout in ms (0 = unlimited)
optimizer on Enable/disable query optimizer
enable_seqscan on Allow sequential scans
enable_indexscan on Allow index scans
enable_hashjoin on Allow hash joins
enable_mergejoin on Allow merge joins
enable_nestloop on Allow nested loop joins
work_mem '4MB' Memory for sort/hash operations

Vector Search Settings

Variable Default Description
hnsw.ef_search 40 HNSW search beam width (higher = more accurate, slower)
ivf.probes 10 Number of IVF lists to probe (higher = more accurate, slower)

SMFI (Self-Maintaining Filter Index) Settings

Variable Default Description
smfi_enabled on Enable SMFI filter optimization
smfi_speculative_enabled on Enable speculative filter creation
smfi_min_table_rows 10000 Minimum rows before SMFI activates
smfi_filter_threshold 0.1 Selectivity threshold for filter creation
smfi_max_filters_per_table 10 Maximum filters per table
smfi_bloom_fpr 0.01 Bloom filter false positive rate

Tenant Context

Variable Description
tenant.id Current tenant ID (read-only via current_tenant_id())
tenant.name Current tenant name (read-only via current_tenant_name())

Debug and Logging

Variable Default Description
log_statement 'none' Log SQL statements: none, ddl, mod, all
client_min_messages 'notice' Minimum message level: debug, log, notice, warning, error

Examples:

-- Set query timeout to 30 seconds
SET statement_timeout = 30000;

-- Increase vector search accuracy
SET hnsw.ef_search = 100;

-- Disable sequential scans (force index usage)
SET enable_seqscan = off;

-- Configure SMFI for aggressive filtering
SET smfi_filter_threshold = 0.05;
SET smfi_speculative_enabled = on;

-- Check current settings
SHOW statement_timeout;
SHOW ALL;

See Also