Interview Question: "How do you find all tables with a specific column name? Explain the difference between Information Schema and system catalogs. How would you track data lineage?" — Asked at Oracle, IBM, SAP for Data Governance roles
ℹ️
Difficulty: Advanced | Companies: Oracle, IBM, SAP, Informatica, Collibra | Time: 45-60 minutes
Information Schema
Standard SQL metadata interface:
-- List all tables in current schema
SELECT
table_schema,
table_name,
table_type,
pg_size_pretty(pg_total_relation_size(table_schema || '.' || table_name)) AS size
FROM information_schema.tables
WHERE table_schema = 'public'
ORDER BY pg_total_relation_size(table_schema || '.' || table_name) DESC;
-- Find all columns with specific type
SELECT
table_name,
column_name,
data_type,
character_maximum_length,
is_nullable,
column_default
FROM information_schema.columns
WHERE table_schema = 'public'
AND data_type IN ('varchar', 'text')
ORDER BY table_name, ordinal_position;
-- Find all foreign keys
SELECT
tc.table_name AS source_table,
kcu.column_name AS source_column,
ccu.table_name AS target_table,
ccu.column_name AS target_column,
tc.constraint_name
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
JOIN information_schema.constraint_column_usage ccu
ON ccu.constraint_name = tc.constraint_name
WHERE tc.constraint_type = 'FOREIGN KEY'
ORDER BY tc.table_name;
PostgreSQL System Catalogs
-- List all schemas
SELECT
nspname AS schema_name,
pg_get_userbyid(nspowner) AS owner,
pg_size_pretty(pg_catalog.pg_namespace_size(oid)) AS size
FROM pg_namespace
WHERE nspname NOT LIKE 'pg_%'
AND nspname != 'information_schema'
ORDER BY nspname;
-- List all tables with details
SELECT
c.relname AS table_name,
pg_get_userbyid(c.relowner) AS owner,
pg_size_pretty(pg_relation_size(c.oid)) AS table_size,
pg_size_pretty(pg_total_relation_size(c.oid)) AS total_size,
c.reltuples::bigint AS estimated_rows,
pg_stat_get_live_tuples(c.oid) AS live_rows,
pg_stat_get_dead_tuples(c.oid) AS dead_rows
FROM pg_class c
JOIN pg_namespace n ON c.relnamespace = n.oid
WHERE c.relkind = 'r'
AND n.nspname = 'public'
ORDER BY pg_total_relation_size(c.oid) DESC;
-- List all indexes
SELECT
indexname,
indexdef,
pg_size_pretty(pg_relation_size(indexname::regclass)) AS index_size
FROM pg_indexes
WHERE schemaname = 'public'
ORDER BY pg_relation_size(indexname::regclass) DESC;
Column Lineage Tracking
-- Create metadata tracking tables
CREATE TABLE data_sources (
source_id SERIAL PRIMARY KEY,
source_name VARCHAR(100),
source_type VARCHAR(50),
connection_info JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE data_lineage (
lineage_id SERIAL PRIMARY KEY,
target_table VARCHAR(100),
target_column VARCHAR(100),
source_table VARCHAR(100),
source_column VARCHAR(100),
transformation TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(100)
);
-- Insert lineage metadata
INSERT INTO data_lineage (target_table, target_column, source_table, source_column, transformation)
VALUES
('sales_summary', 'total_revenue', 'orders', 'amount', 'SUM(amount) GROUP BY date'),
('sales_summary', 'order_count', 'orders', 'order_id', 'COUNT(order_id) GROUP BY date'),
('customer_360', 'lifetime_value', 'orders', 'amount', 'SUM(amount) WHERE status = ''completed''');
-- Query lineage
SELECT
target_table,
target_column,
source_table,
source_column,
transformation
FROM data_lineage
WHERE target_table = 'sales_summary'
ORDER BY target_column;
-- Find all upstream dependencies
WITH RECURSIVE lineage_tree AS (
SELECT
target_table,
target_column,
source_table,
source_column,
transformation,
1 AS depth
FROM data_lineage
WHERE target_table = 'sales_summary'
UNION ALL
SELECT
dl.target_table,
dl.target_column,
dl.source_table,
dl.source_column,
dl.transformation,
lt.depth + 1
FROM data_lineage dl
JOIN lineage_tree lt ON dl.target_table = lt.source_table
WHERE lt.depth < 10
)
SELECT * FROM lineage_tree
ORDER BY depth, source_table;
Table Statistics
-- Comprehensive table statistics
SELECT
schemaname,
relname AS table_name,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch,
n_tup_ins,
n_tup_upd,
n_tup_del,
n_live_tup,
n_dead_tup,
ROUND(n_dead_tup::DECIMAL / NULLIF(n_live_tup, 0) * 100, 2) AS dead_ratio_pct,
last_vacuum,
last_autovacuum,
last_analyze,
last_autoanalyze
FROM pg_stat_user_tables
ORDER BY n_live_tup DESC;
-- Index usage statistics
SELECT
schemaname,
tablename,
indexname,
idx_scan AS times_used,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexname::regclass)) AS index_size,
ROUND(idx_scan::DECIMAL / NULLIF(seq_scan + idx_scan, 0) * 100, 2) AS index_usage_pct
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;
Query Statistics
-- Top queries by total time
SELECT
query,
calls,
ROUND(total_time::NUMERIC, 2) AS total_time_ms,
ROUND(mean_time::NUMERIC, 2) AS avg_time_ms,
rows,
shared_hit AS cache_hits,
shared_read AS disk_reads
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 20;
-- Queries with high I/O
SELECT
query,
calls,
shared_hit,
shared_read,
ROUND(shared_read::DECIMAL / NULLIF(shared_hit + shared_read, 0) * 100, 2) AS io_ratio_pct
FROM pg_stat_statements
WHERE calls > 100
ORDER BY shared_read DESC
LIMIT 10;
Database Schema Comparison
-- Compare schemas between databases
CREATE OR REPLACE FUNCTION compare_schemas(
source_db TEXT,
target_db TEXT
) RETURNS TABLE(
object_type TEXT,
object_name TEXT,
source_exists BOOLEAN,
target_exists BOOLEAN
) AS $$
BEGIN
RETURN QUERY
SELECT
'TABLE'::TEXT AS object_type,
t.table_name,
TRUE AS source_exists,
EXISTS (
SELECT 1 FROM information_schema.tables
WHERE table_name = t.table_name
AND table_schema = 'public'
) AS target_exists
FROM information_schema.tables t
WHERE t.table_schema = 'public'
UNION ALL
SELECT
'VIEW'::TEXT,
v.table_name,
TRUE,
EXISTS (
SELECT 1 FROM information_schema.views
WHERE table_name = v.table_name
)
FROM information_schema.views v
WHERE v.table_schema = 'public';
END;
$$ LANGUAGE plpgsql;
Data Dictionary Views
-- Custom data dictionary view
CREATE OR REPLACE VIEW data_dictionary AS
SELECT
t.table_name,
t.table_type,
c.column_name,
c.ordinal_position,
c.data_type,
c.character_maximum_length,
c.is_nullable,
c.column_default,
pgd.description AS column_comment,
pt.description AS table_comment
FROM information_schema.tables t
JOIN information_schema.columns c
ON t.table_name = c.table_name
AND t.table_schema = c.table_schema
LEFT JOIN pg_catalog.pg_statio_all_tables st
ON t.table_name = st.relname
LEFT JOIN pg_catalog.pg_description pgd
ON pgd.objoid = st.relid
AND pgd.objsubid = c.ordinal_position
LEFT JOIN pg_catalog.pg_description pt
ON pt.objoid = st.relid
AND pt.objsubid = 0
WHERE t.table_schema = 'public'
ORDER BY t.table_name, c.ordinal_position;
-- Query the data dictionary
SELECT * FROM data_dictionary
WHERE table_name = 'orders'
ORDER BY ordinal_position;
Metadata Export
-- Export schema as DDL
SELECT
'CREATE TABLE ' || table_name || ' (' || E'\n' ||
STRING_AGG(
' ' || column_name || ' ' ||
data_type ||
CASE WHEN character_maximum_length IS NOT NULL
THEN '(' || character_maximum_length || ')'
ELSE ''
END ||
CASE WHEN is_nullable = 'NO' THEN ' NOT NULL' ELSE '' END ||
CASE WHEN column_default IS NOT NULL
THEN ' DEFAULT ' || column_default
ELSE ''
END,
',' || E'\n'
ORDER BY ordinal_position
) || E'\n);' AS ddl
FROM information_schema.columns
WHERE table_schema = 'public'
AND table_name = 'orders'
GROUP BY table_name;
Mathematical Formulas
Table bloat estimation:
Index usage efficiency:
ℹ️
Pro Tip: Use pg_stat_statements extension to track query performance over time. It's essential for identifying slow queries.
Common Metadata Queries
-- Find tables with specific column name
SELECT
table_name,
column_name,
data_type
FROM information_schema.columns
WHERE column_name LIKE '%email%'
AND table_schema = 'public';
-- Find tables without primary keys
SELECT
t.table_name
FROM information_schema.tables t
LEFT JOIN information_schema.table_constraints tc
ON t.table_name = tc.table_name
AND tc.constraint_type = 'PRIMARY KEY'
WHERE t.table_schema = 'public'
AND tc.constraint_name IS NULL;
-- Find unused indexes
SELECT
indexname,
idx_scan,
pg_size_pretty(pg_relation_size(indexname::regclass)) AS size
FROM pg_stat_user_indexes
WHERE idx_scan = 0
AND indexname NOT LIKE '%_pkey'
ORDER BY pg_relation_size(indexname::regclass) DESC;
⚠️
Security Note: Restrict access to system catalogs. Use views to provide controlled access to metadata.