Text search converts large volumes of text into tokens (followed by stemming and stop-word removal), which are then indexed. Most databases provide the ability to perform full-text search(FTS).
--Install SQLite 3 brew install sqlite --Start SQLite3 sqlite3 --See the configuration sqlite> .dbconfig defensive off dqs_ddl on dqs_dml on enable_fkey off enable_qpsg off enable_trigger on enable_view on fts3_tokenizer off legacy_alter_table on legacy_file_format off load_extension off no_ckpt_on_close off reset_database off reverse_scanorder off stmt_scanstatus off trigger_eqp off trusted_schema off writable_schema off --See the compile options sqlite> PRAGMA COMPILE_OPTIONS; ATOMIC_INTRINSICS=1 BUG_COMPATIBLE_20160819 CCCRYPT256 COMPILER=clang-17.0.0 DEFAULT_AUTOVACUUM DEFAULT_CACHE_SIZE=2000 DEFAULT_CKPTFULLFSYNC DEFAULT_FILE_FORMAT=4 DEFAULT_JOURNAL_SIZE_LIMIT=32768 DEFAULT_LOOKASIDE=1200,102 DEFAULT_MEMSTATUS=0 DEFAULT_MMAP_SIZE=0 DEFAULT_PAGE_SIZE=4096 DEFAULT_PCACHE_INITSZ=20 DEFAULT_RECURSIVE_TRIGGERS DEFAULT_SECTOR_SIZE=4096 DEFAULT_SYNCHRONOUS=2 DEFAULT_WAL_AUTOCHECKPOINT=1000 DEFAULT_WAL_SYNCHRONOUS=1 DEFAULT_WORKER_THREADS=0 DQS=3 ENABLE_API_ARMOR ENABLE_BYTECODE_VTAB ENABLE_COLUMN_METADATA ENABLE_DBPAGE_VTAB ENABLE_DBSTAT_VTAB ENABLE_EXPLAIN_COMMENTS ENABLE_FTS3 ENABLE_FTS3_PARENTHESIS ENABLE_FTS3_TOKENIZER ENABLE_FTS4 ENABLE_FTS5 ENABLE_LOCKING_STYLE=1 ENABLE_MATH_FUNCTIONS ENABLE_NORMALIZE ENABLE_PREUPDATE_HOOK ENABLE_RTREE ENABLE_SESSION ENABLE_SNAPSHOT ENABLE_SQLLOG ENABLE_STMT_SCANSTATUS ENABLE_UNKNOWN_SQL_FUNCTION ENABLE_UPDATE_DELETE_LIMIT HAS_CODEC_RESTRICTED HAVE_ISNAN MALLOC_SOFT_LIMIT=1024 MAX_ATTACHED=10 MAX_COLUMN=2000 MAX_COMPOUND_SELECT=500 MAX_DEFAULT_PAGE_SIZE=8192 MAX_EXPR_DEPTH=1000 MAX_FUNCTION_ARG=127 MAX_LENGTH=2147483645 MAX_LIKE_PATTERN_LENGTH=50000 MAX_MMAP_SIZE=1073741824 MAX_PAGE_COUNT=1073741823 MAX_PAGE_SIZE=65536 MAX_SQL_LENGTH=1000000000 MAX_TRIGGER_DEPTH=1000 MAX_VARIABLE_NUMBER=500000 MAX_VDBE_OP=250000000 MAX_WORKER_THREADS=8 MUTEX_UNFAIR OMIT_AUTORESET OMIT_LOAD_EXTENSION STMTJRNL_SPILL=131072 SYSTEM_MALLOC TEMP_STORE=1 THREADSAFE=2 USE_URI
Create an FTS table for storing data, and insert some data:
CREATE VIRTUAL TABLE knowledgebase
USING FTS5(title, body);
--Pass as the last parameter: TOKENIZE=icu en_AU
INSERT INTO knowledgebase(title, body)
VALUES
('How to move mount Fuji?', 'Use drills to cut it into rock-cubes and transport it by rail, assuming it is not still an active volcano'),
('What will you binge-watch?', 'Outer Range on Prime looks like a good series to watch over a weekend.');
SELECT * FROM posts('fuji') ORDER BY RANK;
SELECT * FROM posts WHERE title MATCH 'fuji' ORDER BY RANK;
SELECT * FROM posts WHERE title MATCH 'mount+Fuji' ORDER BY RANK;
SELECT * FROM posts WHERE title MATCH 'fuj*' ORDER BY RANK;
SELECT * FROM posts WHERE body MATCH 'to NOT by' ORDER BY RANK; --"NOT" is case-sensitive; OR, AND and brackets can be used.
SELECT * FROM posts WHERE title='How to move mount Fuji?'; --Slow query; linear scan
SELECT
HIGHLIGHT(posts, 0, '-=', '=-') title,
HIGHLIGHT(posts, 1, '-=', '=-') body
FROM posts('fuji');
SELECT
SNIPPET(posts, 0, '-=', '=-', '...', 2) title,
SNIPPET(posts, 1, '-=', '=-', '...', 2) body
FROM posts('the');
--Also see: BM25(), MATCHINFO(), OFFSETS()
More info about the FTS options at:
https://www.sqlite.org/fts3.html
Keeping the virtual table in sync with a trigger:
https://forum.xojo.com/t/sqlite-fts5-how-to-stay-in-sync-virtual-table-with-the-database/55938/8