Go Back   |   Next Page

Text Search

SQLite

Text search converts large volumes of text into tokens (followed by stemming and stop-word removal), which are then indexed. Most databases provide the ability to perform full-text search(FTS).

--Install SQLite 3
brew install sqlite

--Start SQLite3
sqlite3

--See the configuration
sqlite> .dbconfig
          defensive off
            dqs_ddl on
            dqs_dml on
        enable_fkey off
        enable_qpsg off
     enable_trigger on
        enable_view on
     fts3_tokenizer off
 legacy_alter_table on
 legacy_file_format off
     load_extension off
   no_ckpt_on_close off
     reset_database off
  reverse_scanorder off
    stmt_scanstatus off
        trigger_eqp off
     trusted_schema off
    writable_schema off

--See the compile options
sqlite> PRAGMA COMPILE_OPTIONS;
ATOMIC_INTRINSICS=1
BUG_COMPATIBLE_20160819
CCCRYPT256
COMPILER=clang-17.0.0
DEFAULT_AUTOVACUUM
DEFAULT_CACHE_SIZE=2000
DEFAULT_CKPTFULLFSYNC
DEFAULT_FILE_FORMAT=4
DEFAULT_JOURNAL_SIZE_LIMIT=32768
DEFAULT_LOOKASIDE=1200,102
DEFAULT_MEMSTATUS=0
DEFAULT_MMAP_SIZE=0
DEFAULT_PAGE_SIZE=4096
DEFAULT_PCACHE_INITSZ=20
DEFAULT_RECURSIVE_TRIGGERS
DEFAULT_SECTOR_SIZE=4096
DEFAULT_SYNCHRONOUS=2
DEFAULT_WAL_AUTOCHECKPOINT=1000
DEFAULT_WAL_SYNCHRONOUS=1
DEFAULT_WORKER_THREADS=0
DQS=3
ENABLE_API_ARMOR
ENABLE_BYTECODE_VTAB
ENABLE_COLUMN_METADATA
ENABLE_DBPAGE_VTAB
ENABLE_DBSTAT_VTAB
ENABLE_EXPLAIN_COMMENTS
ENABLE_FTS3
ENABLE_FTS3_PARENTHESIS
ENABLE_FTS3_TOKENIZER
ENABLE_FTS4
ENABLE_FTS5
ENABLE_LOCKING_STYLE=1
ENABLE_MATH_FUNCTIONS
ENABLE_NORMALIZE
ENABLE_PREUPDATE_HOOK
ENABLE_RTREE
ENABLE_SESSION
ENABLE_SNAPSHOT
ENABLE_SQLLOG
ENABLE_STMT_SCANSTATUS
ENABLE_UNKNOWN_SQL_FUNCTION
ENABLE_UPDATE_DELETE_LIMIT
HAS_CODEC_RESTRICTED
HAVE_ISNAN
MALLOC_SOFT_LIMIT=1024
MAX_ATTACHED=10
MAX_COLUMN=2000
MAX_COMPOUND_SELECT=500
MAX_DEFAULT_PAGE_SIZE=8192
MAX_EXPR_DEPTH=1000
MAX_FUNCTION_ARG=127
MAX_LENGTH=2147483645
MAX_LIKE_PATTERN_LENGTH=50000
MAX_MMAP_SIZE=1073741824
MAX_PAGE_COUNT=1073741823
MAX_PAGE_SIZE=65536
MAX_SQL_LENGTH=1000000000
MAX_TRIGGER_DEPTH=1000
MAX_VARIABLE_NUMBER=500000
MAX_VDBE_OP=250000000
MAX_WORKER_THREADS=8
MUTEX_UNFAIR
OMIT_AUTORESET
OMIT_LOAD_EXTENSION
STMTJRNL_SPILL=131072
SYSTEM_MALLOC
TEMP_STORE=1
THREADSAFE=2
USE_URI

Create an FTS table for storing data, and insert some data:

CREATE VIRTUAL TABLE knowledgebase
USING FTS5(title, body);
--Pass as the last parameter: TOKENIZE=icu en_AU

INSERT INTO knowledgebase(title, body)
VALUES
('How to move mount Fuji?', 'Use drills to cut it into rock-cubes and transport it by rail, assuming it is not still an active volcano'),
('What will you binge-watch?', 'Outer Range on Prime looks like a good series to watch over a weekend.');

SELECT * FROM posts('fuji') ORDER BY RANK;
SELECT * FROM posts WHERE title MATCH 'fuji' ORDER BY RANK;
SELECT * FROM posts WHERE title MATCH 'mount+Fuji' ORDER BY RANK;
SELECT * FROM posts WHERE title MATCH 'fuj*' ORDER BY RANK;
SELECT * FROM posts WHERE body MATCH 'to NOT by' ORDER BY RANK; --"NOT" is case-sensitive; OR, AND and brackets can be used.
SELECT * FROM posts WHERE title='How to move mount Fuji?'; --Slow query; linear scan

SELECT
HIGHLIGHT(posts, 0, '-=', '=-') title,
HIGHLIGHT(posts, 1, '-=', '=-') body
FROM posts('fuji');

SELECT 
SNIPPET(posts, 0, '-=', '=-', '...', 2) title,
SNIPPET(posts, 1, '-=', '=-', '...', 2) body
FROM posts('the');

--Also see: BM25(), MATCHINFO(), OFFSETS()

More info about the FTS options at:
https://www.sqlite.org/fts3.html

Keeping the virtual table in sync with a trigger:
https://forum.xojo.com/t/sqlite-fts5-how-to-stay-in-sync-virtual-table-with-the-database/55938/8

Go Back   |   Next Page