--- pandia/base.sql 2025/06/25 03:10:20 1.1 +++ pandia/base.sql 2025/06/28 00:33:32 1.3 @@ -1,8 +1,16 @@ -USE pandia; +DROP TABLE IF EXISTS url_domains; +CREATE TABLE url_domains + (url_domain VARCHAR(255) NOT NULL, + PRIMARY KEY(url_domain)) + ENGINE=InnoDB DEFAULT CHARSET=utf8; DROP TABLE IF EXISTS crawl_queue; CREATE TABLE crawl_queue (url VARCHAR(255) NOT NULL, + parent_url VARCHAR(255), + url_domain VARCHAR(255) NOT NULL, + scheme VARCHAR(40) NOT NULL DEFAULT "http", + analyzed TINYINT NOT NULL DEFAULT 0, create_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(url)) ENGINE=InnoDB DEFAULT CHARSET=utf8; @@ -13,3 +21,24 @@ CREATE TABLE blacklist PRIMARY KEY(url_domain)) ENGINE=InnoDB DEFAULT CHARSET=utf8; +DROP TABLE IF EXISTS keywords; +CREATE TABLE keywords + (id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT, + word VARCHAR(255) NOT NULL, + url VARCHAR(255) NOT NULL, + url_domain VARCHAR(255) NOT NULL, + word_count SMALLINT UNSIGNED NOT NULL, + PRIMARY KEY(id)) + ENGINE=InnoDB DEFAULT CHARSET=utf8; + +DROP TABLE IF EXISTS url_fulltext; +CREATE TABLE url_fulltext + (url VARCHAR(255) NOT NULL, + body MEDIUMTEXT NOT NULL, + page_title TEXT, + PRIMARY KEY(url)) + ENGINE=InnoDB DEFAULT CHARSET=utf8; + +CREATE INDEX idx_keyword_search_word ON keywords(word); +CREATE INDEX idx_keyword_search_url ON keywords(url); +CREATE INDEX idx_keyword_search_domain ON keywords(url_domain);