--- pandia/base.sql 2025/06/25 03:10:20 1.1 +++ pandia/base.sql 2025/06/30 02:18:44 1.5 @@ -1,8 +1,16 @@ -USE pandia; +DROP TABLE IF EXISTS url_domains; +CREATE TABLE url_domains + (url_domain VARCHAR(255) NOT NULL, + PRIMARY KEY(url_domain)) + ENGINE=InnoDB DEFAULT CHARSET=utf8; DROP TABLE IF EXISTS crawl_queue; CREATE TABLE crawl_queue (url VARCHAR(255) NOT NULL, + parent_url VARCHAR(255), + url_domain VARCHAR(255) NOT NULL, + scheme VARCHAR(40) NOT NULL DEFAULT "http", + analyzed TINYINT NOT NULL DEFAULT 0, create_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(url)) ENGINE=InnoDB DEFAULT CHARSET=utf8; @@ -13,3 +21,15 @@ CREATE TABLE blacklist PRIMARY KEY(url_domain)) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +DROP TABLE IF EXISTS url_fulltext; +CREATE TABLE url_fulltext + (url VARCHAR(255) NOT NULL, + url_domain VARCHAR(255) NOT NULL, + body MEDIUMTEXT NOT NULL, + body_html MEDIUMTEXT NOT NULL, + page_title TEXT, + last_indexed_dt DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + FULLTEXT(page_title,body), + PRIMARY KEY(url)) + ENGINE=InnoDB DEFAULT CHARSET=utf8;