--- pandia/base.sql 2025/06/25 03:10:20 1.1 +++ pandia/base.sql 2025/06/28 23:54:11 1.4 @@ -1,8 +1,16 @@ -USE pandia; +DROP TABLE IF EXISTS url_domains; +CREATE TABLE url_domains + (url_domain VARCHAR(255) NOT NULL, + PRIMARY KEY(url_domain)) + ENGINE=InnoDB DEFAULT CHARSET=utf8; DROP TABLE IF EXISTS crawl_queue; CREATE TABLE crawl_queue (url VARCHAR(255) NOT NULL, + parent_url VARCHAR(255), + url_domain VARCHAR(255) NOT NULL, + scheme VARCHAR(40) NOT NULL DEFAULT "http", + analyzed TINYINT NOT NULL DEFAULT 0, create_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(url)) ENGINE=InnoDB DEFAULT CHARSET=utf8; @@ -13,3 +21,13 @@ CREATE TABLE blacklist PRIMARY KEY(url_domain)) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +DROP TABLE IF EXISTS url_fulltext; +CREATE TABLE url_fulltext + (url VARCHAR(255) NOT NULL, + url_domain VARCHAR(255) NOT NULL, + body MEDIUMTEXT NOT NULL, + body_html MEDIUMTEXT NOT NULL, + page_title TEXT, + PRIMARY KEY(url)) + ENGINE=InnoDB DEFAULT CHARSET=utf8;