version 1.1, 2025/06/25 03:10:20
|
version 1.3, 2025/06/28 00:33:32
|
Line 1
|
Line 1
|
USE pandia; |
DROP TABLE IF EXISTS url_domains; |
|
CREATE TABLE url_domains |
|
(url_domain VARCHAR(255) NOT NULL, |
|
PRIMARY KEY(url_domain)) |
|
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
|
|
DROP TABLE IF EXISTS crawl_queue; |
DROP TABLE IF EXISTS crawl_queue; |
CREATE TABLE crawl_queue |
CREATE TABLE crawl_queue |
(url VARCHAR(255) NOT NULL, |
(url VARCHAR(255) NOT NULL, |
|
parent_url VARCHAR(255), |
|
url_domain VARCHAR(255) NOT NULL, |
|
scheme VARCHAR(40) NOT NULL DEFAULT "http", |
|
analyzed TINYINT NOT NULL DEFAULT 0, |
create_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, |
create_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, |
PRIMARY KEY(url)) |
PRIMARY KEY(url)) |
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
Line 13 CREATE TABLE blacklist
|
Line 21 CREATE TABLE blacklist
|
PRIMARY KEY(url_domain)) |
PRIMARY KEY(url_domain)) |
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
|
|
|
DROP TABLE IF EXISTS keywords; |
|
CREATE TABLE keywords |
|
(id BIGINT(20) UNSIGNED NOT NULL AUTO_INCREMENT, |
|
word VARCHAR(255) NOT NULL, |
|
url VARCHAR(255) NOT NULL, |
|
url_domain VARCHAR(255) NOT NULL, |
|
word_count SMALLINT UNSIGNED NOT NULL, |
|
PRIMARY KEY(id)) |
|
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
|
|
|
DROP TABLE IF EXISTS url_fulltext; |
|
CREATE TABLE url_fulltext |
|
(url VARCHAR(255) NOT NULL, |
|
body MEDIUMTEXT NOT NULL, |
|
page_title TEXT, |
|
PRIMARY KEY(url)) |
|
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
|
|
|
CREATE INDEX idx_keyword_search_word ON keywords(word); |
|
CREATE INDEX idx_keyword_search_url ON keywords(url); |
|
CREATE INDEX idx_keyword_search_domain ON keywords(url_domain); |