version 1.1, 2025/06/25 03:10:20
|
version 1.5, 2025/06/30 02:18:44
|
Line 1
|
Line 1
|
USE pandia; |
DROP TABLE IF EXISTS url_domains; |
|
CREATE TABLE url_domains |
|
(url_domain VARCHAR(255) NOT NULL, |
|
PRIMARY KEY(url_domain)) |
|
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
|
|
DROP TABLE IF EXISTS crawl_queue; |
DROP TABLE IF EXISTS crawl_queue; |
CREATE TABLE crawl_queue |
CREATE TABLE crawl_queue |
(url VARCHAR(255) NOT NULL, |
(url VARCHAR(255) NOT NULL, |
|
parent_url VARCHAR(255), |
|
url_domain VARCHAR(255) NOT NULL, |
|
scheme VARCHAR(40) NOT NULL DEFAULT "http", |
|
analyzed TINYINT NOT NULL DEFAULT 0, |
create_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, |
create_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, |
PRIMARY KEY(url)) |
PRIMARY KEY(url)) |
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
Line 13 CREATE TABLE blacklist
|
Line 21 CREATE TABLE blacklist
|
PRIMARY KEY(url_domain)) |
PRIMARY KEY(url_domain)) |
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
ENGINE=InnoDB DEFAULT CHARSET=utf8; |
|
|
|
|
|
DROP TABLE IF EXISTS url_fulltext; |
|
CREATE TABLE url_fulltext |
|
(url VARCHAR(255) NOT NULL, |
|
url_domain VARCHAR(255) NOT NULL, |
|
body MEDIUMTEXT NOT NULL, |
|
body_html MEDIUMTEXT NOT NULL, |
|
page_title TEXT, |
|
last_indexed_dt DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, |
|
FULLTEXT(page_title,body), |
|
PRIMARY KEY(url)) |
|
ENGINE=InnoDB DEFAULT CHARSET=utf8; |