package talkcrawler;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Locale;
import java.util.StringTokenizer;
import java.util.TimeZone;
import java.util.logging.Level;
import java.util.logging.Logger;
import tool.http.Post;
public class DailyIndex {
String path;
String dir;
String file;
TalkCrawlerProperties prop;
/**
*
* @param prop MonthlyIndex.MONTHLY_INDEX
* @param dir
* @param file
* @throws java.io.IOException
*/
public DailyIndex(TalkCrawlerProperties prop, String dir, String file) throws IOException {
this.prop = prop;
this.path = prop.getProperty("MONTHLY_INDEX");
this.dir = dir;
this.file = file;
}
@SuppressWarnings({"CallToPrintStackTrace", "SleepWhileInLoop", "UseSpecificCatch"})
public void load() {
boolean euc = false;
try {
URL url = new URL(String.format("%s/%s/%s", path, dir, file));
HttpURLConnection http = (HttpURLConnection)url.openConnection();
http.setRequestMethod("GET");
http.connect();
try (BufferedReader rd = new BufferedReader(new InputStreamReader(http.getInputStream(), "UTF-8"))) {
String line;
while((line = rd.readLine()) != null) {
String str = line.trim().toUpperCase();
if (str.startsWith("<META ")) {
if (str.contains("CHARSET=EUC-JP")) {
euc = true;
break;
}
}
if (str.startsWith("<BODY")) {
break;
}
}
if (!euc) {
getLi(rd);
}
}
}
catch (Exception ex) {
Logger.getLogger(DailyIndex.class.getName()).log(Level.SEVERE, null, ex);
return;
}
try {
if (euc) {
URL url = new URL(String.format("%s/%s/%s", path, dir, file));
HttpURLConnection http = (HttpURLConnection)url.openConnection();
http.setRequestMethod("GET");
http.connect();
try (BufferedReader rd = new BufferedReader(new InputStreamReader(http.getInputStream(), "EUC-JP"))) {
getLi(rd);
}
}
}
catch (Exception ex) {
Logger.getLogger(DailyIndex.class.getName()).log(Level.SEVERE, null, ex);
}
}
void getLi(BufferedReader reader) throws IOException {
String line;
String title = "";
String id = "";
String name = "";
boolean datain = false;
while((line = reader.readLine()) != null) {
String str = line.trim();
if (str.toUpperCase().startsWith("<b>記事数:</b>")) {
datain = true;
}
if (str.toUpperCase().startsWith("<LI>")) {
String str1 = str.substring(4); // 4 <-- length('<LI>')
title = getTitle(str1);
id = getId(str1);
}
if (str.toUpperCase().startsWith("<I>")) {
name = str.substring(3);
}
if (str.equals("</I>")) {
System.out.println("-----");
Article article = new Article(path, dir, id);
article.load();
new Post(prop).post(Article.toJsonArray(article.toJsonObject()));
}
if (datain && str.toUpperCase().startsWith("</UL>")) {
break;
}
}
}
String getTitle(String str) throws IOException {
String title = "";
StringTokenizer st = new StringTokenizer(str, ">");
if (st.hasMoreTokens()) {
String no = st.nextToken().trim();
}
if (st.hasMoreTokens()) {
title = st.nextToken().trim();
}
return title;
}
String getId(String str) throws IOException {
String file1 = "";
StringTokenizer st = new StringTokenizer(str, ">");
if (st.hasMoreTokens()) {
String no = st.nextToken().trim();
if (no.toUpperCase().startsWith("<A HREF=")) {
file1 = no.substring(9, no.length() - 6);
}
}
return file1;
}
}