package talkcrawler;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
public class MonthlyIndex {
//final static String MONTHLY_INDEX = "https://lists.openstreetmap.org/pipermail/talk-ja";
TalkCrawlerProperties prop;
JsonArrayBuilder arryBuild;
URL monthlyIndexUrl;
/**
*
* @param prop
* @throws java.io.IOException
*/
public MonthlyIndex(TalkCrawlerProperties prop) throws IOException {
this.prop = prop;
this.arryBuild = Json.createArrayBuilder();
this.monthlyIndexUrl = new URL(prop.getProperty("MONTHLY_INDEX"));
}
@SuppressWarnings({"CallToPrintStackTrace", "SleepWhileInLoop", "UseSpecificCatch"})
public void load() {
try {
HttpURLConnection http = (HttpURLConnection)monthlyIndexUrl.openConnection();
http.setRequestMethod("GET");
http.connect();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(http.getInputStream()))) {
getHtml(reader);
}
}
catch (Exception ex) {
Logger.getLogger(MonthlyIndex.class.getName()).log(Level.SEVERE, null, ex);
}
}
void getHtml(BufferedReader reader) throws IOException {
boolean in = false;
String line;
while((line = reader.readLine()) != null) {
String str = line.trim();
if (str.toUpperCase().startsWith("<HTML>")) {
in = true;
}
if (in) {
getBody(reader);
}
if (str.toUpperCase().endsWith("</HTML>")) {
in = false;
}
}
}
void getBody(BufferedReader reader) throws IOException {
boolean in = false;
String line;
while((line = reader.readLine()) != null) {
String str = line.trim();
if (str.toUpperCase().startsWith("<BODY")) {
in = true;
}
if (in) {
getTable(reader);
}
if (str.toUpperCase().endsWith("</BODY>")) {
in = false;
}
}
}
void getTable(BufferedReader reader) throws IOException {
boolean in = false;
String line;
while((line = reader.readLine()) != null) {
String str = line.trim();
if (str.toUpperCase().startsWith("<TABLE")) {
in = true;
}
if (in) {
getTr(reader);
}
if (str.toUpperCase().endsWith("</TABLE>")) {
in = false;
}
}
}
void getTr(BufferedReader reader) throws IOException {
boolean in = false;
String line;
while((line = reader.readLine()) != null) {
String str = line.trim();
if (str.toUpperCase().startsWith("<TR")) {
in = true;
}
if (in) {
getA(reader);
}
if (str.toUpperCase().endsWith("</TR>")) {
in = false;
}
}
}
void getA(BufferedReader reader) throws IOException {
String line;
while((line = reader.readLine()) != null) {
String str = line.trim();
if (str.toUpperCase().startsWith("<A HREF=\"") && str.toUpperCase().endsWith("\">[ 日付 ]</A>")) {
String path = str.substring(9, str.length() - 12);
String dir = "";
String name = "";
StringTokenizer st = new StringTokenizer(path, "/");
if (st.hasMoreTokens()) {
dir = st.nextToken().trim();
}
if (st.hasMoreTokens()) {
name = st.nextToken().trim();
}
System.out.println(String.format("./%s/%s", dir, name));
DailyIndex daily = new DailyIndex(prop, dir, name);
daily.load();
}
}
//new Post().post(arryBuild.build());
}
}