package talkcrawler; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.util.StringTokenizer; import java.util.logging.Level; import java.util.logging.Logger; import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonArrayBuilder; public class MonthlyIndex { final static String MONTHLY_INDEX = "https://lists.openstreetmap.org/pipermail/talk-ja"; JsonArrayBuilder arryBuild; /** * * @throws java.io.IOException */ public MonthlyIndex() throws IOException { this.arryBuild = Json.createArrayBuilder(); } @SuppressWarnings({"CallToPrintStackTrace", "SleepWhileInLoop", "UseSpecificCatch"}) public void load() { try { URL url = new URL(MONTHLY_INDEX); HttpURLConnection http = (HttpURLConnection)url.openConnection(); http.setRequestMethod("GET"); http.connect(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(http.getInputStream()))) { getHtml(reader); } } catch (Exception ex) { Logger.getLogger(MonthlyIndex.class.getName()).log(Level.SEVERE, null, ex); } } void getHtml(BufferedReader reader) throws IOException { boolean in = false; String line; while((line = reader.readLine()) != null) { String str = line.trim(); if (str.toUpperCase().startsWith("<HTML>")) { in = true; } if (in) { getBody(reader); } if (str.toUpperCase().endsWith("</HTML>")) { in = false; } } } void getBody(BufferedReader reader) throws IOException { boolean in = false; String line; while((line = reader.readLine()) != null) { String str = line.trim(); if (str.toUpperCase().startsWith("<BODY")) { in = true; } if (in) { getTable(reader); } if (str.toUpperCase().endsWith("</BODY>")) { in = false; } } } void getTable(BufferedReader reader) throws IOException { boolean in = false; String line; while((line = reader.readLine()) != null) { String str = line.trim(); if (str.toUpperCase().startsWith("<TABLE")) { in = true; } if (in) { getTr(reader); } if (str.toUpperCase().endsWith("</TABLE>")) { in = false; } } } void getTr(BufferedReader reader) throws IOException { boolean in = false; String line; while((line = reader.readLine()) != null) { String str = line.trim(); if (str.toUpperCase().startsWith("<TR")) { in = true; } if (in) { getA(reader); } if (str.toUpperCase().endsWith("</TR>")) { in = false; } } } void getA(BufferedReader reader) throws IOException { String line; while((line = reader.readLine()) != null) { String str = line.trim(); if (str.toUpperCase().startsWith("<A HREF=\"") && str.toUpperCase().endsWith("\">[ 日付 ]</A>")) { String path = str.substring(9, str.length() - 12); String dir = ""; String name = ""; StringTokenizer st = new StringTokenizer(path, "/"); if (st.hasMoreTokens()) { dir = st.nextToken().trim(); } if (st.hasMoreTokens()) { name = st.nextToken().trim(); } System.out.println(String.format("%s/%s/%s", MONTHLY_INDEX, dir, name)); DailyIndex daily = new DailyIndex(MONTHLY_INDEX, dir, name); daily.load(); } } Article.post(arryBuild.build()); } }