package talkcrawler; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.util.StringTokenizer; import java.util.logging.Level; import java.util.logging.Logger; public class DailyIndex { String path; String dir; String file; /** * * @param path * @param dir * @param file * @throws java.io.IOException */ public DailyIndex(String path, String dir, String file) throws IOException { this.path = path; this.dir = dir; this.file = file; } @SuppressWarnings({"CallToPrintStackTrace", "SleepWhileInLoop", "UseSpecificCatch"}) public void load() { boolean euc = false; try { URL url = new URL(String.format("%s/%s/%s", path, dir, file)); HttpURLConnection http = (HttpURLConnection)url.openConnection(); http.setRequestMethod("GET"); http.connect(); try (BufferedReader rd = new BufferedReader(new InputStreamReader(http.getInputStream(), "UTF-8"))) { String line; while((line = rd.readLine()) != null) { String str = line.trim().toUpperCase(); if (str.startsWith("<META ")) { if (str.contains("CHARSET=EUC-JP")) { euc = true; break; } } if (str.startsWith("<BODY")) { break; } } if (!euc) { getLi(rd); } } } catch (Exception ex) { Logger.getLogger(DailyIndex.class.getName()).log(Level.SEVERE, null, ex); return; } try { if (euc) { URL url = new URL(String.format("%s/%s/%s", path, dir, file)); HttpURLConnection http = (HttpURLConnection)url.openConnection(); http.setRequestMethod("GET"); http.connect(); try (BufferedReader rd = new BufferedReader(new InputStreamReader(http.getInputStream(), "EUC-JP"))) { getLi(rd); } } } catch (Exception ex) { Logger.getLogger(DailyIndex.class.getName()).log(Level.SEVERE, null, ex); return; } } void getLi(BufferedReader reader) throws IOException { String line; String title = ""; String id = ""; String name = ""; while((line = reader.readLine()) != null) { String str = line.trim(); if (str.toUpperCase().startsWith("<LI>")) { String str1 = str.substring(4); // 4 <-- length('<LI>') title = getTitle(str1); id = getId(str1); } if (str.toUpperCase().startsWith("<I>")) { name = str.substring(3); } if (str.equals("</I>")) { System.out.println("-----"); (new Article(MonthlyIndex.MONTHLY_INDEX, dir, id)).load(); } } } String getTitle(String str) throws IOException { String title = ""; StringTokenizer st = new StringTokenizer(str, ">"); if (st.hasMoreTokens()) { String no = st.nextToken().trim(); } if (st.hasMoreTokens()) { title = st.nextToken().trim(); } return title; } String getId(String str) throws IOException { String file = ""; StringTokenizer st = new StringTokenizer(str, ">"); if (st.hasMoreTokens()) { String no = st.nextToken().trim(); if (no.toUpperCase().startsWith("<A HREF=")) { file = no.substring(9, no.length() - 6); } } return file; } }