diff --git a/src/talkcrawler/Article.java b/src/talkcrawler/Article.java index 14b2302..e03ae67 100644 --- a/src/talkcrawler/Article.java +++ b/src/talkcrawler/Article.java @@ -133,7 +133,7 @@ try { if (euc) { - URL url = new URL(String.format("%s/%s/%s", path, dir, id)); + URL url = new URL(String.format("%s/%s/%s.html", path, dir, id)); HttpURLConnection http = (HttpURLConnection)url.openConnection(); http.setRequestMethod("GET"); http.connect(); @@ -158,6 +158,7 @@ title = str.substring(13, str.length() - 5); // 13 <-- length('

[OSM-ja] ') } if (str.startsWith("") && str.endsWith("")) { + System.out.println(this.id); String dateStr = str.substring(3, str.length() - 4); StringTokenizer st = new StringTokenizer(str, "()"); @@ -169,7 +170,14 @@ date = sdf1.parse(date1 + date3); } catch(ParseException e) { - e.printStackTrace(); + try { + SimpleDateFormat sdf2 = new SimpleDateFormat("''yyyy'年 'M'月 'd'日'HH:mm:ss' GMT'"); + date = sdf2.parse(date1 + date3); + } + catch (ParseException e2) { + e2.printStackTrace(); + throw new IOException(e2); + } } } } diff --git a/src/talkcrawler/DailyIndex.java b/src/talkcrawler/DailyIndex.java index 5bc0a56..ab722a4 100644 --- a/src/talkcrawler/DailyIndex.java +++ b/src/talkcrawler/DailyIndex.java @@ -59,7 +59,6 @@ getLi(rd); } } - } catch (Exception ex) { Logger.getLogger(DailyIndex.class.getName()).log(Level.SEVERE, null, ex); @@ -88,8 +87,12 @@ String title = ""; String id = ""; String name = ""; + boolean datain = false; while((line = reader.readLine()) != null) { String str = line.trim(); + if (str.toUpperCase().startsWith("記事数:")) { + datain = true; + } if (str.toUpperCase().startsWith("
  • ")) { String str1 = str.substring(4); // 4 <-- length('
  • ') title = getTitle(str1); @@ -104,6 +107,9 @@ article.load(); Post.post(Article.toJsonArray(article.toJsonObject())); } + if (datain && str.toUpperCase().startsWith("")) { + break; + } } } diff --git a/src/tool/http/Post.java b/src/tool/http/Post.java index c16b729..a32fd90 100644 --- a/src/tool/http/Post.java +++ b/src/tool/http/Post.java @@ -23,7 +23,7 @@ String jsonText = array.toString(); HttpURLConnection con = null; try { - URL url = new URL("http://172.17.0.1:8983/solr/talkja/update/json?commit=true"); + URL url = new URL("http://surveyor.mydns.jp/solr/talkja/update/json?commit=true"); con = (HttpURLConnection) url.openConnection(); con.setConnectTimeout(60000); con.setReadTimeout(60000); diff --git a/test/talkcrawler/ArticleTest.java b/test/talkcrawler/ArticleTest.java new file mode 100644 index 0000000..9d1489c --- /dev/null +++ b/test/talkcrawler/ArticleTest.java @@ -0,0 +1,83 @@ +package talkcrawler; + +import static junit.framework.TestCase.fail; +import org.junit.Test; +import tool.http.Post; + +public class ArticleTest { + /** + * + */ + @Test + public void test2019_01_010424() { + try { + Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2019-January", "010424"); + ins.load(); + Post.post(Article.toJsonArray(ins.toJsonObject())); + } + catch (Exception e) { + fail(); + } + } + + /** + * https://lists.openstreetmap.org/pipermail/talk-ja/2014-August/008473.html + */ + @Test + public void test2014_08_008473() { + try { + Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2014-August", "008473"); + ins.load(); + Post.post(Article.toJsonArray(ins.toJsonObject())); + } + catch (Exception e) { + fail(); + } + } + + /** + * https://lists.openstreetmap.org/pipermail/talk-ja/2014-August/008479.html + */ + @Test + public void test2014_08_008479() { + try { + Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2014-August", "008479"); + ins.load(); + Post.post(Article.toJsonArray(ins.toJsonObject())); + } + catch (Exception e) { + fail(); + } + } + + /** + * https://lists.openstreetmap.org/pipermail/talk-ja/2014-August/008480.html + * + */ + @Test + public void test2014_08_008480() { + try { + Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2014-August", "008480"); + ins.load(); + Post.post(Article.toJsonArray(ins.toJsonObject())); + } + catch (Exception e) { + fail(); + } + } + + /** + * https://lists.openstreetmap.org/pipermail/talk-ja/2008-March/date.html + */ + @Test + public void test2008_03_000000() { + try { + Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2008-March", "000000"); + ins.load(); + Post.post(Article.toJsonArray(ins.toJsonObject())); + } + catch (Exception e) { + fail(); + } + } +} diff --git a/test/talkcrawler/DailyIndexTest.java b/test/talkcrawler/DailyIndexTest.java new file mode 100644 index 0000000..89dcb23 --- /dev/null +++ b/test/talkcrawler/DailyIndexTest.java @@ -0,0 +1,36 @@ +package talkcrawler; + +import static junit.framework.TestCase.fail; +import org.junit.Test; + +public class DailyIndexTest { + + /** + * Test of load method, of class MonthlyIndex. + * 2014-August + */ + @Test + @SuppressWarnings("UseSpecificCatch") + public void load_201903() { + try { + DailyIndex ins = new DailyIndex(MonthlyIndex.MONTHLY_INDEX, "2019-March", "date.html"); + ins.load(); + } + catch(Exception e) { + fail("The test case is a prototype."); + } + } + + @Test + @SuppressWarnings("UseSpecificCatch") + public void load_201408() { + try { + DailyIndex ins = new DailyIndex(MonthlyIndex.MONTHLY_INDEX, "2014-August", "date.html"); + ins.load(); + } + catch(Exception e) { + fail("The test case is a prototype."); + } + } + +}