diff --git a/src/talkcrawler/Article.java b/src/talkcrawler/Article.java
index 14b2302..e03ae67 100644
--- a/src/talkcrawler/Article.java
+++ b/src/talkcrawler/Article.java
@@ -133,7 +133,7 @@
try {
if (euc) {
- URL url = new URL(String.format("%s/%s/%s", path, dir, id));
+ URL url = new URL(String.format("%s/%s/%s.html", path, dir, id));
HttpURLConnection http = (HttpURLConnection)url.openConnection();
http.setRequestMethod("GET");
http.connect();
@@ -158,6 +158,7 @@
title = str.substring(13, str.length() - 5); // 13 <-- length('
[OSM-ja] ')
}
if (str.startsWith("") && str.endsWith("")) {
+ System.out.println(this.id);
String dateStr = str.substring(3, str.length() - 4);
StringTokenizer st = new StringTokenizer(str, "()");
@@ -169,7 +170,14 @@
date = sdf1.parse(date1 + date3);
}
catch(ParseException e) {
- e.printStackTrace();
+ try {
+ SimpleDateFormat sdf2 = new SimpleDateFormat("''yyyy'年 'M'月 'd'日'HH:mm:ss' GMT'");
+ date = sdf2.parse(date1 + date3);
+ }
+ catch (ParseException e2) {
+ e2.printStackTrace();
+ throw new IOException(e2);
+ }
}
}
}
diff --git a/src/talkcrawler/DailyIndex.java b/src/talkcrawler/DailyIndex.java
index 5bc0a56..ab722a4 100644
--- a/src/talkcrawler/DailyIndex.java
+++ b/src/talkcrawler/DailyIndex.java
@@ -59,7 +59,6 @@
getLi(rd);
}
}
-
}
catch (Exception ex) {
Logger.getLogger(DailyIndex.class.getName()).log(Level.SEVERE, null, ex);
@@ -88,8 +87,12 @@
String title = "";
String id = "";
String name = "";
+ boolean datain = false;
while((line = reader.readLine()) != null) {
String str = line.trim();
+ if (str.toUpperCase().startsWith("記事数:")) {
+ datain = true;
+ }
if (str.toUpperCase().startsWith("
")) {
String str1 = str.substring(4); // 4 <-- length('')
title = getTitle(str1);
@@ -104,6 +107,9 @@
article.load();
Post.post(Article.toJsonArray(article.toJsonObject()));
}
+ if (datain && str.toUpperCase().startsWith("")) {
+ break;
+ }
}
}
diff --git a/src/tool/http/Post.java b/src/tool/http/Post.java
index c16b729..a32fd90 100644
--- a/src/tool/http/Post.java
+++ b/src/tool/http/Post.java
@@ -23,7 +23,7 @@
String jsonText = array.toString();
HttpURLConnection con = null;
try {
- URL url = new URL("http://172.17.0.1:8983/solr/talkja/update/json?commit=true");
+ URL url = new URL("http://surveyor.mydns.jp/solr/talkja/update/json?commit=true");
con = (HttpURLConnection) url.openConnection();
con.setConnectTimeout(60000);
con.setReadTimeout(60000);
diff --git a/test/talkcrawler/ArticleTest.java b/test/talkcrawler/ArticleTest.java
new file mode 100644
index 0000000..9d1489c
--- /dev/null
+++ b/test/talkcrawler/ArticleTest.java
@@ -0,0 +1,83 @@
+package talkcrawler;
+
+import static junit.framework.TestCase.fail;
+import org.junit.Test;
+import tool.http.Post;
+
+public class ArticleTest {
+ /**
+ *
+ */
+ @Test
+ public void test2019_01_010424() {
+ try {
+ Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2019-January", "010424");
+ ins.load();
+ Post.post(Article.toJsonArray(ins.toJsonObject()));
+ }
+ catch (Exception e) {
+ fail();
+ }
+ }
+
+ /**
+ * https://lists.openstreetmap.org/pipermail/talk-ja/2014-August/008473.html
+ */
+ @Test
+ public void test2014_08_008473() {
+ try {
+ Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2014-August", "008473");
+ ins.load();
+ Post.post(Article.toJsonArray(ins.toJsonObject()));
+ }
+ catch (Exception e) {
+ fail();
+ }
+ }
+
+ /**
+ * https://lists.openstreetmap.org/pipermail/talk-ja/2014-August/008479.html
+ */
+ @Test
+ public void test2014_08_008479() {
+ try {
+ Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2014-August", "008479");
+ ins.load();
+ Post.post(Article.toJsonArray(ins.toJsonObject()));
+ }
+ catch (Exception e) {
+ fail();
+ }
+ }
+
+ /**
+ * https://lists.openstreetmap.org/pipermail/talk-ja/2014-August/008480.html
+ *
+ */
+ @Test
+ public void test2014_08_008480() {
+ try {
+ Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2014-August", "008480");
+ ins.load();
+ Post.post(Article.toJsonArray(ins.toJsonObject()));
+ }
+ catch (Exception e) {
+ fail();
+ }
+ }
+
+ /**
+ * https://lists.openstreetmap.org/pipermail/talk-ja/2008-March/date.html
+ */
+ @Test
+ public void test2008_03_000000() {
+ try {
+ Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2008-March", "000000");
+ ins.load();
+ Post.post(Article.toJsonArray(ins.toJsonObject()));
+ }
+ catch (Exception e) {
+ fail();
+ }
+ }
+}
diff --git a/test/talkcrawler/DailyIndexTest.java b/test/talkcrawler/DailyIndexTest.java
new file mode 100644
index 0000000..89dcb23
--- /dev/null
+++ b/test/talkcrawler/DailyIndexTest.java
@@ -0,0 +1,36 @@
+package talkcrawler;
+
+import static junit.framework.TestCase.fail;
+import org.junit.Test;
+
+public class DailyIndexTest {
+
+ /**
+ * Test of load method, of class MonthlyIndex.
+ * 2014-August
+ */
+ @Test
+ @SuppressWarnings("UseSpecificCatch")
+ public void load_201903() {
+ try {
+ DailyIndex ins = new DailyIndex(MonthlyIndex.MONTHLY_INDEX, "2019-March", "date.html");
+ ins.load();
+ }
+ catch(Exception e) {
+ fail("The test case is a prototype.");
+ }
+ }
+
+ @Test
+ @SuppressWarnings("UseSpecificCatch")
+ public void load_201408() {
+ try {
+ DailyIndex ins = new DailyIndex(MonthlyIndex.MONTHLY_INDEX, "2014-August", "date.html");
+ ins.load();
+ }
+ catch(Exception e) {
+ fail("The test case is a prototype.");
+ }
+ }
+
+}