diff --git a/doc/TalkCrawler.class.violet.html b/doc/TalkCrawler.class.violet.html
new file mode 100644
index 0000000..b6c9d5b
--- /dev/null
+++ b/doc/TalkCrawler.class.violet.html
@@ -0,0 +1,350 @@
+
+
+
+
+
+
+
+
+ This file was generated with Violet UML Editor 2.1.0.
+ ( View Source / Download Violet )
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/doc/TalkCrawler.class.violet.png b/doc/TalkCrawler.class.violet.png
new file mode 100644
index 0000000..d00d985
--- /dev/null
+++ b/doc/TalkCrawler.class.violet.png
Binary files differ
diff --git a/doc/newjson.json b/doc/newjson.json
new file mode 100644
index 0000000..e3516d8
--- /dev/null
+++ b/doc/newjson.json
@@ -0,0 +1,10 @@
+[{
+ "path": "https://lists.openstreetmap.org/pipermail/talk-ja",
+ "dir": "2019-January",
+ "url": "https://lists.openstreetmap.org/pipermail/talk-ja/2019-January/010424.html",
+ "title": "JOSM14620 日本語設定への韓国語混入について",
+ "id": "010424",
+ "name": "ribbon",
+ "date": "2019-01-01 00:20:39 UTC",
+ "contents": ["On Tue, Jan 01, 2019 at 09:15:52AM +0900, Hokko-sha wrote:", "", "> また、更新したJOSMを旧バージョンに戻す方法はありますでしょうか?", "> PCは2台ともUbuntu18.04で、更新していない方のPCからファイルを持ってくることは可能です。", "", "https://josm.openstreetmap.de/download/", "", "に古い版があります。", "", "ribbon", ""]
+}]
\ No newline at end of file
diff --git a/doc/talkCrawler.md b/doc/talkCrawler.md
index 0486b6e..e86d222 100644
--- a/doc/talkCrawler.md
+++ b/doc/talkCrawler.md
@@ -23,7 +23,7 @@
### Development environment
-![MachineStructure](machinestructure.png)
+![TalkCrawler.class](TalkCrawler.class.violet.png)
1. curl[monthly index](https://lists.openstreetmap.org/pipermail/talk-ja/) を取得する
2.
diff --git a/nbbuild.xml b/nbbuild.xml
new file mode 100644
index 0000000..665d866
--- /dev/null
+++ b/nbbuild.xml
@@ -0,0 +1,73 @@
+
+
+
+
+
+
+
+
+
+
+ Builds, tests, and runs the project talkCrawler.
+
+
+
diff --git a/src/talkcrawler/Article.java b/src/talkcrawler/Article.java
index 736184c..252c9ef 100644
--- a/src/talkcrawler/Article.java
+++ b/src/talkcrawler/Article.java
@@ -2,26 +2,44 @@
import java.io.BufferedReader;
import java.io.IOException;
+import java.io.InputStream;
import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PrintStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.ParseException;
import java.text.SimpleDateFormat;
+import java.util.ArrayList;
import java.util.Date;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
+import javax.json.Json;
+import javax.json.JsonArray;
+import javax.json.JsonArrayBuilder;
+import javax.json.JsonObject;
+import javax.json.JsonObjectBuilder;
+import tool.json.JsonTool;
public class Article {
+ String url;
String path;
String dir;
String id;
+ String title = null;
+ Date date = null;
+ String name = null;
+ ArrayList contents = new ArrayList<>();
+
+ SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss' UTC'");
@SuppressWarnings({"UseSpecificCatch", "CallToPrintStackTrace"})
public static void main(String[] args) {
try {
Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2019-January", "010424");
ins.load();
+ Article.post(Article.toJsonArray(ins.toJsonObject()));
}
catch (Exception e) {
e.printStackTrace();
@@ -39,8 +57,102 @@
this.path = path;
this.dir = dir;
this.id = id;
+ this.url = String.format("%s/%s/%s.html", path, dir, id);
}
+ public static void post(JsonArray array) {
+ String jsonText = array.toString();
+ HttpURLConnection con = null;
+ try {
+ URL url = new URL("http://172.17.0.1:8983/solr/talkja/update/json?commit=true");
+ con = (HttpURLConnection) url.openConnection();
+ con.setConnectTimeout(60000);
+ con.setReadTimeout(60000);
+ con.addRequestProperty("Content-Type", "application/json; charset=UTF-8");
+ con.setRequestMethod("POST");
+ con.setDoOutput(true);
+ con.setDoInput(true);
+ con.connect();
+ try (OutputStream outputStream = con.getOutputStream()) {
+ try (PrintStream ps = new PrintStream(outputStream)) {
+ ps.print(jsonText);
+ }
+ }
+
+ // recv response
+ int statusCode = con.getResponseCode();
+ String responseData = "";
+ StringBuilder sb = new StringBuilder();
+ try (InputStream stream = con.getInputStream()) {
+ String line = "";
+ BufferedReader br = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+ while ((line = br.readLine()) != null) {
+ sb.append(line);
+ }
+ }
+ responseData = sb.toString();
+ JsonObject res = JsonTool.parse(responseData);
+ if (res != null) {
+ JsonObject header = res.getJsonObject("responseHeader");
+ if (header != null) {
+ int status = header.getInt("status");
+ if (status == 0) {
+ System.out.println(" +++++ "+ status);
+ }
+ else {
+ System.out.println(" xxx "+ status);
+ }
+ }
+ }
+ }
+ catch (Exception ex) {
+ Logger.getLogger(Article.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ finally {
+ if (con != null) {
+ con.disconnect();
+ }
+ }
+ }
+
+ JsonObject toJsonObject() {
+ JsonObjectBuilder json = Json.createObjectBuilder();
+ if (this.path != null) {
+ json.add("path", path);
+ }
+ if (this.dir != null) {
+ json.add("dir", dir);
+ }
+ if (this.url != null) {
+ json.add("url", url);
+ }
+
+ if (this.title != null) {
+ json.add("title", title);
+ }
+ if (this.id != null) {
+ json.add("id", id);
+ }
+ if (this.name != null) {
+ json.add("name", name);
+ }
+ if (this.date != null) {
+ json.add("date", sdf2.format(date));
+ }
+ JsonArrayBuilder arryBuild = Json.createArrayBuilder();
+ for (String content : this.contents) {
+ arryBuild.add(content);
+ }
+ json.add("contents", arryBuild);
+ return json.build();
+ }
+
+ static JsonArray toJsonArray(JsonObject obj) {
+ JsonArrayBuilder arryBuild = Json.createArrayBuilder();
+ arryBuild.add(obj);
+ return arryBuild.build();
+ }
+
@SuppressWarnings({"CallToPrintStackTrace", "SleepWhileInLoop", "UseSpecificCatch"})
public void load() {
boolean euc = false;
@@ -76,10 +188,10 @@
Logger.getLogger(Article.class.getName()).log(Level.SEVERE, null, ex);
return;
}
-
+
try {
if (euc) {
- URL url = new URL(String.format("%s/%s/%s.html", path, dir, id));
+ URL url = new URL(String.format("%s/%s/%s", path, dir, id));
HttpURLConnection http = (HttpURLConnection)url.openConnection();
http.setRequestMethod("GET");
http.connect();
@@ -89,18 +201,15 @@
}
}
catch (Exception ex) {
- Logger.getLogger(Article.class.getName()).log(Level.SEVERE, null, ex);
+ Logger.getLogger(DailyIndex.class.getName()).log(Level.SEVERE, null, ex);
+ return;
}
}
void getLi(BufferedReader reader) throws IOException {
SimpleDateFormat sdf1 = new SimpleDateFormat("''yyyy'年 'M'月 'd'日'HH:mm:ss' UTC'");
- SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss' UTC'");
String line;
- String title = "";
- Date date = null;
- String name = "";
while((line = reader.readLine()) != null) {
String str = line.trim();
if (str.startsWith("[OSM-ja] ")) {
@@ -128,21 +237,17 @@
}
if (str.equals("")) {
getArticle(reader);
-
- System.out.println("{");
- System.out.println(title);
- System.out.println(id);
- System.out.println(sdf2.format(date));
- System.out.println(name);
- System.out.println("}");
- title = "";
- id = "";
- name = "";
- date = null;
}
}
}
+ /*
+ public void post(JsonObject obj) {
+ JsonArray arry = Json.createArrayBuilder().add(obj).build();
+ System.out.println(arry.toString());
+ }
+ */
+
void getArticle(BufferedReader reader) throws IOException {
String line;
boolean top = true;
@@ -158,8 +263,7 @@
str = line.substring(5);
top = false;
}
-
- System.out.println(str);
+ this.contents.add(str);
}
}
}
diff --git a/src/talkcrawler/DailyIndex.java b/src/talkcrawler/DailyIndex.java
index b45a59d..93a50a5 100644
--- a/src/talkcrawler/DailyIndex.java
+++ b/src/talkcrawler/DailyIndex.java
@@ -2,7 +2,6 @@
import java.io.BufferedReader;
import java.io.IOException;
-import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
@@ -14,6 +13,11 @@
String path;
String dir;
String file;
+
+ public static void main(String[] args) throws IOException {
+ DailyIndex ins = new DailyIndex(MonthlyIndex.MONTHLY_INDEX, "2019-March", "date.html");
+ ins.load();
+ }
/**
*
@@ -95,7 +99,9 @@
}
if (str.equals("")) {
System.out.println("-----");
- (new Article(MonthlyIndex.MONTHLY_INDEX, dir, id)).load();
+ Article article = new Article(MonthlyIndex.MONTHLY_INDEX, dir, id);
+ article.load();
+ Article.post(Article.toJsonArray(article.toJsonObject()));
}
}
}
diff --git a/src/talkcrawler/MonthlyIndex.java b/src/talkcrawler/MonthlyIndex.java
index 6cfe2ba..c7b5b5a 100644
--- a/src/talkcrawler/MonthlyIndex.java
+++ b/src/talkcrawler/MonthlyIndex.java
@@ -8,15 +8,20 @@
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
+import javax.json.Json;
+import javax.json.JsonArray;
+import javax.json.JsonArrayBuilder;
public class MonthlyIndex {
- final static String MONTHLY_INDEX = "https://lists.openstreetmap.org/pipermail/talk-ja/";
+ final static String MONTHLY_INDEX = "https://lists.openstreetmap.org/pipermail/talk-ja";
+ JsonArrayBuilder arryBuild;
/**
*
* @throws java.io.IOException
*/
public MonthlyIndex() throws IOException {
+ this.arryBuild = Json.createArrayBuilder();
}
@SuppressWarnings({"CallToPrintStackTrace", "SleepWhileInLoop", "UseSpecificCatch"})
@@ -125,5 +130,6 @@
daily.load();
}
}
+ Article.post(arryBuild.build());
}
}
diff --git a/src/tool/http/Post.java b/src/tool/http/Post.java
new file mode 100644
index 0000000..78855ea
--- /dev/null
+++ b/src/tool/http/Post.java
@@ -0,0 +1,9 @@
+package tool.http;
+
+/**
+ *
+ * @author yuu
+ */
+public class Post {
+
+}
diff --git a/src/tool/json/JsonTool.java b/src/tool/json/JsonTool.java
new file mode 100644
index 0000000..efbc77d
--- /dev/null
+++ b/src/tool/json/JsonTool.java
@@ -0,0 +1,16 @@
+package tool.json;
+
+import java.io.StringReader;
+import javax.json.Json;
+import javax.json.JsonObject;
+import javax.json.JsonReader;
+
+public class JsonTool {
+
+ static public JsonObject parse(String jsonObjectString) {
+ JsonReader jsonReader = Json.createReader(new StringReader(jsonObjectString));
+ JsonObject jsonObject = jsonReader.readObject();
+ return jsonObject;
+ }
+
+}