diff --git a/doc/TalkCrawler.class.violet.html b/doc/TalkCrawler.class.violet.html new file mode 100644 index 0000000..b6c9d5b --- /dev/null +++ b/doc/TalkCrawler.class.violet.html @@ -0,0 +1,350 @@ + + + + + + + + + This file was generated with Violet UML Editor 2.1.0. +   ( View Source / Download Violet ) +
+
+ +
+
+ embedded diagram image + + \ No newline at end of file diff --git a/doc/TalkCrawler.class.violet.png b/doc/TalkCrawler.class.violet.png new file mode 100644 index 0000000..d00d985 --- /dev/null +++ b/doc/TalkCrawler.class.violet.png Binary files differ diff --git a/doc/newjson.json b/doc/newjson.json new file mode 100644 index 0000000..e3516d8 --- /dev/null +++ b/doc/newjson.json @@ -0,0 +1,10 @@ +[{ + "path": "https://lists.openstreetmap.org/pipermail/talk-ja", + "dir": "2019-January", + "url": "https://lists.openstreetmap.org/pipermail/talk-ja/2019-January/010424.html", + "title": "JOSM14620 日本語設定への韓国語混入について", + "id": "010424", + "name": "ribbon", + "date": "2019-01-01 00:20:39 UTC", + "contents": ["On Tue, Jan 01, 2019 at 09:15:52AM +0900, Hokko-sha wrote:", "", "> また、更新したJOSMを旧バージョンに戻す方法はありますでしょうか?", "> PCは2台ともUbuntu18.04で、更新していない方のPCからファイルを持ってくることは可能です。", "", "https://josm.openstreetmap.de/download/", "", "に古い版があります。", "", "ribbon", ""] +}] \ No newline at end of file diff --git a/doc/talkCrawler.md b/doc/talkCrawler.md index 0486b6e..e86d222 100644 --- a/doc/talkCrawler.md +++ b/doc/talkCrawler.md @@ -23,7 +23,7 @@ ### Development environment -![MachineStructure](machinestructure.png) +![TalkCrawler.class](TalkCrawler.class.violet.png) 1. curl[monthly index](https://lists.openstreetmap.org/pipermail/talk-ja/) を取得する 2. diff --git a/nbbuild.xml b/nbbuild.xml new file mode 100644 index 0000000..665d866 --- /dev/null +++ b/nbbuild.xml @@ -0,0 +1,73 @@ + + + + + + + + + + + Builds, tests, and runs the project talkCrawler. + + + diff --git a/src/talkcrawler/Article.java b/src/talkcrawler/Article.java index 736184c..252c9ef 100644 --- a/src/talkcrawler/Article.java +++ b/src/talkcrawler/Article.java @@ -2,26 +2,44 @@ import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.PrintStream; import java.net.HttpURLConnection; import java.net.URL; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Date; import java.util.StringTokenizer; import java.util.logging.Level; import java.util.logging.Logger; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonArrayBuilder; +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; +import tool.json.JsonTool; public class Article { + String url; String path; String dir; String id; + String title = null; + Date date = null; + String name = null; + ArrayList contents = new ArrayList<>(); + + SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss' UTC'"); @SuppressWarnings({"UseSpecificCatch", "CallToPrintStackTrace"}) public static void main(String[] args) { try { Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2019-January", "010424"); ins.load(); + Article.post(Article.toJsonArray(ins.toJsonObject())); } catch (Exception e) { e.printStackTrace(); @@ -39,8 +57,102 @@ this.path = path; this.dir = dir; this.id = id; + this.url = String.format("%s/%s/%s.html", path, dir, id); } + public static void post(JsonArray array) { + String jsonText = array.toString(); + HttpURLConnection con = null; + try { + URL url = new URL("http://172.17.0.1:8983/solr/talkja/update/json?commit=true"); + con = (HttpURLConnection) url.openConnection(); + con.setConnectTimeout(60000); + con.setReadTimeout(60000); + con.addRequestProperty("Content-Type", "application/json; charset=UTF-8"); + con.setRequestMethod("POST"); + con.setDoOutput(true); + con.setDoInput(true); + con.connect(); + try (OutputStream outputStream = con.getOutputStream()) { + try (PrintStream ps = new PrintStream(outputStream)) { + ps.print(jsonText); + } + } + + // recv response + int statusCode = con.getResponseCode(); + String responseData = ""; + StringBuilder sb = new StringBuilder(); + try (InputStream stream = con.getInputStream()) { + String line = ""; + BufferedReader br = new BufferedReader(new InputStreamReader(stream, "UTF-8")); + while ((line = br.readLine()) != null) { + sb.append(line); + } + } + responseData = sb.toString(); + JsonObject res = JsonTool.parse(responseData); + if (res != null) { + JsonObject header = res.getJsonObject("responseHeader"); + if (header != null) { + int status = header.getInt("status"); + if (status == 0) { + System.out.println(" +++++ "+ status); + } + else { + System.out.println(" xxx "+ status); + } + } + } + } + catch (Exception ex) { + Logger.getLogger(Article.class.getName()).log(Level.SEVERE, null, ex); + } + finally { + if (con != null) { + con.disconnect(); + } + } + } + + JsonObject toJsonObject() { + JsonObjectBuilder json = Json.createObjectBuilder(); + if (this.path != null) { + json.add("path", path); + } + if (this.dir != null) { + json.add("dir", dir); + } + if (this.url != null) { + json.add("url", url); + } + + if (this.title != null) { + json.add("title", title); + } + if (this.id != null) { + json.add("id", id); + } + if (this.name != null) { + json.add("name", name); + } + if (this.date != null) { + json.add("date", sdf2.format(date)); + } + JsonArrayBuilder arryBuild = Json.createArrayBuilder(); + for (String content : this.contents) { + arryBuild.add(content); + } + json.add("contents", arryBuild); + return json.build(); + } + + static JsonArray toJsonArray(JsonObject obj) { + JsonArrayBuilder arryBuild = Json.createArrayBuilder(); + arryBuild.add(obj); + return arryBuild.build(); + } + @SuppressWarnings({"CallToPrintStackTrace", "SleepWhileInLoop", "UseSpecificCatch"}) public void load() { boolean euc = false; @@ -76,10 +188,10 @@ Logger.getLogger(Article.class.getName()).log(Level.SEVERE, null, ex); return; } - + try { if (euc) { - URL url = new URL(String.format("%s/%s/%s.html", path, dir, id)); + URL url = new URL(String.format("%s/%s/%s", path, dir, id)); HttpURLConnection http = (HttpURLConnection)url.openConnection(); http.setRequestMethod("GET"); http.connect(); @@ -89,18 +201,15 @@ } } catch (Exception ex) { - Logger.getLogger(Article.class.getName()).log(Level.SEVERE, null, ex); + Logger.getLogger(DailyIndex.class.getName()).log(Level.SEVERE, null, ex); + return; } } void getLi(BufferedReader reader) throws IOException { SimpleDateFormat sdf1 = new SimpleDateFormat("''yyyy'年 'M'月 'd'日'HH:mm:ss' UTC'"); - SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss' UTC'"); String line; - String title = ""; - Date date = null; - String name = ""; while((line = reader.readLine()) != null) { String str = line.trim(); if (str.startsWith("

[OSM-ja] ")) { @@ -128,21 +237,17 @@ } if (str.equals("")) { getArticle(reader); - - System.out.println("{"); - System.out.println(title); - System.out.println(id); - System.out.println(sdf2.format(date)); - System.out.println(name); - System.out.println("}"); - title = ""; - id = ""; - name = ""; - date = null; } } } + /* + public void post(JsonObject obj) { + JsonArray arry = Json.createArrayBuilder().add(obj).build(); + System.out.println(arry.toString()); + } + */ + void getArticle(BufferedReader reader) throws IOException { String line; boolean top = true; @@ -158,8 +263,7 @@ str = line.substring(5); top = false; } - - System.out.println(str); + this.contents.add(str); } } } diff --git a/src/talkcrawler/DailyIndex.java b/src/talkcrawler/DailyIndex.java index b45a59d..93a50a5 100644 --- a/src/talkcrawler/DailyIndex.java +++ b/src/talkcrawler/DailyIndex.java @@ -2,7 +2,6 @@ import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; @@ -14,6 +13,11 @@ String path; String dir; String file; + + public static void main(String[] args) throws IOException { + DailyIndex ins = new DailyIndex(MonthlyIndex.MONTHLY_INDEX, "2019-March", "date.html"); + ins.load(); + } /** * @@ -95,7 +99,9 @@ } if (str.equals("")) { System.out.println("-----"); - (new Article(MonthlyIndex.MONTHLY_INDEX, dir, id)).load(); + Article article = new Article(MonthlyIndex.MONTHLY_INDEX, dir, id); + article.load(); + Article.post(Article.toJsonArray(article.toJsonObject())); } } } diff --git a/src/talkcrawler/MonthlyIndex.java b/src/talkcrawler/MonthlyIndex.java index 6cfe2ba..c7b5b5a 100644 --- a/src/talkcrawler/MonthlyIndex.java +++ b/src/talkcrawler/MonthlyIndex.java @@ -8,15 +8,20 @@ import java.util.StringTokenizer; import java.util.logging.Level; import java.util.logging.Logger; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonArrayBuilder; public class MonthlyIndex { - final static String MONTHLY_INDEX = "https://lists.openstreetmap.org/pipermail/talk-ja/"; + final static String MONTHLY_INDEX = "https://lists.openstreetmap.org/pipermail/talk-ja"; + JsonArrayBuilder arryBuild; /** * * @throws java.io.IOException */ public MonthlyIndex() throws IOException { + this.arryBuild = Json.createArrayBuilder(); } @SuppressWarnings({"CallToPrintStackTrace", "SleepWhileInLoop", "UseSpecificCatch"}) @@ -125,5 +130,6 @@ daily.load(); } } + Article.post(arryBuild.build()); } } diff --git a/src/tool/http/Post.java b/src/tool/http/Post.java new file mode 100644 index 0000000..78855ea --- /dev/null +++ b/src/tool/http/Post.java @@ -0,0 +1,9 @@ +package tool.http; + +/** + * + * @author yuu + */ +public class Post { + +} diff --git a/src/tool/json/JsonTool.java b/src/tool/json/JsonTool.java new file mode 100644 index 0000000..efbc77d --- /dev/null +++ b/src/tool/json/JsonTool.java @@ -0,0 +1,16 @@ +package tool.json; + +import java.io.StringReader; +import javax.json.Json; +import javax.json.JsonObject; +import javax.json.JsonReader; + +public class JsonTool { + + static public JsonObject parse(String jsonObjectString) { + JsonReader jsonReader = Json.createReader(new StringReader(jsonObjectString)); + JsonObject jsonObject = jsonReader.readObject(); + return jsonObject; + } + +}