package talkcrawler;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.json.Json;
import javax.json.JsonArray;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObject;
import javax.json.JsonObjectBuilder;
import tool.json.JsonTool;
public class Article {
String url;
String path;
String dir;
String id;
String title = null;
Date date = null;
String name = null;
ArrayList<String> contents = new ArrayList<>();
SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss' UTC'");
@SuppressWarnings({"UseSpecificCatch", "CallToPrintStackTrace"})
public static void main(String[] args) {
try {
Article ins = new Article(MonthlyIndex.MONTHLY_INDEX, "2019-January", "010424");
ins.load();
Article.post(Article.toJsonArray(ins.toJsonObject()));
}
catch (Exception e) {
e.printStackTrace();
}
}
/**
*
* @param path
* @param dir
* @param id
* @throws java.io.IOException
*/
public Article(String path, String dir, String id) throws IOException {
this.path = path;
this.dir = dir;
this.id = id;
this.url = String.format("%s/%s/%s.html", path, dir, id);
}
public static void post(JsonArray array) {
String jsonText = array.toString();
HttpURLConnection con = null;
try {
URL url = new URL("http://172.17.0.1:8983/solr/talkja/update/json?commit=true");
con = (HttpURLConnection) url.openConnection();
con.setConnectTimeout(60000);
con.setReadTimeout(60000);
con.addRequestProperty("Content-Type", "application/json; charset=UTF-8");
con.setRequestMethod("POST");
con.setDoOutput(true);
con.setDoInput(true);
con.connect();
try (OutputStream outputStream = con.getOutputStream()) {
try (PrintStream ps = new PrintStream(outputStream)) {
ps.print(jsonText);
}
}
// recv response
int statusCode = con.getResponseCode();
String responseData = "";
StringBuilder sb = new StringBuilder();
try (InputStream stream = con.getInputStream()) {
String line = "";
BufferedReader br = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
while ((line = br.readLine()) != null) {
sb.append(line);
}
}
responseData = sb.toString();
JsonObject res = JsonTool.parse(responseData);
if (res != null) {
JsonObject header = res.getJsonObject("responseHeader");
if (header != null) {
int status = header.getInt("status");
if (status == 0) {
System.out.println(" +++++ "+ status);
}
else {
System.out.println(" xxx "+ status);
}
}
}
}
catch (Exception ex) {
Logger.getLogger(Article.class.getName()).log(Level.SEVERE, null, ex);
}
finally {
if (con != null) {
con.disconnect();
}
}
}
JsonObject toJsonObject() {
JsonObjectBuilder json = Json.createObjectBuilder();
if (this.path != null) {
json.add("path", path);
}
if (this.dir != null) {
json.add("dir", dir);
}
if (this.url != null) {
json.add("url", url);
}
if (this.title != null) {
json.add("title", title);
}
if (this.id != null) {
json.add("id", id);
}
if (this.name != null) {
json.add("name", name);
}
if (this.date != null) {
json.add("date", sdf2.format(date));
}
JsonArrayBuilder arryBuild = Json.createArrayBuilder();
for (String content : this.contents) {
arryBuild.add(content);
}
json.add("contents", arryBuild);
return json.build();
}
static JsonArray toJsonArray(JsonObject obj) {
JsonArrayBuilder arryBuild = Json.createArrayBuilder();
arryBuild.add(obj);
return arryBuild.build();
}
@SuppressWarnings({"CallToPrintStackTrace", "SleepWhileInLoop", "UseSpecificCatch"})
public void load() {
boolean euc = false;
try {
URL url = new URL(String.format("%s/%s/%s.html", path, dir, id));
HttpURLConnection http = (HttpURLConnection)url.openConnection();
http.setRequestMethod("GET");
http.connect();
try (BufferedReader rd = new BufferedReader(new InputStreamReader(http.getInputStream(), "UTF-8"))) {
String line;
while((line = rd.readLine()) != null) {
String str = line.trim().toUpperCase();
if (str.startsWith("<META ")) {
if (str.contains("CHARSET=EUC-JP")) {
euc = true;
break;
}
if (str.contains("CHARSET=UTF-8")) {
euc = false;
break;
}
}
if (str.startsWith("<BODY")) {
break;
}
}
if (!euc) {
getLi(rd);
}
}
}
catch (Exception ex) {
Logger.getLogger(Article.class.getName()).log(Level.SEVERE, null, ex);
return;
}
try {
if (euc) {
URL url = new URL(String.format("%s/%s/%s", path, dir, id));
HttpURLConnection http = (HttpURLConnection)url.openConnection();
http.setRequestMethod("GET");
http.connect();
try (BufferedReader rd = new BufferedReader(new InputStreamReader(http.getInputStream(), "EUC-JP"))) {
getLi(rd);
}
}
}
catch (Exception ex) {
Logger.getLogger(DailyIndex.class.getName()).log(Level.SEVERE, null, ex);
return;
}
}
void getLi(BufferedReader reader) throws IOException {
SimpleDateFormat sdf1 = new SimpleDateFormat("'<I>'yyyy'年 'M'月 'd'日'HH:mm:ss' UTC</I>'");
String line;
while((line = reader.readLine()) != null) {
String str = line.trim();
if (str.startsWith("<H1>[OSM-ja] ")) {
title = str.substring(13, str.length() - 5); // 13 <-- length('<H1>[OSM-ja] ')
}
if (str.startsWith("<I>") && str.endsWith("</I>")) {
String dateStr = str.substring(3, str.length() - 4);
StringTokenizer st = new StringTokenizer(str, "()");
if (st.hasMoreTokens()) {
String date1 = st.nextToken().trim();
st.nextToken();
String date3 = st.nextToken().trim();
try {
date = sdf1.parse(date1 + date3);
}
catch(ParseException e) {
e.printStackTrace();
}
}
}
if (str.startsWith("<B>") && str.endsWith("</B>")) {
name = str.substring(3, str.length() - 4);
}
if (str.equals("<!--beginarticle-->")) {
getArticle(reader);
}
}
}
/*
public void post(JsonObject obj) {
JsonArray arry = Json.createArrayBuilder().add(obj).build();
System.out.println(arry.toString());
}
*/
void getArticle(BufferedReader reader) throws IOException {
String line;
boolean top = true;
while((line = reader.readLine()) != null) {
if (line.equals("<!--endarticle-->")) {
return;
}
if (line.startsWith("</PRE>")) {
return;
}
String str = line;
if (line.startsWith("<PRE>") && top) {
str = line.substring(5);
top = false;
}
this.contents.add(str);
}
}
}