forked from nsquare-jdzone/java-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathIndexCSVFilters.java
More file actions
131 lines (118 loc) · 5.39 KB
/
IndexCSVFilters.java
File metadata and controls
131 lines (118 loc) · 5.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
package com.solr.index;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.common.SolrInputDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.TimeZone;
public class IndexCSVFilters {
private SolrClient solrClient;
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
private SimpleDateFormat solrCompatibleSdf,csvCompatibleSdf;
private static final Logger logger = LoggerFactory.getLogger(IndexCSVFilters.class);
private static final String TIME_STAMP = "T00:00:00Z";
private static final String DATE_STAMP = "1970-01-01T";
private static final String DATE_TIME_FIELD = "DateTime";
private static final String TIME_FIELD = "Time";
private static final String ID_FIELD = "id";
private static final String DATE_PART_Z = "Z";
// private static final String DATA_SET_NAME_FIELD = "DataSetName";
public IndexCSVFilters(String solrUrl, String coreName){
String urlString = solrUrl+"/"+coreName;
solrClient = new HttpSolrClient.Builder(urlString)
.withSocketTimeout(0)
.withConnectionTimeout(0)
.build();
solrCompatibleSdf = new SimpleDateFormat("YYYY-MM-dd'T'HH:mm:ss'Z'");
solrCompatibleSdf.setTimeZone(UTC);
csvCompatibleSdf = new SimpleDateFormat("MM/dd/yyyy hh:mm:ss aa");
csvCompatibleSdf.setTimeZone(UTC);
}
public boolean indexCSVFile(File file){
logger.info("Indexing Started for file : "+file.getName());
try(BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8))) {
String line = null;
line = bufferedReader.readLine();
String sep = ";";
if(line.contains(",")){
sep = ",";
}
String fieldNames[] = line.split(sep);
Collection<SolrInputDocument> docList = new ArrayList<>();
// DataSetName,TimeStamp,First,Max,Min,Last,ValB,ValA,FilterS,FilterE,FilterT,ValS,ValC
// E8374H231J#Type0,2017-01-09 09:31:00,66.2,71,66.2,71,66.2,71,35,2017-01-13,C,103.42,0
// E8374H231J#Type0,2017-01-09 09:32:00,66.2,71,66.2,71,66.2,71,35,2017-01-13,C,103.57,0
int cnt =0;
int lineCnt =0;
while ((line = bufferedReader.readLine())!=null){
lineCnt++;
if(line.trim().length()==0){
continue;
}
String data [] = line.trim().split(sep);
if(data.length!=fieldNames.length){
System.out.println(lineCnt+"\t"+line);
continue;
}
SolrInputDocument solrInputDocument = new SolrInputDocument();
for(int index = 0;index<fieldNames.length;index++){
if(index==1 || index==9){
continue;
}
solrInputDocument.addField(fieldNames[index],data[index]);
}
String time = data[1].substring(data[1].indexOf(" ")+1);
solrInputDocument.addField(TIME_FIELD,DATE_STAMP+time+ DATE_PART_Z);
solrInputDocument.addField(DATE_TIME_FIELD,data[1].replace(" ","T")+ DATE_PART_Z);
solrInputDocument.addField(fieldNames[9],data[9]+TIME_STAMP);
solrInputDocument.addField(ID_FIELD,data[1].replace(" ","_")+"_"+data[0]+"_"+data[8]+"_"+data[9]+"_"+data[10]);
docList.add(solrInputDocument);
if(cnt==100000){
cnt = 0;
solrClient.add(docList);
solrClient.commit();
docList.clear();
}else{
cnt++;
}
}
if(cnt!=0){
solrClient.add(docList);
solrClient.commit();
docList.clear();
}
} catch (FileNotFoundException e) {
e.printStackTrace();
logger.error("Error in Indexing file : "+file.getName(),e);
} catch (IOException e) {
e.printStackTrace();
logger.error("Error in Indexing file : "+file.getName(),e);
} catch (SolrServerException e) {
e.printStackTrace();
logger.error("Error in Indexing file : "+file.getName(),e);
}
logger.info("Indexing Completed for file : "+file.getName());
return true;
}
public void optimizeIndex(){
try {
logger.info("Index Optimization Process Start");
solrClient.commit();
solrClient.optimize();
logger.info("Index Optimization Process Completed");
} catch (SolrServerException e) {
e.printStackTrace();
logger.error("Error in Index optimization process",e);
} catch (IOException e) {
e.printStackTrace();
logger.error("Error in Index optimization process",e);
}
}
}