/* * @author: Bansi Kotecha and Chee Yap * @date: March 2003 * @synopsis: * The main application of this reader is to transform * TIGER datafiles into a Postgres Table. But TIGER datafiles can * be replaced by other fixed-field text files. * * The format and method of conversion of the data is determined by * a format file (in ascii). This is the structure of the format file: * * (1) The rest of a line is discarded after the comment character '#' * (2) The first non-blank line has a integer N = Number of Attributes * (3) The second non-blank line has a integer M = * Maximum Number of Fields per attribute. M is at least 4. * (4) The next N lines will contain per attribute information * (5) Each per-attribute line has up to M fields with these meaning: * AttributeName (text) * AttributeType (char) * BeginPosition (int) * EndPosition (int) * ... * Note that the first 4 fields have a fixed meaning. * Depending on the AttributeType, we can have more fields. * (6) The following attribute types are currently recognized: * A = alphanumeric string * N = number (non-negative only?) * L = point but assuming the Lon/Lat convention of TIGER * In this case, M = 6. * NOT YET IMPLEMENTED: * C = single character * P = polygon (or path?) * B = box (i.e., pair of points). In this case, M = 10. */ import java.io.*; import java.util.*; import java.sql.*; public class Reader extends RawDataReader { private final int fileExtLength = 3; private String frmtFile; // e.g., RT1.fmt private String dataFile; // e.g., TGR34017.RT1 private String tableName; // Extracted from dataFile, e.g., RT1 private String[][] frmtdata; private int noOfAttributes; private int fieldsPerAttribute; // This is a slight misnomer, as // it is the Maximum Number of fields per attribute. private int countRows = 0; private Statement stmt; private char option; public Reader(String[] args) { getArguments(args); tableName = getTableName(dataFile); readFrmtFile(); try { stmt = establishConnection().createStatement(); } catch(SQLException se) { se.printStackTrace(); } if((option == 'c') || (option == 'C')) createTable(); readRawDataFile(); } public void getArguments(String args[]){ if(args[0].startsWith("-")) { // do something to get the option // this is temporary. There can me more than one options. option = args[0].charAt(1); this.frmtFile = args[1]; this.dataFile = args[2]; } else { this.frmtFile = args[0]; this.dataFile = args[1]; } } private String getTableName(String filename) { String ext = filename.substring(filename.length() - fileExtLength); return ext; } private void readFrmtFile() { int count = 0; BufferedReader br = null; try { br = new BufferedReader(new FileReader(frmtFile)); String line; int i = 0; while((line = br.readLine()) != null){ line = line.trim(); int x = line.indexOf("#"); if (x >= 0) line = line.substring(0,x).trim(); if (line.length() == 0) continue; count++; if(count == 1) { noOfAttributes = Integer.parseInt(line); } else if(count == 2) { fieldsPerAttribute = Integer.parseInt(line); frmtdata = new String[noOfAttributes][fieldsPerAttribute]; } else { if (i< frmtdata.length) { parseFrmtdata(line, i); i++; } // excess lines are discarded } } }catch(Exception e){ e.printStackTrace(); } } private void parseFrmtdata(String line, int i) { StringTokenizer st = new StringTokenizer(line, " \t"); int j = 0; while (st.hasMoreTokens()) { frmtdata[i][j] = st.nextToken(); j++; } } private void createTable() { try { String query = "create table " + tableName + "("; for(int i = 0; i < frmtdata.length; i++) { query += frmtdata[i][0] + " " + getDataTypeAsString(frmtdata[i][1]); if(i == (frmtdata.length - 1)) query += ")"; else query += ", "; } //print("create query is : " + query); int i = stmt.executeUpdate(query); if(i == 1) print("Table " + tableName + " created successfully"); else print("Error: Could not create table " + tableName); } catch(SQLException se) { se.printStackTrace(); } } private String getDataTypeAsString(String given){ if(given.equals("N")) return "integer"; if(given.equals("A")) // Also need "C" for single character. return "text"; if(given.equals("L")) return "polygon"; // Kludge: cannot have rtree index // on point data! Only polygondata. return ""; // should be error } private int stringToInt(String value) { if(value.equals("")) { return -1; } else { return Integer.parseInt(value); } } /****************************************************************** readRawDataFile is the main reading function -- it reads each line of dataFile and inserts into database table. ******************************************************************/ private void readRawDataFile() { //print("readRawDataFile() called"); try { BufferedReader br = new BufferedReader( new FileReader(dataFile)); String record, query; String[] attributes; while((record = br.readLine()) != null) { attributes = parseRecord(record); query = createQuery(attributes); insertIntoDatabase(query); } commit(); print("\n" + countRows + " rows inserted from file " + dataFile); }catch (Exception ie) { ie.printStackTrace(); } } private String[] parseRecord(String record) { int len = frmtdata.length; // First find out how many values are needed per record: // it is at least "frmtdata.length" but can be more // e.g., each POINT (or Lon/Lat pairs) adds one more for(int i = 0; i < frmtdata.length; i++) if (frmtdata[i][1].equals("L")) len++; // Now form the array: String[] arrayOfAttributes = new String[len]; for(int i = 0; i < frmtdata.length; i++) { arrayOfAttributes[i] = record.substring(Integer.parseInt(frmtdata[i][2]) - 1, Integer.parseInt(frmtdata[i][3])).trim(); if (frmtdata[i][1].equals("L")) { // get second attribute! arrayOfAttributes[i+1] = record.substring(Integer.parseInt(frmtdata[i][4]) - 1, Integer.parseInt(frmtdata[i][5])).trim(); i++; } } return arrayOfAttributes; } /****************************************************************** Quoting all roque characters in the raw input files: \' ******************************************************************/ private String quoteCharacters(String value) { StringBuffer sb = new StringBuffer(value); for(int j = 0; j < sb.length(); j++) { if(sb.charAt(j) == '\'') { sb.insert(j-1, '\''); } } return sb.toString(); } private String createQuery(String[] attributes) { String query = ""; String t; query += "insert into " + tableName + " values ("; for(int i = 0; i < frmtdata.length; i++) { t = frmtdata[i][1]; if (t.equals("N")) { query += Integer.parseInt(attributes[i]); } else if (t.equals("A")) { //attributes[i] = quoteCharacters(attributes[i]); query += "'" + attributes[i] + "'"; } else if (t.equals("L")) { // assume equals("L"), lat/lon query += "'(" + removeLeadingPlus(attributes[i]) + ", " + removeLeadingPlus(attributes[i+1]) + ")'"; i++; // force increment of i } // else error if (i != frmtdata.length -1) query += ", "; } query += ")"; return query; } private String removeLeadingPlus(String ll) { if (ll.charAt(0) == '+') return ll.substring(1,ll.length()); return ll; } public void insertIntoDatabase(String query) { try { int i = stmt.executeUpdate(query); if(i == 1) { countRows++; if ((countRows % 300) == 0) print("Row " + countRows + " successfully inserted"); } else print("could not complete insertion"); }catch(Exception se) { se.printStackTrace(); } } /****************************************************************** The rest of the code are for testing ******************************************************************/ private void printMatrix() { for (int i = 0; i < frmtdata.length; i++){ for (int j = 0; j < frmtdata[0].length; j++){ print("frmtdata[" + i + "][" + j + "] : " +frmtdata[i][j]); } } } private void print(String s) { System.out.println(s); System.out.flush(); } //debugging constructor public Reader(String[] args, boolean b) { getArguments(args); tableName = getTableName(dataFile); readFrmtFile(); // printMatrix(); readRawDataFile(true); } //debugging readRawDataFile (do not actually insert) private void readRawDataFile(boolean b) { try { BufferedReader br = new BufferedReader(new FileReader(dataFile)); String record, query; String[] attributes; while((record = br.readLine()) != null) { attributes = parseRecord(record); query = createQuery(attributes); print("Query: " + query); if (countRows++ > 20) break; } print("\n" + countRows + " rows inserted from file " + dataFile); }catch (Exception ie) { ie.printStackTrace(); } } /****************************************************************** The main program ******************************************************************/ public static void main(String args[]){ Reader r = new Reader(args); // Reader r = new Reader(args, true); // debug version } }