CsvReader.java 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768
  1. /**
  2. *
  3. * Copyright (c) behosoft Co.,Ltd.
  4. * All Rights Reserved.
  5. *
  6. * This software is the confidential and proprietary information of behosoft.
  7. * (Social Security Department). You shall not disclose such
  8. * Confidential Information and shall use it only in accordance with
  9. * the terms of the license agreement you entered into with behosoft.
  10. *
  11. * Distributable under GNU LGPL license by gnu.org
  12. */
  13. package com.behosoft.util;
  14. import java.io.BufferedReader;
  15. import java.io.File;
  16. import java.io.FileInputStream;
  17. import java.io.FileNotFoundException;
  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.io.InputStreamReader;
  21. import java.io.Reader;
  22. import java.io.StringReader;
  23. import java.nio.charset.Charset;
  24. import java.text.NumberFormat;
  25. import java.util.HashMap;
  26. /**
  27. * A stream based parser for parsing delimited text data from a file or a
  28. * stream.
  29. */
  30. public class CsvReader {
  31. private Reader inputStream = null;
  32. private String fileName = null;
  33. // this holds all the values for switches that the user is allowed to set
  34. private UserSettings userSettings = new UserSettings();
  35. private Charset charset = null;
  36. private boolean useCustomRecordDelimiter = false;
  37. // this will be our working buffer to hold data chunks
  38. // read in from the data file
  39. private DataBuffer dataBuffer = new DataBuffer();
  40. private ColumnBuffer columnBuffer = new ColumnBuffer();
  41. private RawRecordBuffer rawBuffer = new RawRecordBuffer();
  42. private boolean[] isQualified = null;
  43. private String rawRecord = "";
  44. private HeadersHolder headersHolder = new HeadersHolder();
  45. // these are all more or less global loop variables
  46. // to keep from needing to pass them all into various
  47. // methods during parsing
  48. private boolean startedColumn = false;
  49. private boolean startedWithQualifier = false;
  50. private boolean hasMoreData = true;
  51. private char lastLetter = '\0';
  52. private boolean hasReadNextLine = false;
  53. private int columnsCount = 0;
  54. private long currentRecord = 0;
  55. private String[] values = new String[StaticSettings.INITIAL_COLUMN_COUNT];
  56. private boolean initialized = false;
  57. private boolean closed = false;
  58. /**
  59. * Double up the text qualifier to represent an occurance of the text
  60. * qualifier.
  61. */
  62. public static final int ESCAPE_MODE_DOUBLED = 1;
  63. /**
  64. * Use a backslash character before the text qualifier to represent an
  65. * occurance of the text qualifier.
  66. */
  67. public static final int ESCAPE_MODE_BACKSLASH = 2;
  68. /**
  69. * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
  70. * as the data source.
  71. *
  72. * @param fileName
  73. * The path to the file to use as the data source.
  74. * @param delimiter
  75. * The character to use as the column delimiter.
  76. * @param charset
  77. * The {@link java.nio.charset.Charset Charset} to use while
  78. * parsing the data.
  79. */
  80. public CsvReader(String fileName, char delimiter, Charset charset)
  81. throws FileNotFoundException {
  82. if (fileName == null) {
  83. throw new IllegalArgumentException(
  84. "Parameter fileName can not be null.");
  85. }
  86. if (charset == null) {
  87. throw new IllegalArgumentException(
  88. "Parameter charset can not be null.");
  89. }
  90. if (!new File(fileName).exists()) {
  91. throw new FileNotFoundException("File " + fileName
  92. + " does not exist.");
  93. }
  94. this.fileName = fileName;
  95. this.userSettings.Delimiter = delimiter;
  96. this.charset = charset;
  97. isQualified = new boolean[values.length];
  98. }
  99. /**
  100. * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
  101. * as the data source. Uses ISO-8859-1 as the
  102. * {@link java.nio.charset.Charset Charset}.
  103. *
  104. * @param fileName
  105. * The path to the file to use as the data source.
  106. * @param delimiter
  107. * The character to use as the column delimiter.
  108. */
  109. public CsvReader(String fileName, char delimiter)
  110. throws FileNotFoundException {
  111. this(fileName, delimiter, Charset.forName("ISO-8859-1"));
  112. }
  113. /**
  114. * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
  115. * as the data source. Uses a comma as the column delimiter and
  116. * ISO-8859-1 as the {@link java.nio.charset.Charset Charset}.
  117. *
  118. * @param fileName
  119. * The path to the file to use as the data source.
  120. */
  121. public CsvReader(String fileName) throws FileNotFoundException {
  122. this(fileName, Letters.COMMA);
  123. }
  124. /**
  125. * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a
  126. * {@link java.io.Reader Reader} object as the data source.
  127. *
  128. * @param inputStream
  129. * The stream to use as the data source.
  130. * @param delimiter
  131. * The character to use as the column delimiter.
  132. */
  133. public CsvReader(Reader inputStream, char delimiter) {
  134. if (inputStream == null) {
  135. throw new IllegalArgumentException(
  136. "Parameter inputStream can not be null.");
  137. }
  138. this.inputStream = inputStream;
  139. this.userSettings.Delimiter = delimiter;
  140. initialized = true;
  141. isQualified = new boolean[values.length];
  142. }
  143. /**
  144. * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a
  145. * {@link java.io.Reader Reader} object as the data source. Uses a
  146. * comma as the column delimiter.
  147. *
  148. * @param inputStream
  149. * The stream to use as the data source.
  150. */
  151. public CsvReader(Reader inputStream) {
  152. this(inputStream, Letters.COMMA);
  153. }
  154. /**
  155. * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an
  156. * {@link java.io.InputStream InputStream} object as the data source.
  157. *
  158. * @param inputStream
  159. * The stream to use as the data source.
  160. * @param delimiter
  161. * The character to use as the column delimiter.
  162. * @param charset
  163. * The {@link java.nio.charset.Charset Charset} to use while
  164. * parsing the data.
  165. */
  166. public CsvReader(InputStream inputStream, char delimiter, Charset charset) {
  167. this(new InputStreamReader(inputStream, charset), delimiter);
  168. }
  169. /**
  170. * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an
  171. * {@link java.io.InputStream InputStream} object as the data
  172. * source. Uses a comma as the column delimiter.
  173. *
  174. * @param inputStream
  175. * The stream to use as the data source.
  176. * @param charset
  177. * The {@link java.nio.charset.Charset Charset} to use while
  178. * parsing the data.
  179. */
  180. public CsvReader(InputStream inputStream, Charset charset) {
  181. this(new InputStreamReader(inputStream, charset));
  182. }
  183. public boolean getCaptureRawRecord() {
  184. return userSettings.CaptureRawRecord;
  185. }
  186. public void setCaptureRawRecord(boolean captureRawRecord) {
  187. userSettings.CaptureRawRecord = captureRawRecord;
  188. }
  189. public String getRawRecord() {
  190. return rawRecord;
  191. }
  192. /**
  193. * Gets whether leading and trailing whitespace characters are being trimmed
  194. * from non-textqualified column data. Default is true.
  195. *
  196. * @return Whether leading and trailing whitespace characters are being
  197. * trimmed from non-textqualified column data.
  198. */
  199. public boolean getTrimWhitespace() {
  200. return userSettings.TrimWhitespace;
  201. }
  202. /**
  203. * Sets whether leading and trailing whitespace characters should be trimmed
  204. * from non-textqualified column data or not. Default is true.
  205. *
  206. * @param trimWhitespace
  207. * Whether leading and trailing whitespace characters should be
  208. * trimmed from non-textqualified column data or not.
  209. */
  210. public void setTrimWhitespace(boolean trimWhitespace) {
  211. userSettings.TrimWhitespace = trimWhitespace;
  212. }
  213. /**
  214. * Gets the character being used as the column delimiter. Default is comma,
  215. * ','.
  216. *
  217. * @return The character being used as the column delimiter.
  218. */
  219. public char getDelimiter() {
  220. return userSettings.Delimiter;
  221. }
  222. /**
  223. * Sets the character to use as the column delimiter. Default is comma, ','.
  224. *
  225. * @param delimiter
  226. * The character to use as the column delimiter.
  227. */
  228. public void setDelimiter(char delimiter) {
  229. userSettings.Delimiter = delimiter;
  230. }
  231. public char getRecordDelimiter() {
  232. return userSettings.RecordDelimiter;
  233. }
  234. /**
  235. * Sets the character to use as the record delimiter.
  236. *
  237. * @param recordDelimiter
  238. * The character to use as the record delimiter. Default is
  239. * combination of standard end of line characters for Windows,
  240. * Unix, or Mac.
  241. */
  242. public void setRecordDelimiter(char recordDelimiter) {
  243. useCustomRecordDelimiter = true;
  244. userSettings.RecordDelimiter = recordDelimiter;
  245. }
  246. /**
  247. * Gets the character to use as a text qualifier in the data.
  248. *
  249. * @return The character to use as a text qualifier in the data.
  250. */
  251. public char getTextQualifier() {
  252. return userSettings.TextQualifier;
  253. }
  254. /**
  255. * Sets the character to use as a text qualifier in the data.
  256. *
  257. * @param textQualifier
  258. * The character to use as a text qualifier in the data.
  259. */
  260. public void setTextQualifier(char textQualifier) {
  261. userSettings.TextQualifier = textQualifier;
  262. }
  263. /**
  264. * Whether text qualifiers will be used while parsing or not.
  265. *
  266. * @return Whether text qualifiers will be used while parsing or not.
  267. */
  268. public boolean getUseTextQualifier() {
  269. return userSettings.UseTextQualifier;
  270. }
  271. /**
  272. * Sets whether text qualifiers will be used while parsing or not.
  273. *
  274. * @param useTextQualifier
  275. * Whether to use a text qualifier while parsing or not.
  276. */
  277. public void setUseTextQualifier(boolean useTextQualifier) {
  278. userSettings.UseTextQualifier = useTextQualifier;
  279. }
  280. /**
  281. * Gets the character being used as a comment signal.
  282. *
  283. * @return The character being used as a comment signal.
  284. */
  285. public char getComment() {
  286. return userSettings.Comment;
  287. }
  288. /**
  289. * Sets the character to use as a comment signal.
  290. *
  291. * @param comment
  292. * The character to use as a comment signal.
  293. */
  294. public void setComment(char comment) {
  295. userSettings.Comment = comment;
  296. }
  297. /**
  298. * Gets whether comments are being looked for while parsing or not.
  299. *
  300. * @return Whether comments are being looked for while parsing or not.
  301. */
  302. public boolean getUseComments() {
  303. return userSettings.UseComments;
  304. }
  305. /**
  306. * Sets whether comments are being looked for while parsing or not.
  307. *
  308. * @param useComments
  309. * Whether comments are being looked for while parsing or not.
  310. */
  311. public void setUseComments(boolean useComments) {
  312. userSettings.UseComments = useComments;
  313. }
  314. /**
  315. * Gets the current way to escape an occurance of the text qualifier inside
  316. * qualified data.
  317. *
  318. * @return The current way to escape an occurance of the text qualifier
  319. * inside qualified data.
  320. */
  321. public int getEscapeMode() {
  322. return userSettings.EscapeMode;
  323. }
  324. /**
  325. * Sets the current way to escape an occurance of the text qualifier inside
  326. * qualified data.
  327. *
  328. * @param escapeMode
  329. * The way to escape an occurance of the text qualifier inside
  330. * qualified data.
  331. * @exception IllegalArgumentException
  332. * When an illegal value is specified for escapeMode.
  333. */
  334. public void setEscapeMode(int escapeMode) throws IllegalArgumentException {
  335. if (escapeMode != ESCAPE_MODE_DOUBLED
  336. && escapeMode != ESCAPE_MODE_BACKSLASH) {
  337. throw new IllegalArgumentException(
  338. "Parameter escapeMode must be a valid value.");
  339. }
  340. userSettings.EscapeMode = escapeMode;
  341. }
  342. public boolean getSkipEmptyRecords() {
  343. return userSettings.SkipEmptyRecords;
  344. }
  345. public void setSkipEmptyRecords(boolean skipEmptyRecords) {
  346. userSettings.SkipEmptyRecords = skipEmptyRecords;
  347. }
  348. /**
  349. * Safety caution to prevent the parser from using large amounts of memory
  350. * in the case where parsing settings like file encodings don't end up
  351. * matching the actual format of a file. This switch can be turned off if
  352. * the file format is known and tested. With the switch off, the max column
  353. * lengths and max column count per record supported by the parser will
  354. * greatly increase. Default is true.
  355. *
  356. * @return The current setting of the safety switch.
  357. */
  358. public boolean getSafetySwitch() {
  359. return userSettings.SafetySwitch;
  360. }
  361. /**
  362. * Safety caution to prevent the parser from using large amounts of memory
  363. * in the case where parsing settings like file encodings don't end up
  364. * matching the actual format of a file. This switch can be turned off if
  365. * the file format is known and tested. With the switch off, the max column
  366. * lengths and max column count per record supported by the parser will
  367. * greatly increase. Default is true.
  368. *
  369. * @param safetySwitch
  370. */
  371. public void setSafetySwitch(boolean safetySwitch) {
  372. userSettings.SafetySwitch = safetySwitch;
  373. }
  374. /**
  375. * Gets the count of columns found in this record.
  376. *
  377. * @return The count of columns found in this record.
  378. */
  379. public int getColumnCount() {
  380. return columnsCount;
  381. }
  382. /**
  383. * Gets the index of the current record.
  384. *
  385. * @return The index of the current record.
  386. */
  387. public long getCurrentRecord() {
  388. return currentRecord - 1;
  389. }
  390. /**
  391. * Gets the count of headers read in by a previous call to
  392. * {@link com.csvreader.CsvReader#readHeaders readHeaders()}.
  393. *
  394. * @return The count of headers read in by a previous call to
  395. * {@link com.csvreader.CsvReader#readHeaders readHeaders()}.
  396. */
  397. public int getHeaderCount() {
  398. return headersHolder.Length;
  399. }
  400. /**
  401. * Returns the header values as a string array.
  402. *
  403. * @return The header values as a String array.
  404. * @exception IOException
  405. * Thrown if this object has already been closed.
  406. */
  407. public String[] getHeaders() throws IOException {
  408. checkClosed();
  409. if (headersHolder.Headers == null) {
  410. return null;
  411. } else {
  412. // use clone here to prevent the outside code from
  413. // setting values on the array directly, which would
  414. // throw off the index lookup based on header name
  415. String[] clone = new String[headersHolder.Length];
  416. System.arraycopy(headersHolder.Headers, 0, clone, 0,
  417. headersHolder.Length);
  418. return clone;
  419. }
  420. }
  421. public void setHeaders(String[] headers) {
  422. headersHolder.Headers = headers;
  423. headersHolder.IndexByName.clear();
  424. if (headers != null) {
  425. headersHolder.Length = headers.length;
  426. } else {
  427. headersHolder.Length = 0;
  428. }
  429. // use headersHolder.Length here in case headers is null
  430. for (int i = 0; i < headersHolder.Length; i++) {
  431. headersHolder.IndexByName.put(headers[i], new Integer(i));
  432. }
  433. }
  434. public String[] getValues() throws IOException {
  435. checkClosed();
  436. // need to return a clone, and can't use clone because values.Length
  437. // might be greater than columnsCount
  438. String[] clone = new String[columnsCount];
  439. System.arraycopy(values, 0, clone, 0, columnsCount);
  440. return clone;
  441. }
  442. /**
  443. * Returns the current column value for a given column index.
  444. *
  445. * @param columnIndex
  446. * The index of the column.
  447. * @return The current column value.
  448. * @exception IOException
  449. * Thrown if this object has already been closed.
  450. */
  451. public String get(int columnIndex) throws IOException {
  452. checkClosed();
  453. if (columnIndex > -1 && columnIndex < columnsCount) {
  454. return values[columnIndex];
  455. } else {
  456. return "";
  457. }
  458. }
  459. /**
  460. * Returns the current column value for a given column header name.
  461. *
  462. * @param headerName
  463. * The header name of the column.
  464. * @return The current column value.
  465. * @exception IOException
  466. * Thrown if this object has already been closed.
  467. */
  468. public String get(String headerName) throws IOException {
  469. checkClosed();
  470. return get(getIndex(headerName));
  471. }
  472. /**
  473. * Creates a {@link com.csvreader.CsvReader CsvReader} object using a string
  474. * of data as the source.&nbsp;Uses ISO-8859-1 as the
  475. * {@link java.nio.charset.Charset Charset}.
  476. *
  477. * @param data
  478. * The String of data to use as the source.
  479. * @return A {@link com.csvreader.CsvReader CsvReader} object using the
  480. * String of data as the source.
  481. */
  482. public static CsvReader parse(String data) {
  483. if (data == null) {
  484. throw new IllegalArgumentException(
  485. "Parameter data can not be null.");
  486. }
  487. return new CsvReader(new StringReader(data));
  488. }
  489. /**
  490. * Reads another record.
  491. *
  492. * @return Whether another record was successfully read or not.
  493. * @exception IOException
  494. * Thrown if an error occurs while reading data from the
  495. * source stream.
  496. */
  497. public boolean readRecord() throws IOException {
  498. checkClosed();
  499. columnsCount = 0;
  500. rawBuffer.Position = 0;
  501. dataBuffer.LineStart = dataBuffer.Position;
  502. hasReadNextLine = false;
  503. // check to see if we've already found the end of data
  504. if (hasMoreData) {
  505. // loop over the data stream until the end of data is found
  506. // or the end of the record is found
  507. do {
  508. if (dataBuffer.Position == dataBuffer.Count) {
  509. checkDataLength();
  510. } else {
  511. startedWithQualifier = false;
  512. // grab the current letter as a char
  513. char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
  514. if (userSettings.UseTextQualifier
  515. && currentLetter == userSettings.TextQualifier) {
  516. // this will be a text qualified column, so
  517. // we need to set startedWithQualifier to make it
  518. // enter the seperate branch to handle text
  519. // qualified columns
  520. lastLetter = currentLetter;
  521. // read qualified
  522. startedColumn = true;
  523. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  524. startedWithQualifier = true;
  525. boolean lastLetterWasQualifier = false;
  526. char escapeChar = userSettings.TextQualifier;
  527. if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) {
  528. escapeChar = Letters.BACKSLASH;
  529. }
  530. boolean eatingTrailingJunk = false;
  531. boolean lastLetterWasEscape = false;
  532. boolean readingComplexEscape = false;
  533. int escape = ComplexEscape.UNICODE;
  534. int escapeLength = 0;
  535. char escapeValue = (char) 0;
  536. dataBuffer.Position++;
  537. do {
  538. if (dataBuffer.Position == dataBuffer.Count) {
  539. checkDataLength();
  540. } else {
  541. // grab the current letter as a char
  542. currentLetter = dataBuffer.Buffer[dataBuffer.Position];
  543. if (eatingTrailingJunk) {
  544. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  545. if (currentLetter == userSettings.Delimiter) {
  546. endColumn();
  547. } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
  548. || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
  549. endColumn();
  550. endRecord();
  551. }
  552. } else if (readingComplexEscape) {
  553. escapeLength++;
  554. switch (escape) {
  555. case ComplexEscape.UNICODE:
  556. escapeValue *= (char) 16;
  557. escapeValue += hexToDec(currentLetter);
  558. if (escapeLength == 4) {
  559. readingComplexEscape = false;
  560. }
  561. break;
  562. case ComplexEscape.OCTAL:
  563. escapeValue *= (char) 8;
  564. escapeValue += (char) (currentLetter - '0');
  565. if (escapeLength == 3) {
  566. readingComplexEscape = false;
  567. }
  568. break;
  569. case ComplexEscape.DECIMAL:
  570. escapeValue *= (char) 10;
  571. escapeValue += (char) (currentLetter - '0');
  572. if (escapeLength == 3) {
  573. readingComplexEscape = false;
  574. }
  575. break;
  576. case ComplexEscape.HEX:
  577. escapeValue *= (char) 16;
  578. escapeValue += hexToDec(currentLetter);
  579. if (escapeLength == 2) {
  580. readingComplexEscape = false;
  581. }
  582. break;
  583. }
  584. if (!readingComplexEscape) {
  585. appendLetter(escapeValue);
  586. } else {
  587. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  588. }
  589. } else if (currentLetter == userSettings.TextQualifier) {
  590. if (lastLetterWasEscape) {
  591. lastLetterWasEscape = false;
  592. lastLetterWasQualifier = false;
  593. } else {
  594. updateCurrentValue();
  595. if (userSettings.EscapeMode == ESCAPE_MODE_DOUBLED) {
  596. lastLetterWasEscape = true;
  597. }
  598. lastLetterWasQualifier = true;
  599. }
  600. } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
  601. && lastLetterWasEscape) {
  602. switch (currentLetter) {
  603. case 'n':
  604. appendLetter(Letters.LF);
  605. break;
  606. case 'r':
  607. appendLetter(Letters.CR);
  608. break;
  609. case 't':
  610. appendLetter(Letters.TAB);
  611. break;
  612. case 'b':
  613. appendLetter(Letters.BACKSPACE);
  614. break;
  615. case 'f':
  616. appendLetter(Letters.FORM_FEED);
  617. break;
  618. case 'e':
  619. appendLetter(Letters.ESCAPE);
  620. break;
  621. case 'v':
  622. appendLetter(Letters.VERTICAL_TAB);
  623. break;
  624. case 'a':
  625. appendLetter(Letters.ALERT);
  626. break;
  627. case '0':
  628. case '1':
  629. case '2':
  630. case '3':
  631. case '4':
  632. case '5':
  633. case '6':
  634. case '7':
  635. escape = ComplexEscape.OCTAL;
  636. readingComplexEscape = true;
  637. escapeLength = 1;
  638. escapeValue = (char) (currentLetter - '0');
  639. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  640. break;
  641. case 'u':
  642. case 'x':
  643. case 'o':
  644. case 'd':
  645. case 'U':
  646. case 'X':
  647. case 'O':
  648. case 'D':
  649. switch (currentLetter) {
  650. case 'u':
  651. case 'U':
  652. escape = ComplexEscape.UNICODE;
  653. break;
  654. case 'x':
  655. case 'X':
  656. escape = ComplexEscape.HEX;
  657. break;
  658. case 'o':
  659. case 'O':
  660. escape = ComplexEscape.OCTAL;
  661. break;
  662. case 'd':
  663. case 'D':
  664. escape = ComplexEscape.DECIMAL;
  665. break;
  666. }
  667. readingComplexEscape = true;
  668. escapeLength = 0;
  669. escapeValue = (char) 0;
  670. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  671. break;
  672. default:
  673. break;
  674. }
  675. lastLetterWasEscape = false;
  676. // can only happen for ESCAPE_MODE_BACKSLASH
  677. } else if (currentLetter == escapeChar) {
  678. updateCurrentValue();
  679. lastLetterWasEscape = true;
  680. } else {
  681. if (lastLetterWasQualifier) {
  682. if (currentLetter == userSettings.Delimiter) {
  683. endColumn();
  684. } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
  685. || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
  686. endColumn();
  687. endRecord();
  688. } else {
  689. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  690. eatingTrailingJunk = true;
  691. }
  692. // make sure to clear the flag for next
  693. // run of the loop
  694. lastLetterWasQualifier = false;
  695. }
  696. }
  697. // keep track of the last letter because we need
  698. // it for several key decisions
  699. lastLetter = currentLetter;
  700. if (startedColumn) {
  701. dataBuffer.Position++;
  702. if (userSettings.SafetySwitch
  703. && dataBuffer.Position
  704. - dataBuffer.ColumnStart
  705. + columnBuffer.Position > 100000) {
  706. close();
  707. throw new IOException(
  708. "Maximum column length of 100,000 exceeded in column "
  709. + NumberFormat
  710. .getIntegerInstance()
  711. .format(
  712. columnsCount)
  713. + " in record "
  714. + NumberFormat
  715. .getIntegerInstance()
  716. .format(
  717. currentRecord)
  718. + ". Set the SafetySwitch property to false"
  719. + " if you're expecting column lengths greater than 100,000 characters to"
  720. + " avoid this error.");
  721. }
  722. }
  723. } // end else
  724. } while (hasMoreData && startedColumn);
  725. } else if (currentLetter == userSettings.Delimiter) {
  726. // we encountered a column with no data, so
  727. // just send the end column
  728. lastLetter = currentLetter;
  729. endColumn();
  730. } else if (useCustomRecordDelimiter
  731. && currentLetter == userSettings.RecordDelimiter) {
  732. // this will skip blank lines
  733. if (startedColumn || columnsCount > 0
  734. || !userSettings.SkipEmptyRecords) {
  735. endColumn();
  736. endRecord();
  737. } else {
  738. dataBuffer.LineStart = dataBuffer.Position + 1;
  739. }
  740. lastLetter = currentLetter;
  741. } else if (!useCustomRecordDelimiter
  742. && (currentLetter == Letters.CR || currentLetter == Letters.LF)) {
  743. // this will skip blank lines
  744. if (startedColumn
  745. || columnsCount > 0
  746. || (!userSettings.SkipEmptyRecords && (currentLetter == Letters.CR || lastLetter != Letters.CR))) {
  747. endColumn();
  748. endRecord();
  749. } else {
  750. dataBuffer.LineStart = dataBuffer.Position + 1;
  751. }
  752. lastLetter = currentLetter;
  753. } else if (userSettings.UseComments && columnsCount == 0
  754. && currentLetter == userSettings.Comment) {
  755. // encountered a comment character at the beginning of
  756. // the line so just ignore the rest of the line
  757. lastLetter = currentLetter;
  758. skipLine();
  759. } else if (userSettings.TrimWhitespace
  760. && (currentLetter == Letters.SPACE || currentLetter == Letters.TAB)) {
  761. // do nothing, this will trim leading whitespace
  762. // for both text qualified columns and non
  763. startedColumn = true;
  764. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  765. } else {
  766. // since the letter wasn't a special letter, this
  767. // will be the first letter of our current column
  768. startedColumn = true;
  769. dataBuffer.ColumnStart = dataBuffer.Position;
  770. boolean lastLetterWasBackslash = false;
  771. boolean readingComplexEscape = false;
  772. int escape = ComplexEscape.UNICODE;
  773. int escapeLength = 0;
  774. char escapeValue = (char) 0;
  775. boolean firstLoop = true;
  776. do {
  777. if (!firstLoop
  778. && dataBuffer.Position == dataBuffer.Count) {
  779. checkDataLength();
  780. } else {
  781. if (!firstLoop) {
  782. // grab the current letter as a char
  783. currentLetter = dataBuffer.Buffer[dataBuffer.Position];
  784. }
  785. if (!userSettings.UseTextQualifier
  786. && userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
  787. && currentLetter == Letters.BACKSLASH) {
  788. if (lastLetterWasBackslash) {
  789. lastLetterWasBackslash = false;
  790. } else {
  791. updateCurrentValue();
  792. lastLetterWasBackslash = true;
  793. }
  794. } else if (readingComplexEscape) {
  795. escapeLength++;
  796. switch (escape) {
  797. case ComplexEscape.UNICODE:
  798. escapeValue *= (char) 16;
  799. escapeValue += hexToDec(currentLetter);
  800. if (escapeLength == 4) {
  801. readingComplexEscape = false;
  802. }
  803. break;
  804. case ComplexEscape.OCTAL:
  805. escapeValue *= (char) 8;
  806. escapeValue += (char) (currentLetter - '0');
  807. if (escapeLength == 3) {
  808. readingComplexEscape = false;
  809. }
  810. break;
  811. case ComplexEscape.DECIMAL:
  812. escapeValue *= (char) 10;
  813. escapeValue += (char) (currentLetter - '0');
  814. if (escapeLength == 3) {
  815. readingComplexEscape = false;
  816. }
  817. break;
  818. case ComplexEscape.HEX:
  819. escapeValue *= (char) 16;
  820. escapeValue += hexToDec(currentLetter);
  821. if (escapeLength == 2) {
  822. readingComplexEscape = false;
  823. }
  824. break;
  825. }
  826. if (!readingComplexEscape) {
  827. appendLetter(escapeValue);
  828. } else {
  829. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  830. }
  831. } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
  832. && lastLetterWasBackslash) {
  833. switch (currentLetter) {
  834. case 'n':
  835. appendLetter(Letters.LF);
  836. break;
  837. case 'r':
  838. appendLetter(Letters.CR);
  839. break;
  840. case 't':
  841. appendLetter(Letters.TAB);
  842. break;
  843. case 'b':
  844. appendLetter(Letters.BACKSPACE);
  845. break;
  846. case 'f':
  847. appendLetter(Letters.FORM_FEED);
  848. break;
  849. case 'e':
  850. appendLetter(Letters.ESCAPE);
  851. break;
  852. case 'v':
  853. appendLetter(Letters.VERTICAL_TAB);
  854. break;
  855. case 'a':
  856. appendLetter(Letters.ALERT);
  857. break;
  858. case '0':
  859. case '1':
  860. case '2':
  861. case '3':
  862. case '4':
  863. case '5':
  864. case '6':
  865. case '7':
  866. escape = ComplexEscape.OCTAL;
  867. readingComplexEscape = true;
  868. escapeLength = 1;
  869. escapeValue = (char) (currentLetter - '0');
  870. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  871. break;
  872. case 'u':
  873. case 'x':
  874. case 'o':
  875. case 'd':
  876. case 'U':
  877. case 'X':
  878. case 'O':
  879. case 'D':
  880. switch (currentLetter) {
  881. case 'u':
  882. case 'U':
  883. escape = ComplexEscape.UNICODE;
  884. break;
  885. case 'x':
  886. case 'X':
  887. escape = ComplexEscape.HEX;
  888. break;
  889. case 'o':
  890. case 'O':
  891. escape = ComplexEscape.OCTAL;
  892. break;
  893. case 'd':
  894. case 'D':
  895. escape = ComplexEscape.DECIMAL;
  896. break;
  897. }
  898. readingComplexEscape = true;
  899. escapeLength = 0;
  900. escapeValue = (char) 0;
  901. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  902. break;
  903. default:
  904. break;
  905. }
  906. lastLetterWasBackslash = false;
  907. } else {
  908. if (currentLetter == userSettings.Delimiter) {
  909. endColumn();
  910. } else if ((!useCustomRecordDelimiter && (currentLetter == Letters.CR || currentLetter == Letters.LF))
  911. || (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter)) {
  912. endColumn();
  913. endRecord();
  914. }
  915. }
  916. // keep track of the last letter because we need
  917. // it for several key decisions
  918. lastLetter = currentLetter;
  919. firstLoop = false;
  920. if (startedColumn) {
  921. dataBuffer.Position++;
  922. if (userSettings.SafetySwitch
  923. && dataBuffer.Position
  924. - dataBuffer.ColumnStart
  925. + columnBuffer.Position > 100000) {
  926. close();
  927. throw new IOException(
  928. "Maximum column length of 100,000 exceeded in column "
  929. + NumberFormat
  930. .getIntegerInstance()
  931. .format(
  932. columnsCount)
  933. + " in record "
  934. + NumberFormat
  935. .getIntegerInstance()
  936. .format(
  937. currentRecord)
  938. + ". Set the SafetySwitch property to false"
  939. + " if you're expecting column lengths greater than 100,000 characters to"
  940. + " avoid this error.");
  941. }
  942. }
  943. } // end else
  944. } while (hasMoreData && startedColumn);
  945. }
  946. if (hasMoreData) {
  947. dataBuffer.Position++;
  948. }
  949. } // end else
  950. } while (hasMoreData && !hasReadNextLine);
  951. // check to see if we hit the end of the file
  952. // without processing the current record
  953. if (startedColumn || lastLetter == userSettings.Delimiter) {
  954. endColumn();
  955. endRecord();
  956. }
  957. }
  958. if (userSettings.CaptureRawRecord) {
  959. if (hasMoreData) {
  960. if (rawBuffer.Position == 0) {
  961. rawRecord = new String(dataBuffer.Buffer,
  962. dataBuffer.LineStart, dataBuffer.Position
  963. - dataBuffer.LineStart - 1);
  964. } else {
  965. rawRecord = new String(rawBuffer.Buffer, 0,
  966. rawBuffer.Position)
  967. + new String(dataBuffer.Buffer,
  968. dataBuffer.LineStart, dataBuffer.Position
  969. - dataBuffer.LineStart - 1);
  970. }
  971. } else {
  972. // for hasMoreData to ever be false, all data would have had to
  973. // have been
  974. // copied to the raw buffer
  975. rawRecord = new String(rawBuffer.Buffer, 0, rawBuffer.Position);
  976. }
  977. } else {
  978. rawRecord = "";
  979. }
  980. return hasReadNextLine;
  981. }
  982. /**
  983. * @exception IOException
  984. * Thrown if an error occurs while reading data from the
  985. * source stream.
  986. */
  987. private void checkDataLength() throws IOException {
  988. if (!initialized) {
  989. if (fileName != null) {
  990. inputStream = new BufferedReader(new InputStreamReader(
  991. new FileInputStream(fileName), charset),
  992. StaticSettings.MAX_FILE_BUFFER_SIZE);
  993. }
  994. charset = null;
  995. initialized = true;
  996. }
  997. updateCurrentValue();
  998. if (userSettings.CaptureRawRecord && dataBuffer.Count > 0) {
  999. if (rawBuffer.Buffer.length - rawBuffer.Position < dataBuffer.Count
  1000. - dataBuffer.LineStart) {
  1001. int newLength = rawBuffer.Buffer.length
  1002. + Math.max(dataBuffer.Count - dataBuffer.LineStart,
  1003. rawBuffer.Buffer.length);
  1004. char[] holder = new char[newLength];
  1005. System.arraycopy(rawBuffer.Buffer, 0, holder, 0,
  1006. rawBuffer.Position);
  1007. rawBuffer.Buffer = holder;
  1008. }
  1009. System.arraycopy(dataBuffer.Buffer, dataBuffer.LineStart,
  1010. rawBuffer.Buffer, rawBuffer.Position, dataBuffer.Count
  1011. - dataBuffer.LineStart);
  1012. rawBuffer.Position += dataBuffer.Count - dataBuffer.LineStart;
  1013. }
  1014. try {
  1015. dataBuffer.Count = inputStream.read(dataBuffer.Buffer, 0,
  1016. dataBuffer.Buffer.length);
  1017. } catch (IOException ex) {
  1018. close();
  1019. throw ex;
  1020. }
  1021. // if no more data could be found, set flag stating that
  1022. // the end of the data was found
  1023. if (dataBuffer.Count == -1) {
  1024. hasMoreData = false;
  1025. }
  1026. dataBuffer.Position = 0;
  1027. dataBuffer.LineStart = 0;
  1028. dataBuffer.ColumnStart = 0;
  1029. }
  1030. /**
  1031. * Read the first record of data as column headers.
  1032. *
  1033. * @return Whether the header record was successfully read or not.
  1034. * @exception IOException
  1035. * Thrown if an error occurs while reading data from the
  1036. * source stream.
  1037. */
  1038. public boolean readHeaders() throws IOException {
  1039. boolean result = readRecord();
  1040. // copy the header data from the column array
  1041. // to the header string array
  1042. headersHolder.Length = columnsCount;
  1043. headersHolder.Headers = new String[columnsCount];
  1044. for (int i = 0; i < headersHolder.Length; i++) {
  1045. String columnValue = get(i);
  1046. headersHolder.Headers[i] = columnValue;
  1047. // if there are duplicate header names, we will save the last one
  1048. headersHolder.IndexByName.put(columnValue, new Integer(i));
  1049. }
  1050. if (result) {
  1051. currentRecord--;
  1052. }
  1053. columnsCount = 0;
  1054. return result;
  1055. }
  1056. /**
  1057. * Returns the column header value for a given column index.
  1058. *
  1059. * @param columnIndex
  1060. * The index of the header column being requested.
  1061. * @return The value of the column header at the given column index.
  1062. * @exception IOException
  1063. * Thrown if this object has already been closed.
  1064. */
  1065. public String getHeader(int columnIndex) throws IOException {
  1066. checkClosed();
  1067. // check to see if we have read the header record yet
  1068. // check to see if the column index is within the bounds
  1069. // of our header array
  1070. if (columnIndex > -1 && columnIndex < headersHolder.Length) {
  1071. // return the processed header data for this column
  1072. return headersHolder.Headers[columnIndex];
  1073. } else {
  1074. return "";
  1075. }
  1076. }
  1077. public boolean isQualified(int columnIndex) throws IOException {
  1078. checkClosed();
  1079. if (columnIndex < columnsCount && columnIndex > -1) {
  1080. return isQualified[columnIndex];
  1081. } else {
  1082. return false;
  1083. }
  1084. }
  1085. /**
  1086. * @exception IOException
  1087. * Thrown if a very rare extreme exception occurs during
  1088. * parsing, normally resulting from improper data format.
  1089. */
  1090. private void endColumn() throws IOException {
  1091. String currentValue = "";
  1092. // must be called before setting startedColumn = false
  1093. if (startedColumn) {
  1094. if (columnBuffer.Position == 0) {
  1095. if (dataBuffer.ColumnStart < dataBuffer.Position) {
  1096. int lastLetter = dataBuffer.Position - 1;
  1097. if (userSettings.TrimWhitespace && !startedWithQualifier) {
  1098. while (lastLetter >= dataBuffer.ColumnStart
  1099. && (dataBuffer.Buffer[lastLetter] == Letters.SPACE || dataBuffer.Buffer[lastLetter] == Letters.TAB)) {
  1100. lastLetter--;
  1101. }
  1102. }
  1103. currentValue = new String(dataBuffer.Buffer,
  1104. dataBuffer.ColumnStart, lastLetter
  1105. - dataBuffer.ColumnStart + 1);
  1106. }
  1107. } else {
  1108. updateCurrentValue();
  1109. int lastLetter = columnBuffer.Position - 1;
  1110. if (userSettings.TrimWhitespace && !startedWithQualifier) {
  1111. while (lastLetter >= 0
  1112. && (columnBuffer.Buffer[lastLetter] == Letters.SPACE || columnBuffer.Buffer[lastLetter] == Letters.SPACE)) {
  1113. lastLetter--;
  1114. }
  1115. }
  1116. currentValue = new String(columnBuffer.Buffer, 0,
  1117. lastLetter + 1);
  1118. }
  1119. }
  1120. columnBuffer.Position = 0;
  1121. startedColumn = false;
  1122. if (columnsCount >= 100000 && userSettings.SafetySwitch) {
  1123. close();
  1124. throw new IOException(
  1125. "Maximum column count of 100,000 exceeded in record "
  1126. + NumberFormat.getIntegerInstance().format(
  1127. currentRecord)
  1128. + ". Set the SafetySwitch property to false"
  1129. + " if you're expecting more than 100,000 columns per record to"
  1130. + " avoid this error.");
  1131. }
  1132. // check to see if our current holder array for
  1133. // column chunks is still big enough to handle another
  1134. // column chunk
  1135. if (columnsCount == values.length) {
  1136. // holder array needs to grow to be able to hold another column
  1137. int newLength = values.length * 2;
  1138. String[] holder = new String[newLength];
  1139. System.arraycopy(values, 0, holder, 0, values.length);
  1140. values = holder;
  1141. boolean[] qualifiedHolder = new boolean[newLength];
  1142. System.arraycopy(isQualified, 0, qualifiedHolder, 0,
  1143. isQualified.length);
  1144. isQualified = qualifiedHolder;
  1145. }
  1146. values[columnsCount] = currentValue;
  1147. isQualified[columnsCount] = startedWithQualifier;
  1148. currentValue = "";
  1149. columnsCount++;
  1150. }
  1151. private void appendLetter(char letter) {
  1152. if (columnBuffer.Position == columnBuffer.Buffer.length) {
  1153. int newLength = columnBuffer.Buffer.length * 2;
  1154. char[] holder = new char[newLength];
  1155. System.arraycopy(columnBuffer.Buffer, 0, holder, 0,
  1156. columnBuffer.Position);
  1157. columnBuffer.Buffer = holder;
  1158. }
  1159. columnBuffer.Buffer[columnBuffer.Position++] = letter;
  1160. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  1161. }
  1162. private void updateCurrentValue() {
  1163. if (startedColumn && dataBuffer.ColumnStart < dataBuffer.Position) {
  1164. if (columnBuffer.Buffer.length - columnBuffer.Position < dataBuffer.Position
  1165. - dataBuffer.ColumnStart) {
  1166. int newLength = columnBuffer.Buffer.length
  1167. + Math.max(
  1168. dataBuffer.Position - dataBuffer.ColumnStart,
  1169. columnBuffer.Buffer.length);
  1170. char[] holder = new char[newLength];
  1171. System.arraycopy(columnBuffer.Buffer, 0, holder, 0,
  1172. columnBuffer.Position);
  1173. columnBuffer.Buffer = holder;
  1174. }
  1175. System.arraycopy(dataBuffer.Buffer, dataBuffer.ColumnStart,
  1176. columnBuffer.Buffer, columnBuffer.Position,
  1177. dataBuffer.Position - dataBuffer.ColumnStart);
  1178. columnBuffer.Position += dataBuffer.Position
  1179. - dataBuffer.ColumnStart;
  1180. }
  1181. dataBuffer.ColumnStart = dataBuffer.Position + 1;
  1182. }
  1183. /**
  1184. * @exception IOException
  1185. * Thrown if an error occurs while reading data from the
  1186. * source stream.
  1187. */
  1188. private void endRecord() throws IOException {
  1189. // this flag is used as a loop exit condition
  1190. // during parsing
  1191. hasReadNextLine = true;
  1192. currentRecord++;
  1193. }
  1194. /**
  1195. * Gets the corresponding column index for a given column header name.
  1196. *
  1197. * @param headerName
  1198. * The header name of the column.
  1199. * @return The column index for the given column header name.&nbsp;Returns
  1200. * -1 if not found.
  1201. * @exception IOException
  1202. * Thrown if this object has already been closed.
  1203. */
  1204. public int getIndex(String headerName) throws IOException {
  1205. checkClosed();
  1206. Object indexValue = headersHolder.IndexByName.get(headerName);
  1207. if (indexValue != null) {
  1208. return ((Integer) indexValue).intValue();
  1209. } else {
  1210. return -1;
  1211. }
  1212. }
  1213. /**
  1214. * Skips the next record of data by parsing each column.&nbsp;Does not
  1215. * increment
  1216. * {@link com.csvreader.CsvReader#getCurrentRecord getCurrentRecord()}.
  1217. *
  1218. * @return Whether another record was successfully skipped or not.
  1219. * @exception IOException
  1220. * Thrown if an error occurs while reading data from the
  1221. * source stream.
  1222. */
  1223. public boolean skipRecord() throws IOException {
  1224. checkClosed();
  1225. boolean recordRead = false;
  1226. if (hasMoreData) {
  1227. recordRead = readRecord();
  1228. if (recordRead) {
  1229. currentRecord--;
  1230. }
  1231. }
  1232. return recordRead;
  1233. }
  1234. /**
  1235. * Skips the next line of data using the standard end of line characters and
  1236. * does not do any column delimited parsing.
  1237. *
  1238. * @return Whether a line was successfully skipped or not.
  1239. * @exception IOException
  1240. * Thrown if an error occurs while reading data from the
  1241. * source stream.
  1242. */
  1243. public boolean skipLine() throws IOException {
  1244. checkClosed();
  1245. // clear public column values for current line
  1246. columnsCount = 0;
  1247. boolean skippedLine = false;
  1248. if (hasMoreData) {
  1249. boolean foundEol = false;
  1250. do {
  1251. if (dataBuffer.Position == dataBuffer.Count) {
  1252. checkDataLength();
  1253. } else {
  1254. skippedLine = true;
  1255. // grab the current letter as a char
  1256. char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
  1257. if (currentLetter == Letters.CR
  1258. || currentLetter == Letters.LF) {
  1259. foundEol = true;
  1260. }
  1261. // keep track of the last letter because we need
  1262. // it for several key decisions
  1263. lastLetter = currentLetter;
  1264. if (!foundEol) {
  1265. dataBuffer.Position++;
  1266. }
  1267. } // end else
  1268. } while (hasMoreData && !foundEol);
  1269. columnBuffer.Position = 0;
  1270. dataBuffer.LineStart = dataBuffer.Position + 1;
  1271. }
  1272. rawBuffer.Position = 0;
  1273. rawRecord = "";
  1274. return skippedLine;
  1275. }
  1276. /**
  1277. * Closes and releases all related resources.
  1278. */
  1279. public void close() {
  1280. if (!closed) {
  1281. close(true);
  1282. closed = true;
  1283. }
  1284. }
  1285. /**
  1286. *
  1287. */
  1288. private void close(boolean closing) {
  1289. if (!closed) {
  1290. if (closing) {
  1291. charset = null;
  1292. headersHolder.Headers = null;
  1293. headersHolder.IndexByName = null;
  1294. dataBuffer.Buffer = null;
  1295. columnBuffer.Buffer = null;
  1296. rawBuffer.Buffer = null;
  1297. }
  1298. try {
  1299. if (initialized) {
  1300. inputStream.close();
  1301. }
  1302. } catch (Exception e) {
  1303. // just eat the exception
  1304. }
  1305. inputStream = null;
  1306. closed = true;
  1307. }
  1308. }
  1309. /**
  1310. * @exception IOException
  1311. * Thrown if this object has already been closed.
  1312. */
  1313. private void checkClosed() throws IOException {
  1314. if (closed) {
  1315. throw new IOException(
  1316. "This instance of the CsvReader class has already been closed.");
  1317. }
  1318. }
  1319. /**
  1320. *
  1321. */
  1322. protected void finalize() {
  1323. close(false);
  1324. }
  1325. private class ComplexEscape {
  1326. private static final int UNICODE = 1;
  1327. private static final int OCTAL = 2;
  1328. private static final int DECIMAL = 3;
  1329. private static final int HEX = 4;
  1330. }
  1331. private static char hexToDec(char hex) {
  1332. char result;
  1333. if (hex >= 'a') {
  1334. result = (char) (hex - 'a' + 10);
  1335. } else if (hex >= 'A') {
  1336. result = (char) (hex - 'A' + 10);
  1337. } else {
  1338. result = (char) (hex - '0');
  1339. }
  1340. return result;
  1341. }
  1342. private class DataBuffer {
  1343. public char[] Buffer;
  1344. public int Position;
  1345. // / <summary>
  1346. // / How much usable data has been read into the stream,
  1347. // / which will not always be as long as Buffer.Length.
  1348. // / </summary>
  1349. public int Count;
  1350. // / <summary>
  1351. // / The position of the cursor in the buffer when the
  1352. // / current column was started or the last time data
  1353. // / was moved out to the column buffer.
  1354. // / </summary>
  1355. public int ColumnStart;
  1356. public int LineStart;
  1357. public DataBuffer() {
  1358. Buffer = new char[StaticSettings.MAX_BUFFER_SIZE];
  1359. Position = 0;
  1360. Count = 0;
  1361. ColumnStart = 0;
  1362. LineStart = 0;
  1363. }
  1364. }
  1365. private class ColumnBuffer {
  1366. public char[] Buffer;
  1367. public int Position;
  1368. public ColumnBuffer() {
  1369. Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE];
  1370. Position = 0;
  1371. }
  1372. }
  1373. private class RawRecordBuffer {
  1374. public char[] Buffer;
  1375. public int Position;
  1376. public RawRecordBuffer() {
  1377. Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE
  1378. * StaticSettings.INITIAL_COLUMN_COUNT];
  1379. Position = 0;
  1380. }
  1381. }
  1382. private class Letters {
  1383. public static final char LF = '\n';
  1384. public static final char CR = '\r';
  1385. public static final char QUOTE = '"';
  1386. public static final char COMMA = ',';
  1387. public static final char SPACE = ' ';
  1388. public static final char TAB = '\t';
  1389. public static final char POUND = '#';
  1390. public static final char BACKSLASH = '\\';
  1391. public static final char NULL = '\0';
  1392. public static final char BACKSPACE = '\b';
  1393. public static final char FORM_FEED = '\f';
  1394. public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape
  1395. public static final char VERTICAL_TAB = '\u000B';
  1396. public static final char ALERT = '\u0007';
  1397. }
  1398. private class UserSettings {
  1399. // having these as publicly accessible members will prevent
  1400. // the overhead of the method call that exists on properties
  1401. public boolean CaseSensitive;
  1402. public char TextQualifier;
  1403. public boolean TrimWhitespace;
  1404. public boolean UseTextQualifier;
  1405. public char Delimiter;
  1406. public char RecordDelimiter;
  1407. public char Comment;
  1408. public boolean UseComments;
  1409. public int EscapeMode;
  1410. public boolean SafetySwitch;
  1411. public boolean SkipEmptyRecords;
  1412. public boolean CaptureRawRecord;
  1413. public UserSettings() {
  1414. CaseSensitive = true;
  1415. TextQualifier = Letters.QUOTE;
  1416. TrimWhitespace = true;
  1417. UseTextQualifier = true;
  1418. Delimiter = Letters.COMMA;
  1419. RecordDelimiter = Letters.NULL;
  1420. Comment = Letters.POUND;
  1421. UseComments = false;
  1422. EscapeMode = CsvReader.ESCAPE_MODE_DOUBLED;
  1423. SafetySwitch = true;
  1424. SkipEmptyRecords = true;
  1425. CaptureRawRecord = true;
  1426. }
  1427. }
  1428. private class HeadersHolder {
  1429. public String[] Headers;
  1430. public int Length;
  1431. public HashMap IndexByName;
  1432. public HeadersHolder() {
  1433. Headers = null;
  1434. Length = 0;
  1435. IndexByName = new HashMap();
  1436. }
  1437. }
  1438. private class StaticSettings {
  1439. // these are static instead of final so they can be changed in unit test
  1440. // isn't visible outside this class and is only accessed once during
  1441. // CsvReader construction
  1442. public static final int MAX_BUFFER_SIZE = 1024;
  1443. public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024;
  1444. public static final int INITIAL_COLUMN_COUNT = 10;
  1445. public static final int INITIAL_COLUMN_BUFFER_SIZE = 50;
  1446. }
  1447. }