The Anomaly detection notebook is unable to handle data with "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0"
ValueError Traceback (most recent call last)
pandas/_libs/lib.pyx in pandas._libs.lib.maybe_convert_numeric()
ValueError: Unable to parse string "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)"
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
in
1 # Create a Pandas dataframe from the Zeek HTTP log
2 log_to_df = LogToDataFrame()
----> 3 bro_df = log_to_df.create_dataframe('../data/http2.log')
4 print('Read in {:d} Rows...'.format(len(bro_df)))
5 bro_df.head()
/usr/local/lib/python3.6/dist-packages/zat/log_to_dataframe.py in create_dataframe(self, log_filename, ts_index, aggressive_category, usecols)
63
64 # Now actually read the Zeek Log using Pandas read CSV
---> 65 self._df = pd.read_csv(log_filename, sep='\t', names=header_names, usecols=usecols, dtype=pandas_types, comment="#", na_values='-')
66
67 # Now we convert 'time' and 'interval' fields to datetime and timedelta respectively
~/.local/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
683 )
684
--> 685 return _read(filepath_or_buffer, kwds)
686
687 parser_f.name = name
~/.local/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
461
462 try:
--> 463 data = parser.read(nrows)
464 finally:
465 parser.close()
~/.local/lib/python3.6/site-packages/pandas/io/parsers.py in read(self, nrows)
1152 def read(self, nrows=None):
1153 nrows = _validate_integer("nrows", nrows)
-> 1154 ret = self._engine.read(nrows)
1155
1156 # May alter columns / col_dict
~/.local/lib/python3.6/site-packages/pandas/io/parsers.py in read(self, nrows)
2057 def read(self, nrows=None):
2058 try:
-> 2059 data = self._reader.read(nrows)
2060 except StopIteration:
2061 if self._first_chunk:
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.read()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_low_memory()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_rows()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._convert_column_data()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._convert_tokens()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._convert_with_dtype()
~/.local/lib/python3.6/site-packages/pandas/core/arrays/integer.py in _from_sequence_of_strings(cls, strings, dtype, copy)
325 @classmethod
326 def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
--> 327 scalars = to_numeric(strings, errors="raise")
328 return cls._from_sequence(scalars, dtype, copy)
329
~/.local/lib/python3.6/site-packages/pandas/core/tools/numeric.py in to_numeric(arg, errors, downcast)
149 coerce_numeric = errors not in ("ignore", "raise")
150 values = lib.maybe_convert_numeric(
--> 151 values, set(), coerce_numeric=coerce_numeric
152 )
153
pandas/_libs/lib.pyx in pandas._libs.lib.maybe_convert_numeric()
ValueError: Unable to parse string "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)" at position 30242