Downloading and preparing dataset slither-audited-smart-contracts/big-multilabel (download: 1.63 GiB, generated: 4.39 GiB, post-processed: Unknown size, total: 6.01 GiB) to C:/Users/username/.cache/huggingface/datasets/mwritescode___slither-audited-smart-contracts/big-multilabel/1.1.0/4cf503b59ce9d3157914e47f6253de773b7ab828f46642685d4b470b88ca1f13...
Downloading data files: 100%|โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ| 4/4 [00:00<00:00, 498.91it/s]
Extracting data files: 100%|โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ| 4/4 [00:00<00:00, 248.87it/s]
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
File ~\anaconda3\lib\site-packages\datasets\builder.py:1597, in GeneratorBasedBuilder._prepare_split_single(self, gen_kwargs, fpath, file_format, max_shard_size, split_info, check_duplicate_keys, job_id)
1596 try:
-> 1597 writer = writer_class(
1598 features=self.info.features,
1599 path=fpath.replace("SSSSS", f"{shard_id:05d}").replace("JJJJJ", f"{job_id:05d}"),
1600 writer_batch_size=self._writer_batch_size,
1601 hash_salt=split_info.name,
1602 check_duplicates=check_duplicate_keys,
1603 storage_options=self._fs.storage_options,
1604 embed_local_files=embed_local_files,
1605 )
1606 try:
File ~\anaconda3\lib\site-packages\datasets\arrow_writer.py:334, in ArrowWriter.__init__(self, schema, features, path, stream, fingerprint, writer_batch_size, hash_salt, check_duplicates, disable_nullable, update_features, with_metadata, unit, embed_local_files, storage_options)
329 self._path = (
330 fs_token_paths[2][0]
331 if not is_remote_filesystem(self._fs)
332 else self._fs.unstrip_protocol(fs_token_paths[2][0])
333 )
--> 334 self.stream = self._fs.open(fs_token_paths[2][0], "wb")
335 self._closable_stream = True
File ~\anaconda3\lib\site-packages\fsspec\spec.py:1199, in AbstractFileSystem.open(self, path, mode, block_size, cache_options, compression, **kwargs)
1198 ac = kwargs.pop("autocommit", not self._intrans)
-> 1199 f = self._open(
1200 path,
1201 mode=mode,
1202 block_size=block_size,
1203 autocommit=ac,
1204 cache_options=cache_options,
1205 **kwargs,
1206 )
1207 if compression is not None:
File ~\anaconda3\lib\site-packages\fsspec\implementations\local.py:183, in LocalFileSystem._open(self, path, mode, block_size, **kwargs)
182 self.makedirs(self._parent(path), exist_ok=True)
--> 183 return LocalFileOpener(path, mode, fs=self, **kwargs)
File ~\anaconda3\lib\site-packages\fsspec\implementations\local.py:314, in LocalFileOpener.__init__(self, path, mode, autocommit, fs, compression, **kwargs)
313 self.blocksize = io.DEFAULT_BUFFER_SIZE
--> 314 self._open()
File ~\anaconda3\lib\site-packages\fsspec\implementations\local.py:319, in LocalFileOpener._open(self)
318 if self.autocommit or "w" not in self.mode:
--> 319 self.f = open(self.path, mode=self.mode)
320 if self.compression:
FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/username/.cache/huggingface/datasets/mwritescode___slither-audited-smart-contracts/big-multilabel/1.1.0/4cf503b59ce9d3157914e47f6253de773b7ab828f46642685d4b470b88ca1f13.incomplete/slither-audited-smart-contracts-validation-00000-00000-of-NNNNN.arrow'
The above exception was the direct cause of the following exception:
DatasetGenerationError Traceback (most recent call last)
Cell In[2], line 11
6 from datasets import load_dataset
8 # Due to a bug in the HuggingFace dataset, at the moment two file checksums do not correspond to what
9 # is in the dataset metadata, thus we have to load the data splits with the flag ignore_verification
10 # set to true
---> 11 train_set = load_dataset("mwritescode/slither-audited-smart-contracts", 'big-multilabel', split='train', ignore_verifications=True)
12 test_set = load_dataset("mwritescode/slither-audited-smart-contracts", 'big-multilabel', split='test', ignore_verifications=True)
13 val_set = load_dataset("mwritescode/slither-audited-smart-contracts", 'big-multilabel', split='validation', ignore_verifications=True)
File ~\anaconda3\lib\site-packages\datasets\load.py:1797, in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)
1794 try_from_hf_gcs = path not in _PACKAGED_DATASETS_MODULES
1796 # Download and prepare data
-> 1797 builder_instance.download_and_prepare(
1798 download_config=download_config,
1799 download_mode=download_mode,
1800 verification_mode=verification_mode,
1801 try_from_hf_gcs=try_from_hf_gcs,
1802 num_proc=num_proc,
1803 storage_options=storage_options,
1804 )
1806 # Build dataset for splits
1807 keep_in_memory = (
1808 keep_in_memory if keep_in_memory is not None else is_small_dataset(builder_instance.info.dataset_size)
1809 )
File ~\anaconda3\lib\site-packages\datasets\builder.py:890, in DatasetBuilder.download_and_prepare(self, output_dir, download_config, download_mode, verification_mode, ignore_verifications, try_from_hf_gcs, dl_manager, base_path, use_auth_token, file_format, max_shard_size, num_proc, storage_options, **download_and_prepare_kwargs)
888 if num_proc is not None:
889 prepare_split_kwargs["num_proc"] = num_proc
--> 890 self._download_and_prepare(
891 dl_manager=dl_manager,
892 verification_mode=verification_mode,
893 **prepare_split_kwargs,
894 **download_and_prepare_kwargs,
895 )
896 # Sync info
897 self.info.dataset_size = sum(split.num_bytes for split in self.info.splits.values())
File ~\anaconda3\lib\site-packages\datasets\builder.py:1649, in GeneratorBasedBuilder._download_and_prepare(self, dl_manager, verification_mode, **prepare_splits_kwargs)
1648 def _download_and_prepare(self, dl_manager, verification_mode, **prepare_splits_kwargs):
-> 1649 super()._download_and_prepare(
1650 dl_manager,
1651 verification_mode,
1652 check_duplicate_keys=verification_mode == VerificationMode.BASIC_CHECKS
1653 or verification_mode == VerificationMode.ALL_CHECKS,
1654 **prepare_splits_kwargs,
1655 )
File ~\anaconda3\lib\site-packages\datasets\builder.py:985, in DatasetBuilder._download_and_prepare(self, dl_manager, verification_mode, **prepare_split_kwargs)
981 split_dict.add(split_generator.split_info)
983 try:
984 # Prepare split will record examples associated to the split
--> 985 self._prepare_split(split_generator, **prepare_split_kwargs)
986 except OSError as e:
987 raise OSError(
988 "Cannot find data file. "
989 + (self.manual_download_instructions or "")
990 + "\nOriginal error:\n"
991 + str(e)
992 ) from None
File ~\anaconda3\lib\site-packages\datasets\builder.py:1487, in GeneratorBasedBuilder._prepare_split(self, split_generator, check_duplicate_keys, file_format, num_proc, max_shard_size)
1485 job_id = 0
1486 with pbar:
-> 1487 for job_id, done, content in self._prepare_split_single(
1488 gen_kwargs=gen_kwargs, job_id=job_id, **_prepare_split_args
1489 ):
1490 if done:
1491 result = content
File ~\anaconda3\lib\site-packages\datasets\builder.py:1644, in GeneratorBasedBuilder._prepare_split_single(self, gen_kwargs, fpath, file_format, max_shard_size, split_info, check_duplicate_keys, job_id)
1642 if isinstance(e, SchemaInferenceError) and e.__context__ is not None:
1643 e = e.__context__
-> 1644 raise DatasetGenerationError("An error occurred while generating the dataset") from e
1646 yield job_id, True, (total_num_examples, total_num_bytes, writer._features, num_shards, shard_lengths)
DatasetGenerationError: An error occurred while generating the dataset