Thank you Mark for making Obsidian more accessible to Python users!! :-)
I was giving it a try with 40.076 files (incl. attachments). (Most of the MD files are generated and do not yet contain a lot of links and metadata.)
The method "gather" ran successfully in about 3 minutes! :-)
However, df = vault.get_all_file_metadata
showed an error message.
Not sure if the following is of help to locate an issue.
ValueError Traceback (most recent call last)
Input In [12], in <cell line: 1>()
----> 1 df = vault.get_all_file_metadata()
File C:...\obsidiantools\api.py:1345, in Vault.get_all_file_metadata(self)
1343 warnings.warn('Only notes (md files) were used to build the graph. Set attachments=True in the connect method to show all file metadata.')
1344 else:
-> 1345 df_media = self.get_media_file_metadata()
1346 df_media['graph_category'] = np.where(
1347 df_media['file_exists'], 'attachment', 'nonexistent')
1348 df_canvas = self.get_canvas_file_metadata()
File C:...\obsidiantools\api.py:1234, in Vault.get_media_file_metadata(self)
1232 return df
1233 else:
-> 1234 df = df.pipe(self._create_media_file_metadata_columns)
1235 return df
File C:...\pandas\core\generic.py:5512, in NDFrame.pipe(self, func, *args, **kwargs)
5454 @Final
5455 @doc(klass=_shared_doc_kwargs["klass"])
5456 def pipe(
(...)
5460 **kwargs,
5461 ) -> T:
5462 r"""
5463 Apply chainable functions that expect Series or DataFrames.
5464
(...)
5510 ... ) # doctest: +SKIP
5511 """
-> 5512 return com.pipe(self, func, *args, **kwargs)
File C:...\pandas\core\common.py:497, in pipe(obj, func, *args, **kwargs)
495 return func(*args, **kwargs)
496 else:
--> 497 return func(obj, *args, **kwargs)
File C:...\obsidiantools\api.py:1249, in Vault._create_media_file_metadata_columns(self, df)
1242 df['abs_filepath'] = np.where(df['rel_filepath'].notna(),
1243 [self._dirpath / str(f)
1244 for f in df['rel_filepath'].tolist()],
1245 np.NaN)
1246 df['file_exists'] = pd.Series(
1247 np.logical_not(df.index.isin(self._nonexistent_media_files)),
1248 index=df.index)
-> 1249 df['n_backlinks'] = self._get_backlink_counts_for_media_files_only()
1250 df['modified_time'] = pd.to_datetime(
1251 [f.lstat().st_mtime if not pd.isna(f)
1252 else pd.NaT
1253 for f in df['abs_filepath'].tolist()],
1254 unit='s')
1255 return df
File C:...\pandas\core\frame.py:3655, in DataFrame.setitem(self, key, value)
3652 self._setitem_array([key], value)
3653 else:
3654 # set column
-> 3655 self._set_item(key, value)
File C:...\pandas\core\frame.py:3832, in DataFrame._set_item(self, key, value)
3822 def _set_item(self, key, value) -> None:
3823 """
3824 Add series to DataFrame in specified column.
3825
(...)
3830 ensure homogeneity.
3831 """
-> 3832 value = self._sanitize_column(value)
3834 if (
3835 key in self.columns
3836 and value.ndim == 1
3837 and not is_extension_array_dtype(value)
3838 ):
3839 # broadcast across multiple columns if necessary
3840 if not self.columns.is_unique or isinstance(self.columns, MultiIndex):
File C:...\pandas\core\frame.py:4538, in DataFrame._sanitize_column(self, value)
4535 return _reindex_for_setitem(value, self.index)
4537 if is_list_like(value):
-> 4538 com.require_length_match(value, self.index)
4539 return sanitize_array(value, self.index, copy=True, allow_2d=True)
File C:...\pandas\core\common.py:557, in require_length_match(data, index)
553 """
554 Check the length of data matches the length of the index.
555 """
556 if len(data) != len(index):
--> 557 raise ValueError(
558 "Length of values "
559 f"({len(data)}) "
560 "does not match length of index "
561 f"({len(index)})"
562 )
ValueError: Length of values (38135) does not match length of index (4216)