add attachment paths to posts csv

This commit is contained in:
2026-03-06 03:04:16 +00:00
parent ea7dc75c10
commit 4b8cf3fafb

View File

@@ -99,13 +99,13 @@ def fetch_notes(user_id, until_id=None):
return res.json() return res.json()
def export_notes(user_id, base_path): def export_notes(user_id, base_path, file_id_to_path):
"""Exports user notes, replacing IDs with handles.""" """Exports user notes, replacing IDs with handles."""
print(f" Exporting notes for {get_handle(user_id)}...") print(f" Exporting notes for {get_handle(user_id)}...")
with open(base_path / "posts.csv", "w", newline="", encoding="utf-8") as f: with open(base_path / "posts.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f) writer = csv.writer(f)
writer.writerow( writer.writerow(
["id", "createdAt", "text", "visibility", "renote", "reply", "cw"] ["id", "createdAt", "text", "visibility", "renote", "reply", "cw", "files"]
) )
until_id = None until_id = None
@@ -130,6 +130,13 @@ def export_notes(user_id, base_path):
elif note.get("replyId"): elif note.get("replyId"):
reply_handle = note["replyId"] reply_handle = note["replyId"]
# Resolve file paths
file_paths = []
for file in note.get("files", []):
f_id = file.get("id")
if f_id in file_id_to_path:
file_paths.append(f"files/{file_id_to_path[f_id]}")
writer.writerow( writer.writerow(
[ [
note["id"], note["id"],
@@ -139,6 +146,7 @@ def export_notes(user_id, base_path):
renote_handle, renote_handle,
reply_handle, reply_handle,
note.get("cw"), note.get("cw"),
", ".join(file_paths),
] ]
) )
@@ -184,6 +192,10 @@ def download_file(file_info, folder_path):
counter = 1 counter = 1
original_dest_path = dest_path original_dest_path = dest_path
while dest_path.exists(): while dest_path.exists():
# Heuristic: If it exists and matches size, assume it's already downloaded
if dest_path.stat().st_size == file_info.get("size"):
return dest_path.name
dest_path = original_dest_path.with_name( dest_path = original_dest_path.with_name(
f"{original_dest_path.stem}_{counter}{original_dest_path.suffix}" f"{original_dest_path.stem}_{counter}{original_dest_path.suffix}"
) )
@@ -195,8 +207,10 @@ def download_file(file_info, folder_path):
with open(dest_path, "wb") as f: with open(dest_path, "wb") as f:
for chunk in res.iter_content(chunk_size=8192): for chunk in res.iter_content(chunk_size=8192):
f.write(chunk) f.write(chunk)
return dest_path.name
except Exception as e: except Exception as e:
print(f" Failed to download {file_name}: {e}") print(f" Failed to download {file_name}: {e}")
return None
def get_folder_path(folder_id, folder_map): def get_folder_path(folder_id, folder_map):
@@ -234,13 +248,17 @@ def export_drive_admin(user_id, base_path):
if len(files) < 100: if len(files) < 100:
break break
until_id = files[-1]["id"] until_id = files[-1]["id"]
file_id_to_path = {}
for file in all_files: for file in all_files:
folder_id = file.get("folderId") folder_id = file.get("folderId")
rel_folder_path = get_folder_path(folder_id, folder_map) rel_folder_path = get_folder_path(folder_id, folder_map)
full_folder_path = files_base_path / rel_folder_path full_folder_path = files_base_path / rel_folder_path
if not full_folder_path.exists(): if not full_folder_path.exists():
full_folder_path.mkdir(parents=True, exist_ok=True) full_folder_path.mkdir(parents=True, exist_ok=True)
download_file(file, full_folder_path) final_name = download_file(file, full_folder_path)
if final_name:
file_id_to_path[file["id"]] = str(rel_folder_path / final_name)
return file_id_to_path
def export_user_data(user, base_path): def export_user_data(user, base_path):
@@ -353,8 +371,8 @@ def main():
user_dir.mkdir(parents=True) user_dir.mkdir(parents=True)
export_user_data(user, user_dir) export_user_data(user, user_dir)
export_notes(user_id, user_dir) file_id_to_path = export_drive_admin(user_id, user_dir)
export_drive_admin(user_id, user_dir) export_notes(user_id, user_dir, file_id_to_path)
export_lists_db(user_id, user_dir, db_conn) export_lists_db(user_id, user_dir, db_conn)
export_antennas_db(user_id, user_dir, db_conn) export_antennas_db(user_id, user_dir, db_conn)