storage
Storage layer — SQLite database and notes.
1"""Storage layer — SQLite database and notes.""" 2 3from .db import ( 4 DB_PATH, 5 _connect, 6 init_db, 7 init_table, 8 parse_entry_id, 9 save_paper, 10 save_papers, 11 save_paper_metadata, 12 save_papers_metadata, 13 get_paper, 14 get_paper_by_id, 15 get_paper_by_source_fk, 16 get_papers_by_source_fks, 17 get_paper_root, 18 get_all_versions, 19 soft_delete_paper, 20 restore_paper, 21 hard_delete_paper, 22 list_deleted_papers, 23 is_paper_deleted, 24 list_papers, 25 get_categories, 26 get_tags, 27 get_graph_data, 28 set_has_pdf, 29 set_pdf_path, 30 set_full_text, 31 search_full_text, 32) 33from .projects import ( 34 Q, 35 Status, 36 Project, 37 ensure_projects_db, 38 get_project, 39 filter_projects, 40 color_to_hex, 41 color_from_hex, 42) 43from .notes import ( 44 Note, 45 ensure_notes_db, 46 get_note, 47 get_notes, 48 get_project_notes, 49 count_project_notes, 50 count_paper_notes, 51 note_counts_by_paper_for_project, 52 search_notes_source_fks, 53) 54 55__all__ = [ 56 # db 57 "DB_PATH", "_connect", "init_db", "init_table", "parse_entry_id", 58 "save_paper", "save_papers", "save_paper_metadata", "save_papers_metadata", 59 "get_paper", "get_paper_by_id", "get_paper_by_source_fk", "get_papers_by_source_fks", 60 "get_paper_root", 61 "get_all_versions", 62 "soft_delete_paper", "restore_paper", "hard_delete_paper", 63 "list_deleted_papers", "is_paper_deleted", 64 "list_papers", 65 "get_categories", "get_tags", "get_graph_data", 66 "set_has_pdf", "set_pdf_path", "set_full_text", "search_full_text", 67 # projects 68 "Q", "Status", "Project", "ensure_projects_db", 69 "get_project", "filter_projects", "color_to_hex", "color_from_hex", 70 # notes 71 "Note", "ensure_notes_db", 72 "get_note", "get_notes", "get_project_notes", 73 "count_project_notes", "count_paper_notes", 74 "note_counts_by_paper_for_project", 75 "search_notes_source_fks", 76]
244def init_db() -> None: 245 with _connect() as conn: 246 apply_sql_schema(conn) 247 248 if old_pdf_dir().is_dir(): 249 wrong_path_rows = _get_deprecated_path_rows() 250 if wrong_path_rows: 251 for rows in wrong_path_rows: 252 try: 253 curr_path = rows["PDF_PATH"] 254 if Path(curr_path).is_file() and Path(curr_path).rename(curr_path.replace(str(old_pdf_dir()), str(pdf_dir()))).exists(): 255 set_pdf_path(rows["source_id"], curr_path.replace(str(old_pdf_dir()), str(pdf_dir()))) 256 print(f"File [ {curr_path} ] moved and verified!") 257 else: 258 print(f"File [ {curr_path} ] could not be moved") 259 except Exception as e: 260 print(f"An error occured while trying to parse file {rows['PDF_PATH']}:\n{e}") 261 _remove_gui_pdf_dir(old_pdf_dir())
53def init_table( 54 table_name: str, 55 columns: list[tuple], 56 primary_key: list[str] | None = None, 57 db_path: str = DB_PATH, 58) -> None: 59 """Create a table if it doesn't exist. 60 61 Each column is a tuple: (name, python_type[, constraints]) 62 where constraints is an optional SQL string appended after the type. 63 """ 64 col_defs: list[str] = [] 65 for col in columns: 66 name = col[0] 67 sql_t = _PY_TO_SQL.get(col[1], "TEXT") 68 extra = col[2] if len(col) > 2 else "" 69 col_defs.append(f" {name} {sql_t} {extra}".rstrip()) 70 if primary_key: 71 col_defs.append(f" PRIMARY KEY ({', '.join(primary_key)})") 72 ddl = ( 73 f"CREATE TABLE IF NOT EXISTS {table_name} (\n" 74 + ",\n".join(col_defs) 75 + "\n);" 76 ) 77 with _connect(db_path) as conn: 78 conn.execute(ddl)
Create a table if it doesn't exist.
Each column is a tuple: (name, python_type[, constraints]) where constraints is an optional SQL string appended after the type.
277def parse_entry_id(entry_id: str) -> tuple[str, int]: 278 """EX:Split 'http://arxiv.org/abs/2204.12985v4' into ('2204.12985', 4).""" 279 raw = entry_id.split('/')[-1] 280 match = re.match(r'^(.+?)(?:v(\d+))?$', raw) 281 assert match 282 source_id = match.group(1) 283 version = int(match.group(2)) if match.group(2) else 1 284 return source_id, version
EX:Split 'http://arxiv.org/abs/2204.12985v4' into ('2204.12985', 4).
344def save_paper(paper: arxiv.Result, tags: list[str] | None = None) -> tuple[str, int]: 345 """Insert a single arxiv paper. Returns (source_id, version).""" 346 with _connect() as conn: 347 return _insert_arxiv(conn, paper, tags)
Insert a single arxiv paper. Returns (source_id, version).
350def save_papers(papers: list[arxiv.Result], tags: list[str] | None = None) -> list[tuple[str, int]]: 351 """Batch insert arxiv papers in a single transaction. Returns list of (source_id, version).""" 352 with _connect() as conn: 353 return [_insert_arxiv(conn, paper, tags) for paper in papers]
Batch insert arxiv papers in a single transaction. Returns list of (source_id, version).
356def save_paper_metadata(meta: PaperMetadata, tags: list[str] | None = None) -> tuple[str, int]: 357 """Insert a paper from any source via PaperMetadata. Returns (source_id, version).""" 358 with _connect() as conn: 359 return _insert_metadata(conn, meta, tags)
Insert a paper from any source via PaperMetadata. Returns (source_id, version).
362def save_papers_metadata(papers: list[PaperMetadata], tags: list[str] | None = None) -> list[tuple[str, int]]: 363 """Batch insert papers from any source. Returns list of (source_id, version).""" 364 with _connect() as conn: 365 return [_insert_metadata(conn, meta, tags) for meta in papers]
Batch insert papers from any source. Returns list of (source_id, version).
626def get_paper(source_id: str, version: Optional[int] = None) -> Optional[sqlite3.Row]: 627 """Fetch a specific version, or the latest if version is None.""" 628 with _connect() as conn: 629 if version: 630 return conn.execute( 631 "SELECT * FROM papers WHERE source_id = ? AND version = ?", 632 (source_id, version), 633 ).fetchone() 634 return conn.execute( 635 "SELECT * FROM latest_papers WHERE source_id = ?", 636 (source_id,), 637 ).fetchone()
Fetch a specific version, or the latest if version is None.
640def get_paper_by_id(paper_id: int) -> Optional[sqlite3.Row]: 641 """Fetch a paper version by its PAPER primary key.""" 642 with _connect() as conn: 643 return conn.execute( 644 "SELECT * FROM papers WHERE paper_id = ?", 645 (paper_id,), 646 ).fetchone()
Fetch a paper version by its PAPER primary key.
649def get_paper_by_source_fk(source_fk: int) -> Optional[sqlite3.Row]: 650 """Fetch the latest version for a PAPER_ROOTS row by SOURCE_FK.""" 651 with _connect() as conn: 652 return conn.execute( 653 "SELECT * FROM latest_papers WHERE source_id = (" 654 "SELECT SOURCE_ID FROM PAPER_ROOTS WHERE SOURCE_FK = ?)", 655 (source_fk,), 656 ).fetchone()
Fetch the latest version for a PAPER_ROOTS row by SOURCE_FK.
659def get_papers_by_source_fks(source_fks: list[int]) -> list[sqlite3.Row]: 660 """Batch-fetch latest paper versions for a list of SOURCE_FKs in one query. 661 662 The empty-list guard is intentional: _queries_list_papers(source_fks=None) 663 returns the full library, which is the wrong behaviour here. 664 """ 665 if not source_fks: 666 return [] 667 return _queries_list_papers(source_fks=source_fks)
Batch-fetch latest paper versions for a list of SOURCE_FKs in one query.
The empty-list guard is intentional: _queries_list_papers(source_fks=None) returns the full library, which is the wrong behaviour here.
671def get_paper_root(source_id: str) -> Optional[sqlite3.Row]: 672 """Return the PAPER_ROOTS row for a given source_id.""" 673 with _connect() as conn: 674 return conn.execute( 675 "SELECT * FROM PAPER_ROOTS WHERE SOURCE_ID = ?", 676 (source_id,), 677 ).fetchone()
Return the PAPER_ROOTS row for a given source_id.
704def get_all_versions(source_id: str) -> list[sqlite3.Row]: 705 """Fetch all stored versions of a paper, ordered oldest to newest.""" 706 with _connect() as conn: 707 return conn.execute( 708 "SELECT * FROM papers WHERE source_id = ? ORDER BY version ASC", 709 (source_id,), 710 ).fetchall()
Fetch all stored versions of a paper, ordered oldest to newest.
510def soft_delete_paper(source_id: str) -> str | None: 511 """Soft-delete a paper: set STATUS='deleted', remove PDF from linxiv dir if present. 512 513 Returns the pdf_path that was stored (for caller reference), or None. 514 """ 515 stored_path: str | None = None 516 with _connect() as conn: 517 row = conn.execute( 518 "SELECT PDF_PATH FROM PAPER_META WHERE PAPER_ID IN " 519 "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? ORDER BY VERSION DESC LIMIT 1)", 520 (source_id,), 521 ).fetchone() 522 if row: 523 stored_path = row["PDF_PATH"] 524 525 if conn.execute( 526 "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'" 527 ).fetchone(): 528 conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,)) 529 530 conn.execute( 531 "UPDATE PAPER_ROOTS SET STATUS = 'deleted', DELETED_AT = ?, UPDATED_AT = datetime('now') WHERE SOURCE_ID = ?", 532 (datetime.datetime.now(), source_id), 533 ) 534 535 if stored_path: 536 p = Path(stored_path) 537 try: 538 linxiv_dir = pdf_dir() 539 if p.is_file() and p.is_relative_to(linxiv_dir): 540 p.unlink() 541 with _connect() as conn: 542 conn.execute( 543 "UPDATE PAPER SET HAS_PDF = 0 WHERE SOURCE_ID = ?", 544 (source_id,), 545 ) 546 except Exception as e: 547 print(f"[db] Could not remove PDF for {source_id}: {e}") 548 549 return stored_path
Soft-delete a paper: set STATUS='deleted', remove PDF from linxiv dir if present.
Returns the pdf_path that was stored (for caller reference), or None.
552def restore_paper(source_id: str) -> str | None: 553 """Restore a soft-deleted paper. Returns the stored pdf_path (may no longer exist).""" 554 stored_path: str | None = None 555 full_text: str | None = None 556 with _connect() as conn: 557 row = conn.execute( 558 "SELECT PDF_PATH, FULL_TEXT FROM PAPER_META WHERE PAPER_ID IN " 559 "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? ORDER BY VERSION DESC LIMIT 1)", 560 (source_id,), 561 ).fetchone() 562 if row: 563 stored_path = row["PDF_PATH"] 564 full_text = row["FULL_TEXT"] 565 566 conn.execute( 567 "UPDATE PAPER_ROOTS SET STATUS = 'active', DELETED_AT = NULL, UPDATED_AT = datetime('now') WHERE SOURCE_ID = ?", 568 (source_id,), 569 ) 570 571 if row and row["FULL_TEXT"]: 572 if conn.execute( 573 "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'" 574 ).fetchone(): 575 conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,)) 576 conn.execute( 577 "INSERT INTO papers_fts(paper_id, full_text) VALUES (?, ?)", 578 (source_id, full_text), 579 ) 580 581 return stored_path
Restore a soft-deleted paper. Returns the stored pdf_path (may no longer exist).
584def hard_delete_paper(source_id: str) -> None: 585 """Permanently delete a paper and all associated data.""" 586 with _connect() as conn: 587 row = conn.execute( 588 "SELECT PDF_PATH FROM PAPER_META WHERE PAPER_ID IN " 589 "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? ORDER BY VERSION DESC LIMIT 1)", 590 (source_id,), 591 ).fetchone() 592 if conn.execute( 593 "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'" 594 ).fetchone(): 595 conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,)) 596 conn.execute("DELETE FROM PAPER_ROOTS WHERE SOURCE_ID = ?", (source_id,)) 597 598 if row and row["PDF_PATH"]: 599 p = Path(row["PDF_PATH"]) 600 try: 601 linxiv_dir = pdf_dir() 602 if p.is_file() and p.is_relative_to(linxiv_dir): 603 p.unlink() 604 except Exception as e: 605 print(f"[db] Could not remove PDF for {source_id} during hard delete: {e}")
Permanently delete a paper and all associated data.
608def list_deleted_papers() -> list[sqlite3.Row]: 609 """Return all soft-deleted papers from the deleted_papers view.""" 610 with _connect() as conn: 611 return conn.execute( 612 "SELECT * FROM deleted_papers ORDER BY deleted_at DESC" 613 ).fetchall()
Return all soft-deleted papers from the deleted_papers view.
616def is_paper_deleted(source_id: str) -> bool: 617 """Return True if a PAPER_ROOTS row exists with STATUS='deleted'.""" 618 with _connect() as conn: 619 row = conn.execute( 620 "SELECT 1 FROM PAPER_ROOTS WHERE SOURCE_ID = ? AND STATUS = 'deleted'", 621 (source_id,), 622 ).fetchone() 623 return row is not None
Return True if a PAPER_ROOTS row exists with STATUS='deleted'.
785def list_papers( 786 latest_only: bool = True, 787 limit: int | None = None, 788 offset: int = 0, 789 category: str | None = None, 790) -> list[sqlite3.Row]: 791 """List all stored papers (latest version per paper by default). 792 793 Optionally filter by exact primary category (e.g. "cs.LG"); limit/offset 794 apply to the filtered result. 795 """ 796 with _connect() as conn: 797 sql = _LIST_PAPERS_LATEST_SQL if latest_only else _LIST_PAPERS_ALL_SQL 798 params: list[int | str] = [] 799 if category is not None: 800 sql += " WHERE category = ?" 801 params.append(category) 802 sql += " ORDER BY published DESC" 803 if limit is not None: 804 sql += " LIMIT ? OFFSET ?" 805 params += [limit, offset] 806 elif offset: 807 sql += " LIMIT -1 OFFSET ?" 808 params.append(offset) 809 return conn.execute(sql, params).fetchall()
List all stored papers (latest version per paper by default).
Optionally filter by exact primary category (e.g. "cs.LG"); limit/offset apply to the filtered result.
824def get_categories() -> list[str]: 825 """Return a sorted list of all distinct primary categories in the DB.""" 826 with _connect() as conn: 827 rows = conn.execute( 828 "SELECT DISTINCT category FROM latest_papers WHERE category IS NOT NULL ORDER BY category" 829 ).fetchall() 830 return [row["category"] for row in rows]
Return a sorted list of all distinct primary categories in the DB.
713def get_graph_data(exclude_single_authors: bool = False) -> tuple[list[dict], list[dict]]: 714 """Returns (nodes, edges) ready to pass to the graph view. 715 716 With ``exclude_single_authors`` True, an author node is dropped unless some 717 AUTHOR_FK matching its name (COLLATE NOCASE) has two or more active papers. 718 """ 719 with _connect() as conn: 720 paper_nodes = [ 721 { 722 "id": row["source_fk"], 723 "source_id": row["source_id"], 724 "label": row["title"], 725 "type": "paper", 726 "category": row["category"], 727 "tags": row["tags"] if row["tags"] else [], 728 "has_pdf": bool(row["has_pdf"]), 729 "published": row["published"].isoformat() if row["published"] else None, 730 "url": row["url"], 731 "doi": row["doi"], 732 "summary": row["summary"], 733 } 734 for row in conn.execute(""" 735 SELECT r.SOURCE_FK AS source_fk, r.SOURCE_ID AS source_id, 736 p.TITLE AS title, p.CATEGORY AS category, 737 m.TAGS AS tags, p.HAS_PDF AS has_pdf, m.PUBLISHED AS published, 738 m.URL AS url, m.DOI AS doi, m.SUMMARY AS summary 739 FROM PAPER_ROOTS r 740 JOIN PAPER p ON p.SOURCE_FK = r.SOURCE_FK 741 JOIN PAPER_META m ON m.PAPER_ID = p.PAPER_ID 742 WHERE p.VERSION = (SELECT MAX(VERSION) FROM PAPER WHERE SOURCE_FK = r.SOURCE_FK) 743 AND r.STATUS = 'active' 744 """) 745 ] 746 # _GRAPH_AUTHORS_WITH_COUNT_SQL adds the per-name paper_count column 747 # (see queries.py); the default constant omits it and scans unchanged. 748 sql = ( 749 _GRAPH_AUTHORS_WITH_COUNT_SQL 750 if exclude_single_authors 751 else _GRAPH_AUTHORS_SQL 752 ) 753 author_rows = conn.execute(sql).fetchall() 754 755 seen_authors: set[str] = set() 756 author_nodes: list[dict] = [] 757 edges: list[dict] = [] 758 for row in author_rows: 759 name = row["author_name"] 760 # Drop an author when its paper_count is below two. A NULL count (no 761 # matching AUTHOR row) fails the `is not None` guard, so it is kept. 762 if ( 763 exclude_single_authors 764 and row["paper_count"] is not None 765 and row["paper_count"] < 2 766 ): 767 continue 768 node_id = f"author::{name}" 769 if node_id not in seen_authors: 770 node: dict[str, object] = {"id": node_id, "label": name, "type": "author"} 771 # author_fk is NULL when the JSON name matches no AUTHOR row. 772 if row["author_fk"] is not None: 773 node["author_id"] = row["author_fk"] 774 author_nodes.append(node) 775 seen_authors.add(node_id) 776 edges.append({"source": row["source_fk"], "target": node_id}) 777 778 return paper_nodes + author_nodes, edges
Returns (nodes, edges) ready to pass to the graph view.
With exclude_single_authors True, an author node is dropped unless some
AUTHOR_FK matching its name (COLLATE NOCASE) has two or more active papers.
445def set_has_pdf(source_id: str, version: int, has: bool) -> None: 446 """Set the has_pdf flag for a specific paper version.""" 447 with _connect() as conn: 448 conn.execute( 449 "UPDATE PAPER SET HAS_PDF = ? WHERE SOURCE_ID = ? AND VERSION = ?", 450 (has, source_id, version), 451 )
Set the has_pdf flag for a specific paper version.
461def set_pdf_path(source_id: str, path: str, version: int | None = None) -> None: 462 """Set the pdf_path for a paper (all versions).""" 463 with _connect() as conn: 464 if version: 465 conn.execute( 466 "UPDATE PAPER_META SET PDF_PATH = ? WHERE PAPER_ID IN " 467 "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? AND VERSION = ?)", 468 (path, source_id, version), 469 ) 470 else: 471 lastrow = conn.execute( 472 "UPDATE PAPER_META SET PDF_PATH = ? WHERE PAPER_ID IN " 473 "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ?)", 474 (path, source_id), 475 ) 476 if lastrow: 477 print("Warning: you are updating the paths of all pdfs associated with this paper. This will cause the pdfs of other version to be deleted, those other version.") 478 print("This may incorrectly update the database, consider using different methodology")
Set the pdf_path for a paper (all versions).
908def set_full_text(full_text: str|None, paper_id: int|None, source_id: str|None, version: int|None) -> None: 909 """Store extracted TeX full text and update the FTS index.""" 910 with _connect() as conn: 911 row = conn.execute( 912 "SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? AND VERSION = ?", 913 (source_id, version), 914 ).fetchone() 915 if row is None: 916 return 917 paper_id = int(row["PAPER_ID"]) 918 conn.execute( 919 "UPDATE PAPER_META SET FULL_TEXT = ?, DOWNLOADED_SOURCE = 1 WHERE PAPER_ID = ?", 920 (full_text, paper_id), 921 ) 922 conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,)) 923 conn.execute( 924 "INSERT INTO papers_fts(paper_id, full_text) VALUES (?, ?)", 925 (source_id, full_text), 926 )
Store extracted TeX full text and update the FTS index.
929def search_full_text(query: str, limit: int = 20) -> list[sqlite3.Row]: 930 """Full-text search over TeX source content. Returns matching papers.""" 931 with _connect() as conn: 932 if not conn.execute( 933 "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'" 934 ).fetchone(): 935 return [] 936 return conn.execute(""" 937 SELECT p.* FROM latest_papers p 938 JOIN papers_fts fts ON p.source_id = fts.paper_id 939 WHERE papers_fts MATCH ? 940 ORDER BY rank 941 LIMIT ? 942 """, (query, limit)).fetchall()
Full-text search over TeX source content. Returns matching papers.
35class Q: 36 """Composable SQL WHERE predicate. Combine with &, |, ~.""" 37 def __init__(self, sql: str, *params) -> None: 38 self.sql = sql 39 self.params = params 40 41 def __and__(self, other: Q) -> Q: 42 return Q(f"({self.sql} AND {other.sql})", *self.params, *other.params) 43 44 def __or__(self, other: Q) -> Q: 45 return Q(f"({self.sql} OR {other.sql})", *self.params, *other.params) 46 47 def __invert__(self) -> Q: 48 return Q(f"(NOT {self.sql})", *self.params)
Composable SQL WHERE predicate. Combine with &, |, ~.
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.
80@dataclass 81class Project: 82 name: str 83 description: str = "" 84 color: Optional[int] = None 85 project_tags: list[str] = field(default_factory=list) 86 source_fks: list[int] = field(default_factory=list) 87 status: Status = Status.ACTIVE 88 id: Optional[int] = None 89 created_at: Optional[datetime.datetime] = None 90 updated_at: Optional[datetime.datetime] = None 91 archived_at: Optional[datetime.datetime] = None 92 _sources_loaded: bool = field(default=True, repr=False, compare=False) 93 94 @classmethod 95 def from_row(cls, row, load_sources: bool = True) -> Project: 96 proj_fk = row["PROJECT_FK"] 97 source_fks = _load_source_fks(proj_fk) if (proj_fk and load_sources) else [] 98 return cls( 99 id = proj_fk, 100 name = row["NAME"], 101 description = row["DESCRIPTION"] or "", 102 color = int(row["COLOR"]) if row["COLOR"] is not None else None, 103 source_fks = source_fks, 104 status = Status(row["STATUS"]), 105 created_at = row["CREATED_AT"], 106 updated_at = row["UPDATED_AT"], 107 archived_at = row["ARCHIVED_AT"], 108 _sources_loaded = load_sources, 109 ) 110 111 def save(self) -> None: 112 now = datetime.datetime.now() 113 self.updated_at = now 114 if self.id is None: 115 self.created_at = now 116 with _connect() as conn: 117 cur = conn.execute( 118 """ 119 INSERT INTO PROJECT 120 (NAME, DESCRIPTION, COLOR, STATUS, 121 CREATED_AT, UPDATED_AT, ARCHIVED_AT) 122 VALUES (?, ?, ?, ?, ?, ?, ?) 123 """, 124 (self.name, self.description, self.color, self.status, 125 self.created_at, self.updated_at, self.archived_at), 126 ) 127 self.id = cur.lastrowid 128 assert self.id 129 _save_source_fks(conn, self.id, self.source_fks) 130 else: 131 # Fields only. Membership is written by add_papers/remove_papers/ 132 # replace_papers — rewriting it here from this instance's 133 # (possibly stale) snapshot would discard rows written by other 134 # requests since the snapshot was loaded. 135 with _connect() as conn: 136 conn.execute( 137 """ 138 UPDATE PROJECT 139 SET NAME = ?, DESCRIPTION = ?, COLOR = ?, STATUS = ?, 140 UPDATED_AT = ?, ARCHIVED_AT = ? 141 WHERE PROJECT_FK = ? 142 """, 143 (self.name, self.description, self.color, self.status, 144 self.updated_at, self.archived_at, self.id), 145 ) 146 147 def delete(self) -> None: 148 self.status = Status.DELETED 149 self.archived_at = datetime.datetime.now() 150 self.save() 151 152 def archive(self) -> None: 153 self.status = Status.ARCHIVED 154 self.archived_at = datetime.datetime.now() 155 self.save() 156 157 def restore(self) -> None: 158 self.status = Status.ACTIVE 159 self.archived_at = None 160 self.save() 161 162 def _refresh_source_fks(self) -> None: 163 """Reload membership through the usual read path after a write, so the 164 in-memory list matches what any fresh load would see (active papers, 165 PROJECT_TO_PAPER_FK order) rather than this instance's snapshot.""" 166 assert self.id is not None 167 self.source_fks = _load_source_fks(self.id) 168 self._sources_loaded = True 169 170 def add_papers(self, source_fks: list[int]) -> None: 171 """Add many papers; duplicates and existing members are skipped.""" 172 if self.id is None: 173 raise ValueError("Project must be saved before papers can be added.") 174 if not source_fks: 175 return 176 # No membership pre-check against self.source_fks (it may be stale); 177 # the insert is OR IGNORE. 178 with _connect() as conn: 179 conn.executemany( 180 _INSERT_MEMBERSHIP_SQL, 181 [(self.id, sfk) for sfk in source_fks], 182 ) 183 self._refresh_source_fks() 184 185 def remove_papers(self, source_fks: list[int]) -> None: 186 """Remove many papers; non-members are skipped.""" 187 if self.id is None: 188 return 189 if not source_fks: 190 return 191 with _connect() as conn: 192 conn.executemany( 193 "DELETE FROM PROJECT_TO_PAPER WHERE PROJECT_FK = ? AND SOURCE_FK = ?", 194 [(self.id, sfk) for sfk in source_fks], 195 ) 196 self._refresh_source_fks() 197 198 def replace_papers(self, source_fks: list[int]) -> None: 199 """Set the membership to exactly source_fks (full replace, in order). 200 201 Any membership rows written since the caller loaded its snapshot are 202 discarded. Use add/remove for incremental changes. 203 """ 204 if self.id is None: 205 raise ValueError("Project must be saved before papers can be replaced.") 206 seen: set[int] = set() 207 deduped: list[int] = [] 208 for sfk in source_fks: 209 if sfk not in seen: 210 seen.add(sfk) 211 deduped.append(sfk) 212 with _connect() as conn: 213 _save_source_fks(conn, self.id, deduped) 214 self._refresh_source_fks() 215 216 def load_papers(self) -> list[int]: 217 return self.source_fks 218 219 @property 220 def paper_count(self) -> int: 221 # Returns 0 when built via from_row(load_sources=False); check 222 # _sources_loaded before trusting this value for display. 223 return len(self.source_fks) 224 225 def __repr__(self) -> str: 226 papers = len(self.source_fks) if self._sources_loaded else "?" 227 return f"<Project id={self.id!r} name={self.name!r} status={self.status!r} papers={papers}>"
94 @classmethod 95 def from_row(cls, row, load_sources: bool = True) -> Project: 96 proj_fk = row["PROJECT_FK"] 97 source_fks = _load_source_fks(proj_fk) if (proj_fk and load_sources) else [] 98 return cls( 99 id = proj_fk, 100 name = row["NAME"], 101 description = row["DESCRIPTION"] or "", 102 color = int(row["COLOR"]) if row["COLOR"] is not None else None, 103 source_fks = source_fks, 104 status = Status(row["STATUS"]), 105 created_at = row["CREATED_AT"], 106 updated_at = row["UPDATED_AT"], 107 archived_at = row["ARCHIVED_AT"], 108 _sources_loaded = load_sources, 109 )
111 def save(self) -> None: 112 now = datetime.datetime.now() 113 self.updated_at = now 114 if self.id is None: 115 self.created_at = now 116 with _connect() as conn: 117 cur = conn.execute( 118 """ 119 INSERT INTO PROJECT 120 (NAME, DESCRIPTION, COLOR, STATUS, 121 CREATED_AT, UPDATED_AT, ARCHIVED_AT) 122 VALUES (?, ?, ?, ?, ?, ?, ?) 123 """, 124 (self.name, self.description, self.color, self.status, 125 self.created_at, self.updated_at, self.archived_at), 126 ) 127 self.id = cur.lastrowid 128 assert self.id 129 _save_source_fks(conn, self.id, self.source_fks) 130 else: 131 # Fields only. Membership is written by add_papers/remove_papers/ 132 # replace_papers — rewriting it here from this instance's 133 # (possibly stale) snapshot would discard rows written by other 134 # requests since the snapshot was loaded. 135 with _connect() as conn: 136 conn.execute( 137 """ 138 UPDATE PROJECT 139 SET NAME = ?, DESCRIPTION = ?, COLOR = ?, STATUS = ?, 140 UPDATED_AT = ?, ARCHIVED_AT = ? 141 WHERE PROJECT_FK = ? 142 """, 143 (self.name, self.description, self.color, self.status, 144 self.updated_at, self.archived_at, self.id), 145 )
170 def add_papers(self, source_fks: list[int]) -> None: 171 """Add many papers; duplicates and existing members are skipped.""" 172 if self.id is None: 173 raise ValueError("Project must be saved before papers can be added.") 174 if not source_fks: 175 return 176 # No membership pre-check against self.source_fks (it may be stale); 177 # the insert is OR IGNORE. 178 with _connect() as conn: 179 conn.executemany( 180 _INSERT_MEMBERSHIP_SQL, 181 [(self.id, sfk) for sfk in source_fks], 182 ) 183 self._refresh_source_fks()
Add many papers; duplicates and existing members are skipped.
185 def remove_papers(self, source_fks: list[int]) -> None: 186 """Remove many papers; non-members are skipped.""" 187 if self.id is None: 188 return 189 if not source_fks: 190 return 191 with _connect() as conn: 192 conn.executemany( 193 "DELETE FROM PROJECT_TO_PAPER WHERE PROJECT_FK = ? AND SOURCE_FK = ?", 194 [(self.id, sfk) for sfk in source_fks], 195 ) 196 self._refresh_source_fks()
Remove many papers; non-members are skipped.
198 def replace_papers(self, source_fks: list[int]) -> None: 199 """Set the membership to exactly source_fks (full replace, in order). 200 201 Any membership rows written since the caller loaded its snapshot are 202 discarded. Use add/remove for incremental changes. 203 """ 204 if self.id is None: 205 raise ValueError("Project must be saved before papers can be replaced.") 206 seen: set[int] = set() 207 deduped: list[int] = [] 208 for sfk in source_fks: 209 if sfk not in seen: 210 seen.add(sfk) 211 deduped.append(sfk) 212 with _connect() as conn: 213 _save_source_fks(conn, self.id, deduped) 214 self._refresh_source_fks()
Set the membership to exactly source_fks (full replace, in order).
Any membership rows written since the caller loaded its snapshot are discarded. Use add/remove for incremental changes.
240def filter_projects(condition: Q | None = None, load_sources: bool = True) -> list[Project]: 241 if condition is None: 242 sql, params = "SELECT * FROM PROJECT", () 243 else: 244 sql = f"SELECT * FROM PROJECT WHERE {condition.sql}" 245 params = condition.params 246 with _connect() as conn: 247 rows = conn.execute(sql, params).fetchall() 248 return [Project.from_row(row, load_sources=load_sources) for row in rows]
35@dataclass 36class Note: 37 source_fk: int 38 project_id: Optional[int] = None 39 paper_id_fk: Optional[int] = None # pins note to a specific paper version 40 title: str = "" 41 content: str = "" 42 id: Optional[int] = None 43 created_at: Optional[datetime.datetime] = None 44 updated_at: Optional[datetime.datetime] = None 45 46 @classmethod 47 def from_row(cls, row) -> Note: 48 return cls( 49 id = row["NOTE_SK"], 50 source_fk = row["SOURCE_FK"], 51 paper_id_fk = row["PAPER_ID_FK"], 52 project_id = row["PROJECT_FK"], 53 title = row["TITLE"] or "", 54 content = row["NOTE"] or "", 55 created_at = row["CREATED_AT"], 56 updated_at = row["UPDATED_AT"], 57 ) 58 59 def to_details(self) -> NoteDetails: 60 return NoteDetails( 61 note_id = self.id, 62 source_fk = self.source_fk, 63 paper_id_fk = self.paper_id_fk, 64 project_id = self.project_id, 65 title = self.title, 66 content = self.content, 67 created_at = self.created_at, 68 updated_at = self.updated_at, 69 ) 70 71 def save(self) -> None: 72 now = datetime.datetime.now(datetime.timezone.utc) 73 if self.id is None: 74 with _connect() as conn: 75 cur = conn.execute( 76 """ 77 INSERT INTO NOTE 78 (SOURCE_FK, PAPER_ID_FK, PROJECT_FK, TITLE, NOTE, CREATED_AT, UPDATED_AT) 79 VALUES (?, ?, ?, ?, ?, ?, ?) 80 """, 81 (self.source_fk, self.paper_id_fk, self.project_id, 82 self.title, self.content, now, now), 83 ) 84 self.id = cur.lastrowid 85 self.created_at = now 86 self.updated_at = now 87 else: 88 with _connect() as conn: 89 cur = conn.execute( 90 "UPDATE NOTE SET TITLE = ?, NOTE = ?, UPDATED_AT = ? WHERE NOTE_SK = ?", 91 (self.title, self.content, now, self.id), 92 ) 93 if cur.rowcount == 0: 94 raise ValueError(f"Note with id={self.id} does not exist") 95 self.updated_at = now 96 97 def delete(self) -> None: 98 if self.id is None: 99 return 100 if delete_note(self.id): 101 self.id = None
46 @classmethod 47 def from_row(cls, row) -> Note: 48 return cls( 49 id = row["NOTE_SK"], 50 source_fk = row["SOURCE_FK"], 51 paper_id_fk = row["PAPER_ID_FK"], 52 project_id = row["PROJECT_FK"], 53 title = row["TITLE"] or "", 54 content = row["NOTE"] or "", 55 created_at = row["CREATED_AT"], 56 updated_at = row["UPDATED_AT"], 57 )
59 def to_details(self) -> NoteDetails: 60 return NoteDetails( 61 note_id = self.id, 62 source_fk = self.source_fk, 63 paper_id_fk = self.paper_id_fk, 64 project_id = self.project_id, 65 title = self.title, 66 content = self.content, 67 created_at = self.created_at, 68 updated_at = self.updated_at, 69 )
71 def save(self) -> None: 72 now = datetime.datetime.now(datetime.timezone.utc) 73 if self.id is None: 74 with _connect() as conn: 75 cur = conn.execute( 76 """ 77 INSERT INTO NOTE 78 (SOURCE_FK, PAPER_ID_FK, PROJECT_FK, TITLE, NOTE, CREATED_AT, UPDATED_AT) 79 VALUES (?, ?, ?, ?, ?, ?, ?) 80 """, 81 (self.source_fk, self.paper_id_fk, self.project_id, 82 self.title, self.content, now, now), 83 ) 84 self.id = cur.lastrowid 85 self.created_at = now 86 self.updated_at = now 87 else: 88 with _connect() as conn: 89 cur = conn.execute( 90 "UPDATE NOTE SET TITLE = ?, NOTE = ?, UPDATED_AT = ? WHERE NOTE_SK = ?", 91 (self.title, self.content, now, self.id), 92 ) 93 if cur.rowcount == 0: 94 raise ValueError(f"Note with id={self.id} does not exist") 95 self.updated_at = now
155def get_notes( 156 source_fk: int, 157 project_id: Optional[int] = None, 158 *, 159 all_projects: bool = False, 160) -> list[NoteDetails]: 161 if all_projects: 162 rows = _list_notes(source_fk=source_fk) 163 elif project_id is None: 164 rows = _list_notes(source_fk=source_fk, project_unscoped=True) 165 else: 166 rows = _list_notes(source_fk=source_fk, project_fk=project_id) 167 return [Note.from_row(row).to_details() for row in rows]
170def get_project_notes(project_id: int) -> list[NoteDetails]: 171 with _connect() as conn: 172 rows = conn.execute( 173 "SELECT * FROM NOTE WHERE PROJECT_FK = ? ORDER BY SOURCE_FK ASC, CREATED_AT ASC", 174 (project_id,), 175 ).fetchall() 176 return [Note.from_row(row).to_details() for row in rows]
200def note_counts_by_paper_for_project(project_id: int) -> dict[int, int]: 201 """ 202 Return note counts keyed by SOURCE_FK for each paper in the project. 203 Papers with no notes appear with count 0. Missing project_id yields empty dict. 204 """ 205 with _connect() as conn: 206 rows = conn.execute( 207 """ 208 SELECT pp.SOURCE_FK AS source_fk, COALESCE(n.cnt, 0) AS note_count 209 FROM PROJECT_TO_PAPER pp 210 JOIN PAPER_ROOTS r ON r.SOURCE_FK = pp.SOURCE_FK 211 LEFT JOIN ( 212 SELECT SOURCE_FK, COUNT(*) AS cnt 213 FROM NOTE 214 WHERE PROJECT_FK = ? 215 GROUP BY SOURCE_FK 216 ) AS n ON n.SOURCE_FK = pp.SOURCE_FK 217 WHERE pp.PROJECT_FK = ? AND r.STATUS = 'active' 218 ORDER BY pp.PROJECT_TO_PAPER_FK 219 """, 220 (project_id, project_id), 221 ).fetchall() 222 return {int(row["source_fk"]): int(row["note_count"]) for row in rows}
Return note counts keyed by SOURCE_FK for each paper in the project. Papers with no notes appear with count 0. Missing project_id yields empty dict.
229def search_notes_source_fks(query: str, limit: int = 50) -> list[int]: 230 """Return distinct SOURCE_FKs of active papers whose note title or content contains query.""" 231 pattern = f"%{_escape_like(query)}%" 232 with _connect() as conn: 233 rows = conn.execute( 234 """ 235 SELECT n.SOURCE_FK 236 FROM NOTE n 237 JOIN PAPER_ROOTS r ON r.SOURCE_FK = n.SOURCE_FK 238 WHERE r.STATUS = 'active' 239 AND (n.TITLE LIKE ? ESCAPE '\\' OR n.NOTE LIKE ? ESCAPE '\\') 240 GROUP BY n.SOURCE_FK 241 ORDER BY MAX(n.UPDATED_AT) DESC 242 LIMIT ? 243 """, 244 (pattern, pattern, limit), 245 ).fetchall() 246 return [int(row["SOURCE_FK"]) for row in rows]
Return distinct SOURCE_FKs of active papers whose note title or content contains query.