storage

Storage layer — SQLite database and notes.

 1"""Storage layer — SQLite database and notes."""
 2
 3from .db import (
 4    DB_PATH,
 5    _connect,
 6    init_db,
 7    init_table,
 8    parse_entry_id,
 9    save_paper,
10    save_papers,
11    save_paper_metadata,
12    save_papers_metadata,
13    get_paper,
14    get_paper_by_id,
15    get_paper_by_source_fk,
16    get_papers_by_source_fks,
17    get_paper_root,
18    get_all_versions,
19    soft_delete_paper,
20    restore_paper,
21    hard_delete_paper,
22    list_deleted_papers,
23    is_paper_deleted,
24    list_papers,
25    get_categories,
26    get_tags,
27    get_graph_data,
28    set_has_pdf,
29    set_pdf_path,
30    set_full_text,
31    search_full_text,
32)
33from .projects import (
34    Q,
35    Status,
36    Project,
37    ensure_projects_db,
38    get_project,
39    filter_projects,
40    color_to_hex,
41    color_from_hex,
42)
43from .notes import (
44    Note,
45    ensure_notes_db,
46    get_note,
47    get_notes,
48    get_project_notes,
49    count_project_notes,
50    count_paper_notes,
51    note_counts_by_paper_for_project,
52    search_notes_source_fks,
53)
54
55__all__ = [
56    # db
57    "DB_PATH", "_connect", "init_db", "init_table", "parse_entry_id",
58    "save_paper", "save_papers", "save_paper_metadata", "save_papers_metadata",
59    "get_paper", "get_paper_by_id", "get_paper_by_source_fk", "get_papers_by_source_fks",
60    "get_paper_root",
61    "get_all_versions",
62    "soft_delete_paper", "restore_paper", "hard_delete_paper",
63    "list_deleted_papers", "is_paper_deleted",
64    "list_papers",
65    "get_categories", "get_tags", "get_graph_data",
66    "set_has_pdf", "set_pdf_path", "set_full_text", "search_full_text",
67    # projects
68    "Q", "Status", "Project", "ensure_projects_db",
69    "get_project", "filter_projects", "color_to_hex", "color_from_hex",
70    # notes
71    "Note", "ensure_notes_db",
72    "get_note", "get_notes", "get_project_notes",
73    "count_project_notes", "count_paper_notes",
74    "note_counts_by_paper_for_project",
75    "search_notes_source_fks",
76]
DB_PATH = '/home/uribe/.local/share/com.linxiv.app/papers.db'
def _connect( db_path: str = '/home/uribe/.local/share/com.linxiv.app/papers.db') -> sqlite3.Connection:
81def _connect(db_path: str = DB_PATH) -> sqlite3.Connection:
82    conn = sqlite3.connect(db_path, detect_types=sqlite3.PARSE_DECLTYPES)
83    conn.row_factory = sqlite3.Row
84    conn.execute("PRAGMA foreign_keys = ON")
85    return conn
def init_db() -> None:
244def init_db() -> None:
245    with _connect() as conn:
246        apply_sql_schema(conn)
247
248    if old_pdf_dir().is_dir():
249        wrong_path_rows = _get_deprecated_path_rows()
250        if wrong_path_rows:
251            for rows in wrong_path_rows:
252                try:
253                    curr_path = rows["PDF_PATH"]
254                    if Path(curr_path).is_file() and Path(curr_path).rename(curr_path.replace(str(old_pdf_dir()), str(pdf_dir()))).exists():
255                        set_pdf_path(rows["source_id"], curr_path.replace(str(old_pdf_dir()), str(pdf_dir())))
256                        print(f"File [ {curr_path} ] moved and verified!")
257                    else:
258                        print(f"File [ {curr_path} ] could not be moved")
259                except Exception as e:
260                    print(f"An error occured while trying to parse file {rows['PDF_PATH']}:\n{e}")
261        _remove_gui_pdf_dir(old_pdf_dir())
def init_table( table_name: str, columns: list[tuple], primary_key: list[str] | None = None, db_path: str = '/home/uribe/.local/share/com.linxiv.app/papers.db') -> None:
53def init_table(
54    table_name: str,
55    columns: list[tuple],
56    primary_key: list[str] | None = None,
57    db_path: str = DB_PATH,
58) -> None:
59    """Create a table if it doesn't exist.
60
61    Each column is a tuple: (name, python_type[, constraints])
62    where constraints is an optional SQL string appended after the type.
63    """
64    col_defs: list[str] = []
65    for col in columns:
66        name  = col[0]
67        sql_t = _PY_TO_SQL.get(col[1], "TEXT")
68        extra = col[2] if len(col) > 2 else ""
69        col_defs.append(f"    {name} {sql_t} {extra}".rstrip())
70    if primary_key:
71        col_defs.append(f"    PRIMARY KEY ({', '.join(primary_key)})")
72    ddl = (
73        f"CREATE TABLE IF NOT EXISTS {table_name} (\n"
74        + ",\n".join(col_defs)
75        + "\n);"
76    )
77    with _connect(db_path) as conn:
78        conn.execute(ddl)

Create a table if it doesn't exist.

Each column is a tuple: (name, python_type[, constraints]) where constraints is an optional SQL string appended after the type.

def parse_entry_id(entry_id: str) -> tuple[str, int]:
277def parse_entry_id(entry_id: str) -> tuple[str, int]:
278    """EX:Split 'http://arxiv.org/abs/2204.12985v4' into ('2204.12985', 4)."""
279    raw = entry_id.split('/')[-1]
280    match = re.match(r'^(.+?)(?:v(\d+))?$', raw)
281    assert match
282    source_id = match.group(1)
283    version = int(match.group(2)) if match.group(2) else 1
284    return source_id, version

EX:Split 'http://arxiv.org/abs/2204.12985v4' into ('2204.12985', 4).

def save_paper(paper: arxiv.Result, tags: list[str] | None = None) -> tuple[str, int]:
344def save_paper(paper: arxiv.Result, tags: list[str] | None = None) -> tuple[str, int]:
345    """Insert a single arxiv paper. Returns (source_id, version)."""
346    with _connect() as conn:
347        return _insert_arxiv(conn, paper, tags)

Insert a single arxiv paper. Returns (source_id, version).

def save_papers( papers: list[arxiv.Result], tags: list[str] | None = None) -> list[tuple[str, int]]:
350def save_papers(papers: list[arxiv.Result], tags: list[str] | None = None) -> list[tuple[str, int]]:
351    """Batch insert arxiv papers in a single transaction. Returns list of (source_id, version)."""
352    with _connect() as conn:
353        return [_insert_arxiv(conn, paper, tags) for paper in papers]

Batch insert arxiv papers in a single transaction. Returns list of (source_id, version).

def save_paper_metadata( meta: sources.PaperMetadata, tags: list[str] | None = None) -> tuple[str, int]:
356def save_paper_metadata(meta: PaperMetadata, tags: list[str] | None = None) -> tuple[str, int]:
357    """Insert a paper from any source via PaperMetadata. Returns (source_id, version)."""
358    with _connect() as conn:
359        return _insert_metadata(conn, meta, tags)

Insert a paper from any source via PaperMetadata. Returns (source_id, version).

def save_papers_metadata( papers: list[sources.PaperMetadata], tags: list[str] | None = None) -> list[tuple[str, int]]:
362def save_papers_metadata(papers: list[PaperMetadata], tags: list[str] | None = None) -> list[tuple[str, int]]:
363    """Batch insert papers from any source. Returns list of (source_id, version)."""
364    with _connect() as conn:
365        return [_insert_metadata(conn, meta, tags) for meta in papers]

Batch insert papers from any source. Returns list of (source_id, version).

def get_paper(source_id: str, version: int | None = None) -> sqlite3.Row | None:
626def get_paper(source_id: str, version: Optional[int] = None) -> Optional[sqlite3.Row]:
627    """Fetch a specific version, or the latest if version is None."""
628    with _connect() as conn:
629        if version:
630            return conn.execute(
631                "SELECT * FROM papers WHERE source_id = ? AND version = ?",
632                (source_id, version),
633            ).fetchone()
634        return conn.execute(
635            "SELECT * FROM latest_papers WHERE source_id = ?",
636            (source_id,),
637        ).fetchone()

Fetch a specific version, or the latest if version is None.

def get_paper_by_id(paper_id: int) -> sqlite3.Row | None:
640def get_paper_by_id(paper_id: int) -> Optional[sqlite3.Row]:
641    """Fetch a paper version by its PAPER primary key."""
642    with _connect() as conn:
643        return conn.execute(
644            "SELECT * FROM papers WHERE paper_id = ?",
645            (paper_id,),
646        ).fetchone()

Fetch a paper version by its PAPER primary key.

def get_paper_by_source_fk(source_fk: int) -> sqlite3.Row | None:
649def get_paper_by_source_fk(source_fk: int) -> Optional[sqlite3.Row]:
650    """Fetch the latest version for a PAPER_ROOTS row by SOURCE_FK."""
651    with _connect() as conn:
652        return conn.execute(
653            "SELECT * FROM latest_papers WHERE source_id = ("
654            "SELECT SOURCE_ID FROM PAPER_ROOTS WHERE SOURCE_FK = ?)",
655            (source_fk,),
656        ).fetchone()

Fetch the latest version for a PAPER_ROOTS row by SOURCE_FK.

def get_papers_by_source_fks(source_fks: list[int]) -> list[sqlite3.Row]:
659def get_papers_by_source_fks(source_fks: list[int]) -> list[sqlite3.Row]:
660    """Batch-fetch latest paper versions for a list of SOURCE_FKs in one query.
661
662    The empty-list guard is intentional: _queries_list_papers(source_fks=None)
663    returns the full library, which is the wrong behaviour here.
664    """
665    if not source_fks:
666        return []
667    return _queries_list_papers(source_fks=source_fks)

Batch-fetch latest paper versions for a list of SOURCE_FKs in one query.

The empty-list guard is intentional: _queries_list_papers(source_fks=None) returns the full library, which is the wrong behaviour here.

def get_paper_root(source_id: str) -> sqlite3.Row | None:
671def get_paper_root(source_id: str) -> Optional[sqlite3.Row]:
672    """Return the PAPER_ROOTS row for a given source_id."""
673    with _connect() as conn:
674        return conn.execute(
675            "SELECT * FROM PAPER_ROOTS WHERE SOURCE_ID = ?",
676            (source_id,),
677        ).fetchone()

Return the PAPER_ROOTS row for a given source_id.

def get_all_versions(source_id: str) -> list[sqlite3.Row]:
704def get_all_versions(source_id: str) -> list[sqlite3.Row]:
705    """Fetch all stored versions of a paper, ordered oldest to newest."""
706    with _connect() as conn:
707        return conn.execute(
708            "SELECT * FROM papers WHERE source_id = ? ORDER BY version ASC",
709            (source_id,),
710        ).fetchall()

Fetch all stored versions of a paper, ordered oldest to newest.

def soft_delete_paper(source_id: str) -> str | None:
510def soft_delete_paper(source_id: str) -> str | None:
511    """Soft-delete a paper: set STATUS='deleted', remove PDF from linxiv dir if present.
512
513    Returns the pdf_path that was stored (for caller reference), or None.
514    """
515    stored_path: str | None = None
516    with _connect() as conn:
517        row = conn.execute(
518            "SELECT PDF_PATH FROM PAPER_META WHERE PAPER_ID IN "
519            "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? ORDER BY VERSION DESC LIMIT 1)",
520            (source_id,),
521        ).fetchone()
522        if row:
523            stored_path = row["PDF_PATH"]
524
525        if conn.execute(
526            "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'"
527        ).fetchone():
528            conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,))
529
530        conn.execute(
531            "UPDATE PAPER_ROOTS SET STATUS = 'deleted', DELETED_AT = ?, UPDATED_AT = datetime('now') WHERE SOURCE_ID = ?",
532            (datetime.datetime.now(), source_id),
533        )
534
535    if stored_path:
536        p = Path(stored_path)
537        try:
538            linxiv_dir = pdf_dir()
539            if p.is_file() and p.is_relative_to(linxiv_dir):
540                p.unlink()
541                with _connect() as conn:
542                    conn.execute(
543                        "UPDATE PAPER SET HAS_PDF = 0 WHERE SOURCE_ID = ?",
544                        (source_id,),
545                    )
546        except Exception as e:
547            print(f"[db] Could not remove PDF for {source_id}: {e}")
548
549    return stored_path

Soft-delete a paper: set STATUS='deleted', remove PDF from linxiv dir if present.

Returns the pdf_path that was stored (for caller reference), or None.

def restore_paper(source_id: str) -> str | None:
552def restore_paper(source_id: str) -> str | None:
553    """Restore a soft-deleted paper. Returns the stored pdf_path (may no longer exist)."""
554    stored_path: str | None = None
555    full_text: str | None = None
556    with _connect() as conn:
557        row = conn.execute(
558            "SELECT PDF_PATH, FULL_TEXT FROM PAPER_META WHERE PAPER_ID IN "
559            "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? ORDER BY VERSION DESC LIMIT 1)",
560            (source_id,),
561        ).fetchone()
562        if row:
563            stored_path = row["PDF_PATH"]
564            full_text = row["FULL_TEXT"]
565
566        conn.execute(
567            "UPDATE PAPER_ROOTS SET STATUS = 'active', DELETED_AT = NULL, UPDATED_AT = datetime('now') WHERE SOURCE_ID = ?",
568            (source_id,),
569        )
570
571        if row and row["FULL_TEXT"]:
572            if conn.execute(
573                "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'"
574            ).fetchone():
575                conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,))
576                conn.execute(
577                    "INSERT INTO papers_fts(paper_id, full_text) VALUES (?, ?)",
578                    (source_id, full_text),
579                )
580
581    return stored_path

Restore a soft-deleted paper. Returns the stored pdf_path (may no longer exist).

def hard_delete_paper(source_id: str) -> None:
584def hard_delete_paper(source_id: str) -> None:
585    """Permanently delete a paper and all associated data."""
586    with _connect() as conn:
587        row = conn.execute(
588            "SELECT PDF_PATH FROM PAPER_META WHERE PAPER_ID IN "
589            "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? ORDER BY VERSION DESC LIMIT 1)",
590            (source_id,),
591        ).fetchone()
592        if conn.execute(
593            "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'"
594        ).fetchone():
595            conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,))
596        conn.execute("DELETE FROM PAPER_ROOTS WHERE SOURCE_ID = ?", (source_id,))
597
598    if row and row["PDF_PATH"]:
599        p = Path(row["PDF_PATH"])
600        try:
601            linxiv_dir = pdf_dir()
602            if p.is_file() and p.is_relative_to(linxiv_dir):
603                p.unlink()
604        except Exception as e:
605            print(f"[db] Could not remove PDF for {source_id} during hard delete: {e}")

Permanently delete a paper and all associated data.

def list_deleted_papers() -> list[sqlite3.Row]:
608def list_deleted_papers() -> list[sqlite3.Row]:
609    """Return all soft-deleted papers from the deleted_papers view."""
610    with _connect() as conn:
611        return conn.execute(
612            "SELECT * FROM deleted_papers ORDER BY deleted_at DESC"
613        ).fetchall()

Return all soft-deleted papers from the deleted_papers view.

def is_paper_deleted(source_id: str) -> bool:
616def is_paper_deleted(source_id: str) -> bool:
617    """Return True if a PAPER_ROOTS row exists with STATUS='deleted'."""
618    with _connect() as conn:
619        row = conn.execute(
620            "SELECT 1 FROM PAPER_ROOTS WHERE SOURCE_ID = ? AND STATUS = 'deleted'",
621            (source_id,),
622        ).fetchone()
623    return row is not None

Return True if a PAPER_ROOTS row exists with STATUS='deleted'.

def list_papers( latest_only: bool = True, limit: int | None = None, offset: int = 0, category: str | None = None) -> list[sqlite3.Row]:
785def list_papers(
786    latest_only: bool = True,
787    limit: int | None = None,
788    offset: int = 0,
789    category: str | None = None,
790) -> list[sqlite3.Row]:
791    """List all stored papers (latest version per paper by default).
792
793    Optionally filter by exact primary category (e.g. "cs.LG"); limit/offset
794    apply to the filtered result.
795    """
796    with _connect() as conn:
797        sql = _LIST_PAPERS_LATEST_SQL if latest_only else _LIST_PAPERS_ALL_SQL
798        params: list[int | str] = []
799        if category is not None:
800            sql += " WHERE category = ?"
801            params.append(category)
802        sql += " ORDER BY published DESC"
803        if limit is not None:
804            sql += " LIMIT ? OFFSET ?"
805            params += [limit, offset]
806        elif offset:
807            sql += " LIMIT -1 OFFSET ?"
808            params.append(offset)
809        return conn.execute(sql, params).fetchall()

List all stored papers (latest version per paper by default).

Optionally filter by exact primary category (e.g. "cs.LG"); limit/offset apply to the filtered result.

def get_categories() -> list[str]:
824def get_categories() -> list[str]:
825    """Return a sorted list of all distinct primary categories in the DB."""
826    with _connect() as conn:
827        rows = conn.execute(
828            "SELECT DISTINCT category FROM latest_papers WHERE category IS NOT NULL ORDER BY category"
829        ).fetchall()
830    return [row["category"] for row in rows]

Return a sorted list of all distinct primary categories in the DB.

def get_tags() -> list[str]:
833def get_tags() -> list[str]:
834    """Return a sorted list of all distinct tag labels across papers and projects."""
835    with _connect() as conn:
836        rows = conn.execute("""
837            SELECT DISTINCT LOWER(je.value) AS tag
838            FROM latest_papers p, json_each(p.tags) je
839            WHERE p.tags IS NOT NULL
840            UNION
841            SELECT DISTINCT LOWER(t.TAG) AS tag
842            FROM TAG t
843            JOIN PROJECT_TO_TAG ptt ON ptt.TAG_FK = t.TAG_FK
844            JOIN PROJECT pr ON pr.PROJECT_FK = ptt.PROJECT_FK
845            WHERE pr.STATUS = 'active'
846            ORDER BY tag
847        """).fetchall()
848    return [row["tag"] for row in rows]

Return a sorted list of all distinct tag labels across papers and projects.

def get_graph_data(exclude_single_authors: bool = False) -> tuple[list[dict], list[dict]]:
713def get_graph_data(exclude_single_authors: bool = False) -> tuple[list[dict], list[dict]]:
714    """Returns (nodes, edges) ready to pass to the graph view.
715
716    With ``exclude_single_authors`` True, an author node is dropped unless some
717    AUTHOR_FK matching its name (COLLATE NOCASE) has two or more active papers.
718    """
719    with _connect() as conn:
720        paper_nodes = [
721            {
722                "id":        row["source_fk"],
723                "source_id": row["source_id"],
724                "label":     row["title"],
725                "type":      "paper",
726                "category":  row["category"],
727                "tags":      row["tags"] if row["tags"] else [],
728                "has_pdf":   bool(row["has_pdf"]),
729                "published": row["published"].isoformat() if row["published"] else None,
730                "url":       row["url"],
731                "doi":       row["doi"],
732                "summary":   row["summary"],
733            }
734            for row in conn.execute("""
735                SELECT r.SOURCE_FK AS source_fk, r.SOURCE_ID AS source_id,
736                       p.TITLE AS title, p.CATEGORY AS category,
737                       m.TAGS AS tags, p.HAS_PDF AS has_pdf, m.PUBLISHED AS published,
738                       m.URL AS url, m.DOI AS doi, m.SUMMARY AS summary
739                FROM PAPER_ROOTS r
740                JOIN PAPER p ON p.SOURCE_FK = r.SOURCE_FK
741                JOIN PAPER_META m ON m.PAPER_ID = p.PAPER_ID
742                WHERE p.VERSION = (SELECT MAX(VERSION) FROM PAPER WHERE SOURCE_FK = r.SOURCE_FK)
743                  AND r.STATUS = 'active'
744            """)
745        ]
746        # _GRAPH_AUTHORS_WITH_COUNT_SQL adds the per-name paper_count column
747        # (see queries.py); the default constant omits it and scans unchanged.
748        sql = (
749            _GRAPH_AUTHORS_WITH_COUNT_SQL
750            if exclude_single_authors
751            else _GRAPH_AUTHORS_SQL
752        )
753        author_rows = conn.execute(sql).fetchall()
754
755    seen_authors: set[str] = set()
756    author_nodes: list[dict] = []
757    edges: list[dict] = []
758    for row in author_rows:
759        name = row["author_name"]
760        # Drop an author when its paper_count is below two. A NULL count (no
761        # matching AUTHOR row) fails the `is not None` guard, so it is kept.
762        if (
763            exclude_single_authors
764            and row["paper_count"] is not None
765            and row["paper_count"] < 2
766        ):
767            continue
768        node_id = f"author::{name}"
769        if node_id not in seen_authors:
770            node: dict[str, object] = {"id": node_id, "label": name, "type": "author"}
771            # author_fk is NULL when the JSON name matches no AUTHOR row.
772            if row["author_fk"] is not None:
773                node["author_id"] = row["author_fk"]
774            author_nodes.append(node)
775            seen_authors.add(node_id)
776        edges.append({"source": row["source_fk"], "target": node_id})
777
778    return paper_nodes + author_nodes, edges

Returns (nodes, edges) ready to pass to the graph view.

With exclude_single_authors True, an author node is dropped unless some AUTHOR_FK matching its name (COLLATE NOCASE) has two or more active papers.

def set_has_pdf(source_id: str, version: int, has: bool) -> None:
445def set_has_pdf(source_id: str, version: int, has: bool) -> None:
446    """Set the has_pdf flag for a specific paper version."""
447    with _connect() as conn:
448        conn.execute(
449            "UPDATE PAPER SET HAS_PDF = ? WHERE SOURCE_ID = ? AND VERSION = ?",
450            (has, source_id, version),
451        )

Set the has_pdf flag for a specific paper version.

def set_pdf_path(source_id: str, path: str, version: int | None = None) -> None:
461def set_pdf_path(source_id: str, path: str, version: int | None = None) -> None:
462    """Set the pdf_path for a paper (all versions)."""
463    with _connect() as conn:
464        if version:
465            conn.execute(
466                "UPDATE PAPER_META SET PDF_PATH = ? WHERE PAPER_ID IN "
467                "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? AND VERSION = ?)",
468                (path, source_id, version),
469            )
470        else:
471            lastrow = conn.execute(
472                "UPDATE PAPER_META SET PDF_PATH = ? WHERE PAPER_ID IN "
473                "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ?)",
474                (path, source_id),
475            )
476            if lastrow:
477                print("Warning: you are updating the paths of all pdfs associated with this paper. This will cause the pdfs of other version to be deleted, those other version.")
478                print("This may incorrectly update the database, consider using different methodology")

Set the pdf_path for a paper (all versions).

def set_full_text( full_text: str | None, paper_id: int | None, source_id: str | None, version: int | None) -> None:
908def set_full_text(full_text: str|None, paper_id: int|None, source_id: str|None, version: int|None) -> None:
909    """Store extracted TeX full text and update the FTS index."""
910    with _connect() as conn:
911        row = conn.execute(
912            "SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? AND VERSION = ?",
913            (source_id, version),
914        ).fetchone()
915        if row is None:
916            return
917        paper_id = int(row["PAPER_ID"])
918        conn.execute(
919            "UPDATE PAPER_META SET FULL_TEXT = ?, DOWNLOADED_SOURCE = 1 WHERE PAPER_ID = ?",
920            (full_text, paper_id),
921        )
922        conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,))
923        conn.execute(
924            "INSERT INTO papers_fts(paper_id, full_text) VALUES (?, ?)",
925            (source_id, full_text),
926        )

Store extracted TeX full text and update the FTS index.

def search_full_text(query: str, limit: int = 20) -> list[sqlite3.Row]:
929def search_full_text(query: str, limit: int = 20) -> list[sqlite3.Row]:
930    """Full-text search over TeX source content. Returns matching papers."""
931    with _connect() as conn:
932        if not conn.execute(
933            "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'"
934        ).fetchone():
935            return []
936        return conn.execute("""
937            SELECT p.* FROM latest_papers p
938            JOIN papers_fts fts ON p.source_id = fts.paper_id
939            WHERE papers_fts MATCH ?
940            ORDER BY rank
941            LIMIT ?
942        """, (query, limit)).fetchall()

Full-text search over TeX source content. Returns matching papers.

class Q:
35class Q:
36    """Composable SQL WHERE predicate. Combine with &, |, ~."""
37    def __init__(self, sql: str, *params) -> None:
38        self.sql    = sql
39        self.params = params
40
41    def __and__(self, other: Q) -> Q:
42        return Q(f"({self.sql} AND {other.sql})", *self.params, *other.params)
43
44    def __or__(self, other: Q) -> Q:
45        return Q(f"({self.sql} OR {other.sql})", *self.params, *other.params)
46
47    def __invert__(self) -> Q:
48        return Q(f"(NOT {self.sql})", *self.params)

Composable SQL WHERE predicate. Combine with &, |, ~.

Q(sql: str, *params)
37    def __init__(self, sql: str, *params) -> None:
38        self.sql    = sql
39        self.params = params
sql
params
class Status(builtins.str, enum.Enum):
 8class Status(str, enum.Enum):
 9    ACTIVE   = "active"
10    ARCHIVED = "archived"
11    DELETED  = "deleted"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

ACTIVE = <Status.ACTIVE: 'active'>
ARCHIVED = <Status.ARCHIVED: 'archived'>
DELETED = <Status.DELETED: 'deleted'>
@dataclass
class Project:
 80@dataclass
 81class Project:
 82    name:            str
 83    description:     str                         = ""
 84    color:           Optional[int]               = None
 85    project_tags:    list[str]                   = field(default_factory=list)
 86    source_fks:      list[int]                   = field(default_factory=list)
 87    status:          Status                      = Status.ACTIVE
 88    id:              Optional[int]               = None
 89    created_at:      Optional[datetime.datetime] = None
 90    updated_at:      Optional[datetime.datetime] = None
 91    archived_at:     Optional[datetime.datetime] = None
 92    _sources_loaded: bool                        = field(default=True, repr=False, compare=False)
 93
 94    @classmethod
 95    def from_row(cls, row, load_sources: bool = True) -> Project:
 96        proj_fk = row["PROJECT_FK"]
 97        source_fks = _load_source_fks(proj_fk) if (proj_fk and load_sources) else []
 98        return cls(
 99            id              = proj_fk,
100            name            = row["NAME"],
101            description     = row["DESCRIPTION"] or "",
102            color           = int(row["COLOR"]) if row["COLOR"] is not None else None,
103            source_fks      = source_fks,
104            status          = Status(row["STATUS"]),
105            created_at      = row["CREATED_AT"],
106            updated_at      = row["UPDATED_AT"],
107            archived_at     = row["ARCHIVED_AT"],
108            _sources_loaded = load_sources,
109        )
110
111    def save(self) -> None:
112        now = datetime.datetime.now()
113        self.updated_at = now
114        if self.id is None:
115            self.created_at = now
116            with _connect() as conn:
117                cur = conn.execute(
118                    """
119                    INSERT INTO PROJECT
120                        (NAME, DESCRIPTION, COLOR, STATUS,
121                         CREATED_AT, UPDATED_AT, ARCHIVED_AT)
122                    VALUES (?, ?, ?, ?, ?, ?, ?)
123                    """,
124                    (self.name, self.description, self.color, self.status,
125                     self.created_at, self.updated_at, self.archived_at),
126                )
127                self.id = cur.lastrowid
128                assert self.id
129                _save_source_fks(conn, self.id, self.source_fks)
130        else:
131            # Fields only. Membership is written by add_papers/remove_papers/
132            # replace_papers — rewriting it here from this instance's
133            # (possibly stale) snapshot would discard rows written by other
134            # requests since the snapshot was loaded.
135            with _connect() as conn:
136                conn.execute(
137                    """
138                    UPDATE PROJECT
139                    SET NAME = ?, DESCRIPTION = ?, COLOR = ?, STATUS = ?,
140                        UPDATED_AT = ?, ARCHIVED_AT = ?
141                    WHERE PROJECT_FK = ?
142                    """,
143                    (self.name, self.description, self.color, self.status,
144                     self.updated_at, self.archived_at, self.id),
145                )
146
147    def delete(self) -> None:
148        self.status      = Status.DELETED
149        self.archived_at = datetime.datetime.now()
150        self.save()
151
152    def archive(self) -> None:
153        self.status      = Status.ARCHIVED
154        self.archived_at = datetime.datetime.now()
155        self.save()
156
157    def restore(self) -> None:
158        self.status      = Status.ACTIVE
159        self.archived_at = None
160        self.save()
161
162    def _refresh_source_fks(self) -> None:
163        """Reload membership through the usual read path after a write, so the
164        in-memory list matches what any fresh load would see (active papers,
165        PROJECT_TO_PAPER_FK order) rather than this instance's snapshot."""
166        assert self.id is not None
167        self.source_fks = _load_source_fks(self.id)
168        self._sources_loaded = True
169
170    def add_papers(self, source_fks: list[int]) -> None:
171        """Add many papers; duplicates and existing members are skipped."""
172        if self.id is None:
173            raise ValueError("Project must be saved before papers can be added.")
174        if not source_fks:
175            return
176        # No membership pre-check against self.source_fks (it may be stale);
177        # the insert is OR IGNORE.
178        with _connect() as conn:
179            conn.executemany(
180                _INSERT_MEMBERSHIP_SQL,
181                [(self.id, sfk) for sfk in source_fks],
182            )
183        self._refresh_source_fks()
184
185    def remove_papers(self, source_fks: list[int]) -> None:
186        """Remove many papers; non-members are skipped."""
187        if self.id is None:
188            return
189        if not source_fks:
190            return
191        with _connect() as conn:
192            conn.executemany(
193                "DELETE FROM PROJECT_TO_PAPER WHERE PROJECT_FK = ? AND SOURCE_FK = ?",
194                [(self.id, sfk) for sfk in source_fks],
195            )
196        self._refresh_source_fks()
197
198    def replace_papers(self, source_fks: list[int]) -> None:
199        """Set the membership to exactly source_fks (full replace, in order).
200
201        Any membership rows written since the caller loaded its snapshot are
202        discarded. Use add/remove for incremental changes.
203        """
204        if self.id is None:
205            raise ValueError("Project must be saved before papers can be replaced.")
206        seen: set[int] = set()
207        deduped: list[int] = []
208        for sfk in source_fks:
209            if sfk not in seen:
210                seen.add(sfk)
211                deduped.append(sfk)
212        with _connect() as conn:
213            _save_source_fks(conn, self.id, deduped)
214        self._refresh_source_fks()
215
216    def load_papers(self) -> list[int]:
217        return self.source_fks
218
219    @property
220    def paper_count(self) -> int:
221        # Returns 0 when built via from_row(load_sources=False); check
222        # _sources_loaded before trusting this value for display.
223        return len(self.source_fks)
224
225    def __repr__(self) -> str:
226        papers = len(self.source_fks) if self._sources_loaded else "?"
227        return f"<Project id={self.id!r} name={self.name!r} status={self.status!r} papers={papers}>"
Project( name: str, description: str = '', color: int | None = None, project_tags: list[str] = <factory>, source_fks: list[int] = <factory>, status: service.models.project.Status = <Status.ACTIVE: 'active'>, id: int | None = None, created_at: datetime.datetime | None = None, updated_at: datetime.datetime | None = None, archived_at: datetime.datetime | None = None, _sources_loaded: bool = True)
name: str
description: str = ''
color: int | None = None
project_tags: list[str]
source_fks: list[int]
status: service.models.project.Status = <Status.ACTIVE: 'active'>
id: int | None = None
created_at: datetime.datetime | None = None
updated_at: datetime.datetime | None = None
archived_at: datetime.datetime | None = None
@classmethod
def from_row(cls, row, load_sources: bool = True) -> Project:
 94    @classmethod
 95    def from_row(cls, row, load_sources: bool = True) -> Project:
 96        proj_fk = row["PROJECT_FK"]
 97        source_fks = _load_source_fks(proj_fk) if (proj_fk and load_sources) else []
 98        return cls(
 99            id              = proj_fk,
100            name            = row["NAME"],
101            description     = row["DESCRIPTION"] or "",
102            color           = int(row["COLOR"]) if row["COLOR"] is not None else None,
103            source_fks      = source_fks,
104            status          = Status(row["STATUS"]),
105            created_at      = row["CREATED_AT"],
106            updated_at      = row["UPDATED_AT"],
107            archived_at     = row["ARCHIVED_AT"],
108            _sources_loaded = load_sources,
109        )
def save(self) -> None:
111    def save(self) -> None:
112        now = datetime.datetime.now()
113        self.updated_at = now
114        if self.id is None:
115            self.created_at = now
116            with _connect() as conn:
117                cur = conn.execute(
118                    """
119                    INSERT INTO PROJECT
120                        (NAME, DESCRIPTION, COLOR, STATUS,
121                         CREATED_AT, UPDATED_AT, ARCHIVED_AT)
122                    VALUES (?, ?, ?, ?, ?, ?, ?)
123                    """,
124                    (self.name, self.description, self.color, self.status,
125                     self.created_at, self.updated_at, self.archived_at),
126                )
127                self.id = cur.lastrowid
128                assert self.id
129                _save_source_fks(conn, self.id, self.source_fks)
130        else:
131            # Fields only. Membership is written by add_papers/remove_papers/
132            # replace_papers — rewriting it here from this instance's
133            # (possibly stale) snapshot would discard rows written by other
134            # requests since the snapshot was loaded.
135            with _connect() as conn:
136                conn.execute(
137                    """
138                    UPDATE PROJECT
139                    SET NAME = ?, DESCRIPTION = ?, COLOR = ?, STATUS = ?,
140                        UPDATED_AT = ?, ARCHIVED_AT = ?
141                    WHERE PROJECT_FK = ?
142                    """,
143                    (self.name, self.description, self.color, self.status,
144                     self.updated_at, self.archived_at, self.id),
145                )
def delete(self) -> None:
147    def delete(self) -> None:
148        self.status      = Status.DELETED
149        self.archived_at = datetime.datetime.now()
150        self.save()
def archive(self) -> None:
152    def archive(self) -> None:
153        self.status      = Status.ARCHIVED
154        self.archived_at = datetime.datetime.now()
155        self.save()
def restore(self) -> None:
157    def restore(self) -> None:
158        self.status      = Status.ACTIVE
159        self.archived_at = None
160        self.save()
def add_papers(self, source_fks: list[int]) -> None:
170    def add_papers(self, source_fks: list[int]) -> None:
171        """Add many papers; duplicates and existing members are skipped."""
172        if self.id is None:
173            raise ValueError("Project must be saved before papers can be added.")
174        if not source_fks:
175            return
176        # No membership pre-check against self.source_fks (it may be stale);
177        # the insert is OR IGNORE.
178        with _connect() as conn:
179            conn.executemany(
180                _INSERT_MEMBERSHIP_SQL,
181                [(self.id, sfk) for sfk in source_fks],
182            )
183        self._refresh_source_fks()

Add many papers; duplicates and existing members are skipped.

def remove_papers(self, source_fks: list[int]) -> None:
185    def remove_papers(self, source_fks: list[int]) -> None:
186        """Remove many papers; non-members are skipped."""
187        if self.id is None:
188            return
189        if not source_fks:
190            return
191        with _connect() as conn:
192            conn.executemany(
193                "DELETE FROM PROJECT_TO_PAPER WHERE PROJECT_FK = ? AND SOURCE_FK = ?",
194                [(self.id, sfk) for sfk in source_fks],
195            )
196        self._refresh_source_fks()

Remove many papers; non-members are skipped.

def replace_papers(self, source_fks: list[int]) -> None:
198    def replace_papers(self, source_fks: list[int]) -> None:
199        """Set the membership to exactly source_fks (full replace, in order).
200
201        Any membership rows written since the caller loaded its snapshot are
202        discarded. Use add/remove for incremental changes.
203        """
204        if self.id is None:
205            raise ValueError("Project must be saved before papers can be replaced.")
206        seen: set[int] = set()
207        deduped: list[int] = []
208        for sfk in source_fks:
209            if sfk not in seen:
210                seen.add(sfk)
211                deduped.append(sfk)
212        with _connect() as conn:
213            _save_source_fks(conn, self.id, deduped)
214        self._refresh_source_fks()

Set the membership to exactly source_fks (full replace, in order).

Any membership rows written since the caller loaded its snapshot are discarded. Use add/remove for incremental changes.

def load_papers(self) -> list[int]:
216    def load_papers(self) -> list[int]:
217        return self.source_fks
paper_count: int
219    @property
220    def paper_count(self) -> int:
221        # Returns 0 when built via from_row(load_sources=False); check
222        # _sources_loaded before trusting this value for display.
223        return len(self.source_fks)
def ensure_projects_db() -> None:
15def ensure_projects_db() -> None:
16    with _connect() as conn:
17        row = conn.execute(
18            "SELECT name FROM sqlite_master WHERE type='table' AND name='PROJECT'"
19        ).fetchone()
20    if row is None:
21        raise RuntimeError("PROJECT table not found — run apply_sql_schema first")
def get_project( project_id: int, load_sources: bool = True) -> Project | None:
232def get_project(project_id: int, load_sources: bool = True) -> Optional[Project]:
233    with _connect() as conn:
234        row = conn.execute(
235            "SELECT * FROM PROJECT WHERE PROJECT_FK = ?", (project_id,)
236        ).fetchone()
237    return Project.from_row(row, load_sources=load_sources) if row else None
def filter_projects( condition: Q | None = None, load_sources: bool = True) -> list[Project]:
240def filter_projects(condition: Q | None = None, load_sources: bool = True) -> list[Project]:
241    if condition is None:
242        sql, params = "SELECT * FROM PROJECT", ()
243    else:
244        sql    = f"SELECT * FROM PROJECT WHERE {condition.sql}"
245        params = condition.params
246    with _connect() as conn:
247        rows = conn.execute(sql, params).fetchall()
248    return [Project.from_row(row, load_sources=load_sources) for row in rows]
def color_to_hex(color: int) -> str:
26def color_to_hex(color: int) -> str:
27    return f"#{color:06x}"
def color_from_hex(hex_str: str) -> int:
30def color_from_hex(hex_str: str) -> int:
31    return int(hex_str.lstrip("#"), 16)
@dataclass
class Note:
 35@dataclass
 36class Note:
 37    source_fk:   int
 38    project_id:  Optional[int]               = None
 39    paper_id_fk: Optional[int]               = None  # pins note to a specific paper version
 40    title:       str                          = ""
 41    content:     str                          = ""
 42    id:          Optional[int]               = None
 43    created_at:  Optional[datetime.datetime] = None
 44    updated_at:  Optional[datetime.datetime] = None
 45
 46    @classmethod
 47    def from_row(cls, row) -> Note:
 48        return cls(
 49            id          = row["NOTE_SK"],
 50            source_fk   = row["SOURCE_FK"],
 51            paper_id_fk = row["PAPER_ID_FK"],
 52            project_id  = row["PROJECT_FK"],
 53            title       = row["TITLE"] or "",
 54            content     = row["NOTE"] or "",
 55            created_at  = row["CREATED_AT"],
 56            updated_at  = row["UPDATED_AT"],
 57        )
 58
 59    def to_details(self) -> NoteDetails:
 60        return NoteDetails(
 61            note_id     = self.id,
 62            source_fk   = self.source_fk,
 63            paper_id_fk = self.paper_id_fk,
 64            project_id  = self.project_id,
 65            title       = self.title,
 66            content     = self.content,
 67            created_at  = self.created_at,
 68            updated_at  = self.updated_at,
 69        )
 70
 71    def save(self) -> None:
 72        now = datetime.datetime.now(datetime.timezone.utc)
 73        if self.id is None:
 74            with _connect() as conn:
 75                cur = conn.execute(
 76                    """
 77                    INSERT INTO NOTE
 78                        (SOURCE_FK, PAPER_ID_FK, PROJECT_FK, TITLE, NOTE, CREATED_AT, UPDATED_AT)
 79                    VALUES (?, ?, ?, ?, ?, ?, ?)
 80                    """,
 81                    (self.source_fk, self.paper_id_fk, self.project_id,
 82                     self.title, self.content, now, now),
 83                )
 84                self.id = cur.lastrowid
 85            self.created_at = now
 86            self.updated_at = now
 87        else:
 88            with _connect() as conn:
 89                cur = conn.execute(
 90                    "UPDATE NOTE SET TITLE = ?, NOTE = ?, UPDATED_AT = ? WHERE NOTE_SK = ?",
 91                    (self.title, self.content, now, self.id),
 92                )
 93                if cur.rowcount == 0:
 94                    raise ValueError(f"Note with id={self.id} does not exist")
 95            self.updated_at = now
 96
 97    def delete(self) -> None:
 98        if self.id is None:
 99            return
100        if delete_note(self.id):
101            self.id = None
Note( source_fk: int, project_id: int | None = None, paper_id_fk: int | None = None, title: str = '', content: str = '', id: int | None = None, created_at: datetime.datetime | None = None, updated_at: datetime.datetime | None = None)
source_fk: int
project_id: int | None = None
paper_id_fk: int | None = None
title: str = ''
content: str = ''
id: int | None = None
created_at: datetime.datetime | None = None
updated_at: datetime.datetime | None = None
@classmethod
def from_row(cls, row) -> Note:
46    @classmethod
47    def from_row(cls, row) -> Note:
48        return cls(
49            id          = row["NOTE_SK"],
50            source_fk   = row["SOURCE_FK"],
51            paper_id_fk = row["PAPER_ID_FK"],
52            project_id  = row["PROJECT_FK"],
53            title       = row["TITLE"] or "",
54            content     = row["NOTE"] or "",
55            created_at  = row["CREATED_AT"],
56            updated_at  = row["UPDATED_AT"],
57        )
def to_details(self) -> service.models.note.NoteDetails:
59    def to_details(self) -> NoteDetails:
60        return NoteDetails(
61            note_id     = self.id,
62            source_fk   = self.source_fk,
63            paper_id_fk = self.paper_id_fk,
64            project_id  = self.project_id,
65            title       = self.title,
66            content     = self.content,
67            created_at  = self.created_at,
68            updated_at  = self.updated_at,
69        )
def save(self) -> None:
71    def save(self) -> None:
72        now = datetime.datetime.now(datetime.timezone.utc)
73        if self.id is None:
74            with _connect() as conn:
75                cur = conn.execute(
76                    """
77                    INSERT INTO NOTE
78                        (SOURCE_FK, PAPER_ID_FK, PROJECT_FK, TITLE, NOTE, CREATED_AT, UPDATED_AT)
79                    VALUES (?, ?, ?, ?, ?, ?, ?)
80                    """,
81                    (self.source_fk, self.paper_id_fk, self.project_id,
82                     self.title, self.content, now, now),
83                )
84                self.id = cur.lastrowid
85            self.created_at = now
86            self.updated_at = now
87        else:
88            with _connect() as conn:
89                cur = conn.execute(
90                    "UPDATE NOTE SET TITLE = ?, NOTE = ?, UPDATED_AT = ? WHERE NOTE_SK = ?",
91                    (self.title, self.content, now, self.id),
92                )
93                if cur.rowcount == 0:
94                    raise ValueError(f"Note with id={self.id} does not exist")
95            self.updated_at = now
def delete(self) -> None:
 97    def delete(self) -> None:
 98        if self.id is None:
 99            return
100        if delete_note(self.id):
101            self.id = None
def ensure_notes_db() -> None:
28def ensure_notes_db() -> None:
29    if not _notes_table_exists():
30        raise RuntimeError("NOTE table not found — run apply_sql_schema first")
def get_note(note_id: int) -> service.models.note.NoteDetails | None:
106def get_note(note_id: int) -> Optional[NoteDetails]:
107    row = _get_note_row(note_id)
108    return Note.from_row(row).to_details() if row else None
def get_notes( source_fk: int, project_id: int | None = None, *, all_projects: bool = False) -> list[service.models.note.NoteDetails]:
155def get_notes(
156    source_fk: int,
157    project_id: Optional[int] = None,
158    *,
159    all_projects: bool = False,
160) -> list[NoteDetails]:
161    if all_projects:
162        rows = _list_notes(source_fk=source_fk)
163    elif project_id is None:
164        rows = _list_notes(source_fk=source_fk, project_unscoped=True)
165    else:
166        rows = _list_notes(source_fk=source_fk, project_fk=project_id)
167    return [Note.from_row(row).to_details() for row in rows]
def get_project_notes(project_id: int) -> list[service.models.note.NoteDetails]:
170def get_project_notes(project_id: int) -> list[NoteDetails]:
171    with _connect() as conn:
172        rows = conn.execute(
173            "SELECT * FROM NOTE WHERE PROJECT_FK = ? ORDER BY SOURCE_FK ASC, CREATED_AT ASC",
174            (project_id,),
175        ).fetchall()
176    return [Note.from_row(row).to_details() for row in rows]
def count_project_notes(project_id: int) -> int:
179def count_project_notes(project_id: int) -> int:
180    return _count_project_notes(project_id)
def count_paper_notes(source_fk: int, project_id: int | None = None) -> int:
183def count_paper_notes(source_fk: int, project_id: Optional[int] = None) -> int:
184    return _count_notes(source_fk, project_fk=project_id)
def note_counts_by_paper_for_project(project_id: int) -> dict[int, int]:
200def note_counts_by_paper_for_project(project_id: int) -> dict[int, int]:
201    """
202    Return note counts keyed by SOURCE_FK for each paper in the project.
203    Papers with no notes appear with count 0. Missing project_id yields empty dict.
204    """
205    with _connect() as conn:
206        rows = conn.execute(
207            """
208            SELECT pp.SOURCE_FK AS source_fk, COALESCE(n.cnt, 0) AS note_count
209            FROM PROJECT_TO_PAPER pp
210            JOIN PAPER_ROOTS r ON r.SOURCE_FK = pp.SOURCE_FK
211            LEFT JOIN (
212                SELECT SOURCE_FK, COUNT(*) AS cnt
213                FROM NOTE
214                WHERE PROJECT_FK = ?
215                GROUP BY SOURCE_FK
216            ) AS n ON n.SOURCE_FK = pp.SOURCE_FK
217            WHERE pp.PROJECT_FK = ? AND r.STATUS = 'active'
218            ORDER BY pp.PROJECT_TO_PAPER_FK
219            """,
220            (project_id, project_id),
221        ).fetchall()
222    return {int(row["source_fk"]): int(row["note_count"]) for row in rows}

Return note counts keyed by SOURCE_FK for each paper in the project. Papers with no notes appear with count 0. Missing project_id yields empty dict.

def search_notes_source_fks(query: str, limit: int = 50) -> list[int]:
229def search_notes_source_fks(query: str, limit: int = 50) -> list[int]:
230    """Return distinct SOURCE_FKs of active papers whose note title or content contains query."""
231    pattern = f"%{_escape_like(query)}%"
232    with _connect() as conn:
233        rows = conn.execute(
234            """
235            SELECT n.SOURCE_FK
236            FROM NOTE n
237            JOIN PAPER_ROOTS r ON r.SOURCE_FK = n.SOURCE_FK
238            WHERE r.STATUS = 'active'
239              AND (n.TITLE LIKE ? ESCAPE '\\' OR n.NOTE LIKE ? ESCAPE '\\')
240            GROUP BY n.SOURCE_FK
241            ORDER BY MAX(n.UPDATED_AT) DESC
242            LIMIT ?
243            """,
244            (pattern, pattern, limit),
245        ).fetchall()
246    return [int(row["SOURCE_FK"]) for row in rows]

Return distinct SOURCE_FKs of active papers whose note title or content contains query.