storage

Storage layer — SQLite database and notes.

View Source

 1"""Storage layer — SQLite database and notes."""
 2
 3from .db import (
 4    DB_PATH,
 5    _connect,
 6    init_db,
 7    init_table,
 8    parse_entry_id,
 9    save_paper,
10    save_papers,
11    save_paper_metadata,
12    save_papers_metadata,
13    get_paper,
14    get_paper_by_id,
15    get_paper_by_source_fk,
16    get_papers_by_source_fks,
17    get_paper_root,
18    get_all_versions,
19    soft_delete_paper,
20    restore_paper,
21    hard_delete_paper,
22    list_deleted_papers,
23    is_paper_deleted,
24    list_papers,
25    get_categories,
26    get_tags,
27    get_graph_data,
28    set_has_pdf,
29    set_pdf_path,
30    set_full_text,
31    search_full_text,
32)
33from .projects import (
34    Q,
35    Status,
36    Project,
37    ensure_projects_db,
38    get_project,
39    filter_projects,
40    color_to_hex,
41    color_from_hex,
42)
43from .notes import (
44    Note,
45    ensure_notes_db,
46    get_note,
47    get_notes,
48    get_project_notes,
49    count_project_notes,
50    count_paper_notes,
51    note_counts_by_paper_for_project,
52    search_notes_source_fks,
53)
54
55__all__ = [
56    # db
57    "DB_PATH", "_connect", "init_db", "init_table", "parse_entry_id",
58    "save_paper", "save_papers", "save_paper_metadata", "save_papers_metadata",
59    "get_paper", "get_paper_by_id", "get_paper_by_source_fk", "get_papers_by_source_fks",
60    "get_paper_root",
61    "get_all_versions",
62    "soft_delete_paper", "restore_paper", "hard_delete_paper",
63    "list_deleted_papers", "is_paper_deleted",
64    "list_papers",
65    "get_categories", "get_tags", "get_graph_data",
66    "set_has_pdf", "set_pdf_path", "set_full_text", "search_full_text",
67    # projects
68    "Q", "Status", "Project", "ensure_projects_db",
69    "get_project", "filter_projects", "color_to_hex", "color_from_hex",
70    # notes
71    "Note", "ensure_notes_db",
72    "get_note", "get_notes", "get_project_notes",
73    "count_project_notes", "count_paper_notes",
74    "note_counts_by_paper_for_project",
75    "search_notes_source_fks",
76]

DB_PATH = '/home/uribe/.local/share/com.linxiv.app/papers.db'

def _connect( db_path: str = '/home/uribe/.local/share/com.linxiv.app/papers.db') -> sqlite3.Connection: View Source

81def _connect(db_path: str = DB_PATH) -> sqlite3.Connection:
82    conn = sqlite3.connect(db_path, detect_types=sqlite3.PARSE_DECLTYPES)
83    conn.row_factory = sqlite3.Row
84    conn.execute("PRAGMA foreign_keys = ON")
85    return conn

def init_db() -> None: View Source

244def init_db() -> None:
245    with _connect() as conn:
246        apply_sql_schema(conn)
247
248    if old_pdf_dir().is_dir():
249        wrong_path_rows = _get_deprecated_path_rows()
250        if wrong_path_rows:
251            for rows in wrong_path_rows:
252                try:
253                    curr_path = rows["PDF_PATH"]
254                    if Path(curr_path).is_file() and Path(curr_path).rename(curr_path.replace(str(old_pdf_dir()), str(pdf_dir()))).exists():
255                        set_pdf_path(rows["source_id"], curr_path.replace(str(old_pdf_dir()), str(pdf_dir())))
256                        print(f"File [ {curr_path} ] moved and verified!")
257                    else:
258                        print(f"File [ {curr_path} ] could not be moved")
259                except Exception as e:
260                    print(f"An error occured while trying to parse file {rows['PDF_PATH']}:\n{e}")
261        _remove_gui_pdf_dir(old_pdf_dir())

def init_table( table_name: str, columns: list[tuple], primary_key: list[str] | None = None, db_path: str = '/home/uribe/.local/share/com.linxiv.app/papers.db') -> None: View Source

53def init_table(
54    table_name: str,
55    columns: list[tuple],
56    primary_key: list[str] | None = None,
57    db_path: str = DB_PATH,
58) -> None:
59    """Create a table if it doesn't exist.
60
61    Each column is a tuple: (name, python_type[, constraints])
62    where constraints is an optional SQL string appended after the type.
63    """
64    col_defs: list[str] = []
65    for col in columns:
66        name  = col[0]
67        sql_t = _PY_TO_SQL.get(col[1], "TEXT")
68        extra = col[2] if len(col) > 2 else ""
69        col_defs.append(f"    {name} {sql_t} {extra}".rstrip())
70    if primary_key:
71        col_defs.append(f"    PRIMARY KEY ({', '.join(primary_key)})")
72    ddl = (
73        f"CREATE TABLE IF NOT EXISTS {table_name} (\n"
74        + ",\n".join(col_defs)
75        + "\n);"
76    )
77    with _connect(db_path) as conn:
78        conn.execute(ddl)

Create a table if it doesn't exist.

Each column is a tuple: (name, python_type[, constraints]) where constraints is an optional SQL string appended after the type.

def parse_entry_id(entry_id: str) -> tuple[str, int]: View Source

277def parse_entry_id(entry_id: str) -> tuple[str, int]:
278    """EX:Split 'http://arxiv.org/abs/2204.12985v4' into ('2204.12985', 4)."""
279    raw = entry_id.split('/')[-1]
280    match = re.match(r'^(.+?)(?:v(\d+))?$', raw)
281    assert match
282    source_id = match.group(1)
283    version = int(match.group(2)) if match.group(2) else 1
284    return source_id, version

EX:Split 'http://arxiv.org/abs/2204.12985v4' into ('2204.12985', 4).

def save_paper(paper: arxiv.Result, tags: list[str] | None = None) -> tuple[str, int]: View Source

344def save_paper(paper: arxiv.Result, tags: list[str] | None = None) -> tuple[str, int]:
345    """Insert a single arxiv paper. Returns (source_id, version)."""
346    with _connect() as conn:
347        return _insert_arxiv(conn, paper, tags)

Insert a single arxiv paper. Returns (source_id, version).

def save_papers( papers: list[arxiv.Result], tags: list[str] | None = None) -> list[tuple[str, int]]: View Source

350def save_papers(papers: list[arxiv.Result], tags: list[str] | None = None) -> list[tuple[str, int]]:
351    """Batch insert arxiv papers in a single transaction. Returns list of (source_id, version)."""
352    with _connect() as conn:
353        return [_insert_arxiv(conn, paper, tags) for paper in papers]

Batch insert arxiv papers in a single transaction. Returns list of (source_id, version).

def save_paper_metadata( meta: sources.PaperMetadata, tags: list[str] | None = None) -> tuple[str, int]: View Source

356def save_paper_metadata(meta: PaperMetadata, tags: list[str] | None = None) -> tuple[str, int]:
357    """Insert a paper from any source via PaperMetadata. Returns (source_id, version)."""
358    with _connect() as conn:
359        return _insert_metadata(conn, meta, tags)

Insert a paper from any source via PaperMetadata. Returns (source_id, version).

def save_papers_metadata( papers: list[sources.PaperMetadata], tags: list[str] | None = None) -> list[tuple[str, int]]: View Source

362def save_papers_metadata(papers: list[PaperMetadata], tags: list[str] | None = None) -> list[tuple[str, int]]:
363    """Batch insert papers from any source. Returns list of (source_id, version)."""
364    with _connect() as conn:
365        return [_insert_metadata(conn, meta, tags) for meta in papers]

Batch insert papers from any source. Returns list of (source_id, version).

def get_paper(source_id: str, version: int | None = None) -> sqlite3.Row | None: View Source

626def get_paper(source_id: str, version: Optional[int] = None) -> Optional[sqlite3.Row]:
627    """Fetch a specific version, or the latest if version is None."""
628    with _connect() as conn:
629        if version:
630            return conn.execute(
631                "SELECT * FROM papers WHERE source_id = ? AND version = ?",
632                (source_id, version),
633            ).fetchone()
634        return conn.execute(
635            "SELECT * FROM latest_papers WHERE source_id = ?",
636            (source_id,),
637        ).fetchone()

Fetch a specific version, or the latest if version is None.

def get_paper_by_id(paper_id: int) -> sqlite3.Row | None: View Source

640def get_paper_by_id(paper_id: int) -> Optional[sqlite3.Row]:
641    """Fetch a paper version by its PAPER primary key."""
642    with _connect() as conn:
643        return conn.execute(
644            "SELECT * FROM papers WHERE paper_id = ?",
645            (paper_id,),
646        ).fetchone()

Fetch a paper version by its PAPER primary key.

def get_paper_by_source_fk(source_fk: int) -> sqlite3.Row | None: View Source

649def get_paper_by_source_fk(source_fk: int) -> Optional[sqlite3.Row]:
650    """Fetch the latest version for a PAPER_ROOTS row by SOURCE_FK."""
651    with _connect() as conn:
652        return conn.execute(
653            "SELECT * FROM latest_papers WHERE source_id = ("
654            "SELECT SOURCE_ID FROM PAPER_ROOTS WHERE SOURCE_FK = ?)",
655            (source_fk,),
656        ).fetchone()

Fetch the latest version for a PAPER_ROOTS row by SOURCE_FK.

def get_papers_by_source_fks(source_fks: list[int]) -> list[sqlite3.Row]: View Source

659def get_papers_by_source_fks(source_fks: list[int]) -> list[sqlite3.Row]:
660    """Batch-fetch latest paper versions for a list of SOURCE_FKs in one query.
661
662    The empty-list guard is intentional: _queries_list_papers(source_fks=None)
663    returns the full library, which is the wrong behaviour here.
664    """
665    if not source_fks:
666        return []
667    return _queries_list_papers(source_fks=source_fks)

Batch-fetch latest paper versions for a list of SOURCE_FKs in one query.

The empty-list guard is intentional: _queries_list_papers(source_fks=None) returns the full library, which is the wrong behaviour here.

def get_paper_root(source_id: str) -> sqlite3.Row | None: View Source

671def get_paper_root(source_id: str) -> Optional[sqlite3.Row]:
672    """Return the PAPER_ROOTS row for a given source_id."""
673    with _connect() as conn:
674        return conn.execute(
675            "SELECT * FROM PAPER_ROOTS WHERE SOURCE_ID = ?",
676            (source_id,),
677        ).fetchone()

Return the PAPER_ROOTS row for a given source_id.

def get_all_versions(source_id: str) -> list[sqlite3.Row]: View Source

704def get_all_versions(source_id: str) -> list[sqlite3.Row]:
705    """Fetch all stored versions of a paper, ordered oldest to newest."""
706    with _connect() as conn:
707        return conn.execute(
708            "SELECT * FROM papers WHERE source_id = ? ORDER BY version ASC",
709            (source_id,),
710        ).fetchall()

Fetch all stored versions of a paper, ordered oldest to newest.

def soft_delete_paper(source_id: str) -> str | None: View Source

510def soft_delete_paper(source_id: str) -> str | None:
511    """Soft-delete a paper: set STATUS='deleted', remove PDF from linxiv dir if present.
512
513    Returns the pdf_path that was stored (for caller reference), or None.
514    """
515    stored_path: str | None = None
516    with _connect() as conn:
517        row = conn.execute(
518            "SELECT PDF_PATH FROM PAPER_META WHERE PAPER_ID IN "
519            "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? ORDER BY VERSION DESC LIMIT 1)",
520            (source_id,),
521        ).fetchone()
522        if row:
523            stored_path = row["PDF_PATH"]
524
525        if conn.execute(
526            "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'"
527        ).fetchone():
528            conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,))
529
530        conn.execute(
531            "UPDATE PAPER_ROOTS SET STATUS = 'deleted', DELETED_AT = ?, UPDATED_AT = datetime('now') WHERE SOURCE_ID = ?",
532            (datetime.datetime.now(), source_id),
533        )
534
535    if stored_path:
536        p = Path(stored_path)
537        try:
538            linxiv_dir = pdf_dir()
539            if p.is_file() and p.is_relative_to(linxiv_dir):
540                p.unlink()
541                with _connect() as conn:
542                    conn.execute(
543                        "UPDATE PAPER SET HAS_PDF = 0 WHERE SOURCE_ID = ?",
544                        (source_id,),
545                    )
546        except Exception as e:
547            print(f"[db] Could not remove PDF for {source_id}: {e}")
548
549    return stored_path

Soft-delete a paper: set STATUS='deleted', remove PDF from linxiv dir if present.

Returns the pdf_path that was stored (for caller reference), or None.

def restore_paper(source_id: str) -> str | None: View Source

552def restore_paper(source_id: str) -> str | None:
553    """Restore a soft-deleted paper. Returns the stored pdf_path (may no longer exist)."""
554    stored_path: str | None = None
555    full_text: str | None = None
556    with _connect() as conn:
557        row = conn.execute(
558            "SELECT PDF_PATH, FULL_TEXT FROM PAPER_META WHERE PAPER_ID IN "
559            "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? ORDER BY VERSION DESC LIMIT 1)",
560            (source_id,),
561        ).fetchone()
562        if row:
563            stored_path = row["PDF_PATH"]
564            full_text = row["FULL_TEXT"]
565
566        conn.execute(
567            "UPDATE PAPER_ROOTS SET STATUS = 'active', DELETED_AT = NULL, UPDATED_AT = datetime('now') WHERE SOURCE_ID = ?",
568            (source_id,),
569        )
570
571        if row and row["FULL_TEXT"]:
572            if conn.execute(
573                "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'"
574            ).fetchone():
575                conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,))
576                conn.execute(
577                    "INSERT INTO papers_fts(paper_id, full_text) VALUES (?, ?)",
578                    (source_id, full_text),
579                )
580
581    return stored_path

Restore a soft-deleted paper. Returns the stored pdf_path (may no longer exist).

def hard_delete_paper(source_id: str) -> None: View Source

584def hard_delete_paper(source_id: str) -> None:
585    """Permanently delete a paper and all associated data."""
586    with _connect() as conn:
587        row = conn.execute(
588            "SELECT PDF_PATH FROM PAPER_META WHERE PAPER_ID IN "
589            "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? ORDER BY VERSION DESC LIMIT 1)",
590            (source_id,),
591        ).fetchone()
592        if conn.execute(
593            "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'"
594        ).fetchone():
595            conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,))
596        conn.execute("DELETE FROM PAPER_ROOTS WHERE SOURCE_ID = ?", (source_id,))
597
598    if row and row["PDF_PATH"]:
599        p = Path(row["PDF_PATH"])
600        try:
601            linxiv_dir = pdf_dir()
602            if p.is_file() and p.is_relative_to(linxiv_dir):
603                p.unlink()
604        except Exception as e:
605            print(f"[db] Could not remove PDF for {source_id} during hard delete: {e}")

Permanently delete a paper and all associated data.

def list_deleted_papers() -> list[sqlite3.Row]: View Source

608def list_deleted_papers() -> list[sqlite3.Row]:
609    """Return all soft-deleted papers from the deleted_papers view."""
610    with _connect() as conn:
611        return conn.execute(
612            "SELECT * FROM deleted_papers ORDER BY deleted_at DESC"
613        ).fetchall()

Return all soft-deleted papers from the deleted_papers view.

def is_paper_deleted(source_id: str) -> bool: View Source

616def is_paper_deleted(source_id: str) -> bool:
617    """Return True if a PAPER_ROOTS row exists with STATUS='deleted'."""
618    with _connect() as conn:
619        row = conn.execute(
620            "SELECT 1 FROM PAPER_ROOTS WHERE SOURCE_ID = ? AND STATUS = 'deleted'",
621            (source_id,),
622        ).fetchone()
623    return row is not None

Return True if a PAPER_ROOTS row exists with STATUS='deleted'.

def list_papers( latest_only: bool = True, limit: int | None = None, offset: int = 0, category: str | None = None) -> list[sqlite3.Row]: View Source

785def list_papers(
786    latest_only: bool = True,
787    limit: int | None = None,
788    offset: int = 0,
789    category: str | None = None,
790) -> list[sqlite3.Row]:
791    """List all stored papers (latest version per paper by default).
792
793    Optionally filter by exact primary category (e.g. "cs.LG"); limit/offset
794    apply to the filtered result.
795    """
796    with _connect() as conn:
797        sql = _LIST_PAPERS_LATEST_SQL if latest_only else _LIST_PAPERS_ALL_SQL
798        params: list[int | str] = []
799        if category is not None:
800            sql += " WHERE category = ?"
801            params.append(category)
802        sql += " ORDER BY published DESC"
803        if limit is not None:
804            sql += " LIMIT ? OFFSET ?"
805            params += [limit, offset]
806        elif offset:
807            sql += " LIMIT -1 OFFSET ?"
808            params.append(offset)
809        return conn.execute(sql, params).fetchall()

List all stored papers (latest version per paper by default).

Optionally filter by exact primary category (e.g. "cs.LG"); limit/offset apply to the filtered result.

def get_categories() -> list[str]: View Source

824def get_categories() -> list[str]:
825    """Return a sorted list of all distinct primary categories in the DB."""
826    with _connect() as conn:
827        rows = conn.execute(
828            "SELECT DISTINCT category FROM latest_papers WHERE category IS NOT NULL ORDER BY category"
829        ).fetchall()
830    return [row["category"] for row in rows]

Return a sorted list of all distinct primary categories in the DB.

def get_tags() -> list[str]: View Source

833def get_tags() -> list[str]:
834    """Return a sorted list of all distinct tag labels across papers and projects."""
835    with _connect() as conn:
836        rows = conn.execute("""
837            SELECT DISTINCT LOWER(je.value) AS tag
838            FROM latest_papers p, json_each(p.tags) je
839            WHERE p.tags IS NOT NULL
840            UNION
841            SELECT DISTINCT LOWER(t.TAG) AS tag
842            FROM TAG t
843            JOIN PROJECT_TO_TAG ptt ON ptt.TAG_FK = t.TAG_FK
844            JOIN PROJECT pr ON pr.PROJECT_FK = ptt.PROJECT_FK
845            WHERE pr.STATUS = 'active'
846            ORDER BY tag
847        """).fetchall()
848    return [row["tag"] for row in rows]

Return a sorted list of all distinct tag labels across papers and projects.

def get_graph_data(exclude_single_authors: bool = False) -> tuple[list[dict], list[dict]]: View Source

713def get_graph_data(exclude_single_authors: bool = False) -> tuple[list[dict], list[dict]]:
714    """Returns (nodes, edges) ready to pass to the graph view.
715
716    With ``exclude_single_authors`` True, an author node is dropped unless some
717    AUTHOR_FK matching its name (COLLATE NOCASE) has two or more active papers.
718    """
719    with _connect() as conn:
720        paper_nodes = [
721            {
722                "id":        row["source_fk"],
723                "source_id": row["source_id"],
724                "label":     row["title"],
725                "type":      "paper",
726                "category":  row["category"],
727                "tags":      row["tags"] if row["tags"] else [],
728                "has_pdf":   bool(row["has_pdf"]),
729                "published": row["published"].isoformat() if row["published"] else None,
730                "url":       row["url"],
731                "doi":       row["doi"],
732                "summary":   row["summary"],
733            }
734            for row in conn.execute("""
735                SELECT r.SOURCE_FK AS source_fk, r.SOURCE_ID AS source_id,
736                       p.TITLE AS title, p.CATEGORY AS category,
737                       m.TAGS AS tags, p.HAS_PDF AS has_pdf, m.PUBLISHED AS published,
738                       m.URL AS url, m.DOI AS doi, m.SUMMARY AS summary
739                FROM PAPER_ROOTS r
740                JOIN PAPER p ON p.SOURCE_FK = r.SOURCE_FK
741                JOIN PAPER_META m ON m.PAPER_ID = p.PAPER_ID
742                WHERE p.VERSION = (SELECT MAX(VERSION) FROM PAPER WHERE SOURCE_FK = r.SOURCE_FK)
743                  AND r.STATUS = 'active'
744            """)
745        ]
746        # _GRAPH_AUTHORS_WITH_COUNT_SQL adds the per-name paper_count column
747        # (see queries.py); the default constant omits it and scans unchanged.
748        sql = (
749            _GRAPH_AUTHORS_WITH_COUNT_SQL
750            if exclude_single_authors
751            else _GRAPH_AUTHORS_SQL
752        )
753        author_rows = conn.execute(sql).fetchall()
754
755    seen_authors: set[str] = set()
756    author_nodes: list[dict] = []
757    edges: list[dict] = []
758    for row in author_rows:
759        name = row["author_name"]
760        # Drop an author when its paper_count is below two. A NULL count (no
761        # matching AUTHOR row) fails the `is not None` guard, so it is kept.
762        if (
763            exclude_single_authors
764            and row["paper_count"] is not None
765            and row["paper_count"] < 2
766        ):
767            continue
768        node_id = f"author::{name}"
769        if node_id not in seen_authors:
770            node: dict[str, object] = {"id": node_id, "label": name, "type": "author"}
771            # author_fk is NULL when the JSON name matches no AUTHOR row.
772            if row["author_fk"] is not None:
773                node["author_id"] = row["author_fk"]
774            author_nodes.append(node)
775            seen_authors.add(node_id)
776        edges.append({"source": row["source_fk"], "target": node_id})
777
778    return paper_nodes + author_nodes, edges

Returns (nodes, edges) ready to pass to the graph view.

With exclude_single_authors True, an author node is dropped unless some AUTHOR_FK matching its name (COLLATE NOCASE) has two or more active papers.

def set_has_pdf(source_id: str, version: int, has: bool) -> None: View Source

445def set_has_pdf(source_id: str, version: int, has: bool) -> None:
446    """Set the has_pdf flag for a specific paper version."""
447    with _connect() as conn:
448        conn.execute(
449            "UPDATE PAPER SET HAS_PDF = ? WHERE SOURCE_ID = ? AND VERSION = ?",
450            (has, source_id, version),
451        )

Set the has_pdf flag for a specific paper version.

def set_pdf_path(source_id: str, path: str, version: int | None = None) -> None: View Source

461def set_pdf_path(source_id: str, path: str, version: int | None = None) -> None:
462    """Set the pdf_path for a paper (all versions)."""
463    with _connect() as conn:
464        if version:
465            conn.execute(
466                "UPDATE PAPER_META SET PDF_PATH = ? WHERE PAPER_ID IN "
467                "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? AND VERSION = ?)",
468                (path, source_id, version),
469            )
470        else:
471            lastrow = conn.execute(
472                "UPDATE PAPER_META SET PDF_PATH = ? WHERE PAPER_ID IN "
473                "(SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ?)",
474                (path, source_id),
475            )
476            if lastrow:
477                print("Warning: you are updating the paths of all pdfs associated with this paper. This will cause the pdfs of other version to be deleted, those other version.")
478                print("This may incorrectly update the database, consider using different methodology")

Set the pdf_path for a paper (all versions).

def set_full_text( full_text: str | None, paper_id: int | None, source_id: str | None, version: int | None) -> None: View Source

908def set_full_text(full_text: str|None, paper_id: int|None, source_id: str|None, version: int|None) -> None:
909    """Store extracted TeX full text and update the FTS index."""
910    with _connect() as conn:
911        row = conn.execute(
912            "SELECT PAPER_ID FROM PAPER WHERE SOURCE_ID = ? AND VERSION = ?",
913            (source_id, version),
914        ).fetchone()
915        if row is None:
916            return
917        paper_id = int(row["PAPER_ID"])
918        conn.execute(
919            "UPDATE PAPER_META SET FULL_TEXT = ?, DOWNLOADED_SOURCE = 1 WHERE PAPER_ID = ?",
920            (full_text, paper_id),
921        )
922        conn.execute("DELETE FROM papers_fts WHERE paper_id = ?", (source_id,))
923        conn.execute(
924            "INSERT INTO papers_fts(paper_id, full_text) VALUES (?, ?)",
925            (source_id, full_text),
926        )

Store extracted TeX full text and update the FTS index.

def search_full_text(query: str, limit: int = 20) -> list[sqlite3.Row]: View Source

929def search_full_text(query: str, limit: int = 20) -> list[sqlite3.Row]:
930    """Full-text search over TeX source content. Returns matching papers."""
931    with _connect() as conn:
932        if not conn.execute(
933            "SELECT 1 FROM sqlite_master WHERE type='table' AND name='papers_fts'"
934        ).fetchone():
935            return []
936        return conn.execute("""
937            SELECT p.* FROM latest_papers p
938            JOIN papers_fts fts ON p.source_id = fts.paper_id
939            WHERE papers_fts MATCH ?
940            ORDER BY rank
941            LIMIT ?
942        """, (query, limit)).fetchall()

Full-text search over TeX source content. Returns matching papers.

class Q: View Source

35class Q:
36    """Composable SQL WHERE predicate. Combine with &, |, ~."""
37    def __init__(self, sql: str, *params) -> None:
38        self.sql    = sql
39        self.params = params
40
41    def __and__(self, other: Q) -> Q:
42        return Q(f"({self.sql} AND {other.sql})", *self.params, *other.params)
43
44    def __or__(self, other: Q) -> Q:
45        return Q(f"({self.sql} OR {other.sql})", *self.params, *other.params)
46
47    def __invert__(self) -> Q:
48        return Q(f"(NOT {self.sql})", *self.params)

Composable SQL WHERE predicate. Combine with &, |, ~.

Q(sql: str, *params) View Source

37    def __init__(self, sql: str, *params) -> None:
38        self.sql    = sql
39        self.params = params

sql

params

class Status(builtins.str, enum.Enum): View Source

 8class Status(str, enum.Enum):
 9    ACTIVE   = "active"
10    ARCHIVED = "archived"
11    DELETED  = "deleted"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

ACTIVE = <Status.ACTIVE: 'active'>

ARCHIVED = <Status.ARCHIVED: 'archived'>

DELETED = <Status.DELETED: 'deleted'>

def ensure_projects_db() -> None: View Source

15def ensure_projects_db() -> None:
16    with _connect() as conn:
17        row = conn.execute(
18            "SELECT name FROM sqlite_master WHERE type='table' AND name='PROJECT'"
19        ).fetchone()
20    if row is None:
21        raise RuntimeError("PROJECT table not found — run apply_sql_schema first")

def get_project( project_id: int, load_sources: bool = True) -> Project | None: View Source

232def get_project(project_id: int, load_sources: bool = True) -> Optional[Project]:
233    with _connect() as conn:
234        row = conn.execute(
235            "SELECT * FROM PROJECT WHERE PROJECT_FK = ?", (project_id,)
236        ).fetchone()
237    return Project.from_row(row, load_sources=load_sources) if row else None

def filter_projects( condition: Q | None = None, load_sources: bool = True) -> list[Project]: View Source

240def filter_projects(condition: Q | None = None, load_sources: bool = True) -> list[Project]:
241    if condition is None:
242        sql, params = "SELECT * FROM PROJECT", ()
243    else:
244        sql    = f"SELECT * FROM PROJECT WHERE {condition.sql}"
245        params = condition.params
246    with _connect() as conn:
247        rows = conn.execute(sql, params).fetchall()
248    return [Project.from_row(row, load_sources=load_sources) for row in rows]

def color_to_hex(color: int) -> str: View Source

26def color_to_hex(color: int) -> str:
27    return f"#{color:06x}"

def color_from_hex(hex_str: str) -> int: View Source

30def color_from_hex(hex_str: str) -> int:
31    return int(hex_str.lstrip("#"), 16)

@dataclass

class Note: View Source

 35@dataclass
 36class Note:
 37    source_fk:   int
 38    project_id:  Optional[int]               = None
 39    paper_id_fk: Optional[int]               = None  # pins note to a specific paper version
 40    title:       str                          = ""
 41    content:     str                          = ""
 42    id:          Optional[int]               = None
 43    created_at:  Optional[datetime.datetime] = None
 44    updated_at:  Optional[datetime.datetime] = None
 45
 46    @classmethod
 47    def from_row(cls, row) -> Note:
 48        return cls(
 49            id          = row["NOTE_SK"],
 50            source_fk   = row["SOURCE_FK"],
 51            paper_id_fk = row["PAPER_ID_FK"],
 52            project_id  = row["PROJECT_FK"],
 53            title       = row["TITLE"] or "",
 54            content     = row["NOTE"] or "",
 55            created_at  = row["CREATED_AT"],
 56            updated_at  = row["UPDATED_AT"],
 57        )
 58
 59    def to_details(self) -> NoteDetails:
 60        return NoteDetails(
 61            note_id     = self.id,
 62            source_fk   = self.source_fk,
 63            paper_id_fk = self.paper_id_fk,
 64            project_id  = self.project_id,
 65            title       = self.title,
 66            content     = self.content,
 67            created_at  = self.created_at,
 68            updated_at  = self.updated_at,
 69        )
 70
 71    def save(self) -> None:
 72        now = datetime.datetime.now(datetime.timezone.utc)
 73        if self.id is None:
 74            with _connect() as conn:
 75                cur = conn.execute(
 76                    """
 77                    INSERT INTO NOTE
 78                        (SOURCE_FK, PAPER_ID_FK, PROJECT_FK, TITLE, NOTE, CREATED_AT, UPDATED_AT)
 79                    VALUES (?, ?, ?, ?, ?, ?, ?)
 80                    """,
 81                    (self.source_fk, self.paper_id_fk, self.project_id,
 82                     self.title, self.content, now, now),
 83                )
 84                self.id = cur.lastrowid
 85            self.created_at = now
 86            self.updated_at = now
 87        else:
 88            with _connect() as conn:
 89                cur = conn.execute(
 90                    "UPDATE NOTE SET TITLE = ?, NOTE = ?, UPDATED_AT = ? WHERE NOTE_SK = ?",
 91                    (self.title, self.content, now, self.id),
 92                )
 93                if cur.rowcount == 0:
 94                    raise ValueError(f"Note with id={self.id} does not exist")
 95            self.updated_at = now
 96
 97    def delete(self) -> None:
 98        if self.id is None:
 99            return
100        if delete_note(self.id):
101            self.id = None

source_fk: int

project_id: int | None = None

paper_id_fk: int | None = None

title: str = ''

content: str = ''

id: int | None = None

created_at: datetime.datetime | None = None

updated_at: datetime.datetime | None = None

@classmethod

def from_row(cls, row) -> Note: View Source

46    @classmethod
47    def from_row(cls, row) -> Note:
48        return cls(
49            id          = row["NOTE_SK"],
50            source_fk   = row["SOURCE_FK"],
51            paper_id_fk = row["PAPER_ID_FK"],
52            project_id  = row["PROJECT_FK"],
53            title       = row["TITLE"] or "",
54            content     = row["NOTE"] or "",
55            created_at  = row["CREATED_AT"],
56            updated_at  = row["UPDATED_AT"],
57        )

def to_details(self) -> service.models.note.NoteDetails: View Source

59    def to_details(self) -> NoteDetails:
60        return NoteDetails(
61            note_id     = self.id,
62            source_fk   = self.source_fk,
63            paper_id_fk = self.paper_id_fk,
64            project_id  = self.project_id,
65            title       = self.title,
66            content     = self.content,
67            created_at  = self.created_at,
68            updated_at  = self.updated_at,
69        )

def save(self) -> None: View Source

71    def save(self) -> None:
72        now = datetime.datetime.now(datetime.timezone.utc)
73        if self.id is None:
74            with _connect() as conn:
75                cur = conn.execute(
76                    """
77                    INSERT INTO NOTE
78                        (SOURCE_FK, PAPER_ID_FK, PROJECT_FK, TITLE, NOTE, CREATED_AT, UPDATED_AT)
79                    VALUES (?, ?, ?, ?, ?, ?, ?)
80                    """,
81                    (self.source_fk, self.paper_id_fk, self.project_id,
82                     self.title, self.content, now, now),
83                )
84                self.id = cur.lastrowid
85            self.created_at = now
86            self.updated_at = now
87        else:
88            with _connect() as conn:
89                cur = conn.execute(
90                    "UPDATE NOTE SET TITLE = ?, NOTE = ?, UPDATED_AT = ? WHERE NOTE_SK = ?",
91                    (self.title, self.content, now, self.id),
92                )
93                if cur.rowcount == 0:
94                    raise ValueError(f"Note with id={self.id} does not exist")
95            self.updated_at = now

def delete(self) -> None: View Source

 97    def delete(self) -> None:
 98        if self.id is None:
 99            return
100        if delete_note(self.id):
101            self.id = None

def ensure_notes_db() -> None: View Source

28def ensure_notes_db() -> None:
29    if not _notes_table_exists():
30        raise RuntimeError("NOTE table not found — run apply_sql_schema first")

def get_note(note_id: int) -> service.models.note.NoteDetails | None: View Source

106def get_note(note_id: int) -> Optional[NoteDetails]:
107    row = _get_note_row(note_id)
108    return Note.from_row(row).to_details() if row else None

def get_notes( source_fk: int, project_id: int | None = None, *, all_projects: bool = False) -> list[service.models.note.NoteDetails]: View Source

155def get_notes(
156    source_fk: int,
157    project_id: Optional[int] = None,
158    *,
159    all_projects: bool = False,
160) -> list[NoteDetails]:
161    if all_projects:
162        rows = _list_notes(source_fk=source_fk)
163    elif project_id is None:
164        rows = _list_notes(source_fk=source_fk, project_unscoped=True)
165    else:
166        rows = _list_notes(source_fk=source_fk, project_fk=project_id)
167    return [Note.from_row(row).to_details() for row in rows]

def get_project_notes(project_id: int) -> list[service.models.note.NoteDetails]: View Source

170def get_project_notes(project_id: int) -> list[NoteDetails]:
171    with _connect() as conn:
172        rows = conn.execute(
173            "SELECT * FROM NOTE WHERE PROJECT_FK = ? ORDER BY SOURCE_FK ASC, CREATED_AT ASC",
174            (project_id,),
175        ).fetchall()
176    return [Note.from_row(row).to_details() for row in rows]

def count_project_notes(project_id: int) -> int: View Source

179def count_project_notes(project_id: int) -> int:
180    return _count_project_notes(project_id)

def count_paper_notes(source_fk: int, project_id: int | None = None) -> int: View Source

183def count_paper_notes(source_fk: int, project_id: Optional[int] = None) -> int:
184    return _count_notes(source_fk, project_fk=project_id)

def note_counts_by_paper_for_project(project_id: int) -> dict[int, int]: View Source

200def note_counts_by_paper_for_project(project_id: int) -> dict[int, int]:
201    """
202    Return note counts keyed by SOURCE_FK for each paper in the project.
203    Papers with no notes appear with count 0. Missing project_id yields empty dict.
204    """
205    with _connect() as conn:
206        rows = conn.execute(
207            """
208            SELECT pp.SOURCE_FK AS source_fk, COALESCE(n.cnt, 0) AS note_count
209            FROM PROJECT_TO_PAPER pp
210            JOIN PAPER_ROOTS r ON r.SOURCE_FK = pp.SOURCE_FK
211            LEFT JOIN (
212                SELECT SOURCE_FK, COUNT(*) AS cnt
213                FROM NOTE
214                WHERE PROJECT_FK = ?
215                GROUP BY SOURCE_FK
216            ) AS n ON n.SOURCE_FK = pp.SOURCE_FK
217            WHERE pp.PROJECT_FK = ? AND r.STATUS = 'active'
218            ORDER BY pp.PROJECT_TO_PAPER_FK
219            """,
220            (project_id, project_id),
221        ).fetchall()
222    return {int(row["source_fk"]): int(row["note_count"]) for row in rows}

Return note counts keyed by SOURCE_FK for each paper in the project. Papers with no notes appear with count 0. Missing project_id yields empty dict.

def search_notes_source_fks(query: str, limit: int = 50) -> list[int]: View Source

229def search_notes_source_fks(query: str, limit: int = 50) -> list[int]:
230    """Return distinct SOURCE_FKs of active papers whose note title or content contains query."""
231    pattern = f"%{_escape_like(query)}%"
232    with _connect() as conn:
233        rows = conn.execute(
234            """
235            SELECT n.SOURCE_FK
236            FROM NOTE n
237            JOIN PAPER_ROOTS r ON r.SOURCE_FK = n.SOURCE_FK
238            WHERE r.STATUS = 'active'
239              AND (n.TITLE LIKE ? ESCAPE '\\' OR n.NOTE LIKE ? ESCAPE '\\')
240            GROUP BY n.SOURCE_FK
241            ORDER BY MAX(n.UPDATED_AT) DESC
242            LIMIT ?
243            """,
244            (pattern, pattern, limit),
245        ).fetchall()
246    return [int(row["SOURCE_FK"]) for row in rows]

Return distinct SOURCE_FKs of active papers whose note title or content contains query.