import os import urllib.request import PyPDF2 import re import html # Create output directory kb_dir = "C:\\thetis_kb" os.makedirs(kb_dir, exist_ok=True) # Download the manual PDF print("Downloading manual...") manual_url = "https://github.com/TAPR/OpenHPSDR-Thetis/raw/master/Documentation/Radio/Thetis%20manual_1.1.pdf" manual_path = os.path.join(kb_dir, "thetis_manual.pdf") urllib.request.urlretrieve(manual_url, manual_path) # Define community enhancements community_enhancements = { "installation": [ ( "Hermes Lite 2 Installation", "For Hermes Lite 2 users, there is a specific version of Thetis maintained by Reid Campbell (MI0BOT) that supports the hardware differences.", "Hermes Lite 2 Plus", "https://www.hermeslite2plus.com/p/thetis-software-versions.html" ) ], "puresignal": [ ( "PureSignal Support", "When using PureSignal with the Hermes Lite 2, note that some software including Thetis may not support this feature directly. Verify compatibility.", "Ham Radio Secrets", "https://www.hamradiosecrets.com/sdr-ham-radio-transceiver.html" ) ], } # Extract PDF content print("Processing PDF...") pdf_text = {} with open(manual_path, 'rb') as file: reader = PyPDF2.PdfReader(file) num_pages = len(reader.pages) # Process each page for i in range(num_pages): page = reader.pages[i] text = page.extract_text() pdf_text[i+1] = text html_content = """ Enhanced Thetis Manual

Enhanced Thetis SDR Manual

This manual combines the official Thetis documentation with community knowledge.

Table of Contents

""" # Add processed page content with enhancements for page_num, text in pdf_text.items(): html_content += f'
\n' html_content += f'\n' html_content += f'
\n' # Check if this page appears to be a table of contents if re.search(r'Table of Contents|Contents', text[:100]) or re.search(r'[0-9]+\.[0-9]+\s+.+\s+[0-9]+$', text, re.MULTILINE): # This looks like a TOC or has section numbering with page numbers html_content += f'
{html.escape(text)}
\n' else: # Process text with better paragraph handling lines = text.split('\n') current_paragraph = "" in_list = False for line in lines: line = line.strip() # Skip page headers/footers if line.startswith("THETIS User Manual") or re.match(r'^Page \d+( of \d+)?$', line): continue # Check if this line looks like a section title is_section = re.match(r'^[0-9]+(\.[0-9]+)* ', line) # Check if line looks like part of a table/dotted line listing is_table_line = re.search(r'\.{3,}|\s{5,}', line) and re.search(r'\d+$', line) if not line: # Empty line - paragraph break if current_paragraph: html_content += f'

{html.escape(current_paragraph)}

\n' current_paragraph = "" elif is_section: # This looks like a section heading if current_paragraph: html_content += f'

{html.escape(current_paragraph)}

\n' current_paragraph = "" html_content += f'

{html.escape(line)}

\n' elif is_table_line: # This is likely part of a TOC or table if current_paragraph: html_content += f'

{html.escape(current_paragraph)}

\n' current_paragraph = "" html_content += f'
{html.escape(line)}
\n' else: # Continue current paragraph if current_paragraph: current_paragraph += " " + line else: current_paragraph = line # Don't forget the last paragraph if current_paragraph: html_content += f'

{html.escape(current_paragraph)}

\n' # Add enhancements for this page if keywords match for keyword, additions in community_enhancements.items(): if keyword.lower() in text.lower(): for title, content, source, url in additions: html_content += f'''

{html.escape(title)}

{html.escape(content)}

Source: {html.escape(source)}

''' html_content += '
\n' # Close page-content html_content += '
\n' # Close page html_content += """
""" output_html_path = os.path.join(kb_dir, 'enhanced_manual.html') with open(output_html_path, 'w', encoding='utf-8') as f: f.write(html_content) print(f"Enhanced manual created at {output_html_path}") print("Open this file in your browser to view the enhanced manual.") input("Press Enter to close...")