Upload files to "/"

2026-06-10 18:55:29 +00:00
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
 venv
 .env
--- a/main.py
+++ b/main.py
@@ -0,0 +1,306 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 Interactive R&D Assistant: generates one research idea at a time (user decides when to stop),
 then suggests 3 search terms per idea, searches mithal.space,
 and saves everything in a markdown file.
 """
 import sys
 import time
 import json
 import requests
 from pathlib import Path
 from datetime import datetime
 from urllib.parse import quote
 from dotenv import load_dotenv
 load_dotenv()
 import os
 # =============================================================================
 # 1. LLM configuration
 # =============================================================================
 AUTH_TOKEN = os.getenv("AUTH_TOKEN")  
 BASE_URL = "https://chat.cumin.dev/api"
 HEADERS = {
    "Authorization": f"Bearer {AUTH_TOKEN}",
    "Content-Type": "application/json"
 }
 # Prompt for a SINGLE research idea (JSON format)
 SINGLE_IDEA_PROMPT = """You are an expert R&D assistant specialized in AI, Machine Learning, and LLMs.
 Generate exactly ONE creative, underexplored research direction.
 Respond ONLY with a valid JSON object with exactly these fields:
 - "title": short title in Arabic
 - "explanation": brief explanation (2-3 sentences) in Arabic
 - "novelty": why it's promising and novel evidence (1 sentence) in Arabic
 - "reference": at least one paper or source (as a short text) in Arabic
 Example:
 {
  "title": "عنوان الفكرة",
  "explanation": "شرح موجز للفكرة...",
  "novelty": "لماذا هذه الفكرة جديدة وواعدة...",
  "reference": "مثال: ورقة علمية من arXiv..."
 }
 Output ONLY the JSON object, no extra text."""
 def create_conversation(title="R&D Session"):
    url = f"{BASE_URL}/conversations"
    resp = requests.post(url, headers=HEADERS, json={"title": title})
    if resp.status_code in (200, 201):
        return resp.json().get("id")
    else:
        print(f"Failed to create conversation: {resp.status_code} - {resp.text}")
        return None
 def send_message(conv_id, text):
    url = f"{BASE_URL}/conversations/{conv_id}/chat"
    resp = requests.post(url, headers=HEADERS, json={"message": text, "files": []})
    return resp.status_code == 200
 def get_conversation(conv_id):
    url = f"{BASE_URL}/conversations/{conv_id}"
    resp = requests.get(url, headers=HEADERS)
    if resp.status_code == 200:
        return resp.json()
    return None
 def wait_for_assistant_reply(conv_id, last_msg_count, timeout=120, poll_interval=2):
    """Wait for assistant reply, with interruptible polls."""
    start = time.time()
    while time.time() - start < timeout:
        conv = get_conversation(conv_id)
        if conv and "messages" in conv and len(conv["messages"]) > last_msg_count:
            last_msg = conv["messages"][-1]
            if last_msg["role"] == "assistant":
                return last_msg["content"]
        time.sleep(poll_interval)
    return None
 def get_llm_response(prompt, timeout=120):
    """Create a fresh conversation, send prompt, wait for reply."""
    conv_id = create_conversation()
    if not conv_id:
        return None
    if not send_message(conv_id, prompt):
        return None
    conv_data = get_conversation(conv_id)
    if not conv_data:
        return None
    msg_count = len(conv_data.get("messages", []))
    print("  ⏳ Waiting for LLM response (up to {}s)...".format(timeout))
    reply = wait_for_assistant_reply(conv_id, msg_count, timeout=timeout)
    return reply
 def parse_single_idea(raw_json_str):
    """Parse LLM response into a dict; return None on failure."""
    try:
        clean = raw_json_str.strip()
        if clean.startswith("```json"):
            clean = clean[7:]
        if clean.startswith("```"):
            clean = clean[3:]
        if clean.endswith("```"):
            clean = clean[:-3]
        idea = json.loads(clean)
        if isinstance(idea, dict) and all(k in idea for k in ("title", "explanation", "novelty", "reference")):
            return idea
        else:
            print("  ❌ LLM response missing required fields.")
            return None
    except json.JSONDecodeError:
        print("  ❌ Failed to parse JSON from LLM.")
        print("  Raw response (first 500 chars):", raw_json_str[:500])
        return None
 # =============================================================================
 # 2. Search engine integration (no screenshots)
 # =============================================================================
 from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
 RESULT_LINK_SELECTOR = "a.result-link"
 TITLE_SELECTOR = "h3"
 URL_SELECTOR = "small.arabic-url"
 def search_mithal(query, retry_user_choice=True):
    """
    Perform a single search on mithal.space and return top 3 results as list of (title, url).
    If retry_user_choice is True, on failure ask user to retry or skip.
    """
    search_url = f"https://mithal.space/search?q={quote(query, safe='')}"
    print(f"    🔍 Searching: {query}")
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context(locale="ar-SA", viewport={"width": 1280, "height": 900})
        page = context.new_page()
        while True:
            try:
                page.goto(search_url, wait_until="domcontentloaded", timeout=15000)
                page.wait_for_selector(RESULT_LINK_SELECTOR, timeout=10000)
                links = page.query_selector_all(RESULT_LINK_SELECTOR)
                results = []
                for i, link in enumerate(links[:3]):
                    title_elem = link.query_selector(TITLE_SELECTOR)
                    title = title_elem.inner_text() if title_elem else "(بدون عنوان)"
                    url_elem = link.query_selector(URL_SELECTOR)
                    url = url_elem.get_attribute("data-url") if url_elem else link.get_attribute("href")
                    results.append((title.strip(), url.strip() if url else "#"))
                browser.close()
                return results
            except PlaywrightTimeoutError:
                print(f"    ⚠️ Search timed out for: {query}")
                if retry_user_choice:
                    choice = input("      Retry (r), Skip this term (s), or Abort whole script (a)? ").strip().lower()
                    if choice == 'r':
                        continue
                    elif choice == 's':
                        browser.close()
                        return []
                    elif choice == 'a':
                        browser.close()
                        sys.exit(0)
                    else:
                        browser.close()
                        return []
                else:
                    return []
            except Exception as e:
                print(f"    ❌ Error searching: {e}")
                if retry_user_choice:
                    choice = input("      Retry (r), Skip (s), or Abort (a)? ").strip().lower()
                    if choice == 'r':
                        continue
                    elif choice == 's':
                        browser.close()
                        return []
                    elif choice == 'a':
                        browser.close()
                        sys.exit(0)
                else:
                    return []
 # =============================================================================
 # 3. Search term generation for a given idea
 # =============================================================================
 def get_search_terms_for_idea(idea_title, idea_explanation, timeout=90):
    """Ask LLM to generate 3 Arabic search queries for a given idea."""
    prompt = f"""Based on the following R&D idea:
 Title: {idea_title}
 Explanation: {idea_explanation}
 Generate exactly 3 Arabic search queries (short phrases) that would help find related research, papers, or projects online.
 Return ONLY a JSON array of strings, e.g. ["استعلام 1", "استعلام 2", "استعلام 3"].
 No extra text."""
    reply = get_llm_response(prompt, timeout=timeout)
    if not reply:
        return None
    try:
        clean = reply.strip()
        if clean.startswith("```json"): clean = clean[7:]
        if clean.startswith("```"): clean = clean[3:]
        if clean.endswith("```"): clean = clean[:-3]
        terms = json.loads(clean)
        if isinstance(terms, list) and all(isinstance(t, str) for t in terms):
            return terms[:3]
        else:
            print(f"    ⚠️ Invalid search terms format for idea: {idea_title}")
            return None
    except:
        print(f"    ⚠️ Could not parse search terms for idea: {idea_title}")
        return None
 # =============================================================================
 # 4. Main interactive loop
 # =============================================================================
 def main():
    print("="*70)
    print("INTERACTIVE R&D ASSISTANT with integrated search")
    print("Type 'stop' when asked for another idea, or press Ctrl+C to abort.")
    print("="*70)
    # Create output file with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    out_file = Path(f"rd_ideas_{timestamp}.md")
    idea_counter = 0
    out_file.write_text(f"# R&D Ideas Report\n*Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n", encoding="utf-8")
    while True:
        idea_counter += 1
        print(f"\n📡 Generating idea #{idea_counter}...")
        raw_idea = get_llm_response(SINGLE_IDEA_PROMPT, timeout=120)
        if not raw_idea:
            print("❌ Failed to get idea from LLM. Do you want to try again?")
            retry = input("Try again? (y/n): ").strip().lower()
            if retry == 'y':
                continue
            else:
                break
        idea = parse_single_idea(raw_idea)
        if not idea:
            print("❌ Could not parse idea. Skipping this one.")
            continue
        title = idea.get("title", "بدون عنوان")
        explanation = idea.get("explanation", "")
        novelty = idea.get("novelty", "")
        reference = idea.get("reference", "")
        # Write idea to markdown file
        with out_file.open("a", encoding="utf-8") as f:
            f.write(f"## فكرة {idea_counter}: {title}\n\n")
            f.write(f"**الشرح:** {explanation}\n\n")
            f.write(f"**الجدة:** {novelty}\n\n")
            f.write(f"**مرجع:** {reference}\n\n")
        print(f"\n✅ Idea {idea_counter}: {title}")
        print("   Generating 3 search terms (LLM)...")
        terms = get_search_terms_for_idea(title, explanation, timeout=90)
        if not terms:
            with out_file.open("a", encoding="utf-8") as f:
                f.write("**❌ فشل في توليد مصطلحات البحث**\n\n---\n\n")
            print("   ⚠️ No search terms generated. Moving to next idea decision.\n")
        else:
            with out_file.open("a", encoding="utf-8") as f:
                f.write("### مصطلحات البحث والنتائج\n\n")
            for term_idx, term in enumerate(terms, 1):
                print(f"    Term {term_idx}: {term}")
                with out_file.open("a", encoding="utf-8") as f:
                    f.write(f"#### مصطلح {term_idx}: `{term}`\n\n")
                results = search_mithal(term, retry_user_choice=True)
                if not results:
                    with out_file.open("a", encoding="utf-8") as f:
                        f.write("*لا توجد نتائج*  \n\n")
                else:
                    with out_file.open("a", encoding="utf-8") as f:
                        for rank, (res_title, res_url) in enumerate(results, 1):
                            f.write(f"{rank}. [{res_title}]({res_url})  \n")
                        f.write("\n")
            with out_file.open("a", encoding="utf-8") as f:
                f.write("---\n\n")
        # Ask if user wants another idea
        while True:
            again = input("\n🔁 Do you want another research idea? (y/n): ").strip().lower()
            if again in ('y', 'n'):
                break
            print("Please answer 'y' or 'n'.")
        if again == 'n':
            break
    print(f"\n🎉 Finished! Results saved to: {out_file.resolve()}")
 if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\n\n⚠️ User interrupted. Exiting gracefully.")
        sys.exit(0)
--- a/requirements.txt
+++ b/requirements.txt