diff --git a/docs/assets/search.js b/docs/assets/search.js index ae203c1..20acdf9 100644 --- a/docs/assets/search.js +++ b/docs/assets/search.js @@ -1 +1 @@ -window.searchData = "data:application/octet-stream;base64,"; \ No newline at end of file +window.searchData = "data:application/octet-stream;base64,"; \ No newline at end of file diff --git a/docs/classes/torch.html b/docs/classes/torch.html index 247a52f..53edaa0 100644 --- a/docs/classes/torch.html +++ b/docs/classes/torch.html @@ -57,7 +57,7 @@

PyTorch Contributors, Leao, E. et al (2022), See also: Brain.js

-

Properties

Properties

Tensor Parameter add neg @@ -89,9 +89,8 @@

Properties

Tensor: typeof Tensor

Add methods from tensor.js (these methods are accessed with "torch."):

-
Parameter: typeof Parameter
add: ((a: any, b: any) => any)
neg: ((a: any) => any)
mul: ((a: any, b: any) => any)
div: ((a: any, b: any) => any)
matmul: ((a: any, b: any) => any)
exp: ((a: any) => any)
log: ((a: any) => any)
sqrt: ((a: any) => any)
pow: ((a: any, n: any) => any)
mean: ((a: any, dim?: number, keepdims?: boolean) => any)
masked_fill: ((a: any, mask: any, condition: any, value: any) => any)
variance: ((a: any, dim?: number, keepdims?: boolean) => any)
at: ((a: any, idx1: any, idx2: any) => any)
reshape: ((a: any, shape: any) => any)
_reshape: ((a: any, shape: any) => any)
transpose: ((a: any, dim1: any, dim2: any) => any)
tensor: ((data: any, requires_grad?: boolean, device?: string) => Tensor)
randint: ((low?: number, high?: number, shape?: number[], requires_grad?: boolean) => Tensor)
randn: ((shape: any, requires_grad?: boolean, device?: string, xavier?: boolean) => Tensor)
rand: ((shape: any, requires_grad?: boolean, device?: string) => Tensor)
tril: ((shape: any, requires_grad?: boolean, device?: string) => Tensor)
ones: ((shape: any, requires_grad?: boolean, device?: string) => Tensor)
zeros: ((shape: any, requires_grad?: boolean, device?: string) => Tensor)
broadcast: ((a: any, b: any) => Tensor)
save: ((model: any, file: any) => string)
load: ((model: any, loadedData: any) => any)
nn: {
    Module: typeof Module;
    Linear: typeof Linear;
    MultiHeadSelfAttention: typeof MultiHeadSelfAttention;
    FullyConnected: typeof FullyConnected;
    Block: typeof Block;
    Embedding: typeof Embedding;
    PositionalEmbedding: typeof PositionalEmbedding;
    ReLU: typeof ReLU;
    Softmax: typeof Softmax;
    Dropout: typeof Dropout;
    LayerNorm: typeof LayerNorm;
    CrossEntropyLoss: typeof CrossEntropyLoss;
}

Add submodules:

-
optim: {
    Adam: typeof Adam;
}
getShape: ((data: any, shape?: any[]) => any[])

Type declaration

    • (data, shape?): any[]
    • import { GPU } from "@eduardoleao052/gpu"

      -

      Parameters

      • data: any
      • shape: any[] = []

      Returns any[]

diff --git a/docs/functions/scrapeURL.html b/docs/functions/scrapeURL.html index cc68066..bfb4809 100644 --- a/docs/functions/scrapeURL.html +++ b/docs/functions/scrapeURL.html @@ -17,7 +17,7 @@ query string like this: http://localhost:3000/?url=https://example.org

Parameters

  • url: string

    any domain's URL

    -
  • Optionaloptions: {
        timeout: number;
        maxRedirects: number;
        checkBotDetection: number;
        changeReferer: number;
        userAgentIndex: number;
        useCORSProxy: number;
        urlProxy: string;
    } = {}
      +
  • Optionaloptions: {
        timeout: number;
        maxRedirects: number;
        checkBotDetection: number;
        changeReferer: number;
        userAgentIndex: number;
        useCORSProxy: number;
        proxy: string;
    } = {}
    • timeout: number

      default=5 - abort request if not retrived, in seconds

      @@ -26,7 +26,7 @@
    • changeReferer: number

      default=true - set referer as google

    • userAgentIndex: number

      default=0 - index of [google bot, default chrome]

    • useCORSProxy: number

      default=false - use 60%-working corsproxy.io (in frontend JS)

      -
    • urlProxy: string

      default=false - use proxy url

      +
    • proxy: string

      default=false - use proxy url

Returns Promise<any>

  • HTML, JSON, arraybuffer, or error object
diff --git a/docs/functions/searchWeb.html b/docs/functions/searchWeb.html index 19601a7..9d63db0 100644 --- a/docs/functions/searchWeb.html +++ b/docs/functions/searchWeb.html @@ -4,15 +4,15 @@ SearXNG is a free internet metasearch engine which aggregates results from more than 70 search services.

Parameters

  • query: string

    The search query string.

    -
  • Optionaloptions: {
        category: number;
        recency: number;
        customSearxngDomain: string | boolean;
        maxRetries: number;
        page: number;
    } = {}
    • category: number

      default=0 - ["general", "news", "videos", "images", +

    • Optionaloptions: {
          category: number;
          recency: number;
          customSearxngDomain: string | boolean;
          maxRetries: number;
          page: number;
          proxy: string;
      } = {}
      • category: number

        default=0 - ["general", "news", "videos", "images", "science", "map", "music", "it", "files", "social+media"]

      • recency: number

        default=0 - ["", "day", "week", "month", "year"]

      • customSearxngDomain: string | boolean

        default=null - Use your custom domain SearXNG

      • maxRetries: number

        default=3 - Maximum number of retry attempts if the initial search fails.

      • page: number

        default=1 - The page number to retrieve.

        +
      • proxy: string

        default=false - Use corsproxy.io to access in frontend JS

Returns Promise<{
    title: string;
    url: string;
    snippet: string;
    engines: string[];
}[]>

An array of search result objects.

-

Throws an error if the search fails after all retry attempts.

-
const advancedResults = await searchWeb('Node.js', {
category: 2,
recency: 1,
maxRetries: 5
}); +
const advancedResults = await searchWeb('Node.js', {
category: 2,
recency: 1,
maxRetries: 5
});

Gulakov, A. (2024) diff --git a/docs/index.html b/docs/index.html index 70d6308..408f664 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,6 +42,8 @@

Javascript Docs (airesearch.js.org) Live Demo (qwksearch.com)

+

Critical times call for critical thinkers to create a crowdsourced argument reasoning dataset, for AI models to recommend research quotes, evolve crowdsourced chain-of-thought reasoning, unlock faster ways to read long articles, monitor developments by topic modeling a knowledge base graph, and provide a public service of answers to research.

+

Language Models can distill the essence of collective thought into a vector space where every point has a weighted value representing its contribution to the overall decision-making process, leading to direct demoratic AI economy where public votes reward influence. AI will show its reasoning based on what sentences and cites it used from the collective research, so that people can see it is aligned with our interests. Research Agents recommend articles for human researchers working alongside AI to develop a summarized topic outline as a public service. The agents monitor for any related articles via web searches for keywords associated with that Topic Model. For example, imagine uploading several academic PDFs, then the app finds the citations full text and creates topic model and keyword summaries, then monitors that literature base and stores highlights. People will make personal knowledge bases of what influences them online in news and research to create AI assistants cloning their mind-uploaded perspective. Similar apps are Anthropic, Obsidian, SciSpace and Perplexity, which shows that users need this new way of self-organizating information.

@@ -165,7 +167,7 @@

Javascript Docs (airesea
  • Can LLMs Generate Novel Research Ideas?
  • PRs Welcome

    -

    diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 6e1a259..8a3dede 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -2,166 +2,166 @@ https://airesearch.js.org/modules.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/index.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/extractCite.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/extractFavicon.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/extractContentHTML.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/extractContentHTML2.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/convertHTMLToBasicHTML.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/convertMathLaTexToImage.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/convertHTMLSpecialChars.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/convertMarkdownToHtml.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/copyHtmlToClipboard.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/convertPDFToHTML.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/scrapeURL.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/extract.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/interfaces/Article.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/embedYoutubePlayer.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/convertYoutubeToText.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/generateLanguageModelReply.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/weighSimilarityByCharacter.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/matchQUASAR.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/weighRelevanceTermFrequency.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/searchSTREAM.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/searchWeb.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/searchWikipedia.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/convertEmbeddingsToUMAP.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/weighRelevanceConceptVectorAPI.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/convertTextToEmbedding.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/getEmbeddingModel.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/addEmbeddingVectorsToIndex.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/searchVectorIndex.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/getAllEmbeddings.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/exportEmbeddingsIndex.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/importVectorIndexFromString.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/splitSentences.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/suggestNextWordCompletions.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/splitTextSemanticChars.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/convertTextToTokens.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/stemWordToRoot.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/extractSEEKTOPIC.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/functions/extractTopicTermGroupsLDA.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z https://airesearch.js.org/classes/torch.html - 2024-09-21T21:41:04.299Z + 2024-09-24T20:00:23.768Z diff --git a/package.json b/package.json index c289a03..f621696 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "ai-research-agent", - "version": "0.9.7", + "version": "0.9.8", "module": "index.js", "type": "module", "author": "vtempest", diff --git a/readme.md b/readme.md index 8fc592e..533b2ae 100644 --- a/readme.md +++ b/readme.md @@ -44,6 +44,14 @@

    Javascript Docs (airesearch.js.org) Live Demo (qwksearch.com)

    +### 🧠💻 Reimagine the Internet as 3D Mind Map + +Critical times call for critical thinkers to create a crowdsourced argument reasoning dataset, for AI models to recommend research quotes, evolve crowdsourced chain-of-thought reasoning, unlock faster ways to read long articles, monitor developments by topic modeling a knowledge base graph, and provide a public service of answers to research. + +Language Models can distill the essence of collective thought into a vector space where every point has a weighted value representing its contribution to the overall decision-making process, leading to direct demoratic AI economy where public votes reward influence. AI will show its reasoning based on what sentences and cites it used from the collective research, so that people can see it is aligned with our interests. Research Agents recommend articles for human researchers working alongside AI to develop a summarized topic outline as a public service. The agents monitor for any related articles via web searches for keywords associated with that Topic Model. For example, imagine uploading several academic PDFs, then the app finds the citations full text and creates topic model and keyword summaries, then monitors that literature base and stores highlights. People will make personal knowledge bases of what influences them online in news and research to create AI assistants cloning their mind-uploaded perspective. Similar apps are Anthropic Claude, Obsidian, SciSpace and Perplexity, which shows that users need this new way of self-organizating information. + + + #### 🤖🔎 STREAM: Search with Top Result Extraction & Answer Model

    diff --git a/src/crawler/compose.yml b/src/crawler/compose.yml index d036131..122e287 100644 --- a/src/crawler/compose.yml +++ b/src/crawler/compose.yml @@ -17,6 +17,32 @@ services: - 3000:3000 # environment: # - DEBUG=true + + networks: + - caddy + labels: + caddy: proxy.qwksearch.com + caddy.reverse_proxy: "{{upstreams 3000}}" - \ No newline at end of file + + # Caddy - proxy server router for containers + caddy: + image: lucaslorentz/caddy-docker-proxy:ci-alpine + container_name: caddy + ports: + - 80:80 + - 443:443 + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - /var/lib/caddy/data:/data + restart: unless-stopped + environment: + - CADDY_INGRESS_NETWORKS=caddy + networks: + - caddy + + +networks: + caddy: + external: true \ No newline at end of file diff --git a/src/extractor/html-to-content/html-to-basic-html.js b/src/extractor/html-to-content/html-to-basic-html.js index a2d4481..07318a7 100644 --- a/src/extractor/html-to-content/html-to-basic-html.js +++ b/src/extractor/html-to-content/html-to-basic-html.js @@ -173,12 +173,20 @@ export function convertHTMLToTokens(html) { //insert attr into domElement element .substring(attributesIndex) - .match(/\w+=("(?:[^"\\]|\\.\s)*")/g) + .match(/ \w+=("(?:[^"\\]|\\.\s)*")/g) ?.forEach((attr) => { + attr = attr.trim(); + + var key = attr.split("=")[0]; var value = attr.slice(key.length + 2, -1); + if (key == "srcset"){ + key = "src"; + value = value.split(',')[0].trim().split(' ')[0]; + } - if (key && value) domElement[key] = value?.replace(/"/g, ""); + if (key && value) + domElement[key] = value?.replace(/"/g, ""); }); } diff --git a/src/extractor/url-to-content/scrape-url.js b/src/extractor/url-to-content/scrape-url.js index 424d5de..5d1f70d 100644 --- a/src/extractor/url-to-content/scrape-url.js +++ b/src/extractor/url-to-content/scrape-url.js @@ -30,7 +30,7 @@ * @param {number} options.changeReferer default=true - set referer as google * @param {number} options.userAgentIndex default=0 - index of [google bot, default chrome] * @param {number} options.useCORSProxy default=false - use 60%-working corsproxy.io (in frontend JS) - * @param {string} options.urlProxy default=false - use proxy url + * @param {string} options.proxy default=false - use proxy url * @returns {Promise} - HTML, JSON, arraybuffer, or error object * @category Extract * @example await scrapeURL("https://hckrnews.com", {timeout: 5, userAgentIndex: 1}) @@ -45,14 +45,15 @@ export async function scrapeURL(url, options = {}) { changeReferer = 0, userAgentIndex = 0, useCORSProxy = false, - urlProxy = false, + proxy = null, useProxyAsBackup = true, } = options; - if(urlProxy) - url = urlProxy + encodeURIComponent(url); + if(proxy) + url = proxy + url; + console.log(url); var userAgentStrings = ['Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible ; Googlebot/2.1 ; +http://www.google.com/bot.html)', @@ -64,16 +65,6 @@ export async function scrapeURL(url, options = {}) { signal: AbortSignal.timeout(timeout * 1000), "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept-language": "en-US,en;q=0.9", - "cache-control": "max-age=0", - "priority": "u=0, i", - "sec-ch-ua": "\"Chromium\";v=\"128\", \"Not;A=Brand\";v=\"24\", \"Google Chrome\";v=\"128\"", - "sec-ch-ua-mobile": "?0", - "sec-ch-ua-platform": "\"Windows\"", - "sec-fetch-dest": "document", - "sec-fetch-mode": "navigate", - "sec-fetch-site": "same-origin", - "sec-fetch-user": "?1", - "upgrade-insecure-requests": "1" }; if (changeReferer) @@ -118,7 +109,6 @@ export async function scrapeURL(url, options = {}) { * Check html for bot block messages * @param {string} html * @returns {Boolean} true if bot detection message found - */ function isHTMLBotDetection(html) { var commonBlocks = [ diff --git a/src/search/search-web.js b/src/search/search-web.js index f8b7c80..f824327 100644 --- a/src/search/search-web.js +++ b/src/search/search-web.js @@ -14,8 +14,8 @@ import { scrapeURL } from "../../index.js"; * @param {string|boolean} options.customSearxngDomain default=null - Use your custom domain SearXNG * @param {number} options.maxRetries default=3 - Maximum number of retry attempts if the initial search fails. * @param {number} options.page default=1 - The page number to retrieve. + * @param {string} options.proxy default=false - Use corsproxy.io to access in frontend JS * @returns {Promise>} An array of search result objects. - * @throws {Error} Throws an error if the search fails after all retry attempts. * @example const advancedResults = await searchWeb('Node.js', { * category: 2, * recency: 1, @@ -29,10 +29,11 @@ export async function searchWeb(query, options = {}) { const { category = 0, recency = 0, - customSearxngDomain = 0, + customSearxngDomain = null, maxRetries = 3, page = 1, language = "en-US", + proxy = null } = options; const CATEGORY_LIST = [ @@ -116,24 +117,28 @@ export async function searchWeb(query, options = {}) { ]; //select a random domain if none is provided - - const searchDomain = - customSearxngDomain || - "https://" + + const searchDomain = customSearxngDomain || "https://" + SEARX_DOMAINS[Math.floor(Math.random() * SEARX_DOMAINS.length)]; const categoryName = CATEGORY_LIST[category]; // Using the first category as default const timeRangeName = RECENCY_LIST[recency]; // Using the first time range as default - const url = `${searchDomain}/search?q=${encodeURIComponent(query)}` + + var url = `${searchDomain}/search?q=${encodeURIComponent(query)}` + `&category_${categoryName}=1&language=${language}&time_range=${timeRangeName}` + `&safesearch=0&pageno=${page}`; - const resultHTML = await scrapeURL(url, { + //on cloudflare to avoid "Too many redirects" change SSL mode to Full + if (proxy) + url = proxy + url; + + const resultHTML = await (await fetch(url, { headers: { "accept-language": language+",en;q=0.9", - }, - }) + } + })).text(); + + console.log(resultHTML); + let results = []; const resultRegex = /

    ]*>(.*?)<\/a><\/h3>/; @@ -163,9 +168,9 @@ export async function searchWeb(query, options = {}) { let cached = null; let linkMatch; - while ((linkMatch = linksRegex.exec(resultHtml)) !== null) { - cached = linkMatch[1]; - } + // while ((linkMatch = linksRegex.exec(resultHtml)) !== null) { + // cached = linkMatch[1]; + // } title = convertHTMLSpecialChars(title); snippet = convertHTMLSpecialChars(snippet); @@ -174,12 +179,12 @@ export async function searchWeb(query, options = {}) { } if (results.length === 0 && maxRetries > 0) { - console.log("No results found with ", searchDomain); - results = await searchWeb(query, { - category, - recency, - maxRetries: maxRetries - 1, - }); + console.log(url); + results = await searchWeb(query, + {...options, + maxRetries: maxRetries - 1, + useProxy: true + }); } //filter out url that end with .de diff --git a/web-app/package.json b/web-app/package.json index 5647ee2..dde3fd7 100644 --- a/web-app/package.json +++ b/web-app/package.json @@ -36,6 +36,8 @@ "lucide-svelte": "^0.428.0", "postcss": "^8.4.41", "svelte-splitpanes": "^8.0.5", + "swagger-ui-dist": "^5.17.14", + "swagger-ui-themes": "^3.0.1", "tailwind": "^4.0.0", "tailwindcss": "^3.4.10", "three": "^0.168.0" diff --git a/web-app/src/lib/config/config.js b/web-app/src/lib/config/config.js index 9ab42a4..fd8a1d8 100644 --- a/web-app/src/lib/config/config.js +++ b/web-app/src/lib/config/config.js @@ -25,7 +25,7 @@ export const enumLLMs = [ export const NEXT_PUBLIC_WS_URL = "/api/websocket"; export const NEXT_PUBLIC_API_URL = "/api"; -export const PROXY = PUBLIC_PROXY; +export const proxy = PUBLIC_PROXY; export const APP_NAME = "QwkSearch"; export const APP_EMAIL = "support@qwksearch.com"; export const searxngDomain = "http://ec2-184-169-153-151.us-west-1.compute.amazonaws.com/searxng" diff --git a/web-app/src/routes/api/docs/+page.svelte b/web-app/src/routes/api/docs/+page.svelte new file mode 100644 index 0000000..b98cf8e --- /dev/null +++ b/web-app/src/routes/api/docs/+page.svelte @@ -0,0 +1,39 @@ + + +
    + \ No newline at end of file diff --git a/web-app/src/routes/api/extract/+server.js b/web-app/src/routes/api/extract/+server.js index a1c20b6..aaab115 100644 --- a/web-app/src/routes/api/extract/+server.js +++ b/web-app/src/routes/api/extract/+server.js @@ -10,7 +10,7 @@ export async function GET({ url }) { if (!urlToExtract) return json({error: "URL parameter is required" }, { status: 500 }); - if (optionReturnFullHtml) { + if (optionReturnFullHtml == "true") { let html = await scrapeURL(urlToExtract); console.log(html); var domain = urlToExtract.split("://")[1].split("/")[0]; diff --git a/web-app/src/routes/api/model/+server.js b/web-app/src/routes/api/model/+server.js index 6592b0e..2f8cc0f 100644 --- a/web-app/src/routes/api/model/+server.js +++ b/web-app/src/routes/api/model/+server.js @@ -5,34 +5,12 @@ import topicModel from '../../../../../data/wiki-phrases-model-240k.json' export async function GET({ url, fetch }) { - - // const githubUrl = 'https://raw.githubusercontent.com/vtempest/ai-research-agent/master/data/wiki-phrases-model-240k.json' - // if (!githubUrl) { - // return json({ error: 'No GitHub URL provided' }, { status: 400 }); - // } - - try { - // // Fetch the raw content from GitHub - // const response = await fetch(githubUrl); - - // if (!response.ok) { - // throw new Error(`GitHub API responded with status ${response.status}`); - // } - - // const data = await response.json(); - - return json(topicModel, { - headers: { - 'Content-Type': 'application/json', - 'Cache-Control': 'public, max-age=360000' - } - } - ); - - // Return the JSON data - // return json(data); - } catch (error) { - console.error('Error fetching GitHub JSON:', error); - return json({ error: 'Failed to fetch GitHub JSON' }, { status: 500 }); - } + return json(topicModel, { + headers: { + 'Content-Type': 'application/json', + 'Cache-Control': 'public, max-age=360000' + } + } + ); + } \ No newline at end of file diff --git a/web-app/src/routes/api/search/+server.js b/web-app/src/routes/api/search/+server.js index 13579ad..0fcaacb 100644 --- a/web-app/src/routes/api/search/+server.js +++ b/web-app/src/routes/api/search/+server.js @@ -1,49 +1,35 @@ -import { searchSTREAM, searchWeb } from "../../../../../"; -import { json } from '@sveltejs/kit'; -import { searxngDomain } from "$lib/config/config.js"; +import { searchSTREAM, searchWeb } from "$airesearchagent"; +import { json } from "@sveltejs/kit"; +import { searxngDomain, proxy } from "$lib/config/config.js"; -export async function GET({ url }) { - const query = url.searchParams.get('q'); - const category = parseInt(url.searchParams.get('cat') || '0'); - const recency = parseInt(url.searchParams.get('time') || '0'); - const maxTopResultsToExtract = parseInt(url.searchParams.get('limitExtract') || '4'); - let startTime = Date.now(); - if (!query) - return json({ error: 'Query parameter is required' }) - +export async function GET({ url }) { + const query = url.searchParams.get("q"); + const category = parseInt(url.searchParams.get("cat") || "0"); + const recency = parseInt(url.searchParams.get("time") || "0"); + const maxTopResultsToExtract = parseInt( + url.searchParams.get("limitExtract") || "4" + ); + const page = parseInt(url.searchParams.get("page") || "1"); + let startTime = Date.now(); + if (!query) return json({ error: "Query parameter is required" }); + + // customSearxngDomain: searxngDomain, - let results = await searchWeb(query, { category, recency, maxRetries: 6, - customSearxngDomain: searxngDomain, + customSearxngDomain: false, + proxy, + page }); - - - // const results = await searchSTREAM(query, { - // categoryIndex, - // recencyIndex, - // maxTopResultsToExtract, - // selectedDomain - // }); - - - if (!results ) { - return json(results, {status: 500}); - } + if (!results) + return json(results, { status: 500 }); + - let elapsedTime = Date.now() - startTime; - let response = {results, elapsedTime} - return new Response(JSON.stringify(response), { - headers: { 'Content-Type': 'application/json' } - }); - // } catch (error) { - // return new Response(JSON.stringify({ error: 'An error occurred while fetching search results' }), { - // status: 500, - // headers: { 'Content-Type': 'application/json' } - // }); - // } -} \ No newline at end of file + let elapsedTime = Date.now() - startTime; + let response = { results, elapsedTime }; + return json(response); +} diff --git a/web-app/static/openapi.yaml b/web-app/static/openapi.yaml new file mode 100644 index 0000000..9898396 --- /dev/null +++ b/web-app/static/openapi.yaml @@ -0,0 +1,135 @@ +openapi: 3.0.3 +info: + title: QwkSearch API + description: Search and outline a topic base with AI Research Agent + version: 0.9.8 +paths: + /api/search: + get: + summary: Search the web + description: Retrieves web search results based on the query. + parameters: + - name: q + in: query + description: Search query string + required: true + schema: + type: string + - name: cat + in: query + description: Category of search (optional) + required: false + schema: + type: integer + default: 0 + - name: time + in: query + description: Recency filter (optional) + required: false + schema: + type: integer + default: 0 + - name: limitExtract + in: query + description: Max number of top results to extract (optional) + required: false + schema: + type: integer + default: 4 + - name: page + in: query + description: Pagination for results (optional) + required: false + schema: + type: integer + default: 1 + responses: + '200': + description: A list of search results + content: + application/json: + schema: + type: object + properties: + results: + type: array + items: + type: object + properties: + title: + type: string + description: Title of the search result + example: "QwkSearch - Search and outline a topic base with AI Research Agent" + link: + type: string + description: URL of the search result + example: "https://qwksearch.com" + snippet: + type: string + description: Snippet of the search result + example: "QwkSearch is a search engine that allows you to..." + elapsedTime: + type: integer + description: Time taken for the search (in ms) + '400': + description: Missing required query parameter `q` + content: + application/json: + schema: + type: object + properties: + error: + type: string + '500': + description: Server error when fetching search results + content: + application/json: + schema: + type: object + properties: + error: + type: string + /api/extract: + get: + summary: Extract content from a URL + description: Extracts content (either full HTML or summary) from a given URL using `extract` or `scrapeURL`. + parameters: + - name: url + in: query + description: URL to extract content from + required: true + schema: + type: string + - name: full + in: query + description: Return full HTML content if set to true (optional) + required: false + schema: + type: boolean + default: false + responses: + '200': + description: Extracted content or HTML + content: + application/json: + schema: + type: object + properties: + html: + type: string + description: Full HTML of the page (if `full` is true) + title: + type: string + description: Title of the page (if `full` is not provided) + content: + type: string + description: Extracted content summary of the page + '500': + description: Server error or missing URL parameter + content: + application/json: + schema: + type: object + properties: + error: + type: string \ No newline at end of file