-
Notifications
You must be signed in to change notification settings - Fork 5.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add JSON field extraction and enhanced URL validation #6051
base: main
Are you sure you want to change the base?
Conversation
…on by using dictionary unpacking instead of manual key-value pairs
♻️ (url.py): refactor JSON URL validation for better readability and consistency
…LComponent class ♻️ (url.py): refactor ensure_url method to simplify logic and improve readability 🐛 (url.py): fix error handling in URLComponent class for invalid JSON content
…url-improve`) Here is the optimized version of the provided Python code.
⚡️ Codeflash found optimizations for this PR📄 28% (0.28x) speedup for
|
…t handling of JSON data ✨ (url.py): introduce async validation of JSON content from URLs using aiohttp to improve performance and reliability
@@ -1,18 +1,23 @@ | |||
import asyncio | |||
import json | |||
import re |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
import re | |
import re | |
from functools import cache |
Raises: | ||
ValueError: If the string is not a valid URL. | ||
""" | ||
def ensure_url(self, string: str) -> str: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
def ensure_url(self, string: str) -> str: | |
@cache | |
def ensure_url(self, string: str) -> str: |
⚡️ Codeflash found optimizations for this PR📄 38% (0.38x) speedup for
|
Test | Status |
---|---|
⚙️ Existing Unit Tests | 🔘 None Found |
🌀 Generated Regression Tests | ✅ 3059 Passed |
⏪ Replay Tests | 🔘 None Found |
🔎 Concolic Coverage Tests | 🔘 None Found |
📊 Tests Coverage | undefined |
🌀 Generated Regression Tests Details
import re
# imports
import pytest # used for our unit tests
from langflow.components.data.url import URLComponent
from langflow.custom import Component
# unit tests
@pytest.fixture
def url_component():
return URLComponent()
# Basic Test Cases
def test_valid_urls_with_scheme(url_component):
codeflash_output = url_component.ensure_url("http://example.com")
codeflash_output = url_component.ensure_url("https://example.com")
codeflash_output = url_component.ensure_url("http://www.example.com")
codeflash_output = url_component.ensure_url("https://www.example.com")
def test_valid_urls_without_scheme(url_component):
codeflash_output = url_component.ensure_url("example.com")
codeflash_output = url_component.ensure_url("www.example.com")
codeflash_output = url_component.ensure_url("example.com/path")
codeflash_output = url_component.ensure_url("example.com:8080")
# Edge Test Cases
def test_invalid_urls(url_component):
with pytest.raises(ValueError):
url_component.ensure_url("htp://example.com")
with pytest.raises(ValueError):
url_component.ensure_url("http://example")
with pytest.raises(ValueError):
url_component.ensure_url("http://.com")
with pytest.raises(ValueError):
url_component.ensure_url("http://example..com")
with pytest.raises(ValueError):
url_component.ensure_url("http://example.com:abc")
def test_edge_cases(url_component):
with pytest.raises(ValueError):
url_component.ensure_url("")
with pytest.raises(ValueError):
url_component.ensure_url(" ")
with pytest.raises(ValueError):
url_component.ensure_url("http://")
with pytest.raises(ValueError):
url_component.ensure_url("http://example.com/ ")
with pytest.raises(ValueError):
url_component.ensure_url("http://example.com/?")
def test_urls_with_special_characters(url_component):
codeflash_output = url_component.ensure_url("http://example.com/path?query=param&other=param2")
codeflash_output = url_component.ensure_url("http://example.com/path#fragment")
codeflash_output = url_component.ensure_url("http://example.com/path with spaces")
codeflash_output = url_component.ensure_url("http://example.com/path%20with%20encoded%20spaces")
def test_urls_with_ports(url_component):
codeflash_output = url_component.ensure_url("http://example.com:80")
codeflash_output = url_component.ensure_url("https://example.com:443")
codeflash_output = url_component.ensure_url("http://example.com:8080/path")
def test_urls_with_subdomains(url_component):
codeflash_output = url_component.ensure_url("http://sub.example.com")
codeflash_output = url_component.ensure_url("https://subdomain.example.com")
codeflash_output = url_component.ensure_url("http://sub.sub.example.com")
def test_urls_with_complex_paths(url_component):
codeflash_output = url_component.ensure_url("http://example.com/path/to/resource")
codeflash_output = url_component.ensure_url("http://example.com/path/to/resource/")
codeflash_output = url_component.ensure_url("http://example.com/path/to/resource?query=param")
codeflash_output = url_component.ensure_url("http://example.com/path/to/resource#fragment")
def test_large_scale_urls(url_component):
long_path = "http://example.com/" + "a" * 1000
codeflash_output = url_component.ensure_url(long_path)
long_query = "http://example.com/path?" + "param=" + "a" * 1000
codeflash_output = url_component.ensure_url(long_query)
long_fragment = "http://example.com/path#" + "a" * 1000
codeflash_output = url_component.ensure_url(long_fragment)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
import re
# imports
import pytest # used for our unit tests
from langflow.components.data.url import URLComponent
from langflow.custom import Component
# unit tests
# Initialize the URLComponent
url_component = URLComponent()
def test_valid_urls_with_scheme():
# Test valid URLs with "http://" or "https://" schemes
codeflash_output = url_component.ensure_url("http://example.com")
codeflash_output = url_component.ensure_url("https://example.com")
codeflash_output = url_component.ensure_url("http://www.example.com")
codeflash_output = url_component.ensure_url("https://www.example.com")
def test_valid_urls_without_scheme():
# Test valid URLs without a scheme
codeflash_output = url_component.ensure_url("example.com")
codeflash_output = url_component.ensure_url("www.example.com")
codeflash_output = url_component.ensure_url("example.com/path")
codeflash_output = url_component.ensure_url("example.com:8080")
def test_invalid_urls():
# Test invalid URLs that should raise a ValueError
with pytest.raises(ValueError):
url_component.ensure_url("htp://example.com")
with pytest.raises(ValueError):
url_component.ensure_url("http://")
with pytest.raises(ValueError):
url_component.ensure_url("example")
with pytest.raises(ValueError):
url_component.ensure_url("http://example..com")
with pytest.raises(ValueError):
url_component.ensure_url("http://example.com:abc")
def test_edge_cases():
# Test edge cases that might not be immediately obvious
with pytest.raises(ValueError):
url_component.ensure_url("")
with pytest.raises(ValueError):
url_component.ensure_url(" ")
with pytest.raises(ValueError):
url_component.ensure_url("http://example.com/ ")
with pytest.raises(ValueError):
url_component.ensure_url("http://example.com:65536")
def test_urls_with_complex_paths():
# Test URLs with complex paths
codeflash_output = url_component.ensure_url("http://example.com/path/to/resource")
codeflash_output = url_component.ensure_url("http://example.com/path/to/resource?query=param")
codeflash_output = url_component.ensure_url("http://example.com/path/to/resource#fragment")
def test_urls_with_ports():
# Test URLs with ports
codeflash_output = url_component.ensure_url("http://example.com:80")
codeflash_output = url_component.ensure_url("http://example.com:8080")
codeflash_output = url_component.ensure_url("https://example.com:443")
def test_urls_with_subdomains():
# Test URLs with subdomains
codeflash_output = url_component.ensure_url("http://subdomain.example.com")
codeflash_output = url_component.ensure_url("https://subdomain.example.com")
codeflash_output = url_component.ensure_url("http://sub.subdomain.example.com")
def test_urls_with_idn():
# Test URLs with internationalized domain names
codeflash_output = url_component.ensure_url("http://xn--fsq.com")
codeflash_output = url_component.ensure_url("https://xn--fsq.com")
def test_urls_with_special_characters_in_path():
# Test URLs with special characters in the path
codeflash_output = url_component.ensure_url("http://example.com/path/to/res%20ource")
codeflash_output = url_component.ensure_url("http://example.com/path/to/res%2Fource")
def test_large_scale_valid_urls():
# Test large scale valid URLs
valid_urls = ["http://example{}.com".format(i) for i in range(1000)]
for url in valid_urls:
codeflash_output = url_component.ensure_url(url)
def test_large_scale_invalid_urls():
# Test large scale invalid URLs
invalid_urls = ["htp://example{}.com".format(i) for i in range(1000)]
for url in invalid_urls:
with pytest.raises(ValueError):
url_component.ensure_url(url)
def test_large_scale_mixed_urls():
# Test large scale mixed valid and invalid URLs
mixed_urls = ["http://example{}.com".format(i) if i % 2 == 0 else "htp://example{}.com".format(i) for i in range(1000)]
for i, url in enumerate(mixed_urls):
if i % 2 == 0:
codeflash_output = url_component.ensure_url(url)
else:
with pytest.raises(ValueError):
url_component.ensure_url(url)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
…cz/url-improve`) ### Explanation of Optimizations.
⚡️ Codeflash found optimizations for this PR📄 137% (1.37x) speedup for
|
This pull request introduces enhancements to the
URLComponent
class in thesrc/backend/base/langflow/components/data/url.py
file, primarily focusing on adding support for JSON format extraction from HTML content. The most important changes include importing thejson
module, updating the output format options, and implementing additional logic to handle JSON content.Enhancements to the
URLComponent
class:src/backend/base/langflow/components/data/url.py
: Imported thejson
module to handle JSON content.class URLComponent(Component)
: Updated theDropdownInput
options to include "JSON" and modified theinfo
attribute to reflect the new option.def ensure_url(self, string: str) -> str
: Added validation to ensure URLs ending with ".json" when the format is set to "JSON".def fetch_content(self) -> list[Data]
: Implemented logic to parse and validate JSON content, and to structure the data accordingly.