mirror of
https://gitee.com/infiniflow/ragflow.git
synced 2025-12-06 07:19:03 +08:00
feat: improve Moodle connector functionality (#11665)
### What problem does this PR solve? Add metadata from moodle data source. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@@ -17,7 +17,11 @@ from common.data_source.exceptions import (
|
||||
InsufficientPermissionsError,
|
||||
ConnectorValidationError,
|
||||
)
|
||||
from common.data_source.interfaces import LoadConnector, PollConnector, SecondsSinceUnixEpoch
|
||||
from common.data_source.interfaces import (
|
||||
LoadConnector,
|
||||
PollConnector,
|
||||
SecondsSinceUnixEpoch,
|
||||
)
|
||||
from common.data_source.models import Document
|
||||
from common.data_source.utils import batch_generator, rl_requests
|
||||
|
||||
@@ -42,7 +46,9 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
delimiter = "&" if "?" in file_url else "?"
|
||||
return f"{file_url}{delimiter}token={token}"
|
||||
|
||||
def _log_error(self, context: str, error: Exception, level: str = "warning") -> None:
|
||||
def _log_error(
|
||||
self, context: str, error: Exception, level: str = "warning"
|
||||
) -> None:
|
||||
"""Simplified logging wrapper"""
|
||||
msg = f"{context}: {error}"
|
||||
if level == "error":
|
||||
@@ -73,7 +79,9 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
except MoodleException as e:
|
||||
if "invalidtoken" in str(e).lower():
|
||||
raise CredentialExpiredError("Moodle token is invalid or expired")
|
||||
raise ConnectorMissingCredentialError(f"Failed to initialize Moodle client: {e}")
|
||||
raise ConnectorMissingCredentialError(
|
||||
f"Failed to initialize Moodle client: {e}"
|
||||
)
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
if not self.moodle_client:
|
||||
@@ -125,7 +133,9 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
logger.warning("No courses found to poll")
|
||||
return
|
||||
|
||||
yield from self._yield_in_batches(self._get_updated_content(courses, start, end))
|
||||
yield from self._yield_in_batches(
|
||||
self._get_updated_content(courses, start, end)
|
||||
)
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2)
|
||||
def _get_enrolled_courses(self) -> list:
|
||||
@@ -187,9 +197,7 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
except Exception as e:
|
||||
self._log_error(f"polling course {course.fullname}", e)
|
||||
|
||||
def _process_module(
|
||||
self, course, section, module
|
||||
) -> Optional[Document]:
|
||||
def _process_module(self, course, section, module) -> Optional[Document]:
|
||||
try:
|
||||
mtype = module.modname
|
||||
if mtype in ["label", "url"]:
|
||||
@@ -224,11 +232,37 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
|
||||
try:
|
||||
resp = rl_requests.get(self._add_token_to_url(file_info.fileurl), timeout=60)
|
||||
resp = rl_requests.get(
|
||||
self._add_token_to_url(file_info.fileurl), timeout=60
|
||||
)
|
||||
resp.raise_for_status()
|
||||
blob = resp.content
|
||||
ext = os.path.splitext(file_name)[1] or ".bin"
|
||||
semantic_id = f"{course.fullname} / {section.name} / {file_name}"
|
||||
|
||||
# Create metadata dictionary with relevant information
|
||||
metadata = {
|
||||
"moodle_url": self.moodle_url,
|
||||
"course_id": getattr(course, "id", None),
|
||||
"course_name": getattr(course, "fullname", None),
|
||||
"course_shortname": getattr(course, "shortname", None),
|
||||
"section_id": getattr(section, "id", None),
|
||||
"section_name": getattr(section, "name", None),
|
||||
"section_number": getattr(section, "section", None),
|
||||
"module_id": getattr(module, "id", None),
|
||||
"module_name": getattr(module, "name", None),
|
||||
"module_type": getattr(module, "modname", None),
|
||||
"module_instance": getattr(module, "instance", None),
|
||||
"file_url": getattr(file_info, "fileurl", None),
|
||||
"file_name": file_name,
|
||||
"file_size": getattr(file_info, "filesize", len(blob)),
|
||||
"file_type": getattr(file_info, "mimetype", None),
|
||||
"time_created": getattr(module, "timecreated", None),
|
||||
"time_modified": getattr(module, "timemodified", None),
|
||||
"visible": getattr(module, "visible", None),
|
||||
"groupmode": getattr(module, "groupmode", None),
|
||||
}
|
||||
|
||||
return Document(
|
||||
id=f"moodle_resource_{module.id}",
|
||||
source="moodle",
|
||||
@@ -237,6 +271,7 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
blob=blob,
|
||||
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
||||
size_bytes=len(blob),
|
||||
metadata=metadata,
|
||||
)
|
||||
except Exception as e:
|
||||
self._log_error(f"downloading resource {file_name}", e, "error")
|
||||
@@ -247,7 +282,9 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
return None
|
||||
|
||||
try:
|
||||
result = self.moodle_client.mod.forum.get_forum_discussions(forumid=module.instance)
|
||||
result = self.moodle_client.mod.forum.get_forum_discussions(
|
||||
forumid=module.instance
|
||||
)
|
||||
disc_list = getattr(result, "discussions", [])
|
||||
if not disc_list:
|
||||
return None
|
||||
@@ -264,6 +301,38 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
|
||||
blob = "\n".join(markdown).encode("utf-8")
|
||||
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
||||
|
||||
# Create metadata dictionary with relevant information
|
||||
metadata = {
|
||||
"moodle_url": self.moodle_url,
|
||||
"course_id": getattr(course, "id", None),
|
||||
"course_name": getattr(course, "fullname", None),
|
||||
"course_shortname": getattr(course, "shortname", None),
|
||||
"section_id": getattr(section, "id", None),
|
||||
"section_name": getattr(section, "name", None),
|
||||
"section_number": getattr(section, "section", None),
|
||||
"module_id": getattr(module, "id", None),
|
||||
"module_name": getattr(module, "name", None),
|
||||
"module_type": getattr(module, "modname", None),
|
||||
"forum_id": getattr(module, "instance", None),
|
||||
"discussion_count": len(disc_list),
|
||||
"time_created": getattr(module, "timecreated", None),
|
||||
"time_modified": getattr(module, "timemodified", None),
|
||||
"visible": getattr(module, "visible", None),
|
||||
"groupmode": getattr(module, "groupmode", None),
|
||||
"discussions": [
|
||||
{
|
||||
"id": getattr(d, "id", None),
|
||||
"name": getattr(d, "name", None),
|
||||
"user_id": getattr(d, "userid", None),
|
||||
"user_fullname": getattr(d, "userfullname", None),
|
||||
"time_created": getattr(d, "timecreated", None),
|
||||
"time_modified": getattr(d, "timemodified", None),
|
||||
}
|
||||
for d in disc_list
|
||||
],
|
||||
}
|
||||
|
||||
return Document(
|
||||
id=f"moodle_forum_{module.id}",
|
||||
source="moodle",
|
||||
@@ -272,6 +341,7 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
blob=blob,
|
||||
doc_updated_at=datetime.fromtimestamp(latest_ts or 0, tz=timezone.utc),
|
||||
size_bytes=len(blob),
|
||||
metadata=metadata,
|
||||
)
|
||||
except Exception as e:
|
||||
self._log_error(f"processing forum {module.name}", e)
|
||||
@@ -293,11 +363,37 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
|
||||
try:
|
||||
resp = rl_requests.get(self._add_token_to_url(file_info.fileurl), timeout=60)
|
||||
resp = rl_requests.get(
|
||||
self._add_token_to_url(file_info.fileurl), timeout=60
|
||||
)
|
||||
resp.raise_for_status()
|
||||
blob = resp.content
|
||||
ext = os.path.splitext(file_name)[1] or ".html"
|
||||
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
||||
|
||||
# Create metadata dictionary with relevant information
|
||||
metadata = {
|
||||
"moodle_url": self.moodle_url,
|
||||
"course_id": getattr(course, "id", None),
|
||||
"course_name": getattr(course, "fullname", None),
|
||||
"course_shortname": getattr(course, "shortname", None),
|
||||
"section_id": getattr(section, "id", None),
|
||||
"section_name": getattr(section, "name", None),
|
||||
"section_number": getattr(section, "section", None),
|
||||
"module_id": getattr(module, "id", None),
|
||||
"module_name": getattr(module, "name", None),
|
||||
"module_type": getattr(module, "modname", None),
|
||||
"module_instance": getattr(module, "instance", None),
|
||||
"page_url": getattr(file_info, "fileurl", None),
|
||||
"file_name": file_name,
|
||||
"file_size": getattr(file_info, "filesize", len(blob)),
|
||||
"file_type": getattr(file_info, "mimetype", None),
|
||||
"time_created": getattr(module, "timecreated", None),
|
||||
"time_modified": getattr(module, "timemodified", None),
|
||||
"visible": getattr(module, "visible", None),
|
||||
"groupmode": getattr(module, "groupmode", None),
|
||||
}
|
||||
|
||||
return Document(
|
||||
id=f"moodle_page_{module.id}",
|
||||
source="moodle",
|
||||
@@ -306,6 +402,7 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
blob=blob,
|
||||
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
||||
size_bytes=len(blob),
|
||||
metadata=metadata,
|
||||
)
|
||||
except Exception as e:
|
||||
self._log_error(f"processing page {file_name}", e, "error")
|
||||
@@ -326,6 +423,29 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
|
||||
semantic_id = f"{course.fullname} / {section.name} / {mname}"
|
||||
blob = markdown.encode("utf-8")
|
||||
|
||||
# Create metadata dictionary with relevant information
|
||||
metadata = {
|
||||
"moodle_url": self.moodle_url,
|
||||
"course_id": getattr(course, "id", None),
|
||||
"course_name": getattr(course, "fullname", None),
|
||||
"course_shortname": getattr(course, "shortname", None),
|
||||
"section_id": getattr(section, "id", None),
|
||||
"section_name": getattr(section, "name", None),
|
||||
"section_number": getattr(section, "section", None),
|
||||
"module_id": getattr(module, "id", None),
|
||||
"module_name": getattr(module, "name", None),
|
||||
"module_type": getattr(module, "modname", None),
|
||||
"activity_type": mtype,
|
||||
"activity_instance": getattr(module, "instance", None),
|
||||
"description": desc,
|
||||
"time_created": getattr(module, "timecreated", None),
|
||||
"time_modified": getattr(module, "timemodified", None),
|
||||
"added": getattr(module, "added", None),
|
||||
"visible": getattr(module, "visible", None),
|
||||
"groupmode": getattr(module, "groupmode", None),
|
||||
}
|
||||
|
||||
return Document(
|
||||
id=f"moodle_{mtype}_{module.id}",
|
||||
source="moodle",
|
||||
@@ -334,6 +454,7 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
blob=blob,
|
||||
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
||||
size_bytes=len(blob),
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def _process_book(self, course, section, module) -> Optional[Document]:
|
||||
@@ -342,8 +463,10 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
|
||||
contents = module.contents
|
||||
chapters = [
|
||||
c for c in contents
|
||||
if getattr(c, "fileurl", None) and os.path.basename(c.filename) == "index.html"
|
||||
c
|
||||
for c in contents
|
||||
if getattr(c, "fileurl", None)
|
||||
and os.path.basename(c.filename) == "index.html"
|
||||
]
|
||||
if not chapters:
|
||||
return None
|
||||
@@ -356,17 +479,54 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
|
||||
markdown_parts = [f"# {module.name}\n"]
|
||||
chapter_info = []
|
||||
|
||||
for ch in chapters:
|
||||
try:
|
||||
resp = rl_requests.get(self._add_token_to_url(ch.fileurl), timeout=60)
|
||||
resp.raise_for_status()
|
||||
html = resp.content.decode("utf-8", errors="ignore")
|
||||
markdown_parts.append(md(html) + "\n\n---\n")
|
||||
|
||||
# Collect chapter information for metadata
|
||||
chapter_info.append(
|
||||
{
|
||||
"chapter_id": getattr(ch, "chapterid", None),
|
||||
"title": getattr(ch, "title", None),
|
||||
"filename": getattr(ch, "filename", None),
|
||||
"fileurl": getattr(ch, "fileurl", None),
|
||||
"time_created": getattr(ch, "timecreated", None),
|
||||
"time_modified": getattr(ch, "timemodified", None),
|
||||
"size": getattr(ch, "filesize", None),
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
self._log_error(f"processing book chapter {ch.filename}", e)
|
||||
|
||||
blob = "\n".join(markdown_parts).encode("utf-8")
|
||||
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
||||
|
||||
# Create metadata dictionary with relevant information
|
||||
metadata = {
|
||||
"moodle_url": self.moodle_url,
|
||||
"course_id": getattr(course, "id", None),
|
||||
"course_name": getattr(course, "fullname", None),
|
||||
"course_shortname": getattr(course, "shortname", None),
|
||||
"section_id": getattr(section, "id", None),
|
||||
"section_name": getattr(section, "name", None),
|
||||
"section_number": getattr(section, "section", None),
|
||||
"module_id": getattr(module, "id", None),
|
||||
"module_name": getattr(module, "name", None),
|
||||
"module_type": getattr(module, "modname", None),
|
||||
"book_id": getattr(module, "instance", None),
|
||||
"chapter_count": len(chapters),
|
||||
"chapters": chapter_info,
|
||||
"time_created": getattr(module, "timecreated", None),
|
||||
"time_modified": getattr(module, "timemodified", None),
|
||||
"visible": getattr(module, "visible", None),
|
||||
"groupmode": getattr(module, "groupmode", None),
|
||||
}
|
||||
|
||||
return Document(
|
||||
id=f"moodle_book_{module.id}",
|
||||
source="moodle",
|
||||
@@ -375,4 +535,5 @@ class MoodleConnector(LoadConnector, PollConnector):
|
||||
blob=blob,
|
||||
doc_updated_at=datetime.fromtimestamp(latest_ts or 0, tz=timezone.utc),
|
||||
size_bytes=len(blob),
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user