diff --git a/poetry.lock b/poetry.lock index c0ebfed1..2a50a23e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4279,6 +4279,20 @@ files = [ [package.dependencies] llama-index-core = ">=0.10.1,<0.11.0" +[[package]] +name = "llama-index-experimental" +version = "0.2.0" +description = "llama-index experimental package" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_experimental-0.2.0-py3-none-any.whl", hash = "sha256:c252e63fb7595a8be7cf107a565ec37fb78cc10df7bc89e215b6c8dece04d733"}, + {file = "llama_index_experimental-0.2.0.tar.gz", hash = "sha256:8d36236dd914fa0bea41c41fd6da1ffb15a650b6d34758e07eed862a01b77a9b"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.11.post1,<0.11.0" + [[package]] name = "llama-index-indices-managed-llama-cloud" version = "0.2.7" @@ -7364,19 +7378,6 @@ files = [ {file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"}, {file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"}, {file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"}, - {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"}, - {file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"}, - {file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"}, - {file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"}, - {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"}, - {file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"}, - {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"}, ] [package.dependencies] @@ -7843,6 +7844,21 @@ files = [ {file = "PyMuPDFb-1.24.9.tar.gz", hash = "sha256:5505f07b3dded6e791ab7d10d01f0687e913fc75edd23fdf2825a582b6651558"}, ] +[[package]] +name = "pymysql" +version = "1.1.1" +description = "Pure Python MySQL Driver" +optional = false +python-versions = ">=3.7" +files = [ + {file = "PyMySQL-1.1.1-py3-none-any.whl", hash = "sha256:4de15da4c61dc132f4fb9ab763063e693d521a80fd0e87943b9a453dd4c19d6c"}, + {file = "pymysql-1.1.1.tar.gz", hash = "sha256:e127611aaf2b417403c60bf4dc570124aeb4a57f5f37b8e95ae399a42f904cd0"}, +] + +[package.extras] +ed25519 = ["PyNaCl (>=1.4.0)"] +rsa = ["cryptography"] + [[package]] name = "pynndescent" version = "0.5.13" @@ -11470,4 +11486,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.11.0,<3.12" -content-hash = "206936bf48890ed1d64e6949ca35b7601803ea5e0855178c62b1e22ae41e5c56" +content-hash = "46816d86b32b1dd711171f4e893fde3403018b90495c8fc39c497f597c1595ea" diff --git a/pyproject.toml b/pyproject.toml index f3dc7c41..eee8ddd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,8 @@ pgvector = "^0.3.2" pre-commit = "^3.8.0" cn-clip = "^1.5.1" llama-index-llms-paieas = "^0.1.0" +pymysql = "^1.1.1" +llama-index-experimental = "^0.2.0" llama-index-readers-web = "^0.1.23" rapidocr-onnxruntime = "^1.3.24" rapid-table = "^0.1.3" diff --git a/pyproject_gpu.toml b/pyproject_gpu.toml index c9c52597..5f1bd913 100644 --- a/pyproject_gpu.toml +++ b/pyproject_gpu.toml @@ -78,6 +78,8 @@ pgvector = "^0.3.2" pre-commit = "^3.8.0" cn-clip = "^1.5.1" llama-index-llms-paieas = "^0.1.0" +pymysql = "^1.1.1" +llama-index-experimental = "^0.2.0" llama-index-readers-web = "^0.1.23" rapidocr-onnxruntime = "^1.3.24" rapid-table = "^0.1.3" diff --git a/src/pai_rag/app/api/query.py b/src/pai_rag/app/api/query.py index f9eb174a..f1d50f35 100644 --- a/src/pai_rag/app/api/query.py +++ b/src/pai_rag/app/api/query.py @@ -4,6 +4,8 @@ import hashlib import os import tempfile +import shutil +import pandas as pd from pai_rag.core.rag_service import rag_service from pai_rag.app.api.models import ( RagQuery, @@ -11,6 +13,9 @@ LlmResponse, ) from fastapi.responses import StreamingResponse +import logging + +logger = logging.getLogger(__name__) router = APIRouter() @@ -180,3 +185,63 @@ async def upload_oss_data( ) return {"task_id": task_id} + + +@router.post("/upload_datasheet") +async def upload_datasheet( + file: UploadFile, +): + task_id = uuid.uuid4().hex + if not file: + return None + + persist_path = "./localdata/data_analysis" + + os.makedirs(name=persist_path, exist_ok=True) + + # 清空目录中的文件 + for filename in os.listdir(persist_path): + file_path = os.path.join(persist_path, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + except Exception as e: + logger.info(f"Failed to delete {file_path}. Reason: {e}") + + # 指定持久化存储位置 + file_name = os.path.basename(file.filename) # 获取文件名 + destination_path = os.path.join(persist_path, file_name) + # 写入文件 + try: + # shutil.copy(file.filename, destination_path) + with open(destination_path, "wb") as f: + shutil.copyfileobj(file.file, f) + logger.info("data analysis file saved successfully") + + if destination_path.endswith(".csv"): + df = pd.read_csv(destination_path) + elif destination_path.endswith(".xlsx"): + df = pd.read_excel(destination_path) + else: + raise TypeError("Unsupported file type.") + + except Exception as e: + return StreamingResponse(status_code=500, content={"message": str(e)}) + + return { + "task_id": task_id, + "destination_path": destination_path, + "data_preview": df.head(10).to_json(orient="records", lines=False), + } + + +@router.post("/query/data_analysis") +async def aquery_analysis(query: RagQuery): + response = await rag_service.aquery_analysis(query) + if not query.stream: + return response + else: + return StreamingResponse( + response, + media_type="text/event-stream", + ) diff --git a/src/pai_rag/app/web/rag_client.py b/src/pai_rag/app/web/rag_client.py index 6ccb0763..8dbfaa35 100644 --- a/src/pai_rag/app/web/rag_client.py +++ b/src/pai_rag/app/web/rag_client.py @@ -43,6 +43,10 @@ def query_url(self): def search_url(self): return f"{self.endpoint}service/query/search" + @property + def data_analysis_url(self): + return f"{self.endpoint}service/query/data_analysis" + @property def llm_url(self): return f"{self.endpoint}service/query/llm" @@ -59,6 +63,10 @@ def config_url(self): def load_data_url(self): return f"{self.endpoint}service/upload_data" + @property + def load_datasheet_url(self): + return f"{self.endpoint}service/upload_datasheet" + @property def load_agent_cfg_url(self): return f"{self.endpoint}service/config/agent" @@ -126,6 +134,8 @@ def _format_rag_response(
""" + else: + content = "" content_list.append(content) referenced_docs = "".join(content_list) @@ -193,6 +203,35 @@ def query_search( text, chunk_response, session_id=session_id, stream=stream ) + def query_data_analysis( + self, + text: str, + session_id: str = None, + stream: bool = False, + ): + q = dict( + question=text, + session_id=session_id, + stream=stream, + ) + r = requests.post(self.data_analysis_url, json=q, stream=True) + if r.status_code != HTTPStatus.OK: + raise RagApiError(code=r.status_code, msg=r.text) + if not stream: + response = dotdict(json.loads(r.text)) + yield self._format_rag_response( + text, response, session_id=session_id, stream=stream + ) + else: + full_content = "" + for chunk in r.iter_lines(chunk_size=8192, decode_unicode=True): + chunk_response = dotdict(json.loads(chunk)) + full_content += chunk_response.delta + chunk_response.delta = full_content + yield self._format_rag_response( + text, chunk_response, session_id=session_id, stream=stream + ) + def query_llm( self, text: str, @@ -298,6 +337,30 @@ def add_knowledge( response = dotdict(json.loads(r.text)) return response + def add_datasheet( + self, + input_file: str, + ): + file_obj = open(input_file, "rb") + mimetype = mimetypes.guess_type(input_file)[0] + files = {"file": (input_file, file_obj, mimetype)} + try: + r = requests.post( + self.load_datasheet_url, + files=files, + timeout=DEFAULT_CLIENT_TIME_OUT, + ) + response = dotdict(json.loads(r.text)) + if r.status_code != HTTPStatus.OK: + raise RagApiError(code=r.status_code, msg=response.message) + except Exception as e: + print(f"add_datasheet failed: {e}") + finally: + file_obj.close() + + response = dotdict(json.loads(r.text)) + return response + async def get_knowledge_state(self, task_id: str): async with httpx.AsyncClient(timeout=DEFAULT_CLIENT_TIME_OUT) as client: r = await client.get(self.get_load_state_url, params={"task_id": task_id}) @@ -376,5 +439,46 @@ def evaluate_for_response_stage(self): raise RagApiError(code=r.status_code, msg=response.message) print("evaluate_for_response_stage response", response) + def _format_data_analysis_rag_response( + self, question, response, session_id: str = None, stream: bool = False + ): + if stream: + text = response["delta"] + else: + text = response["answer"] + + docs = response.get("docs", []) or [] + is_finished = response.get("is_finished", True) + + referenced_docs = "" + if is_finished and len(docs) == 0 and not text: + response["result"] = EMPTY_KNOWLEDGEBASE_MESSAGE.format(query_str=question) + return response + elif is_finished: + seen_filenames = set() + file_idx = 1 + for i, doc in enumerate(docs): + filename = doc["metadata"].get("file_name", None) + if filename and filename not in seen_filenames: + seen_filenames.add(filename) + formatted_file_name = re.sub("^[0-9a-z]{32}_", "", filename) + title = doc["metadata"].get("title") + if not title: + referenced_docs += f'[{file_idx}]: {formatted_file_name} Score:{doc["score"]} \n' + else: + referenced_docs += f'[{file_idx}]: [{title}]({formatted_file_name}) Score:{doc["score"]} \n' + + file_idx += 1 + formatted_answer = "" + if session_id: + new_query = response["new_query"] + formatted_answer += f"**Query Transformation**: {new_query} \n\n" + formatted_answer += f"**Answer**: {text} \n\n" + if referenced_docs: + formatted_answer += f"**Reference**:\n {referenced_docs}" + + response["result"] = formatted_answer + return response + rag_client = RagWebClient() diff --git a/src/pai_rag/app/web/tabs/chat_tab.py b/src/pai_rag/app/web/tabs/chat_tab.py index 607feb72..cee6fced 100644 --- a/src/pai_rag/app/web/tabs/chat_tab.py +++ b/src/pai_rag/app/web/tabs/chat_tab.py @@ -28,6 +28,10 @@ def respond(input_elements: List[Any]): for element, value in input_elements.items(): update_dict[element.elem_id] = value + if update_dict["retrieval_mode"] == "data_analysis": + update_dict["retrieval_mode"] = "hybrid" + update_dict["synthesizer_type"] = "SimpleSummarize" + # empty input. if not update_dict["question"]: yield "", update_dict["chatbot"], 0 diff --git a/src/pai_rag/app/web/tabs/data_analysis_tab.py b/src/pai_rag/app/web/tabs/data_analysis_tab.py new file mode 100644 index 00000000..11bf8903 --- /dev/null +++ b/src/pai_rag/app/web/tabs/data_analysis_tab.py @@ -0,0 +1,255 @@ +import os +import json +import datetime +import re +from typing import Dict, Any, List +import gradio as gr +import pandas as pd +from pai_rag.app.web.rag_client import rag_client, RagApiError + + +DEFAULT_IS_INTERACTIVE = os.environ.get("PAIRAG_RAG__SETTING__interactive", "true") + + +def upload_file_fn(input_file): + if input_file is None: + return None + try: + # 调用接口 + res = rag_client.add_datasheet(input_file.name) + # 更新config + update_dict = { + "analysis_type": "nl2pandas", + "analysis_file_path": res["destination_path"], + } + rag_client.patch_config(update_dict) + + # json_str = res["data_preview"] + + # # 将json字符串加载为列表 + # # data_list = json.loads(json_str) + # # # 将列表转换为 DataFrame + # # df = pd.DataFrame(data_list) + + # df = pd.read_json(json_str) + + if input_file.name.endswith(".csv"): + df = pd.read_csv(input_file.name) + return df.head(10) + elif input_file.name.endswith(".xlsx"): + df = pd.read_excel(input_file.name) + return df.head(10) + else: + return "Unsupported file type." + except RagApiError as api_error: + raise gr.Error(f"HTTP {api_error.code} Error: {api_error.msg}") + + +def connect_database(input_db: List[Any]): + try: + update_dict = {"analysis_type": "nl2sql"} + for element, value in input_db.items(): + if (element.elem_id == "db_tables") and (value != ""): + # 去掉首位空格和末尾逗号 + value = value.strip().rstrip(",") + # 英文逗号和中文逗号作为分隔符进行分割,并去除多余空白字符 + value = [word.strip() for word in re.split(r"\s*,\s*|,\s*", value)] + # 检查是否为列表 + if isinstance(value, list): + print(f"Valid input: {value}") + else: + return "Invalid input: Input must be table_A, table_B,..." + if (element.elem_id == "db_descriptions") and (value != ""): + value = json.loads(value) + # 检查是否为字典 + if isinstance(value, dict): + print(f"Valid input: {value}") + else: + return "Invalid input: Input must be a dictionary." + update_dict[element.elem_id] = value + # print("db_config:", update_dict) + + rag_client.patch_config(update_dict) + return f"[{datetime.datetime.now()}] Connect database success!" + except RagApiError as api_error: + raise gr.Error(f"HTTP {api_error.code} Error: {api_error.msg}") + + +def analysis_respond(question, chatbot): + response_gen = rag_client.query_data_analysis(question, stream=True) + content = "" + chatbot.append((question, content)) + for resp in response_gen: + chatbot[-1] = (question, resp.result) + yield chatbot + + +def clear_history(chatbot): + chatbot = [] + global current_session_id + current_session_id = None + return chatbot + + +def reset_textbox(): + return gr.update(value="") + + +def create_data_analysis_tab() -> Dict[str, Any]: + with gr.Row(): + with gr.Column(scale=4): + data_analysis_type = gr.Dropdown( + choices=[ + "database", + "datafile", + ], + value="datafile", + label="Please choose data analysis type", + elem_id="data_analysis_type", + interactive=DEFAULT_IS_INTERACTIVE.lower() != "false", + ) + + # datafile + with gr.Column( + visible=(data_analysis_type.value == "datafile") + ) as file_col: + upload_file = gr.File( + label="Upload csv/xlsx file for data analysis", + file_count="single", + file_types=[".xlsx", ".csv"], + elem_id="upload_file", + scale=8, + ) + output_text = gr.DataFrame( + label="Data File Preview", + value=pd.DataFrame(), + visible=True, + scale=10, + ) + + upload_file.upload( + fn=upload_file_fn, + inputs=upload_file, + outputs=output_text, + api_name="upload_analysis_file_fn", + ) + + # database + with gr.Column(visible=(data_analysis_type.value == "database")) as db_col: + dialect = gr.Textbox( + label="Dialect", elem_id="db_dialect", value="mysql" + ) + user = gr.Textbox(label="Username", elem_id="db_username") + password = gr.Textbox( + label="Password", elem_id="db_password", type="password" + ) + host = gr.Textbox(label="Host", elem_id="db_host") + port = gr.Textbox(label="Port", elem_id="db_port", value=3306) + dbname = gr.Textbox(label="DBname", elem_id="db_name") + tables = gr.Textbox( + label="Tables", + elem_id="db_tables", + placeholder="List db tables, separated by commas, e.g. table_A, table_B, ... , using all tables if blank", + ) + descriptions = gr.Textbox( + label="Descriptions", + lines=5, + elem_id="db_descriptions", + placeholder="A dict of table descriptions, e.g. {'table_A': 'text_description_A', 'table_B': 'text_description_B'}", + ) + + connect_db_button = gr.Button( + "Connect Database" + ) # 点击功能中增加retriever type的选择(如果连接成功的话) + connection_info = gr.Textbox( + label="Connection Info", elem_id="db_connection_info" + ) + + inputs_db = { + dialect, + user, + password, + host, + port, + dbname, + tables, + descriptions, + } + + connect_db_button.click( + fn=connect_database, + inputs=inputs_db, + outputs=connection_info, + api_name="connect_db", + ) + + def data_analysis_type_change(type_value): + if type_value == "datafile": + return { + file_col: gr.update(visible=type_value), + db_col: gr.update(visible=False), + } + elif type_value == "database": + return { + db_col: gr.update(visible=type_value), + file_col: gr.update(visible=False), + } + + data_analysis_type.change( + fn=data_analysis_type_change, + inputs=data_analysis_type, + outputs=[file_col, db_col], + ) + + with gr.Column(scale=6): + chatbot = gr.Chatbot(height=500, elem_id="data_analysis_chatbot") + question = gr.Textbox(label="Enter your question.", elem_id="question") + with gr.Row(): + submitBtn = gr.Button("Submit", variant="primary") + clearBtn = gr.Button("Clear History", variant="secondary") + + submitBtn.click( + fn=analysis_respond, + inputs=[question, chatbot], + outputs=[chatbot], + api_name="analysis_respond_clk", + ) + + # 绑定Textbox提交事件,当按下Enter,调用respond函数 + question.submit( + analysis_respond, + inputs=[question, chatbot], + outputs=[chatbot], + api_name="analysis_respond_q", + ) + + submitBtn.click( + fn=reset_textbox, + inputs=[], + outputs=[question], + api_name="analysis_reset_clk", + ) + question.submit( + fn=reset_textbox, + inputs=[], + outputs=[question], + api_name="analysis_reset_q", + ) + clearBtn.click( + fn=clear_history, + inputs=[chatbot], + outputs=[chatbot], + api_name="analysi_clear_history", + ) + + return { + upload_file.elem_id: upload_file, + dialect.elem_id: dialect, + user.elem_id: user, + password.elem_id: password, + host.elem_id: host, + port.elem_id: port, + dbname.elem_id: dbname, + tables.elem_id: tables, + descriptions.elem_id: descriptions, + } diff --git a/src/pai_rag/app/web/view_model.py b/src/pai_rag/app/web/view_model.py index ebc8a8f7..2837ee84 100644 --- a/src/pai_rag/app/web/view_model.py +++ b/src/pai_rag/app/web/view_model.py @@ -123,6 +123,18 @@ class ViewModel(BaseModel): search_count: int = 10 search_lang: str = "zh-CN" + # data_analysis + analysis_type: str = "nl2pandas" # nl2sql / nl2pandas + analysis_file_path: str = None + db_dialect: str = "mysql" + db_username: str = None + db_password: str = None + db_host: str = None + db_port: int = 3306 + db_name: str = None + db_tables: list = [] + db_descriptions: dict = {} + # postprocessor reranker_type: str = ( "simple-weighted-reranker" # simple-weighted-reranker / model-based-reranker @@ -278,6 +290,23 @@ def from_app_config(config): elif config["retriever"]["retrieval_mode"] == "keyword": view_model.retrieval_mode = "Keyword Only" + if config["data_analysis"]["analysis_type"] == "nl2pandas": + view_model.analysis_type = "nl2pandas" + elif config["data_analysis"]["analysis_type"] == "nl2sql": + view_model.analysis_type = "nl2sql" + + view_model.analysis_file_path = config["data_analysis"].get( + "analysis_file_path", None + ) + view_model.db_dialect = config["data_analysis"].get("dialect", "mysql") + view_model.db_username = config["data_analysis"].get("user", None) + view_model.db_password = config["data_analysis"].get("password", None) + view_model.db_host = config["data_analysis"].get("host", None) + view_model.db_port = config["data_analysis"].get("port", 3306) + view_model.db_name = config["data_analysis"].get("dbname", None) + view_model.db_tables = config["data_analysis"].get("tables", None) + view_model.db_descriptions = config["data_analysis"].get("descriptions", None) + reranker_type = config["postprocessor"].get( "reranker_type", "simple-weighted-reranker" ) @@ -416,6 +445,21 @@ def to_app_config(self): elif self.retrieval_mode == "Keyword Only": config["retriever"]["retrieval_mode"] = "keyword" + if self.analysis_type == "nl2pandas": + config["data_analysis"]["analysis_type"] = "nl2pandas" + elif self.analysis_type == "nl2sql": + config["data_analysis"]["analysis_type"] = "nl2sql" + + config["data_analysis"]["analysis_file_path"] = self.analysis_file_path + config["data_analysis"]["dialect"] = self.db_dialect + config["data_analysis"]["user"] = self.db_username + config["data_analysis"]["password"] = self.db_password + config["data_analysis"]["host"] = self.db_host + config["data_analysis"]["port"] = self.db_port + config["data_analysis"]["dbname"] = self.db_name + config["data_analysis"]["tables"] = self.db_tables + config["data_analysis"]["descriptions"] = self.db_descriptions + config["postprocessor"]["reranker_type"] = self.reranker_type config["postprocessor"]["reranker_model"] = self.reranker_model config["postprocessor"]["keyword_weight"] = self.keyword_weight @@ -617,4 +661,17 @@ def to_component_settings(self) -> Dict[str, Dict[str, Any]]: settings["search_api_key"] = {"value": self.search_api_key} settings["search_lang"] = {"value": self.search_lang} settings["search_count"] = {"value": self.search_count} + + # data_analysis + settings["analysis_type"] = {"value": self.analysis_type} + settings["analysis_file_path"] = {"value": self.analysis_file_path} + settings["db_dialect"] = {"value": self.db_dialect} + settings["db_username"] = {"value": self.db_username} + settings["db_password"] = {"value": self.db_password} + settings["db_host"] = {"value": self.db_host} + settings["db_port"] = {"value": self.db_port} + settings["db_name"] = {"value": self.db_name} + settings["db_tables"] = {"value": self.db_tables} + settings["db_descriptions"] = {"value": self.db_descriptions} + return settings diff --git a/src/pai_rag/app/web/webui.py b/src/pai_rag/app/web/webui.py index 7be4f962..748001bb 100644 --- a/src/pai_rag/app/web/webui.py +++ b/src/pai_rag/app/web/webui.py @@ -7,6 +7,7 @@ from pai_rag.app.web.tabs.upload_tab import create_upload_tab from pai_rag.app.web.tabs.chat_tab import create_chat_tab from pai_rag.app.web.tabs.agent_tab import create_agent_tab +from pai_rag.app.web.tabs.data_analysis_tab import create_data_analysis_tab from pai_rag.app.web.element_manager import elem_manager from pai_rag.app.web.ui_constants import ( DEFAULT_CSS_STYPE, @@ -61,7 +62,9 @@ def make_homepage(): with gr.Tab("\N{rocket} Agent"): agent_elements = create_agent_tab() elem_manager.add_elems(agent_elements) - + with gr.Tab("\N{bar chart} Data Analysis"): + analysis_elements = create_data_analysis_tab() + elem_manager.add_elems(analysis_elements) homepage.load( resume_ui, outputs=elem_manager.get_elem_list(), concurrency_limit=None ) diff --git a/src/pai_rag/config/settings.toml b/src/pai_rag/config/settings.toml index aee8dbac..147f8111 100644 --- a/src/pai_rag/config/settings.toml +++ b/src/pai_rag/config/settings.toml @@ -25,6 +25,9 @@ host = "Aliyun-Redis host" password = "Aliyun-Redis user:pwd" persist_path = "localdata/storage" +[rag.data_analysis] +analysis_type = "nl2pandas" + [rag.data_loader] type = "local" diff --git a/src/pai_rag/core/rag_application.py b/src/pai_rag/core/rag_application.py index 73d55ab1..a7c0d5ef 100644 --- a/src/pai_rag/core/rag_application.py +++ b/src/pai_rag/core/rag_application.py @@ -414,3 +414,57 @@ async def aevaluate_retrieval_and_response(self, type, overwrite: bool = False): None, f"Evaluation against vector store '{vector_store_type}' is not supported. Only FAISS is supported for now.", ) + + async def aquery_analysis(self, query: RagQuery): + """Query answer from RAG App asynchronously. + + Generate answer from Data Analysis interface. + + Args: + query: RagQuery + + Returns: + RagResponse + """ + session_id = query.session_id or uuid_generator() + self.logger.debug(f"Get session ID: {session_id}.") + if not query.question: + return RagResponse( + answer="Empty query. Please input your question.", session_id=session_id + ) + + sessioned_config = self.config + + analyst = module_registry.get_module_with_config( + "DataAnalysisModule", sessioned_config + ) + if not analyst: + raise ValueError("Data Analysis not enabled. Please specify analysis type.") + + if not query.stream: + response = await analyst.aquery(query.question) + else: + response = await analyst.astream_query(query.question) + + node_results = response.source_nodes + new_query = query.question + + reference_docs = [ + ContextDoc( + text=score_node.node.get_content(), + metadata=score_node.node.metadata, + score=score_node.score, + ) + for score_node in node_results + ] + + result_info = { + "session_id": session_id, + "docs": reference_docs, + "new_query": new_query, + } + + if not query.stream: + return RagResponse(answer=response.response, **result_info) + else: + return event_generator_async(response=response, extra_info=result_info) diff --git a/src/pai_rag/core/rag_configuration.py b/src/pai_rag/core/rag_configuration.py index fa753e0f..80b44e0e 100644 --- a/src/pai_rag/core/rag_configuration.py +++ b/src/pai_rag/core/rag_configuration.py @@ -69,7 +69,7 @@ def persist(self): """Save configuration to file.""" data = self.config.as_dict() os.makedirs("localdata", exist_ok=True) - loaders.write(GENERATED_CONFIG_FILE_NAME, DynaBox(data).to_dict(), merge=True) + loaders.write(GENERATED_CONFIG_FILE_NAME, DynaBox(data).to_dict()) def get_config_mtime(self): try: diff --git a/src/pai_rag/core/rag_service.py b/src/pai_rag/core/rag_service.py index 85cda797..a8aac2ef 100644 --- a/src/pai_rag/core/rag_service.py +++ b/src/pai_rag/core/rag_service.py @@ -223,5 +223,13 @@ async def aevaluate_retrieval_and_response( logger.error(traceback.format_exc()) raise UserInputError(f"Query RAG failed: {ex}") + async def aquery_analysis(self, query: RagQuery): + try: + self.check_updates() + return await self.rag.aquery_analysis(query) + except Exception as ex: + logger.error(traceback.format_exc()) + raise UserInputError(f"Query Analysis failed: {ex}") + rag_service = RagService() diff --git a/src/pai_rag/integrations/data_analysis/data_analysis_synthesizer.py b/src/pai_rag/integrations/data_analysis/data_analysis_synthesizer.py new file mode 100644 index 00000000..85da44b0 --- /dev/null +++ b/src/pai_rag/integrations/data_analysis/data_analysis_synthesizer.py @@ -0,0 +1,313 @@ +import logging +from typing import Any, List, Generator, Optional, Sequence, cast, AsyncGenerator + +from llama_index.core.callbacks.base import CallbackManager +from llama_index.core.indices.prompt_helper import PromptHelper +from llama_index.core.prompts import BasePromptTemplate, PromptTemplate +from llama_index.core.settings import Settings +from llama_index.core.schema import NodeWithScore, QueryType, QueryBundle +from llama_index.core.prompts.mixin import PromptDictType +from llama_index.core.response_synthesizers.base import BaseSynthesizer +from llama_index.core.service_context import ServiceContext +from llama_index.core.service_context_elements.llm_predictor import LLMPredictorType +from llama_index.core.types import RESPONSE_TEXT_TYPE +from llama_index.core.base.response.schema import ( + RESPONSE_TYPE, + Response, + StreamingResponse, + AsyncStreamingResponse, +) +from llama_index.core.instrumentation.events.synthesis import ( + SynthesizeStartEvent, + SynthesizeEndEvent, +) +from llama_index.core.callbacks.schema import CBEventType, EventPayload +import llama_index.core.instrumentation as instrument + +logger = logging.getLogger(__name__) + +dispatcher = instrument.get_dispatcher(__name__) + + +def empty_response_generator() -> Generator[str, None, None]: + yield "Empty Response" + + +async def empty_response_agenerator() -> AsyncGenerator[str, None]: + yield "Empty Response" + + +DEFAULT_RESPONSE_SYNTHESIS_PROMPT_TMPL = ( + "Given an input question, synthesize a response in Chinese from the query results.\n" + "Query: {query_str}\n\n" + "SQL or Python Code Instructions (optional):\n{query_code_instruction}\n\n" + "Code Query Output: {query_output}\n\n" + "Response: " +) + +DEFAULT_RESPONSE_SYNTHESIS_PROMPT = PromptTemplate( + DEFAULT_RESPONSE_SYNTHESIS_PROMPT_TMPL, +) + + +class DataAnalysisSynthesizer(BaseSynthesizer): + def __init__( + self, + llm: Optional[LLMPredictorType] = None, + callback_manager: Optional[CallbackManager] = None, + prompt_helper: Optional[PromptHelper] = None, + response_synthesis_prompt: Optional[BasePromptTemplate] = None, + streaming: bool = False, + # deprecated + service_context: Optional[ServiceContext] = None, + ) -> None: + logger.info("DataAnalysisSynthesizer initialized") + if service_context is not None: + prompt_helper = service_context.prompt_helper + + self._llm = llm or Settings.llm + self._response_synthesis_prompt = ( + response_synthesis_prompt or DEFAULT_RESPONSE_SYNTHESIS_PROMPT + ) + + super().__init__( + llm=llm, + callback_manager=callback_manager, + prompt_helper=prompt_helper, + service_context=service_context, + streaming=streaming, + ) + + def _get_prompts(self) -> PromptDictType: + """Get prompts.""" + return {"response_synthesis_prompt": self._response_synthesis_prompt} + + def _update_prompts(self, prompts: PromptDictType) -> None: + """Update prompts.""" + if "response_synthesis_prompt" in prompts: + self._response_synthesis_prompt = prompts["response_synthesis_prompt"] + + async def aget_response( + self, + query_str: str, + retrieved_nodes: List[NodeWithScore], + **response_kwargs: Any, + ) -> RESPONSE_TEXT_TYPE: + query_df_output = [n.node.get_content() for n in retrieved_nodes] + + partial_prompt_tmpl = self._response_synthesis_prompt.partial_format( + query_str=query_str, + query_code_instruction=[ + n.node.metadata["query_code_instruction"] for n in retrieved_nodes + ], + ) + truncated_df_output = self._prompt_helper.truncate( + prompt=partial_prompt_tmpl, + text_chunks=["\n".join(query_df_output)], + ) + logger.info(f"truncated_df_output: {str(truncated_df_output)}") + + response: RESPONSE_TEXT_TYPE + if not self._streaming: + response = await self._llm.apredict( + self._response_synthesis_prompt, + query_str=query_str, + query_code_instruction=[ + n.node.metadata["query_code_instruction"] for n in retrieved_nodes + ], # sql or pandas query + query_output=truncated_df_output, # query output + **response_kwargs, + ) + else: + response = await self._llm.astream( + self._response_synthesis_prompt, + query_str=query_str, + query_code_instruction=[ + n.node.metadata["query_code_instruction"] for n in retrieved_nodes + ], + query_output=truncated_df_output, + **response_kwargs, + ) + + if isinstance(response, str): + response = response or "Empty Response" + else: + response = cast(Generator, response) + + return response + + def get_response( + self, + query_str: str, + retrieved_nodes: List[NodeWithScore], + **kwargs: Any, + ) -> RESPONSE_TEXT_TYPE: + query_df_output = [n.node.get_content() for n in retrieved_nodes] + + partial_prompt_tmpl = self._response_synthesis_prompt.partial_format( + query_str=query_str, + query_code_instruction=[ + n.node.metadata["query_code_instruction"] for n in retrieved_nodes + ], + ) + truncated_df_output = self._prompt_helper.truncate( + prompt=partial_prompt_tmpl, + text_chunks=["\n".join(query_df_output)], + ) + logger.info(f"truncated_df_output: {truncated_df_output}") + + response: RESPONSE_TEXT_TYPE + if not self._streaming: + response = self._llm.predict( + self._response_synthesis_prompt, + query_str=query_str, + query_code_instruction=[ + n.node.metadata["query_code_instruction"] for n in retrieved_nodes + ], # sql or pandas query + query_output=truncated_df_output, # query output + **kwargs, + ) + else: + response = self._llm.stream( + self._response_synthesis_prompt, + query_str=query_str, + query_code_instruction=[ + n.node.metadata["query_code_instruction"] for n in retrieved_nodes + ], + query_output=truncated_df_output, + **kwargs, + ) + + if isinstance(response, str): + response = response or "Empty Response" + else: + response = cast(Generator, response) + + return response + + @dispatcher.span + def synthesize( + self, + query: QueryType, + nodes: List[NodeWithScore], + additional_source_nodes: Optional[Sequence[NodeWithScore]] = None, + **response_kwargs: Any, + ) -> RESPONSE_TYPE: + dispatcher.event( + SynthesizeStartEvent( + query=query, + ) + ) + + if len(nodes) == 0: + if self._streaming: + empty_response = StreamingResponse( + response_gen=empty_response_generator() + ) + dispatcher.event( + SynthesizeEndEvent( + query=query, + response=empty_response, + ) + ) + return empty_response + else: + empty_response = Response("Empty Response") + dispatcher.event( + SynthesizeEndEvent( + query=query, + response=empty_response, + ) + ) + return empty_response + + if isinstance(query, str): + query = QueryBundle(query_str=query) + + with self._callback_manager.event( + CBEventType.SYNTHESIZE, + payload={EventPayload.QUERY_STR: query.query_str}, + ) as event: + response_str = self.get_response( + query_str=query.query_str, + retrieved_nodes=nodes, + **response_kwargs, + ) + + additional_source_nodes = additional_source_nodes or [] + source_nodes = list(nodes) + list(additional_source_nodes) + + response = self._prepare_response_output(response_str, source_nodes) + + event.on_end(payload={EventPayload.RESPONSE: response}) + + dispatcher.event( + SynthesizeEndEvent( + query=query, + response=response, + ) + ) + return response + + @dispatcher.span + async def asynthesize( + self, + query: QueryType, + nodes: List[NodeWithScore], + additional_source_nodes: Optional[Sequence[NodeWithScore]] = None, + **response_kwargs: Any, + ) -> RESPONSE_TYPE: + dispatcher.event( + SynthesizeStartEvent( + query=query, + ) + ) + if len(nodes) == 0: + if self._streaming: + empty_response = AsyncStreamingResponse( + response_gen=empty_response_agenerator() + ) + dispatcher.event( + SynthesizeEndEvent( + query=query, + response=empty_response, + ) + ) + return empty_response + else: + empty_response = Response("Empty Response") + dispatcher.event( + SynthesizeEndEvent( + query=query, + response=empty_response, + ) + ) + return empty_response + + if isinstance(query, str): + query = QueryBundle(query_str=query) + + with self._callback_manager.event( + CBEventType.SYNTHESIZE, + payload={EventPayload.QUERY_STR: query.query_str}, + ) as event: + response_str = await self.aget_response( + query_str=query.query_str, + retrieved_nodes=nodes, + **response_kwargs, + ) + + additional_source_nodes = additional_source_nodes or [] + source_nodes = list(nodes) + list(additional_source_nodes) + + response = self._prepare_response_output(response_str, source_nodes) + + event.on_end(payload={EventPayload.RESPONSE: response}) + + dispatcher.event( + SynthesizeEndEvent( + query=query, + response=response, + ) + ) + return response diff --git a/src/pai_rag/integrations/data_analysis/data_analysis_tool.py b/src/pai_rag/integrations/data_analysis/data_analysis_tool.py new file mode 100644 index 00000000..3297dc19 --- /dev/null +++ b/src/pai_rag/integrations/data_analysis/data_analysis_tool.py @@ -0,0 +1,120 @@ +import logging +from typing import Optional, List + + +from llama_index.core.base.base_query_engine import BaseQueryEngine +from llama_index.core.callbacks.base import CallbackManager +from llama_index.core.callbacks.schema import CBEventType, EventPayload +from llama_index.core.base.base_retriever import BaseRetriever +from llama_index.core.response_synthesizers import BaseSynthesizer +from llama_index.core.base.response.schema import RESPONSE_TYPE +from llama_index.core.llms.llm import LLM +from llama_index.core.prompts.mixin import PromptMixinType +from llama_index.core.schema import QueryBundle, NodeWithScore +from llama_index.core.settings import Settings +import llama_index.core.instrumentation as instrument + + +from pai_rag.integrations.data_analysis.nl2pandas_retriever import PandasQueryRetriever +from pai_rag.integrations.data_analysis.data_analysis_synthesizer import ( + DataAnalysisSynthesizer, +) + +logger = logging.getLogger(__name__) + +dispatcher = instrument.get_dispatcher(__name__) + + +class DataAnalysisTool(BaseQueryEngine): + """ + Used for db or excel/csv file Data Analysis + """ + + def __init__( + self, + llm: Optional[LLM] = None, + analysis_retriever: BaseRetriever = PandasQueryRetriever, + analysis_synthesizer: BaseSynthesizer = DataAnalysisSynthesizer, + callback_manager: Optional[CallbackManager] = None, + ) -> None: + """Initialize params.""" + self._llm = llm or Settings.llm + self._retriever = analysis_retriever + self._synthesizer = analysis_synthesizer + super().__init__(callback_manager=callback_manager) + + def _get_prompt_modules(self) -> PromptMixinType: + """Get prompt sub-modules.""" + return {} + + def retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: + nodes = self._retriever.retrieve(query_bundle) + return nodes + + async def aretrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: + nodes = await self._retriever.aretrieve(query_bundle) + return nodes + + def synthesize( + self, + query_bundle: QueryBundle, + nodes: List[NodeWithScore], + ) -> RESPONSE_TYPE: + return self._synthesizer.synthesize( + query=query_bundle, + nodes=nodes, + ) + + async def asynthesize( + self, + query_bundle: QueryBundle, + nodes: List[NodeWithScore], + ) -> RESPONSE_TYPE: + return await self._synthesizer.asynthesize( + query=query_bundle, + nodes=nodes, + ) + + @dispatcher.span + def _query(self, query_bundle: QueryBundle) -> RESPONSE_TYPE: + """Answer a query.""" + with self.callback_manager.event( + CBEventType.QUERY, payload={EventPayload.QUERY_STR: query_bundle.query_str} + ) as query_event: + nodes = self.retrieve(query_bundle) + response = self._synthesizer.synthesize( + query=query_bundle, + nodes=nodes, + ) + query_event.on_end(payload={EventPayload.RESPONSE: response}) + + return response + + @dispatcher.span + async def _aquery(self, query_bundle: QueryBundle) -> RESPONSE_TYPE: + """Answer a query.""" + with self.callback_manager.event( + CBEventType.QUERY, payload={EventPayload.QUERY_STR: query_bundle.query_str} + ) as query_event: + nodes = await self.aretrieve(query_bundle) + response = await self._synthesizer.asynthesize( + query=query_bundle, + nodes=nodes, + ) + + query_event.on_end(payload={EventPayload.RESPONSE: response}) + + return response + + async def astream_query(self, query_bundle: QueryBundle) -> RESPONSE_TYPE: + streaming = self._synthesizer._streaming + self._synthesizer._streaming = True + + nodes = await self.aretrieve(query_bundle) + + stream_response = await self._synthesizer.asynthesize( + query=query_bundle, nodes=nodes + ) + self._synthesizer._streaming = streaming + + return stream_response diff --git a/src/pai_rag/integrations/data_analysis/nl2pandas_retriever.py b/src/pai_rag/integrations/data_analysis/nl2pandas_retriever.py new file mode 100644 index 00000000..c943d439 --- /dev/null +++ b/src/pai_rag/integrations/data_analysis/nl2pandas_retriever.py @@ -0,0 +1,223 @@ +import logging +from typing import Any, Dict, List, Optional + +import pandas as pd +from llama_index.core.base.base_retriever import BaseRetriever +from llama_index.core.llms.llm import LLM +from llama_index.core.prompts import BasePromptTemplate, PromptTemplate, PromptType +from llama_index.core.prompts.mixin import PromptDictType, PromptMixinType +from llama_index.core.schema import NodeWithScore, QueryBundle, QueryType, TextNode +from llama_index.core.settings import Settings +from llama_index.core.callbacks.base import CallbackManager +from llama_index.experimental.query_engine.pandas.output_parser import ( + PandasInstructionParser, +) + + +logger = logging.getLogger(__name__) + +DEFAULT_INSTRUCTION_STR = ( + "1. Convert the query to executable Python code using Pandas.\n" + "2. The final line of code should be a Python expression that can be called with the `eval()` function.\n" + "3. The code should represent a solution to the query.\n" + "4. PRINT ONLY THE EXPRESSION.\n" + "5. Do not quote the expression.\n" +) + +DEFAULT_PANDAS_TMPL = ( + "You are working with a pandas dataframe in Python.\n" + "The name of the dataframe is `df`.\n" + "This is the result of `print(df.head())`:\n" + "{df_str}\n\n" + "Follow these instructions:\n" + "{instruction_str}\n" + "Query: {query_str}\n\n" + "Expression:" +) + +DEFAULT_PANDAS_PROMPT = PromptTemplate( + DEFAULT_PANDAS_TMPL, prompt_type=PromptType.PANDAS +) + + +class PandasQueryRetriever(BaseRetriever): + """ + Pandas query retriever + + Convert natural language to Pandas python code. + + Args: + df (pd.DataFrame): Pandas dataframe to use + instruction_str (Optional[str]): Instruction string to use + output_processor (Optional[Callable[[str], str]]): Output processor + A callable that takes in the output string, pandas DataFrame, + and any output kwargs and returns a string. + eg.kwargs["max_colwidth"] = [int] is used to set the length of text + that each column can display during str(df). Set it to a higher number + if there is possibly long text in the dataframe. + pandas_prompt (Optional[BasePromptTemplate]): Pandas prompt to use. + head (int): Number of rows to show in the table context. + llm (Optional[LLM]): Language model to use. + + """ + + def __init__( + self, + df: pd.DataFrame, + instruction_str: Optional[str] = None, + instruction_parser: Optional[PandasInstructionParser] = None, + pandas_prompt: Optional[BasePromptTemplate] = None, + output_kwargs: Optional[dict] = None, + head: int = 5, + llm: Optional[LLM] = None, + callback_manager: CallbackManager | None = None, + **kwargs: Any, + ) -> None: + """Initialize params.""" + + self._df = df + self._head = head + self._pandas_prompt = pandas_prompt or DEFAULT_PANDAS_PROMPT + self._instruction_str = instruction_str or DEFAULT_INSTRUCTION_STR + self._instruction_parser = instruction_parser or PandasInstructionParser( + self._df, output_kwargs or {} + ) + self._llm = llm or Settings.llm + + super().__init__(callback_manager) + + def _get_prompts(self) -> Dict[str, Any]: + """Get prompts.""" + return { + "pandas_prompt": self._pandas_prompt, + } + + def _update_prompts(self, prompts: PromptDictType) -> None: + """Update prompts.""" + if "pandas_prompt" in prompts: + self._pandas_prompt = prompts["pandas_prompt"] + + def _get_prompt_modules(self) -> PromptMixinType: + """Get prompt modules.""" + return {} + + def _get_table_context(self) -> str: + """Get table context.""" + try: + res = str(self._df.head(self._head)) + except Exception as e: + logger.info(f"No dataframe provided, {e}") + res = None + return res + + def _retrieve(self, query_bundle: QueryType) -> List[NodeWithScore]: + """Retrieve pandas instruction and pandas output.""" + if isinstance(query_bundle, str): + query_bundle = QueryBundle(query_bundle) + else: + query_bundle = query_bundle + + context = self._get_table_context() + logger.info(f"> Table head: {context}\n") + + # get executable python code + pandas_response_str = self._llm.predict( + self._pandas_prompt, + df_str=context, + query_str=query_bundle.query_str, + instruction_str=self._instruction_str, + ) + logger.info( + ( + f"> Pandas instructions (query code):\n" + f"```\n{pandas_response_str}\n```\n" + ) + ) + + # get pandas output + pandas_output = self._instruction_parser.parse(pandas_response_str) + logger.info(f"> Pandas output: {pandas_output}\n") + + # check pandas output + if ( + "There was an error running the output as Python code" in pandas_output + ) or (pandas_output == "None"): + pandas_output = str(self._df) + + retrieved_nodes = [ + NodeWithScore( + node=TextNode( + text=str(pandas_output), + metadata={ + "query_code_instruction": pandas_response_str, + "query_output": pandas_output, + }, + excluded_embed_metadata_keys=[ + "query_code_instruction", + "query_output", + ], + excluded_llm_metadata_keys=[ + "query_code_instruction", + "query_output", + ], + ), + score=1.0, + ) + ] + return retrieved_nodes + + async def _aretrieve(self, query_bundle: QueryType) -> List[NodeWithScore]: + """Async pandas instruction and pandas output.""" + if isinstance(query_bundle, str): + query_bundle = QueryBundle(query_bundle) + else: + query_bundle = query_bundle + + context = self._get_table_context() + logger.info(f"> Async Table head: {context}\n") + + # get executable python code + pandas_response_str = await self._llm.apredict( + self._pandas_prompt, + df_str=context, + query_str=query_bundle.query_str, + instruction_str=self._instruction_str, + ) + logger.info( + ( + f"> Async Pandas instructions (query code):\n" + f"```\n{pandas_response_str}\n```\n" + ) + ) + + # get pandas output + pandas_output = self._instruction_parser.parse(pandas_response_str) + logger.info(f"> Async Pandas output: {pandas_output}\n") + + # check pandas output + if ( + "There was an error running the output as Python code" in pandas_output + ) or (pandas_output == "None"): + pandas_output = str(self._df) + + retrieved_nodes = [ + NodeWithScore( + node=TextNode( + text=str(pandas_output), + metadata={ + "query_code_instruction": pandas_response_str, + "query_output": pandas_output, + }, + excluded_embed_metadata_keys=[ + "query_code_instruction", + "query_output", + ], + excluded_llm_metadata_keys=[ + "query_code_instruction", + "query_output", + ], + ), + score=1.0, + ) + ] + return retrieved_nodes diff --git a/src/pai_rag/integrations/data_analysis/nl2sql_retriever.py b/src/pai_rag/integrations/data_analysis/nl2sql_retriever.py new file mode 100644 index 00000000..fa3f55e5 --- /dev/null +++ b/src/pai_rag/integrations/data_analysis/nl2sql_retriever.py @@ -0,0 +1,513 @@ +""" +Modification based on llama-index SQL Retriever, + - add score=1.0 to NodeWithScore to be compatible with my_retriever_query_engine + - add logger for Predicted SQL query & SQL query result for synthesize + - constrain LIMIT on the generated SQL query + - constrain time on run_query + - modify DefaultSQLParser +""" + +import logging +import re +import signal +from abc import ABC, abstractmethod +from enum import Enum +from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast + +from llama_index.core.base.base_retriever import BaseRetriever +from llama_index.core.base.embeddings.base import BaseEmbedding +from llama_index.core.callbacks.base import CallbackManager +from llama_index.core.instrumentation import DispatcherSpanMixin +from llama_index.core.llms.llm import LLM +from llama_index.core.objects.base import ObjectRetriever +from llama_index.core.objects.table_node_mapping import SQLTableSchema +from llama_index.core.prompts import BasePromptTemplate +from llama_index.core.prompts.default_prompts import ( + DEFAULT_TEXT_TO_SQL_PROMPT, +) +from llama_index.core.prompts.mixin import ( + PromptDictType, + PromptMixin, + PromptMixinType, +) +from llama_index.core.schema import NodeWithScore, QueryBundle, QueryType, TextNode +from llama_index.core.service_context import ServiceContext +from llama_index.core.settings import ( + Settings, + callback_manager_from_settings_or_context, + embed_model_from_settings_or_context, + llm_from_settings_or_context, +) +from llama_index.core.utilities.sql_wrapper import SQLDatabase +from sqlalchemy import Table + +logger = logging.getLogger(__name__) + + +def timeout_handler(): + raise TimeoutError("Query timed out") + + +class MySQLRetriever(BaseRetriever): + """SQL Retriever. + + Retrieves via raw SQL statements. + + Args: + sql_database (SQLDatabase): SQL database. + return_raw (bool): Whether to return raw results or format results. + Defaults to True. + + """ + + def __init__( + self, + sql_database: SQLDatabase, + return_raw: bool = True, + callback_manager: Optional[CallbackManager] = None, + **kwargs: Any, + ) -> None: + """Initialize params.""" + self._sql_database = sql_database + self._return_raw = return_raw + super().__init__(callback_manager) + + def _format_node_results( + self, results: List[List[Any]], col_keys: List[str] + ) -> List[NodeWithScore]: + """Format node results.""" + nodes = [] + for result in results: + # associate column keys with result tuple + metadata = dict(zip(col_keys, result)) + # NOTE: leave text field blank for now + text_node = TextNode( + text="", + metadata=metadata, + ) + nodes.append(NodeWithScore(node=text_node, score=1.0)) + return nodes + + def _limit_check(self, sql_query: str, max_limit=100): + limit_pattern = r"\bLIMIT\s+(\d+)(?:\s+OFFSET\s+\d+)?\b" + match = re.search(limit_pattern, sql_query, re.IGNORECASE) + + if match: + limit_value = int(match.group(1)) + if limit_value > max_limit: + new_sql_query = re.sub( + limit_pattern, + f"LIMIT {max_limit}", + sql_query, + count=1, + flags=re.IGNORECASE, + ) + return new_sql_query + else: + return sql_query + else: + raise ValueError("check sql query and regular expression") + + def retrieve_with_metadata( + self, str_or_query_bundle: QueryType + ) -> Tuple[List[NodeWithScore], Dict]: + """Retrieve with metadata.""" + if isinstance(str_or_query_bundle, str): + query_bundle = QueryBundle(str_or_query_bundle) + else: + query_bundle = str_or_query_bundle + + # constrain LIMIT in sql_query + if ("INSERT" in query_bundle.query_str) or ("CREATE" in query_bundle.query_str): + raise ValueError("ONLY QUERY ALLOWED") + if "limit" not in query_bundle.query_str.lower(): + query_bundle.query_str = query_bundle.query_str + " limit 100" + else: + query_bundle.query_str = self._limit_check(query_bundle.query_str) + logger.info(f"Limited SQL query: {query_bundle.query_str}") + + # set timeout to 5s + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(10) # start + try: + raw_response_str, metadata = self._sql_database.run_sql( + query_bundle.query_str + ) + except TimeoutError: + logger.info("SQL Query Timed Out (>10s)") + raw_response_str = "SQL Query Timed Out (>10s)" + finally: + signal.alarm(0) # cancel + + if self._return_raw: + return [ + NodeWithScore( + node=TextNode( + text=raw_response_str, + metadata={ + "query_code_instruction": query_bundle.query_str, + "query_output": metadata["result"], + "col_keys": metadata["col_keys"], + }, + excluded_embed_metadata_keys=[ + "query_code_instruction", + "query_output", + "col_keys", + ], + excluded_llm_metadata_keys=[ + "query_code_instruction", + "query_output", + "col_keys", + ], + ), + score=1.0, + ), + ], metadata + else: + # return formatted + results = metadata["result"] + col_keys = metadata["col_keys"] + return self._format_node_results(results, col_keys), metadata + + async def aretrieve_with_metadata( + self, str_or_query_bundle: QueryType + ) -> Tuple[List[NodeWithScore], Dict]: + return self.retrieve_with_metadata(str_or_query_bundle) + + def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: + """Retrieve nodes given query.""" + retrieved_nodes, _ = self.retrieve_with_metadata(query_bundle) + return retrieved_nodes + + +class SQLParserMode(str, Enum): + """SQL Parser Mode.""" + + DEFAULT = "default" + PGVECTOR = "pgvector" + + +class BaseSQLParser(DispatcherSpanMixin, ABC): + """Base SQL Parser.""" + + @abstractmethod + def parse_response_to_sql(self, response: str, query_bundle: QueryBundle) -> str: + """Parse response to SQL.""" + + +class DefaultSQLParser(BaseSQLParser): + """Default SQL Parser.""" + + def parse_response_to_sql(self, response: str, query_bundle: QueryBundle) -> str: + """Parse response to SQL.""" + sql_query_start = response.find("SQLQuery:") + if sql_query_start != -1: + response = response[sql_query_start:] + # TODO: move to removeprefix after Python 3.9+ + if response.startswith("SQLQuery:"): + response = response[len("SQLQuery:") :] + sql_result_start = response.find("SQLResult:") + if sql_result_start != -1: + response = response[:sql_result_start] + return response.strip().strip("```").strip().strip(";").strip() + + +class MyNLSQLRetriever(BaseRetriever, PromptMixin): + """Text-to-SQL Retriever. + + Retrieves via text. + + Args: + sql_database (SQLDatabase): SQL database. + text_to_sql_prompt (BasePromptTemplate): Prompt template for text-to-sql. + Defaults to DEFAULT_TEXT_TO_SQL_PROMPT. + context_query_kwargs (dict): Mapping from table name to context query. + Defaults to None. + tables (Union[List[str], List[Table]]): List of table names or Table objects. + table_retriever (ObjectRetriever[SQLTableSchema]): Object retriever for + SQLTableSchema objects. Defaults to None. + context_str_prefix (str): Prefix for context string. Defaults to None. + service_context (ServiceContext): Service context. Defaults to None. + return_raw (bool): Whether to return plain-text dump of SQL results, or parsed into Nodes. + handle_sql_errors (bool): Whether to handle SQL errors. Defaults to True. + sql_only (bool) : Whether to get only sql and not the sql query result. + Default to False. + llm (Optional[LLM]): Language model to use. + + """ + + def __init__( + self, + sql_database: SQLDatabase, + text_to_sql_prompt: Optional[BasePromptTemplate] = None, + context_query_kwargs: Optional[dict] = None, + tables: Optional[Union[List[str], List[Table]]] = None, + table_retriever: Optional[ObjectRetriever[SQLTableSchema]] = None, + context_str_prefix: Optional[str] = None, + sql_parser_mode: SQLParserMode = SQLParserMode.DEFAULT, + llm: Optional[LLM] = None, + embed_model: Optional[BaseEmbedding] = None, + service_context: Optional[ServiceContext] = None, + return_raw: bool = True, + handle_sql_errors: bool = True, + sql_only: bool = False, + callback_manager: Optional[CallbackManager] = None, + verbose: bool = False, + **kwargs: Any, + ) -> None: + """Initialize params.""" + self._sql_retriever = MySQLRetriever(sql_database, return_raw=return_raw) + self._sql_database = sql_database + self._get_tables = self._load_get_tables_fn( + sql_database, tables, context_query_kwargs, table_retriever + ) + self._context_str_prefix = context_str_prefix + self._llm = llm or llm_from_settings_or_context(Settings, service_context) + self._text_to_sql_prompt = text_to_sql_prompt or DEFAULT_TEXT_TO_SQL_PROMPT + self._sql_parser_mode = sql_parser_mode + + embed_model = embed_model or embed_model_from_settings_or_context( + Settings, service_context + ) + self._sql_parser = self._load_sql_parser(sql_parser_mode, embed_model) + self._handle_sql_errors = handle_sql_errors + self._sql_only = sql_only + self._verbose = verbose + super().__init__( + callback_manager=callback_manager + or callback_manager_from_settings_or_context(Settings, service_context) + ) + + def _get_prompts(self) -> Dict[str, Any]: + """Get prompts.""" + return { + "text_to_sql_prompt": self._text_to_sql_prompt, + } + + def _update_prompts(self, prompts: PromptDictType) -> None: + """Update prompts.""" + if "text_to_sql_prompt" in prompts: + self._text_to_sql_prompt = prompts["text_to_sql_prompt"] + + def _get_prompt_modules(self) -> PromptMixinType: + """Get prompt modules.""" + return {} + + def _load_sql_parser( + self, sql_parser_mode: SQLParserMode, embed_model: BaseEmbedding + ) -> BaseSQLParser: + """Load SQL parser.""" + if sql_parser_mode == SQLParserMode.DEFAULT: + return DefaultSQLParser() + else: + raise ValueError(f"Unknown SQL parser mode: {sql_parser_mode}") + + def _load_get_tables_fn( + self, + sql_database: SQLDatabase, + tables: Optional[Union[List[str], List[Table]]] = None, + context_query_kwargs: Optional[dict] = None, + table_retriever: Optional[ObjectRetriever[SQLTableSchema]] = None, + ) -> Callable[[str], List[SQLTableSchema]]: + """Load get_tables function.""" + context_query_kwargs = context_query_kwargs or {} + if table_retriever is not None: + return lambda query_str: cast(Any, table_retriever).retrieve(query_str) + else: + if tables is not None: + table_names: List[str] = [ + t.name if isinstance(t, Table) else t for t in tables + ] + else: + table_names = list(sql_database.get_usable_table_names()) + context_strs = [context_query_kwargs.get(t, None) for t in table_names] + table_schemas = [ + SQLTableSchema(table_name=t, context_str=c) + for t, c in zip(table_names, context_strs) + ] + return lambda _: table_schemas + + def retrieve_with_metadata( + self, str_or_query_bundle: QueryType + ) -> Tuple[List[NodeWithScore], Dict]: + """Retrieve with metadata.""" + if isinstance(str_or_query_bundle, str): + query_bundle = QueryBundle(str_or_query_bundle) + else: + query_bundle = str_or_query_bundle + table_desc_str = self._get_table_context(query_bundle) + logger.info(f"> Table desc str: {table_desc_str}\n") + + response_str = self._llm.predict( + self._text_to_sql_prompt, + query_str=query_bundle.query_str, + schema=table_desc_str, + dialect=self._sql_database.dialect, + ) + + sql_query_str = self._sql_parser.parse_response_to_sql( + response_str, query_bundle + ) + # assume that it's a valid SQL query + logger.info(f"> Predicted SQL query: {sql_query_str}\n") + + if self._sql_only: + sql_only_node = TextNode(text=f"{sql_query_str}") + retrieved_nodes = [NodeWithScore(node=sql_only_node, score=1.0)] + metadata = {"result": sql_query_str} + else: + try: + ( + retrieved_nodes, + metadata, + ) = self._sql_retriever.retrieve_with_metadata(sql_query_str) + logger.info( + f"> SQL query result: {retrieved_nodes[0].metadata['query_output']}\n" + ) + if retrieved_nodes[0].metadata["query_output"] == []: + new_sql_query_str = self._sql_query_modification(sql_query_str) + ( + retrieved_nodes, + metadata, + ) = self._sql_retriever.retrieve_with_metadata(new_sql_query_str) + logger.info( + f"> Whole SQL query result: {retrieved_nodes[0].metadata['query_output']}\n" + ) + except BaseException as e: + # if handle_sql_errors is True, then return error message + if self._handle_sql_errors: + logger.info(f"async error info: {e}\n") + + new_sql_query_str = self._sql_query_modification(sql_query_str) + ( + retrieved_nodes, + metadata, + ) = self._sql_retriever.retrieve_with_metadata(new_sql_query_str) + logger.info( + f"> Whole SQL query result: {retrieved_nodes[0].metadata['query_output']}\n" + ) + # err_node = TextNode(text=f"Error: {e!s}") + # logger.info(f"async error_node info: {err_node}\n") + # retrieved_nodes = [NodeWithScore(node=err_node, score=1.0)] + # metadata = {} + # else: + # raise + + return retrieved_nodes, {"sql_query": sql_query_str, **metadata} + + async def aretrieve_with_metadata( + self, str_or_query_bundle: QueryType + ) -> Tuple[List[NodeWithScore], Dict]: + """Async retrieve with metadata.""" + if isinstance(str_or_query_bundle, str): + query_bundle = QueryBundle(str_or_query_bundle) + else: + query_bundle = str_or_query_bundle + table_desc_str = self._get_table_context(query_bundle) + logger.info(f"> Table desc str: {table_desc_str}\n") + + response_str = await self._llm.apredict( + self._text_to_sql_prompt, + query_str=query_bundle.query_str, + schema=table_desc_str, + dialect=self._sql_database.dialect, + ) + + sql_query_str = self._sql_parser.parse_response_to_sql( + response_str, query_bundle + ) + # assume that it's a valid SQL query + logger.info(f"> Predicted SQL query: {sql_query_str}\n") + + if self._sql_only: + sql_only_node = TextNode(text=f"{sql_query_str}") + retrieved_nodes = [NodeWithScore(node=sql_only_node, score=1.0)] + metadata: Dict[str, Any] = {} + else: + try: + ( + retrieved_nodes, + metadata, + ) = await self._sql_retriever.aretrieve_with_metadata(sql_query_str) + logger.info( + f"> SQL query result: {retrieved_nodes[0].metadata['query_output']}\n" + ) + if retrieved_nodes[0].metadata["query_output"] == []: + new_sql_query_str = self._sql_query_modification(sql_query_str) + ( + retrieved_nodes, + metadata, + ) = await self._sql_retriever.aretrieve_with_metadata( + new_sql_query_str + ) + logger.info( + f"> Whole SQL query result: {retrieved_nodes[0].metadata['query_output']}\n" + ) + + except BaseException as e: + # if handle_sql_errors is True, then return error message + if self._handle_sql_errors: + logger.info(f"async error info: {e}\n") + + new_sql_query_str = self._sql_query_modification(sql_query_str) + ( + retrieved_nodes, + metadata, + ) = await self._sql_retriever.aretrieve_with_metadata(new_sql_query_str) + logger.info( + f"> Whole SQL query result: {retrieved_nodes[0].metadata['query_output']}\n" + ) + # err_node = TextNode(text=f"Error: {e!s}") + # logger.info(f"async error_node info: {err_node}\n") + # retrieved_nodes = [NodeWithScore(node=err_node, score=1.0)] + # metadata = {} + # else: + # raise + return retrieved_nodes, {"sql_query": sql_query_str, **metadata} + + def _sql_query_modification(self, sql_query_str): + table_pattern = r"FROM\s+(\w+)" + match = re.search(table_pattern, sql_query_str, re.IGNORECASE | re.DOTALL) + if match: + first_table = match.group(1) + new_sql_query_str = f"SELECT * FROM {first_table}" + logger.info(f"use the whole table {first_table} instead if possible") + else: + raise ValueError("No table is matched") + + return new_sql_query_str + + def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: + """Retrieve nodes given query.""" + retrieved_nodes, _ = self.retrieve_with_metadata(query_bundle) + return retrieved_nodes + + async def _aretrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: + """Async retrieve nodes given query.""" + retrieved_nodes, _ = await self.aretrieve_with_metadata(query_bundle) + return retrieved_nodes + + def _get_table_context(self, query_bundle: QueryBundle) -> str: + """Get table context. + + Get tables schema + optional context as a single string. + + """ + table_schema_objs = self._get_tables(query_bundle.query_str) + context_strs = [] + if self._context_str_prefix is not None: + context_strs = [self._context_str_prefix] + + for table_schema_obj in table_schema_objs: + table_info = self._sql_database.get_single_table_info( + table_schema_obj.table_name + ) + + if table_schema_obj.context_str: + table_opt_context = " The table description is: " + table_opt_context += table_schema_obj.context_str + table_info += table_opt_context + + context_strs.append(table_info) + + return "\n\n".join(context_strs) diff --git a/src/pai_rag/modules/__init__.py b/src/pai_rag/modules/__init__.py index 1c3ec9f0..1c21f06b 100644 --- a/src/pai_rag/modules/__init__.py +++ b/src/pai_rag/modules/__init__.py @@ -23,6 +23,7 @@ from pai_rag.modules.intentdetection.intent_detection import IntentDetectionModule from pai_rag.modules.customconfig.custom_config import CustomConfigModule from pai_rag.modules.search.search import SearchModule +from pai_rag.modules.dataanalysis.data_analysis import DataAnalysisModule ALL_MODULES = [ "EmbeddingModule", @@ -50,6 +51,7 @@ "IntentDetectionModule", "CustomConfigModule", "SearchModule", + "DataAnalysisModule", ] __all__ = ALL_MODULES + ["ALL_MODULES"] diff --git a/src/pai_rag/modules/dataanalysis/data_analysis.py b/src/pai_rag/modules/dataanalysis/data_analysis.py new file mode 100644 index 00000000..986ff9dd --- /dev/null +++ b/src/pai_rag/modules/dataanalysis/data_analysis.py @@ -0,0 +1,176 @@ +import logging +import os +import glob +from typing import Dict, List, Any +import pandas as pd +from sqlalchemy import create_engine, inspect +from sqlalchemy.engine import URL +from sqlalchemy.pool import QueuePool +from llama_index.core import SQLDatabase + +from pai_rag.modules.base.configurable_module import ConfigurableModule +from pai_rag.modules.base.module_constants import MODULE_PARAM_CONFIG +from pai_rag.utils.prompt_template import ( + DEFAULT_TEXT_TO_SQL_TMPL, + DEFAULT_INSTRUCTION_STR, + DEFAULT_PANDAS_PROMPT, + DEFAULT_RESPONSE_SYNTHESIS_PROMPT, +) +from pai_rag.integrations.data_analysis.data_analysis_tool import DataAnalysisTool +from pai_rag.integrations.data_analysis.nl2sql_retriever import MyNLSQLRetriever +from pai_rag.integrations.data_analysis.nl2pandas_retriever import PandasQueryRetriever +from pai_rag.integrations.data_analysis.data_analysis_synthesizer import ( + DataAnalysisSynthesizer, +) + +logger = logging.getLogger(__name__) + + +class DataAnalysisModule(ConfigurableModule): + @staticmethod + def get_dependencies() -> List[str]: + return ["LlmModule", "EmbeddingModule"] + + def _create_new_instance(self, new_params: Dict[str, Any]): + config = new_params[MODULE_PARAM_CONFIG] or {} + llm = new_params["LlmModule"] + embed_model = new_params["EmbeddingModule"] + data_analysis_type = config.get("analysis_type", "nl2pandas") + + if data_analysis_type == "nl2pandas": + df = self.get_dataframe(config) + analysis_retriever = PandasQueryRetriever( + df=df, + instruction_str=DEFAULT_INSTRUCTION_STR, + pandas_prompt=DEFAULT_PANDAS_PROMPT, + llm=llm, + ) + logger.info("DataAnalysis PandasQueryRetriever used") + + elif data_analysis_type == "nl2sql": + sql_database, tables, table_descriptions = self.db_connection(config) + analysis_retriever = MyNLSQLRetriever( + sql_database=sql_database, + text_to_sql_prompt=DEFAULT_TEXT_TO_SQL_TMPL, + tables=tables, + context_query_kwargs=table_descriptions, + sql_only=False, + embed_model=embed_model, + llm=llm, + ) + logger.info("DataAnalysis NL2SQLRetriever used") + + else: + raise ValueError( + "Please specify the correct analysis type, 'nl2pandas' or 'nl2sql'" + ) + + analysis_synthesizer = DataAnalysisSynthesizer( + response_synthesis_prompt=DEFAULT_RESPONSE_SYNTHESIS_PROMPT + ) + logger.info("DataAnalysisSynthesizer used") + + return DataAnalysisTool( + analysis_retriever=analysis_retriever, + analysis_synthesizer=analysis_synthesizer, + ) + + def db_connection(self, config): + # get rds_db config + dialect = config.get("dialect", "sqlite") + user = config.get("user", "") + password = config.get("password", "") + host = config.get("host", "") + port = config.get("port", "") + path = config.get("path", "") + dbname = config.get("dbname", "") + desired_tables = config.get("tables", []) + table_descriptions = config.get("descriptions", {}) + + logger.info(f"desired_tables from ui input: {desired_tables}") + logger.info(f"table_descriptions from ui input: {table_descriptions}") + + if dialect == "sqlite": + db_path = os.path.join(path, dbname) + database_uri = f"{dialect}:///{db_path}" + elif dialect == "mysql": + dd_prefix = f"{dialect}+pymysql" + database_uri = URL.create( + dd_prefix, + username=user, + password=password, + host=host, + port=port, + database=dbname, + ) + else: + raise ValueError(f"not supported SQL dialect: {dialect}") + + # use sqlalchemy engine for db connection + engine = create_engine( + database_uri, + echo=False, + pool_size=5, + max_overflow=10, + pool_timeout=30, + pool_recycle=360, + poolclass=QueuePool, + ) + inspector = inspect(engine) + db_tables = inspector.get_table_names() + if len(db_tables) == 0: + raise ValueError("No database tables") + + if len(desired_tables) > 0: + tables = desired_tables + else: + tables = db_tables + + # create an sqldatabase instance including desired table info + sql_database = SQLDatabase(engine, include_tables=tables) + + if len(table_descriptions) > 0: + table_descriptions = table_descriptions + else: + table_descriptions = {} + + return sql_database, tables, table_descriptions + + def get_dataframe(self, config): + file_path = config.get("file_path", "./localdata/data_analysis/") + if not file_path: + file_path = "./localdata/data_analysis/" + + if os.path.isfile(file_path): + return self._read_file(file_path) + elif os.path.isdir(file_path): + first_file_path = self._find_first_csv_or_xlsx_in_directory(file_path) + if first_file_path: + return self._read_file(first_file_path) + else: + # raise FileExistsError("No .csv or .xlsx files found in the directory.") + logger.info("No .csv or .xlsx files found in the directory.") + return + else: + logger.info("Please provide a valid file") + return + + def _find_first_csv_or_xlsx_in_directory(self, directory_path): + # 使用 glob 模块查找第一个 .csv 或 .xlsx 文件 + files = glob.glob(os.path.join(directory_path, "*.csv")) + glob.glob( + os.path.join(directory_path, "*.xlsx") + ) + if files: + return files[0] + else: + return None + + def _read_file(self, file_path): + if file_path.endswith(".csv"): + df = pd.read_csv(file_path) + return df + elif file_path.endswith(".xlsx"): + df = pd.read_excel(file_path) + return df + else: + raise TypeError("Unsupported file type.") diff --git a/src/pai_rag/modules/module_registry.py b/src/pai_rag/modules/module_registry.py index 46141333..85843784 100644 --- a/src/pai_rag/modules/module_registry.py +++ b/src/pai_rag/modules/module_registry.py @@ -31,6 +31,7 @@ "BM25IndexModule": "rag.bm25", "SearchModule": "rag.search", "NodesEnhancementModule": "rag.node_enhancement", + "DataAnalysisModule": "rag.data_analysis", } diff --git a/src/pai_rag/modules/queryengine/query_engine.py b/src/pai_rag/modules/queryengine/query_engine.py index 7c137631..0805d28c 100644 --- a/src/pai_rag/modules/queryengine/query_engine.py +++ b/src/pai_rag/modules/queryengine/query_engine.py @@ -41,7 +41,10 @@ def _create_new_instance(self, new_params: Dict[str, Any]): multi_modal_llm = new_params["MultiModalLlmModule"] if config["type"] == "RetrieverQueryEngine": - if not postprocessor: + if (not postprocessor) or ( + "NLSQLRetriever" or "PandasQueryRetriever" in retriever.__repr__() + ): + logger.info("Query_engine without postprocess created") my_query_engine = MyRetrieverQueryEngine( retriever=retriever, response_synthesizer=synthesizer ) diff --git a/src/pai_rag/modules/retriever/retriever.py b/src/pai_rag/modules/retriever/retriever.py index 64cd8ddf..6815ed3b 100644 --- a/src/pai_rag/modules/retriever/retriever.py +++ b/src/pai_rag/modules/retriever/retriever.py @@ -2,21 +2,25 @@ import logging from typing import Dict, List, Any -from llama_index.core.indices.list.base import SummaryIndex # from llama_index.core.retrievers import QueryFusionRetriever from llama_index.core.tools import RetrieverTool from llama_index.core.selectors import LLMSingleSelector from llama_index.core.retrievers import RouterRetriever from llama_index.core.vector_stores.types import VectorStoreQueryMode +from llama_index.core.indices.list.base import SummaryIndex + from pai_rag.integrations.index.multi_modal_index import MyMultiModalVectorStoreIndex from pai_rag.integrations.retrievers.bm25 import BM25Retriever from pai_rag.modules.base.configurable_module import ConfigurableModule from pai_rag.modules.base.module_constants import MODULE_PARAM_CONFIG -from pai_rag.utils.prompt_template import QUERY_GEN_PROMPT +from pai_rag.utils.prompt_template import ( + QUERY_GEN_PROMPT, +) from pai_rag.modules.retriever.my_vector_index_retriever import MyVectorIndexRetriever from pai_rag.integrations.retrievers.fusion_retriever import MyQueryFusionRetriever + logger = logging.getLogger(__name__) diff --git a/src/pai_rag/modules/synthesizer/synthesizer.py b/src/pai_rag/modules/synthesizer/synthesizer.py index a6260d7a..73984633 100644 --- a/src/pai_rag/modules/synthesizer/synthesizer.py +++ b/src/pai_rag/modules/synthesizer/synthesizer.py @@ -35,9 +35,12 @@ from llama_index.core.types import BasePydanticProgram from pai_rag.modules.base.configurable_module import ConfigurableModule from pai_rag.modules.base.module_constants import MODULE_PARAM_CONFIG -from pai_rag.utils.prompt_template import DEFAULT_TEXT_QA_PROMPT_TMPL +from pai_rag.utils.prompt_template import ( + DEFAULT_TEXT_QA_PROMPT_TMPL, +) from pai_rag.integrations.synthesizer.my_simple_synthesizer import MySimpleSummarize + logger = logging.getLogger(__name__) diff --git a/src/pai_rag/utils/prompt_template.py b/src/pai_rag/utils/prompt_template.py index 98283521..6d1e070f 100644 --- a/src/pai_rag/utils/prompt_template.py +++ b/src/pai_rag/utils/prompt_template.py @@ -131,3 +131,57 @@ "问题: {query_str}\n请返回文字和展示图片,不需要标明图片顺序" "答案: " ) + + +DEFAULT_TEXT_TO_SQL_TMPL = PromptTemplate( + "Given an input question, first create a syntactically correct {dialect} " + "query to run, then look at the results of the query and return the answer. " + "You can order the results by a relevant column to return the most " + "interesting examples in the database.\n\n" + "Never query for all the columns from a specific table, only ask for a " + "few relevant columns given the question.\n\n" + "Pay attention to use only the column names that you can see in the schema " + "description. " + "Be careful to not query for columns that do not exist. " + "Pay attention to which column is in which table. " + "Also, qualify column names with the table name when needed. " + "You are required to use the following format, each taking one line:\n\n" + "Question: Question here\n" + "SQLQuery: SQL Query to run\n" + "SQLResult: Result of the SQLQuery\n" + "Answer: Final answer here\n\n" + "Only use tables listed below.\n" + "{schema}\n\n" + "Question: {query_str}\n" + "SQLQuery: " +) + + +DEFAULT_INSTRUCTION_STR = ( + "1. Convert the query to executable Python code using Pandas.\n" + "2. The final line of code should be a Python expression that can be called with the `eval()` function.\n" + "3. The code should represent a solution to the query.\n" + "4. PRINT ONLY THE EXPRESSION.\n" + "5. Do not quote the expression.\n" +) + + +DEFAULT_PANDAS_PROMPT = PromptTemplate( + "You are working with a pandas dataframe in Python.\n" + "The name of the dataframe is `df`.\n" + "This is the result of `print(df.head())`:\n" + "{df_str}\n\n" + "Follow these instructions:\n" + "{instruction_str}\n" + "Query: {query_str}\n\n" + "Expression:" +) + + +DEFAULT_RESPONSE_SYNTHESIS_PROMPT = PromptTemplate( + "Given an input question, synthesize a response in Chinese from the query results.\n" + "Query: {query_str}\n\n" + "SQL or Python Code Instructions (optional):\n{query_code_instruction}\n\n" + "Code Query Output: {query_output}\n\n" + "Response: " +) diff --git a/tests/data_readers/test_csv_reader.py b/tests/data_readers/test_csv_reader.py index f3686834..b3a6dd58 100644 --- a/tests/data_readers/test_csv_reader.py +++ b/tests/data_readers/test_csv_reader.py @@ -25,7 +25,7 @@ def test_csv_reader(): documents = directory_reader.load_data() for doc in documents: print(doc) - assert len(documents) == 7 + assert len(documents) == 897 def test_pandas_csv_reader(): @@ -45,4 +45,4 @@ def test_pandas_csv_reader(): documents = directory_reader.load_data() for doc in documents: print(doc) - assert len(documents) == 7 + assert len(documents) == 897 diff --git a/tests/integrations/test_nl2pandas_retriever.py b/tests/integrations/test_nl2pandas_retriever.py new file mode 100644 index 00000000..7d5055ef --- /dev/null +++ b/tests/integrations/test_nl2pandas_retriever.py @@ -0,0 +1,83 @@ +import os +import pytest +import pandas as pd + +from llama_index.llms.dashscope import DashScope +from llama_index.embeddings.dashscope import DashScopeEmbedding +from llama_index.core import Settings +from llama_index.core.schema import NodeWithScore, TextNode + +from pai_rag.integrations.data_analysis.nl2pandas_retriever import PandasQueryRetriever +from pai_rag.integrations.data_analysis.data_analysis_synthesizer import ( + DataAnalysisSynthesizer, +) + + +llm = DashScope(model_name="qwen-max", temperature=0.1) +embed_model = DashScopeEmbedding(embed_batch_size=10) +Settings.llm = llm +Settings.embed_model = embed_model + + +@pytest.mark.skipif( + os.getenv("DASHSCOPE_API_KEY") is None, reason="no llm api key provided" +) +def test_pandas_query_retriever(): + file_path = "./tests/testdata/data/csv_data/titanic_train.csv" + df = pd.read_csv(file_path) + data_analysis_retriever = PandasQueryRetriever(df) + query = "What is the correlation between survival and age?" + + retrieved_res = data_analysis_retriever.retrieve(query) + + assert ( + retrieved_res[0].metadata["query_code_instruction"] + == "df['survived'].corr(df['age'])" + ) + + assert eval(retrieved_res[0].metadata["query_output"]) < 0 + + +@pytest.mark.skipif( + os.getenv("DASHSCOPE_API_KEY") is None, reason="no llm api key provided" +) +def test_data_analysis_synthesizer(): + query = "What is the correlation between survival and age?" + retrieved_nodes = [ + NodeWithScore( + node=TextNode( + id_="77c9cf14-260f-4d00-9575-aced468a70b6", + embedding=None, + metadata={ + "query_code_instruction": "df['survived'].corr(df['age'])", + "query_output": "-0.07722109457217755", + }, + excluded_embed_metadata_keys=["query_code_instruction", "query_output"], + excluded_llm_metadata_keys=["query_code_instruction", "query_output"], + relationships={}, + text="-0.07722109457217755", + mimetype="text/plain", + start_char_idx=None, + end_char_idx=None, + text_template="{metadata_str}\n\n{content}", + metadata_template="{key}: {value}", + metadata_seperator="\n", + ), + score=1.0, + ) + ] + data_analysis_synthesizer = DataAnalysisSynthesizer() + + res_get_response = data_analysis_synthesizer.get_response( + query_str=query, retrieved_nodes=retrieved_nodes + ) + + assert len(res_get_response) > 0 + + res_synthesize = data_analysis_synthesizer.synthesize( + query=query, nodes=retrieved_nodes + ) + + assert len(res_synthesize.response) > 0 + + assert res_synthesize.source_nodes == retrieved_nodes diff --git a/tests/integrations/test_nl2sql_retriever.py b/tests/integrations/test_nl2sql_retriever.py new file mode 100644 index 00000000..2fa0aa1f --- /dev/null +++ b/tests/integrations/test_nl2sql_retriever.py @@ -0,0 +1,110 @@ +import os +import pytest +from dotenv import load_dotenv +from sqlalchemy import create_engine, inspect +from sqlalchemy.engine import URL + +from llama_index.llms.dashscope import DashScope +from llama_index.embeddings.dashscope import DashScopeEmbedding +from llama_index.core import Settings +from llama_index.core import SQLDatabase + +from pai_rag.utils.prompt_template import DEFAULT_TEXT_TO_SQL_TMPL +from pai_rag.integrations.data_analysis.nl2sql_retriever import ( + MyNLSQLRetriever, + MySQLRetriever, +) + +load_dotenv() + +llm = DashScope(model_name="qwen-max", temperature=0.1) +embed_model = DashScopeEmbedding(embed_batch_size=10) +Settings.llm = llm +Settings.embed_model = embed_model + + +@pytest.fixture() +def db_connection(): + if os.path.exists("./env"): + dialect = os.getenv("dialect") + user = os.getenv("user") + password = os.getenv("password") + host = os.getenv("host") + port = os.getenv("port") + path = os.getenv("path") + dbname = os.getenv("dbname") + desired_tables = os.getenv("tables") + table_descriptions = os.getenv("descriptions") + else: + dialect = "sqlite" + path = "./tests/testdata/data/db_data" + dbname = "pets.db" + desired_tables = [] + table_descriptions = {} + + if dialect == "sqlite": + db_path = os.path.join(path, dbname) + database_uri = f"{dialect}:///{db_path}" + elif dialect == "mysql": + dd_prefix = f"{dialect}+pymysql" + database_uri = URL.create( + dd_prefix, + username=user, + password=password, + host=host, + port=port, + database=dbname, + ) + else: + raise ValueError(f"not supported SQL dialect: {dialect}") + + # use sqlalchemy engine for db connection + engine = create_engine(database_uri, echo=False) + inspector = inspect(engine) + db_tables = inspector.get_table_names() + if len(db_tables) == 0: + raise ValueError("No database tables") + + if len(desired_tables) > 0: + tables = desired_tables + else: + tables = db_tables + + # create an sqldatabase instance including desired table info + sql_database = SQLDatabase(engine, include_tables=tables) + + if len(table_descriptions) > 0: + table_descriptions = table_descriptions + else: + table_descriptions = {} + + return sql_database, tables, table_descriptions + + +@pytest.mark.skipif( + os.getenv("DASHSCOPE_API_KEY") is None, reason="no llm api key provided" +) +def test_sql_retriever(db_connection): + sql_database, db_tables, table_descriptions = db_connection + sql_retriever = MySQLRetriever(sql_database=sql_database) + sql_query = "SELECT * FROM student" + + res = sql_retriever.retrieve(sql_query) + + assert res[0].metadata["query_code_instruction"] == sql_query + " limit 100" + + +@pytest.mark.skipif( + os.getenv("DASHSCOPE_API_KEY") is None, reason="no llm api key provided" +) +def test_nl2sql_retriever(db_connection): + sql_database, db_tables, table_descriptions = db_connection + nl2sql_retriever = MyNLSQLRetriever( + sql_database=sql_database, + text_to_sql_prompt=DEFAULT_TEXT_TO_SQL_TMPL, + tables=db_tables, + ) + + res = nl2sql_retriever.retrieve("找出体重大于10的宠物的数量") + + assert res[0].score == 1 diff --git a/tests/testdata/data/csv_data/titanic_train.csv b/tests/testdata/data/csv_data/titanic_train.csv new file mode 100644 index 00000000..16c5876c --- /dev/null +++ b/tests/testdata/data/csv_data/titanic_train.csv @@ -0,0 +1,892 @@ +survived,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked +0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S +1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C +1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S +1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S +0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S +0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q +0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,E46,S +0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,,S +1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S +1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,,C +1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,G6,S +1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,C103,S +0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.05,,S +0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.275,,S +0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14,0,0,350406,7.8542,,S +1,2,"Hewlett, Mrs. (Mary D Kingcome) ",female,55,0,0,248706,16,,S +0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.125,,Q +1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13,,S +0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31,1,0,345763,18,,S +1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C +0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26,,S +1,2,"Beesley, Mr. Lawrence",male,34,0,0,248698,13,D56,S +1,3,"McGowan, Miss. Anna ""Annie""",female,15,0,0,330923,8.0292,,Q +1,1,"Sloper, Mr. William Thompson",male,28,0,0,113788,35.5,A6,S +0,3,"Palsson, Miss. Torborg Danira",female,8,3,1,349909,21.075,,S +1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38,1,5,347077,31.3875,,S +0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,,C +0,1,"Fortune, Mr. Charles Alexander",male,19,3,2,19950,263,C23 C25 C27,S +1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q +0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,,S +0,1,"Uruchurtu, Don. Manuel E",male,40,0,0,PC 17601,27.7208,,C +1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C +1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q +0,2,"Wheadon, Mr. Edward H",male,66,0,0,C.A. 24579,10.5,,S +0,1,"Meyer, Mr. Edgar Joseph",male,28,1,0,PC 17604,82.1708,,C +0,1,"Holverson, Mr. Alexander Oskar",male,42,1,0,113789,52,,S +1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,,C +0,3,"Cann, Mr. Ernest Charles",male,21,0,0,A./5. 2152,8.05,,S +0,3,"Vander Planke, Miss. Augusta Maria",female,18,2,0,345764,18,,S +1,3,"Nicola-Yarred, Miss. Jamila",female,14,1,0,2651,11.2417,,C +0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40,1,0,7546,9.475,,S +0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27,1,0,11668,21,,S +0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C +1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3,1,2,SC/Paris 2123,41.5792,,C +1,3,"Devaney, Miss. Margaret Delia",female,19,0,0,330958,7.8792,,Q +0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,,S +0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,,Q +1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q +0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,,C +0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18,1,0,349237,17.8,,S +0,3,"Panula, Master. Juha Niilo",male,7,4,1,3101295,39.6875,,S +0,3,"Nosworthy, Mr. Richard Cater",male,21,0,0,A/4. 39886,7.8,,S +1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49,1,0,PC 17572,76.7292,D33,C +1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29,1,0,2926,26,,S +0,1,"Ostby, Mr. Engelhart Cornelius",male,65,0,1,113509,61.9792,B30,C +1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S +1,2,"Rugg, Miss. Emily",female,21,0,0,C.A. 31026,10.5,,S +0,3,"Novel, Mr. Mansouer",male,28.5,0,0,2697,7.2292,,C +1,2,"West, Miss. Constance Mirium",female,5,1,2,C.A. 34651,27.75,,S +0,3,"Goodwin, Master. William Frederick",male,11,5,2,CA 2144,46.9,,S +0,3,"Sirayanian, Mr. Orsen",male,22,0,0,2669,7.2292,,C +1,1,"Icard, Miss. Amelie",female,38,0,0,113572,80,B28, +0,1,"Harris, Mr. Henry Birkhardt",male,45,1,0,36973,83.475,C83,S +0,3,"Skoog, Master. Harald",male,4,3,2,347088,27.9,,S +0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C +1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,,C +1,2,"Nye, Mrs. (Elizabeth Ramell)",female,29,0,0,C.A. 29395,10.5,F33,S +0,3,"Crease, Mr. Ernest James",male,19,0,0,S.P. 3464,8.1583,,S +1,3,"Andersson, Miss. Erna Alexandra",female,17,4,2,3101281,7.925,,S +0,3,"Kink, Mr. Vincenz",male,26,2,0,315151,8.6625,,S +0,2,"Jenkin, Mr. Stephen Curnow",male,32,0,0,C.A. 33111,10.5,,S +0,3,"Goodwin, Miss. Lillian Amy",female,16,5,2,CA 2144,46.9,,S +0,2,"Hood, Mr. Ambrose Jr",male,21,0,0,S.O.C. 14879,73.5,,S +0,3,"Chronopoulos, Mr. Apostolos",male,26,1,0,2680,14.4542,,C +1,3,"Bing, Mr. Lee",male,32,0,0,1601,56.4958,,S +0,3,"Moen, Mr. Sigurd Hansen",male,25,0,0,348123,7.65,F G73,S +0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,,S +0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,,S +1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29,,S +1,3,"Dowdell, Miss. Elizabeth",female,30,0,0,364516,12.475,,S +0,3,"Waelens, Mr. Achille",male,22,0,0,345767,9,,S +1,3,"Sheerlinck, Mr. Jan Baptist",male,29,0,0,345779,9.5,,S +1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,,Q +0,1,"Carrau, Mr. Francisco M",male,28,0,0,113059,47.1,,S +1,2,"Ilett, Miss. Bertha",female,17,0,0,SO/C 14885,10.5,,S +1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33,3,0,3101278,15.85,,S +0,3,"Ford, Mr. William Neal",male,16,1,3,W./C. 6608,34.375,,S +0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,,S +1,1,"Fortune, Miss. Mabel Helen",female,23,3,2,19950,263,C23 C25 C27,S +0,3,"Celotti, Mr. Francesco",male,24,0,0,343275,8.05,,S +0,3,"Christmann, Mr. Emil",male,29,0,0,343276,8.05,,S +0,3,"Andreasson, Mr. Paul Edvin",male,20,0,0,347466,7.8542,,S +0,1,"Chaffee, Mr. Herbert Fuller",male,46,1,0,W.E.P. 5734,61.175,E31,S +0,3,"Dean, Mr. Bertram Frank",male,26,1,2,C.A. 2315,20.575,,S +0,3,"Coxon, Mr. Daniel",male,59,0,0,364500,7.25,,S +0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S +0,1,"Goldschmidt, Mr. George B",male,71,0,0,PC 17754,34.6542,A5,C +1,1,"Greenfield, Mr. William Bertram",male,23,0,1,PC 17759,63.3583,D10 D12,C +1,2,"Doling, Mrs. John T (Ada Julia Bone)",female,34,0,1,231919,23,,S +0,2,"Kantor, Mr. Sinai",male,34,1,0,244367,26,,S +0,3,"Petranec, Miss. Matilda",female,28,0,0,349245,7.8958,,S +0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S +0,1,"White, Mr. Richard Frasar",male,21,0,1,35281,77.2875,D26,S +0,3,"Johansson, Mr. Gustaf Joel",male,33,0,0,7540,8.6542,,S +0,3,"Gustafsson, Mr. Anders Vilhelm",male,37,2,0,3101276,7.925,,S +0,3,"Mionoff, Mr. Stoytcho",male,28,0,0,349207,7.8958,,S +1,3,"Salkjelsvik, Miss. Anna Kristine",female,21,0,0,343120,7.65,,S +1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,,S +0,3,"Rekic, Mr. Tido",male,38,0,0,349249,7.8958,,S +1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,,Q +0,1,"Porter, Mr. Walter Chamberlain",male,47,0,0,110465,52,C110,S +0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C +0,3,"Barton, Mr. David John",male,22,0,0,324669,8.05,,S +0,3,"Jussila, Miss. Katriina",female,20,1,0,4136,9.825,,S +0,3,"Attalah, Miss. Malake",female,17,0,0,2627,14.4583,,C +0,3,"Pekoniemi, Mr. Edvard",male,21,0,0,STON/O 2. 3101294,7.925,,S +0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q +0,2,"Turpin, Mr. William John Robert",male,29,1,0,11668,21,,S +0,1,"Baxter, Mr. Quigg Edmond",male,24,0,1,PC 17558,247.5208,B58 B60,C +0,3,"Andersson, Miss. Ellis Anna Maria",female,2,4,2,347082,31.275,,S +0,2,"Hickman, Mr. Stanley George",male,21,2,0,S.O.C. 14879,73.5,,S +0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,,S +0,2,"Nasser, Mr. Nicholas",male,32.5,1,0,237736,30.0708,,C +1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13,E101,S +0,1,"White, Mr. Percival Wayland",male,54,0,1,35281,77.2875,D26,S +1,3,"Nicola-Yarred, Master. Elias",male,12,1,0,2651,11.2417,,C +0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,,Q +1,3,"Madsen, Mr. Fridtjof Arne",male,24,0,0,C 17369,7.1417,,S +1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,F E69,C +0,3,"Ekstrom, Mr. Johan",male,45,0,0,347061,6.975,,S +0,3,"Drazenoic, Mr. Jozef",male,33,0,0,349241,7.8958,,C +0,3,"Coelho, Mr. Domingos Fernandeo",male,20,0,0,SOTON/O.Q. 3101307,7.05,,S +0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47,1,0,A/5. 3337,14.5,,S +1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29,1,0,228414,26,,S +0,2,"Sobey, Mr. Samuel James Hayden",male,25,0,0,C.A. 29178,13,,S +0,2,"Richard, Mr. Emile",male,23,0,0,SC/PARIS 2133,15.0458,,C +1,1,"Newsom, Miss. Helen Monypeny",female,19,0,2,11752,26.2833,D47,S +0,1,"Futrelle, Mr. Jacques Heath",male,37,1,0,113803,53.1,C123,S +0,3,"Osen, Mr. Olaf Elon",male,16,0,0,7534,9.2167,,S +0,1,"Giglio, Mr. Victor",male,24,0,0,PC 17593,79.2,B86,C +0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C +1,3,"Nysten, Miss. Anna Sofia",female,22,0,0,347081,7.75,,S +1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24,1,0,STON/O2. 3101279,15.85,,S +0,3,"Burke, Mr. Jeremiah",male,19,0,0,365222,6.75,,Q +0,2,"Andrew, Mr. Edgardo Samuel",male,18,0,0,231945,11.5,,S +0,2,"Nicholls, Mr. Joseph Charles",male,19,1,1,C.A. 33112,36.75,,S +1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27,0,0,350043,7.7958,,S +0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9,2,2,W./C. 6608,34.375,,S +0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26,F2,S +0,2,"Byles, Rev. Thomas Roussel Davids",male,42,0,0,244310,13,,S +0,2,"Bateman, Rev. Robert James",male,51,0,0,S.O.P. 1166,12.525,,S +1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22,1,0,113776,66.6,C2,S +0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.05,,S +0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,,S +0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S +0,1,"Williams, Mr. Charles Duane",male,51,0,1,PC 17597,61.3792,,C +1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16,0,0,35851,7.7333,,Q +0,3,"Corn, Mr. Harry",male,30,0,0,SOTON/OQ 392090,8.05,,S +0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,,S +0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S +0,3,"Cribb, Mr. John Hatfield",male,44,0,1,371362,16.1,,S +1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40,0,0,C.A. 33595,15.75,,S +0,3,"Bengtsson, Mr. John Viktor",male,26,0,0,347068,7.775,,S +0,3,"Calic, Mr. Jovo",male,17,0,0,315093,8.6625,,S +0,3,"Panula, Master. Eino Viljami",male,1,4,1,3101295,39.6875,,S +1,3,"Goldsmith, Master. Frank John William ""Frankie""",male,9,0,2,363291,20.525,,S +1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55,E33,S +0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45,1,4,347088,27.9,,S +0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,,S +0,3,"Ling, Mr. Lee",male,28,0,0,1601,56.4958,,S +0,1,"Van der hoef, Mr. Wyckoff",male,61,0,0,111240,33.5,B19,S +0,3,"Rice, Master. Arthur",male,4,4,1,382652,29.125,,Q +1,3,"Johnson, Miss. Eleanor Ileen",female,1,1,1,347742,11.1333,,S +0,3,"Sivola, Mr. Antti Wilhelm",male,21,0,0,STON/O 2. 3101280,7.925,,S +0,1,"Smith, Mr. James Clinch",male,56,0,0,17764,30.6958,A7,C +0,3,"Klasen, Mr. Klas Albin",male,18,1,1,350404,7.8542,,S +0,3,"Lefebre, Master. Henry Forbes",male,,3,1,4133,25.4667,,S +0,1,"Isham, Miss. Ann Elizabeth",female,50,0,0,PC 17595,28.7125,C49,C +0,2,"Hale, Mr. Reginald",male,30,0,0,250653,13,,S +0,3,"Leonard, Mr. Lionel",male,36,0,0,LINE,0,,S +0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S +0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,,C +0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9,4,2,347077,31.3875,,S +1,2,"Becker, Master. Richard F",male,1,2,1,230136,39,F4,S +1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4,0,2,315153,22.025,,S +0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50,A32,S +1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",female,,1,0,370365,15.5,,Q +1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45,0,0,111428,26.55,,S +0,3,"Bourke, Mr. John",male,40,1,1,364849,15.5,,Q +0,3,"Turcin, Mr. Stjepan",male,36,0,0,349247,7.8958,,S +1,2,"Pinsky, Mrs. (Rosa)",female,32,0,0,234604,13,,S +0,2,"Carbines, Mr. William",male,19,0,0,28424,13,,S +1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19,1,0,350046,7.8542,,S +1,2,"Navratil, Master. Michel M",male,3,1,1,230080,26,F2,S +1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44,0,0,PC 17610,27.7208,B4,C +1,1,"Lurette, Miss. Elise",female,58,0,0,PC 17569,146.5208,B80,C +0,3,"Mernagh, Mr. Robert",male,,0,0,368703,7.75,,Q +0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42,0,1,4579,8.4042,,S +1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,,Q +0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24,0,0,248747,13,,S +0,3,"Vande Walle, Mr. Nestor Cyriel",male,28,0,0,345770,9.5,,S +0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S +0,3,"Johanson, Mr. Jakob Alfred",male,34,0,0,3101264,6.4958,,S +0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C +1,3,"Cohen, Mr. Gurshon ""Gus""",male,18,0,0,A/5 3540,8.05,,S +0,3,"Strom, Miss. Telma Matilda",female,2,0,1,347054,10.4625,G6,S +0,3,"Backstrom, Mr. Karl Alfred",male,32,1,0,3101278,15.85,,S +1,3,"Albimona, Mr. Nassef Cassem",male,26,0,0,2699,18.7875,,C +1,3,"Carr, Miss. Helen ""Ellen""",female,16,0,0,367231,7.75,,Q +1,1,"Blank, Mr. Henry",male,40,0,0,112277,31,A31,C +0,3,"Ali, Mr. Ahmed",male,24,0,0,SOTON/O.Q. 3101311,7.05,,S +1,2,"Cameron, Miss. Clear Annie",female,35,0,0,F.C.C. 13528,21,,S +0,3,"Perkin, Mr. John Henry",male,22,0,0,A/5 21174,7.25,,S +0,2,"Givard, Mr. Hans Kristensen",male,30,0,0,250646,13,,S +0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,,Q +1,1,"Newell, Miss. Madeleine",female,31,1,0,35273,113.275,D36,C +1,3,"Honkanen, Miss. Eliina",female,27,0,0,STON/O2. 3101283,7.925,,S +0,2,"Jacobsohn, Mr. Sidney Samuel",male,42,1,0,243847,27,,S +1,1,"Bazzani, Miss. Albina",female,32,0,0,11813,76.2917,D15,C +0,2,"Harris, Mr. Walter",male,30,0,0,W/C 14208,10.5,,S +1,3,"Sunderland, Mr. Victor Francis",male,16,0,0,SOTON/OQ 392089,8.05,,S +0,2,"Bracken, Mr. James H",male,27,0,0,220367,13,,S +0,3,"Green, Mr. George Henry",male,51,0,0,21440,8.05,,S +0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,,S +1,1,"Hoyt, Mr. Frederick Maxfield",male,38,1,0,19943,90,C93,S +0,3,"Berglund, Mr. Karl Ivar Sven",male,22,0,0,PP 4348,9.35,,S +1,2,"Mellors, Mr. William John",male,19,0,0,SW/PP 751,10.5,,S +0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,,S +0,2,"Fahlstrom, Mr. Arne Jonas",male,18,0,0,236171,13,,S +0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,,S +1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35,1,0,36973,83.475,C83,S +0,3,"Larsson, Mr. Bengt Edvin",male,29,0,0,347067,7.775,,S +0,2,"Sjostedt, Mr. Ernst Adolf",male,59,0,0,237442,13.5,,S +1,3,"Asplund, Miss. Lillian Gertrud",female,5,4,2,347077,31.3875,,S +0,2,"Leyson, Mr. Robert William Norman",male,24,0,0,C.A. 29566,10.5,,S +0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,,S +0,2,"Hold, Mr. Stephen",male,44,1,0,26707,26,,S +1,2,"Collyer, Miss. Marjorie ""Lottie""",female,8,0,2,C.A. 31921,26.25,,S +0,2,"Pengelly, Mr. Frederick William",male,19,0,0,28665,10.5,,S +0,2,"Hunt, Mr. George Henry",male,33,0,0,SCO/W 1585,12.275,,S +0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C +1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q +0,2,"Coleridge, Mr. Reginald Charles",male,29,0,0,W./C. 14263,10.5,,S +0,3,"Maenpaa, Mr. Matti Alexanteri",male,22,0,0,STON/O 2. 3101275,7.125,,S +0,3,"Attalah, Mr. Sleiman",male,30,0,0,2694,7.225,,C +0,1,"Minahan, Dr. William Edward",male,44,2,0,19928,90,C78,Q +0,3,"Lindahl, Miss. Agda Thorilda Viktoria",female,25,0,0,347071,7.775,,S +1,2,"Hamalainen, Mrs. William (Anna)",female,24,0,2,250649,14.5,,S +1,1,"Beckwith, Mr. Richard Leonard",male,37,1,1,11751,52.5542,D35,S +0,2,"Carter, Rev. Ernest Courtenay",male,54,1,0,244252,26,,S +0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S +0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29,1,1,347054,10.4625,G6,S +0,1,"Stead, Mr. William Thomas",male,62,0,0,113514,26.55,C87,S +0,3,"Lobb, Mr. William Arthur",male,30,1,0,A/5. 3336,16.1,,S +0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41,0,2,370129,20.2125,,S +1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29,0,2,2650,15.2458,,C +1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C +1,1,"Cherry, Miss. Gladys",female,30,0,0,110152,86.5,B77,S +1,1,"Ward, Miss. Anna",female,35,0,0,PC 17755,512.3292,,C +1,2,"Parrish, Mrs. (Lutie Davis)",female,50,0,1,230433,26,,S +0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,,Q +1,3,"Asplund, Master. Edvin Rojj Felix",male,3,4,2,347077,31.3875,,S +0,1,"Taussig, Mr. Emil",male,52,1,1,110413,79.65,E67,S +0,1,"Harrison, Mr. William",male,40,0,0,112059,0,B94,S +0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,,Q +0,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5,,S +0,3,"Panula, Mr. Ernesti Arvid",male,16,4,1,3101295,39.6875,,S +1,3,"Persson, Mr. Ernst Ulrik",male,25,1,0,347083,7.775,,S +1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58,0,1,PC 17582,153.4625,C125,S +1,1,"Bissette, Miss. Amelia",female,35,0,0,PC 17760,135.6333,C99,S +0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31,,S +1,3,"Tornquist, Mr. William Henry",male,25,0,0,LINE,0,,S +1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41,0,1,250644,19.5,,S +0,1,"Natsch, Mr. Charles H",male,37,0,1,PC 17596,29.7,C118,C +1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,,Q +1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,D7,S +0,3,"Lindblom, Miss. Augusta Charlotta",female,45,0,0,347073,7.75,,S +0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0,,S +0,3,"Rice, Master. Eric",male,7,4,1,382652,29.125,,Q +1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35,1,1,C.A. 2673,20.25,,S +0,3,"Duane, Mr. Frank",male,65,0,0,336439,7.75,,Q +0,3,"Olsson, Mr. Nils Johan Goransson",male,28,0,0,347464,7.8542,,S +0,3,"de Pelsmaeker, Mr. Alfons",male,16,0,0,345778,9.5,,S +1,3,"Dorking, Mr. Edward Arthur",male,19,0,0,A/5. 10482,8.05,,S +0,1,"Smith, Mr. Richard William",male,,0,0,113056,26,A19,S +0,3,"Stankovic, Mr. Ivan",male,33,0,0,349239,8.6625,,C +1,3,"de Mulder, Mr. Theodore",male,30,0,0,345774,9.5,,S +0,3,"Naidenoff, Mr. Penko",male,22,0,0,349206,7.8958,,S +1,2,"Hosono, Mr. Masabumi",male,42,0,0,237798,13,,S +1,3,"Connolly, Miss. Kate",female,22,0,0,370373,7.75,,Q +1,1,"Barber, Miss. Ellen ""Nellie""",female,26,0,0,19877,78.85,,S +1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19,1,0,11967,91.0792,B49,C +0,2,"Levy, Mr. Rene Jacques",male,36,0,0,SC/Paris 2163,12.875,D,C +0,3,"Haas, Miss. Aloisia",female,24,0,0,349236,8.85,,S +0,3,"Mineff, Mr. Ivan",male,24,0,0,349233,7.8958,,S +0,1,"Lewy, Mr. Ervin G",male,,0,0,PC 17612,27.7208,,C +0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,,C +0,1,"Allison, Miss. Helen Loraine",female,2,1,2,113781,151.55,C22 C26,S +1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S +1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50,0,1,PC 17558,247.5208,B58 B60,C +1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",female,,0,0,9234,7.75,,Q +1,3,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,,Q +0,3,"Johnson, Mr. William Cahoone Jr",male,19,0,0,LINE,0,,S +1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,E101,Q +0,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.05,,S +1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S +1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,,C +1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17,1,0,PC 17758,108.9,C65,C +0,2,"Abelson, Mr. Samuel",male,30,1,0,P/PP 3381,24,,C +1,1,"Francatelli, Miss. Laura Mabel",female,30,0,0,PC 17485,56.9292,E36,C +1,1,"Hays, Miss. Margaret Bechstein",female,24,0,0,11767,83.1583,C54,C +1,1,"Ryerson, Miss. Emily Borie",female,18,2,2,PC 17608,262.375,B57 B59 B63 B66,C +0,2,"Lahtinen, Mrs. William (Anna Sylfven)",female,26,1,1,250651,26,,S +0,3,"Hendekovic, Mr. Ignjac",male,28,0,0,349243,7.8958,,S +0,2,"Hart, Mr. Benjamin",male,43,1,1,F.C.C. 13529,26.25,,S +1,3,"Nilsson, Miss. Helmina Josefina",female,26,0,0,347470,7.8542,,S +1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24,1,0,244367,26,,S +0,2,"Moraweck, Dr. Ernest",male,54,0,0,29011,14,,S +1,1,"Wick, Miss. Mary Natalie",female,31,0,2,36928,164.8667,C7,S +1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40,1,1,16966,134.5,E34,C +0,3,"Dennis, Mr. Samuel",male,22,0,0,A/5 21172,7.25,,S +0,3,"Danoff, Mr. Yoto",male,27,0,0,349219,7.8958,,S +1,2,"Slayter, Miss. Hilda Mary",female,30,0,0,234818,12.35,,Q +1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22,1,1,248738,29,,S +0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S +1,1,"Young, Miss. Marie Grice",female,36,0,0,PC 17760,135.6333,C32,C +0,3,"Nysveen, Mr. Johan Hansen",male,61,0,0,345364,6.2375,,S +1,2,"Ball, Mrs. (Ada E Hall)",female,36,0,0,28551,13,D,S +1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31,1,1,363291,20.525,,S +1,1,"Hippach, Miss. Jean Gertrude",female,16,0,1,111361,57.9792,B18,C +1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q +0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,C124,S +0,1,"Graham, Mr. George Edward",male,38,0,1,PC 17582,153.4625,C91,S +0,3,"Vander Planke, Mr. Leo Edmondus",male,16,2,0,345764,18,,S +1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",female,,1,0,PC 17611,133.65,,S +0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S +0,1,"Pears, Mr. Thomas Clinton",male,29,1,0,113776,66.6,C2,S +1,1,"Burns, Miss. Elizabeth Margaret",female,41,0,0,16966,134.5,E40,C +1,3,"Dahl, Mr. Karl Edwart",male,45,0,0,7598,8.05,,S +0,1,"Blackwell, Mr. Stephen Weart",male,45,0,0,113784,35.5,T,S +1,2,"Navratil, Master. Edmond Roger",male,2,1,1,230080,26,F2,S +1,1,"Fortune, Miss. Alice Elizabeth",female,24,3,2,19950,263,C23 C25 C27,S +0,2,"Collander, Mr. Erik Gustaf",male,28,0,0,248740,13,,S +0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25,0,0,244361,13,,S +0,2,"Fox, Mr. Stanley Hubert",male,36,0,0,229236,13,,S +1,2,"Brown, Miss. Amelia ""Mildred""",female,24,0,0,248733,13,F33,S +1,2,"Smith, Miss. Marion Elsie",female,40,0,0,31418,13,,S +1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,,S +1,3,"Coutts, Master. William Loch ""William""",male,3,1,1,C.A. 37671,15.9,,S +0,3,"Dimic, Mr. Jovan",male,42,0,0,315088,8.6625,,S +0,3,"Odahl, Mr. Nils Martin",male,23,0,0,7267,9.225,,S +0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35,C128,S +0,3,"Elias, Mr. Tannous",male,15,1,1,2695,7.2292,,C +0,3,"Arnold-Franchi, Mr. Josef",male,25,1,0,349237,17.8,,S +0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C +0,3,"Vanden Steen, Mr. Leo Peter",male,28,0,0,345783,9.5,,S +1,1,"Bowerman, Miss. Elsie Edith",female,22,0,1,113505,55,E33,S +0,2,"Funk, Miss. Annie Clemmer",female,38,0,0,237671,13,,S +1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,,Q +1,3,"Mockler, Miss. Helen Mary ""Ellie""",female,,0,0,330980,7.8792,,Q +0,3,"Skoog, Mr. Wilhelm",male,40,1,4,347088,27.9,,S +0,2,"del Carlo, Mr. Sebastiano",male,29,1,0,SC/PARIS 2167,27.7208,,C +0,3,"Barbara, Mrs. (Catherine David)",female,45,0,1,2691,14.4542,,C +0,3,"Asim, Mr. Adola",male,35,0,0,SOTON/O.Q. 3101310,7.05,,S +0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,,Q +0,3,"Adahl, Mr. Mauritz Nils Martin",male,30,0,0,C 7076,7.25,,S +1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60,1,0,110813,75.25,D37,C +1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C +1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,,Q +1,1,"Aubart, Mme. Leontine Pauline",female,24,0,0,PC 17477,69.3,B35,C +1,1,"Harder, Mr. George Achilles",male,25,1,0,11765,55.4417,E50,C +0,3,"Wiklund, Mr. Jakob Alfred",male,18,1,0,3101267,6.4958,,S +0,3,"Beavan, Mr. William Thomas",male,19,0,0,323951,8.05,,S +0,1,"Ringhini, Mr. Sante",male,22,0,0,PC 17760,135.6333,,C +0,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075,,S +1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,,C +1,3,"Landergren, Miss. Aurora Adelia",female,22,0,0,C 7077,7.25,,S +0,1,"Widener, Mr. Harry Elkins",male,27,0,2,113503,211.5,C82,C +0,3,"Betros, Mr. Tannous",male,20,0,0,2648,4.0125,,C +0,3,"Gustafsson, Mr. Karl Gideon",male,19,0,0,347069,7.775,,S +1,1,"Bidois, Miss. Rosalie",female,42,0,0,PC 17757,227.525,,C +1,3,"Nakid, Miss. Maria (""Mary"")",female,1,0,2,2653,15.7417,,C +0,3,"Tikkanen, Mr. Juho",male,32,0,0,STON/O 2. 3101293,7.925,,S +1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35,1,0,113789,52,,S +0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,,S +0,2,"Davies, Mr. Charles Henry",male,18,0,0,S.O.C. 14879,73.5,,S +0,3,"Goodwin, Master. Sidney Leonard",male,1,5,2,CA 2144,46.9,,S +1,2,"Buss, Miss. Kate",female,36,0,0,27849,13,,S +0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,,Q +1,2,"Lehmann, Miss. Bertha",female,17,0,0,SC 1748,12,,C +1,1,"Carter, Mr. William Ernest",male,36,1,2,113760,120,B96 B98,S +1,3,"Jansson, Mr. Carl Olof",male,21,0,0,350034,7.7958,,S +0,3,"Gustafsson, Mr. Johan Birger",male,28,2,0,3101277,7.925,,S +1,1,"Newell, Miss. Marjorie",female,23,1,0,35273,113.275,D36,C +1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24,0,2,PP 9549,16.7,G6,S +0,3,"Johansson, Mr. Erik",male,22,0,0,350052,7.7958,,S +0,3,"Olsson, Miss. Elina",female,31,0,0,350407,7.8542,,S +0,2,"McKane, Mr. Peter David",male,46,0,0,28403,26,,S +0,2,"Pain, Dr. Alfred",male,23,0,0,244278,10.5,,S +1,2,"Trout, Mrs. William H (Jessie L)",female,28,0,0,240929,12.65,,S +1,3,"Niskanen, Mr. Juha",male,39,0,0,STON/O 2. 3101289,7.925,,S +0,3,"Adams, Mr. John",male,26,0,0,341826,8.05,,S +0,3,"Jussila, Miss. Mari Aina",female,21,1,0,4137,9.825,,S +0,3,"Hakkarainen, Mr. Pekka Pietari",male,28,1,0,STON/O2. 3101279,15.85,,S +0,3,"Oreskovic, Miss. Marija",female,20,0,0,315096,8.6625,,S +0,2,"Gale, Mr. Shadrach",male,34,1,0,28664,21,,S +0,3,"Widegren, Mr. Carl/Charles Peter",male,51,0,0,347064,7.75,,S +1,2,"Richards, Master. William Rowe",male,3,1,1,29106,18.75,,S +0,3,"Birkeland, Mr. Hans Martin Monsen",male,21,0,0,312992,7.775,,S +0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,,S +0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,,S +0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,,Q +1,1,"Minahan, Miss. Daisy E",female,33,1,0,19928,90,C78,Q +0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0,,S +1,3,"Sundman, Mr. Johan Julian",male,44,0,0,STON/O 2. 3101269,7.925,,S +0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",female,,0,0,343095,8.05,,S +1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34,1,1,28220,32.5,,S +1,2,"Silven, Miss. Lyyli Karoliina",female,18,0,2,250652,13,,S +0,2,"Matthews, Mr. William John",male,30,0,0,28228,13,,S +0,3,"Van Impe, Miss. Catharina",female,10,0,2,345773,24.15,,S +0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C +0,3,"Charters, Mr. David",male,21,0,0,A/5. 13032,7.7333,,Q +0,3,"Zimmerman, Mr. Leo",male,29,0,0,315082,7.875,,S +0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28,1,1,347080,14.4,,S +0,3,"Rosblom, Mr. Viktor Richard",male,18,1,1,370129,20.2125,,S +0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,,S +1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28,1,0,2003,26,,S +1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",female,19,0,0,250655,26,,S +0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,,Q +1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32,0,0,SOTON/O.Q. 392078,8.05,E10,S +1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28,0,0,110564,26.55,C52,S +1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,,S +1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42,1,0,SC/AH 3085,26,,S +0,3,"Kallio, Mr. Nikolai Erland",male,17,0,0,STON/O 2. 3101274,7.125,,S +0,1,"Silvey, Mr. William Baird",male,50,1,0,13507,55.9,E44,S +1,1,"Carter, Miss. Lucile Polk",female,14,1,2,113760,120,B96 B98,S +0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21,2,2,W./C. 6608,34.375,,S +1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24,2,3,29106,18.75,,S +0,1,"Fortune, Mr. Mark",male,64,1,4,19950,263,C23 C25 C27,S +0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31,0,0,C.A. 18723,10.5,,S +1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",female,45,1,1,F.C.C. 13529,26.25,,S +0,3,"Hampe, Mr. Leon",male,20,0,0,345769,9.5,,S +0,3,"Petterson, Mr. Johan Emil",male,25,1,0,347076,7.775,,S +1,2,"Reynaldo, Ms. Encarnacion",female,28,0,0,230434,13,,S +1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S +1,1,"Dodge, Master. Washington",male,4,0,2,33638,81.8583,A34,S +1,2,"Mellinger, Miss. Madeleine Violet",female,13,0,1,250644,19.5,,S +1,1,"Seward, Mr. Frederic Kimber",male,34,0,0,113794,26.55,,S +1,3,"Baclini, Miss. Marie Catherine",female,5,2,1,2666,19.2583,,C +1,1,"Peuchen, Major. Arthur Godfrey",male,52,0,0,113786,30.5,C104,S +0,2,"West, Mr. Edwy Arthur",male,36,1,2,C.A. 34651,27.75,,S +0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,,S +0,1,"Foreman, Mr. Benjamin Laventall",male,30,0,0,113051,27.75,C111,C +1,1,"Goldenberg, Mr. Samuel L",male,49,1,0,17453,89.1042,C92,C +0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S +1,3,"Jalsevac, Mr. Ivan",male,29,0,0,349240,7.8958,,C +0,1,"Millet, Mr. Francis Davis",male,65,0,0,13509,26.55,E38,S +1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,D21,S +1,2,"Toomey, Miss. Ellen",female,50,0,0,F.C.C. 13531,10.5,,S +0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,,Q +1,1,"Anderson, Mr. Harry",male,48,0,0,19952,26.55,E12,S +0,3,"Morley, Mr. William",male,34,0,0,364506,8.05,,S +0,1,"Gee, Mr. Arthur H",male,47,0,0,111320,38.5,E63,S +0,2,"Milling, Mr. Jacob Christian",male,48,0,0,234360,13,,S +0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,,S +0,3,"Goncalves, Mr. Manuel Estanslas",male,38,0,0,SOTON/O.Q. 3101306,7.05,,S +0,2,"Campbell, Mr. William",male,,0,0,239853,0,,S +0,1,"Smart, Mr. John Montgomery",male,56,0,0,113792,26.55,,S +0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q +1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C +0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,,S +0,3,"Cacic, Mr. Luka",male,38,0,0,315089,8.6625,,S +1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33,1,2,C.A. 34651,27.75,,S +1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",female,23,0,0,SC/AH Basle 541,13.7917,D,C +0,3,"Strandberg, Miss. Ida Sofia",female,22,0,0,7553,9.8375,,S +0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52,A14,S +0,2,"Renouf, Mr. Peter Henry",male,34,1,0,31027,21,,S +0,3,"Braund, Mr. Lewis Richard",male,29,1,0,3460,7.0458,,S +0,3,"Karlsson, Mr. Nils August",male,22,0,0,350060,7.5208,,S +1,3,"Hirvonen, Miss. Hildur E",female,2,0,1,3101298,12.2875,,S +0,3,"Goodwin, Master. Harold Victor",male,9,5,2,CA 2144,46.9,,S +0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0,,S +0,3,"Rouse, Mr. Richard Henry",male,50,0,0,A/5 3594,8.05,,S +1,3,"Turkula, Mrs. (Hedwig)",female,63,0,0,4134,9.5875,,S +1,1,"Bishop, Mr. Dickinson H",male,25,1,0,11967,91.0792,B49,C +0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,,S +1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35,1,0,19943,90,C93,S +0,1,"Kent, Mr. Edward Austin",male,58,0,0,11771,29.7,B37,C +0,3,"Somerton, Mr. Francis William",male,30,0,0,A.5. 18509,8.05,,S +1,3,"Coutts, Master. Eden Leslie ""Neville""",male,9,1,1,C.A. 37671,15.9,,S +0,3,"Hagland, Mr. Konrad Mathias Reiersen",male,,1,0,65304,19.9667,,S +0,3,"Windelov, Mr. Einar",male,21,0,0,SOTON/OQ 3101317,7.25,,S +0,1,"Molson, Mr. Harry Markland",male,55,0,0,113787,30.5,C30,S +0,1,"Artagaveytia, Mr. Ramon",male,71,0,0,PC 17609,49.5042,,C +0,3,"Stanley, Mr. Edward Roland",male,21,0,0,A/4 45380,8.05,,S +0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C +1,1,"Eustis, Miss. Elizabeth Mussey",female,54,1,0,36947,78.2667,D20,C +0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,,S +0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1,2,113781,151.55,C22 C26,S +0,3,"Svensson, Mr. Olof",male,24,0,0,350035,7.7958,,S +0,3,"Calic, Mr. Petar",male,17,0,0,315086,8.6625,,S +0,3,"Canavan, Miss. Mary",female,21,0,0,364846,7.75,,Q +0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,,Q +0,3,"Laitinen, Miss. Kristina Sofia",female,37,0,0,4135,9.5875,,S +1,1,"Maioni, Miss. Roberta",female,16,0,0,110152,86.5,B79,S +0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18,1,0,PC 17758,108.9,C65,C +1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33,0,2,26360,26,,S +1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.55,,S +0,3,"Olsen, Mr. Henry Margido",male,28,0,0,C 4001,22.525,,S +1,3,"Lang, Mr. Fang",male,26,0,0,1601,56.4958,,S +1,3,"Daly, Mr. Eugene Patrick",male,29,0,0,382651,7.75,,Q +0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,,S +1,1,"McGough, Mr. James Robert",male,36,0,0,PC 17473,26.2875,E25,S +1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54,1,0,PC 17603,59.4,,C +0,3,"Coleff, Mr. Satio",male,24,0,0,349209,7.4958,,S +0,1,"Walker, Mr. William Anderson",male,47,0,0,36967,34.0208,D46,S +1,2,"Lemore, Mrs. (Amelia Milley)",female,34,0,0,C.A. 34260,10.5,F33,S +0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,,Q +1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36,1,0,226875,26,,S +0,3,"Pavlovic, Mr. Stefo",male,32,0,0,349242,7.8958,,S +1,1,"Perreault, Miss. Anne",female,30,0,0,12749,93.5,B73,S +0,3,"Vovk, Mr. Janko",male,22,0,0,349252,7.8958,,S +0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,,C +1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44,0,1,111361,57.9792,B18,C +0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,,C +0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,,Q +1,2,"Ridsdale, Miss. Lucy",female,50,0,0,W./C. 14258,10.5,,S +0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S +0,3,"Salonen, Mr. Johan Werner",male,39,0,0,3101296,7.925,,S +0,2,"Hocking, Mr. Richard George",male,23,2,1,29104,11.5,,S +1,2,"Quick, Miss. Phyllis May",female,2,1,1,26360,26,,S +0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,,C +0,3,"Elias, Mr. Joseph Jr",male,17,1,1,2690,7.2292,,C +1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,,C +0,3,"Cacic, Miss. Marija",female,30,0,0,315084,8.6625,,S +1,2,"Hart, Miss. Eva Miriam",female,7,0,2,F.C.C. 13529,26.25,,S +0,1,"Butt, Major. Archibald Willingham",male,45,0,0,113050,26.55,B38,S +1,1,"LeRoy, Miss. Bertha",female,30,0,0,PC 17761,106.425,,C +0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,,S +1,1,"Frolicher, Miss. Hedwig Margaritha",female,22,0,2,13568,49.5,B39,C +1,1,"Crosby, Miss. Harriet R",female,36,0,2,WE/P 5735,71,B22,S +0,3,"Andersson, Miss. Ingeborg Constanzia",female,9,4,2,347082,31.275,,S +0,3,"Andersson, Miss. Sigrid Elisabeth",female,11,4,2,347082,31.275,,S +1,2,"Beane, Mr. Edward",male,32,1,0,2908,26,,S +0,1,"Douglas, Mr. Walter Donald",male,50,1,0,PC 17761,106.425,C86,C +0,1,"Nicholson, Mr. Arthur Ernest",male,64,0,0,693,26,,S +1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19,1,0,2908,26,,S +1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C +0,3,"Goldsmith, Mr. Frank John",male,33,1,1,363291,20.525,,S +1,2,"Davies, Master. John Morgan Jr",male,8,1,1,C.A. 33112,36.75,,S +1,1,"Thayer, Mr. John Borland Jr",male,17,0,2,17421,110.8833,C70,C +0,2,"Sharp, Mr. Percival James R",male,27,0,0,244358,26,,S +0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q +1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",male,22,0,0,2620,7.225,,C +1,3,"Ohman, Miss. Velin",female,22,0,0,347085,7.775,,S +0,1,"Wright, Mr. George",male,62,0,0,113807,26.55,,S +1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48,1,0,11755,39.6,A16,C +0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,,C +1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39,1,1,110413,79.65,E67,S +1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36,1,0,345572,17.4,,S +0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,,Q +0,3,"Sivic, Mr. Husein",male,40,0,0,349251,7.8958,,S +0,2,"Norman, Mr. Robert Douglas",male,28,0,0,218629,13.5,,S +0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,,S +0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,,S +0,3,"Davies, Mr. Alfred J",male,24,2,0,A/4 48871,24.15,,S +0,3,"Stoytcheff, Mr. Ilia",male,19,0,0,349205,7.8958,,S +0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29,0,4,349909,21.075,,S +0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,,C +1,3,"Jonsson, Mr. Carl",male,32,0,0,350417,7.8542,,S +1,2,"Harris, Mr. George",male,62,0,0,S.W./PP 752,10.5,,S +1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,C101,S +1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36,0,0,PC 17474,26.3875,E25,S +1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,,Q +0,3,"Rush, Mr. Alfred George John",male,16,0,0,A/4. 20589,8.05,,S +0,3,"Patchett, Mr. George",male,19,0,0,358585,14.5,,S +1,2,"Garside, Miss. Ethel",female,34,0,0,243880,13,,S +1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39,1,0,13507,55.9,E44,S +0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,,C +1,3,"Jussila, Mr. Eiriik",male,32,0,0,STON/O 2. 3101286,7.925,,S +1,2,"Christy, Miss. Julie Rachel",female,25,1,1,237789,30,,S +1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39,1,1,17421,110.8833,C68,C +0,2,"Downton, Mr. William James",male,54,0,0,28403,26,,S +0,1,"Ross, Mr. John Hugo",male,36,0,0,13049,40.125,A10,C +0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,,C +1,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65,E68,S +0,2,"Jarvis, Mr. John Denzil",male,47,0,0,237565,15,,S +1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60,1,1,13567,79.2,B41,C +0,3,"Gilinski, Mr. Eliezer",male,22,0,0,14973,8.05,,S +0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,,S +0,3,"Rintamaki, Mr. Matti",male,35,0,0,STON/O 2. 3101273,7.125,,S +1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52,1,0,36947,78.2667,D20,C +0,3,"Elsbury, Mr. William James",male,47,0,0,A/5 3902,7.25,,S +0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.75,,Q +0,2,"Chapman, Mr. John Henry",male,37,1,0,SC/AH 29037,26,,S +0,3,"Van Impe, Mr. Jean Baptiste",male,36,1,1,345773,24.15,,S +1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33,,S +0,3,"Johnson, Mr. Alfred",male,49,0,0,LINE,0,,S +0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,,C +1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49,1,0,PC 17485,56.9292,A20,C +1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24,2,1,243847,27,,S +0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,,S +0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,,S +0,3,"Torber, Mr. Ernst William",male,44,0,0,364511,8.05,,S +1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35,0,0,111426,26.55,,C +0,3,"Lindell, Mr. Edvard Bengtsson",male,36,1,0,349910,15.55,,S +0,3,"Karaic, Mr. Milan",male,30,0,0,349246,7.8958,,S +1,1,"Daniel, Mr. Robert Williams",male,27,0,0,113804,30.5,,S +1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22,1,2,SC/Paris 2123,41.5792,,C +1,1,"Shutes, Miss. Elizabeth W",female,40,0,0,PC 17582,153.4625,C125,S +0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39,1,5,347082,31.275,,S +0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,,S +1,3,"Murphy, Miss. Margaret Jane",female,,1,0,367230,15.5,,Q +0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,,Q +0,3,"Brocklebank, Mr. William Alfred",male,35,0,0,364512,8.05,,S +1,2,"Herman, Miss. Alice",female,24,1,2,220845,65,,S +0,3,"Danbom, Mr. Ernst Gilbert",male,34,1,1,347080,14.4,,S +0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26,1,0,A/5. 3336,16.1,,S +1,2,"Becker, Miss. Marion Louise",female,4,2,1,230136,39,F4,S +0,2,"Gavey, Mr. Lawrence",male,26,0,0,31028,10.5,,S +0,3,"Yasbeck, Mr. Antoni",male,27,1,0,2659,14.4542,,C +1,1,"Kimball, Mr. Edwin Nelson Jr",male,42,1,0,11753,52.5542,D19,S +1,3,"Nakid, Mr. Sahid",male,20,1,1,2653,15.7417,,C +0,3,"Hansen, Mr. Henry Damsgaard",male,21,0,0,350029,7.8542,,S +0,3,"Bowen, Mr. David John ""Dai""",male,21,0,0,54636,16.1,,S +0,1,"Sutton, Mr. Frederick",male,61,0,0,36963,32.3208,D50,S +0,2,"Kirkland, Rev. Charles Leonard",male,57,0,0,219533,12.35,,Q +1,1,"Longley, Miss. Gretchen Fiske",female,21,0,0,13502,77.9583,D9,S +0,3,"Bostandyeff, Mr. Guentcho",male,26,0,0,349224,7.8958,,S +0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,,Q +1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80,0,0,27042,30,A23,S +0,3,"Lundahl, Mr. Johan Svensson",male,51,0,0,347743,7.0542,,S +1,1,"Stahelin-Maeglin, Dr. Max",male,32,0,0,13214,30.5,B50,C +0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0,,S +0,3,"Skoog, Miss. Mabel",female,9,3,2,347088,27.9,,S +1,2,"Davis, Miss. Mary",female,28,0,0,237668,13,,S +0,3,"Leinonen, Mr. Antti Gustaf",male,32,0,0,STON/O 2. 3101292,7.925,,S +0,2,"Collyer, Mr. Harvey",male,31,1,1,C.A. 31921,26.25,,S +0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41,0,5,3101295,39.6875,,S +0,3,"Thorneycroft, Mr. Percival",male,,1,0,376564,16.1,,S +0,3,"Jensen, Mr. Hans Peder",male,20,0,0,350050,7.8542,,S +1,1,"Sagesser, Mlle. Emma",female,24,0,0,PC 17477,69.3,B35,C +0,3,"Skoog, Miss. Margit Elizabeth",female,2,3,2,347088,27.9,,S +1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,,S +1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C +1,1,"Harper, Mr. Henry Sleeper",male,48,1,0,PC 17572,76.7292,D33,C +0,3,"Cor, Mr. Liudevit",male,19,0,0,349231,7.8958,,S +1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56,0,0,13213,35.5,A26,C +0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,,S +1,3,"Stanley, Miss. Amy Zillah Elsie",female,23,0,0,CA. 2314,7.55,,S +0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,,S +1,2,"Doling, Miss. Elsie",female,18,0,1,231919,23,,S +0,3,"Kalvik, Mr. Johannes Halvorsen",male,21,0,0,8475,8.4333,,S +1,3,"O'Leary, Miss. Hanora ""Norah""",female,,0,0,330919,7.8292,,Q +0,3,"Hegarty, Miss. Hanora ""Nora""",female,18,0,0,365226,6.75,,Q +0,2,"Hickman, Mr. Leonard Mark",male,24,2,0,S.O.C. 14879,73.5,,S +0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,,S +0,3,"Bourke, Mrs. John (Catherine)",female,32,1,1,364849,15.5,,Q +0,2,"Eitemiller, Mr. George Floyd",male,23,0,0,29751,13,,S +0,1,"Newell, Mr. Arthur Webster",male,58,0,2,35273,113.275,D48,C +1,1,"Frauenthal, Dr. Henry William",male,50,2,0,PC 17611,133.65,,S +0,3,"Badt, Mr. Mohamed",male,40,0,0,2623,7.225,,C +0,1,"Colley, Mr. Edward Pomeroy",male,47,0,0,5727,25.5875,E58,S +0,3,"Coleff, Mr. Peju",male,36,0,0,349210,7.4958,,S +1,3,"Lindqvist, Mr. Eino William",male,20,1,0,STON/O 2. 3101285,7.925,,S +0,2,"Hickman, Mr. Lewis",male,32,2,0,S.O.C. 14879,73.5,,S +0,2,"Butler, Mr. Reginald Fenton",male,25,0,0,234686,13,,S +0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,,S +0,3,"Cook, Mr. Jacob",male,43,0,0,A/5 3536,8.05,,S +1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52,C126,S +1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40,1,1,29750,39,,S +0,1,"Davidson, Mr. Thornton",male,31,1,0,F.C. 12750,52,B71,S +0,2,"Mitchell, Mr. Henry Michael",male,70,0,0,C.A. 24580,10.5,,S +1,2,"Wilhelms, Mr. Charles",male,31,0,0,244270,13,,S +0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0,,S +0,3,"Edvardsson, Mr. Gustaf Hjalmar",male,18,0,0,349912,7.775,,S +0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,,S +1,3,"Turja, Miss. Anna Sofia",female,18,0,0,4138,9.8417,,S +0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43,1,6,CA 2144,46.9,,S +1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36,0,1,PC 17755,512.3292,B51 B53 B55,C +0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,,Q +1,1,"Hassab, Mr. Hammad",male,27,0,0,PC 17572,76.7292,D49,C +0,3,"Olsvigen, Mr. Thor Anderson",male,20,0,0,6563,9.225,,S +0,3,"Goodwin, Mr. Charles Edward",male,14,5,2,CA 2144,46.9,,S +0,2,"Brown, Mr. Thomas William Solomon",male,60,1,1,29750,39,,S +0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25,1,2,SC/Paris 2123,41.5792,,C +0,3,"Panula, Mr. Jaako Arnold",male,14,4,1,3101295,39.6875,,S +0,3,"Dakic, Mr. Branko",male,19,0,0,349228,10.1708,,S +0,3,"Fischer, Mr. Eberhard Thelander",male,18,0,0,350036,7.7958,,S +1,1,"Madill, Miss. Georgette Alexandra",female,15,0,1,24160,211.3375,B5,S +1,1,"Dick, Mr. Albert Adrian",male,31,1,0,17474,57,B20,S +1,3,"Karun, Miss. Manca",female,4,0,1,349256,13.4167,,C +1,3,"Lam, Mr. Ali",male,,0,0,1601,56.4958,,S +0,3,"Saad, Mr. Khalil",male,25,0,0,2672,7.225,,C +0,1,"Weir, Col. John",male,60,0,0,113800,26.55,,S +0,2,"Chapman, Mr. Charles Henry",male,52,0,0,248731,13.5,,S +0,3,"Kelly, Mr. James",male,44,0,0,363592,8.05,,S +1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,,Q +0,1,"Thayer, Mr. John Borland",male,49,1,1,17421,110.8833,C68,C +0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42,0,0,348121,7.65,F G63,S +1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18,1,0,PC 17757,227.525,C62 C64,C +1,1,"Silverthorne, Mr. Spencer Victor",male,35,0,0,PC 17475,26.2875,E24,S +0,3,"Barbara, Miss. Saiide",female,18,0,1,2691,14.4542,,C +0,3,"Gallagher, Mr. Martin",male,25,0,0,36864,7.7417,,Q +0,3,"Hansen, Mr. Henrik Juul",male,26,1,0,350025,7.8542,,S +0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39,0,0,250655,26,,S +1,2,"Kelly, Mrs. Florence ""Fannie""",female,45,0,0,223596,13.5,,S +1,1,"Calderhead, Mr. Edward Pennington",male,42,0,0,PC 17476,26.2875,E24,S +1,1,"Cleaver, Miss. Alice",female,22,0,0,113781,151.55,,S +1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,,C +1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",female,24,0,0,PC 17482,49.5042,C90,C +0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,C124,S +1,1,"Taylor, Mr. Elmer Zebley",male,48,1,0,19996,52,C126,S +0,3,"Larsson, Mr. August Viktor",male,29,0,0,7545,9.4833,,S +0,2,"Greenberg, Mr. Samuel",male,52,0,0,250647,13,,S +0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19,0,0,348124,7.65,F G73,S +1,1,"Endres, Miss. Caroline Louise",female,38,0,0,PC 17757,227.525,C45,C +1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27,0,0,34218,10.5,E101,S +0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,,Q +0,3,"Johnson, Mr. Malkolm Joackim",male,33,0,0,347062,7.775,,S +1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6,0,1,248727,33,,S +0,3,"Jensen, Mr. Svend Lauritz",male,17,1,0,350048,7.0542,,S +0,2,"Gillespie, Mr. William Henry",male,34,0,0,12233,13,,S +0,2,"Hodges, Mr. Henry Price",male,50,0,0,250643,13,,S +1,1,"Chambers, Mr. Norman Campbell",male,27,1,0,113806,53.1,E8,S +0,3,"Oreskovic, Mr. Luka",male,20,0,0,315094,8.6625,,S +1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30,3,0,31027,21,,S +1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,,Q +0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25,1,0,236853,26,,S +0,3,"Ilmakangas, Miss. Pieta Sofia",female,25,1,0,STON/O2. 3101271,7.925,,S +1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,B5,S +0,3,"Hassan, Mr. Houssein G N",male,11,0,0,2699,18.7875,,C +0,2,"Knight, Mr. Robert J",male,,0,0,239855,0,,S +0,2,"Berriman, Mr. William John",male,23,0,0,28425,13,,S +0,2,"Troupiansky, Mr. Moses Aaron",male,23,0,0,233639,13,,S +0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,,S +0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48,1,3,W./C. 6608,34.375,,S +1,1,"Lesurer, Mr. Gustave J",male,35,0,0,PC 17755,512.3292,B101,C +0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,,S +0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,,S +1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30,D45,S +0,1,"Cavendish, Mr. Tyrell William",male,36,1,0,19877,78.85,C46,S +1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21,2,2,PC 17608,262.375,B57 B59 B63 B66,C +0,3,"McNamee, Mr. Neal",male,24,1,0,376566,16.1,,S +1,3,"Stranden, Mr. Juho",male,31,0,0,STON/O 2. 3101288,7.925,,S +0,1,"Crosby, Capt. Edward Gifford",male,70,1,1,WE/P 5735,71,B22,S +0,3,"Abbott, Mr. Rossmore Edward",male,16,1,1,C.A. 2673,20.25,,S +1,2,"Sinkkonen, Miss. Anna",female,30,0,0,250648,13,,S +0,1,"Marvin, Mr. Daniel Warner",male,19,1,0,113773,53.1,D30,S +0,3,"Connaghton, Mr. Michael",male,31,0,0,335097,7.75,,Q +1,2,"Wells, Miss. Joan",female,4,1,1,29103,23,,S +1,3,"Moor, Master. Meier",male,6,0,1,392096,12.475,E121,S +0,3,"Vande Velde, Mr. Johannes Joseph",male,33,0,0,345780,9.5,,S +0,3,"Jonkoff, Mr. Lalio",male,23,0,0,349204,7.8958,,S +1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48,1,2,220845,65,,S +1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,,S +0,3,"Carlsson, Mr. August Sigfrid",male,28,0,0,350042,7.7958,,S +0,2,"Bailey, Mr. Percy Andrew",male,18,0,0,29108,11.5,,S +0,3,"Theobald, Mr. Thomas Leonard",male,34,0,0,363294,8.05,,S +1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33,0,0,110152,86.5,B77,S +0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,,S +0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41,0,0,SOTON/O2 3101272,7.125,,S +1,3,"Barah, Mr. Hanna Assi",male,20,0,0,2663,7.2292,,C +1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36,1,2,113760,120,B96 B98,S +0,3,"Eklund, Mr. Hans Linus",male,16,0,0,347074,7.775,,S +1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51,1,0,13502,77.9583,D11,S +0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,,C +0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,,Q +0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,,Q +0,3,"Gronnestad, Mr. Daniel Danielsen",male,32,0,0,8471,8.3625,,S +0,3,"Lievens, Mr. Rene Aime",male,24,0,0,345781,9.5,,S +0,3,"Jensen, Mr. Niels Peder",male,48,0,0,350047,7.8542,,S +0,2,"Mack, Mrs. (Mary)",female,57,0,0,S.O./P.P. 3,10.5,E77,S +0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,,C +1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54,1,3,29105,23,,S +0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18,0,0,347078,7.75,,S +0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,F38,Q +1,3,"Emanuel, Miss. Virginia Ethel",female,5,0,0,364516,12.475,,S +0,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,,Q +1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43,0,1,24160,211.3375,B3,S +1,3,"Ayoub, Miss. Banoura",female,13,0,0,2687,7.2292,,C +1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17,1,0,17474,57,B20,S +0,1,"Long, Mr. Milton Clyde",male,29,0,0,113501,30,D6,S +0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,,S +0,3,"Ali, Mr. William",male,25,0,0,SOTON/O.Q. 3101312,7.05,,S +0,3,"Harmer, Mr. Abraham (David Lishin)",male,25,0,0,374887,7.25,,S +1,3,"Sjoblom, Miss. Anna Sofia",female,18,0,0,3101265,7.4958,,S +0,3,"Rice, Master. George Hugh",male,8,4,1,382652,29.125,,Q +1,3,"Dean, Master. Bertram Vere",male,1,1,2,C.A. 2315,20.575,,S +0,1,"Guggenheim, Mr. Benjamin",male,46,0,0,PC 17593,79.2,B82 B84,C +0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,,Q +0,2,"Gaskell, Mr. Alfred",male,16,0,0,239865,26,,S +0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,,S +0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,,C +0,3,"Dantcheff, Mr. Ristiu",male,25,0,0,349203,7.8958,,S +0,2,"Otter, Mr. Richard",male,39,0,0,28213,13,,S +1,1,"Leader, Dr. Alice (Farnham)",female,49,0,0,17465,25.9292,D17,S +1,3,"Osman, Mrs. Mara",female,31,0,0,349244,8.6833,,S +0,3,"Ibrahim Shawah, Mr. Yousseff",male,30,0,0,2685,7.2292,,C +0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30,1,1,345773,24.15,,S +0,2,"Ponesell, Mr. Martin",male,34,0,0,250647,13,,S +1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31,1,1,C.A. 31921,26.25,,S +1,1,"Carter, Master. William Thornton II",male,11,1,2,113760,120,B96 B98,S +1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C +1,3,"Hedman, Mr. Oskar Arvid",male,27,0,0,347089,6.975,,S +0,3,"Johansson, Mr. Karl Johan",male,31,0,0,347063,7.775,,S +0,1,"Andrews, Mr. Thomas Jr",male,39,0,0,112050,0,A36,S +0,3,"Pettersson, Miss. Ellen Natalia",female,18,0,0,347087,7.775,,S +0,2,"Meyer, Mr. August",male,39,0,0,248723,13,,S +1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33,1,0,113806,53.1,E8,S +0,3,"Alexander, Mr. William",male,26,0,0,3474,7.8875,,S +0,3,"Lester, Mr. James",male,39,0,0,A/4 48871,24.15,,S +0,2,"Slemen, Mr. Richard James",male,35,0,0,28206,10.5,,S +0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6,4,2,347082,31.275,,S +0,3,"Tomlin, Mr. Ernest Portage",male,30.5,0,0,364499,8.05,,S +0,1,"Fry, Mr. Richard",male,,0,0,112058,0,B102,S +0,3,"Heininen, Miss. Wendla Maria",female,23,0,0,STON/O2. 3101290,7.925,,S +0,2,"Mallet, Mr. Albert",male,31,1,1,S.C./PARIS 2079,37.0042,,C +0,3,"Holm, Mr. John Fredrik Alexander",male,43,0,0,C 7075,6.45,,S +0,3,"Skoog, Master. Karl Thorsten",male,10,3,2,347088,27.9,,S +1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52,1,1,12749,93.5,B69,S +1,3,"Lulic, Mr. Nikola",male,27,0,0,315098,8.6625,,S +0,1,"Reuchlin, Jonkheer. John George",male,38,0,0,19972,0,,S +1,3,"Moor, Mrs. (Beila)",female,27,0,1,392096,12.475,E121,S +0,3,"Panula, Master. Urho Abraham",male,2,4,1,3101295,39.6875,,S +0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,,Q +0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,,S +1,2,"Mallet, Master. Andre",male,1,0,2,S.C./PARIS 2079,37.0042,,C +1,3,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,,Q +1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0,0,113572,80,B28, +1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15,1,0,2659,14.4542,,C +1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S +0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,,C +0,3,"Augustsson, Mr. Albert",male,23,0,0,347468,7.8542,,S +0,3,"Allum, Mr. Owen George",male,18,0,0,2223,8.3,,S +1,1,"Compton, Miss. Sara Rebecca",female,39,1,1,PC 17756,83.1583,E49,C +0,3,"Pasic, Mr. Jakob",male,21,0,0,315097,8.6625,,S +0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,,S +1,3,"Chip, Mr. Chang",male,32,0,0,1601,56.4958,,S +1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C47,C +0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20,0,0,SOTON/O2 3101287,7.925,,S +0,2,"Mudd, Mr. Thomas Charles",male,16,0,0,S.O./P.P. 3,10.5,,S +1,1,"Serepeca, Miss. Augusta",female,30,0,0,113798,31,,C +0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C +0,3,"Culumovic, Mr. Jeso",male,17,0,0,315090,8.6625,,S +0,3,"Abbing, Mr. Anthony",male,42,0,0,C.A. 5547,7.55,,S +0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S +0,3,"Markoff, Mr. Marin",male,35,0,0,349213,7.8958,,C +0,2,"Harper, Rev. John",male,28,0,1,248727,33,,S +1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C92,C +0,3,"Andersson, Master. Sigvard Harald Elias",male,4,4,2,347082,31.275,,S +0,3,"Svensson, Mr. Johan",male,74,0,0,347060,7.775,,S +0,3,"Boulos, Miss. Nourelain",female,9,1,1,2678,15.2458,,C +1,1,"Lines, Miss. Mary Conover",female,16,0,1,PC 17592,39.4,D28,S +0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44,1,0,244252,26,,S +1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18,0,1,392091,9.35,,S +1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45,1,1,36928,164.8667,,S +1,1,"Daly, Mr. Peter Denis ",male,51,0,0,113055,26.55,E17,S +1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24,0,3,2666,19.2583,,C +0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C +0,3,"Hansen, Mr. Claus Peter",male,41,2,0,350026,14.1083,,S +0,2,"Giles, Mr. Frederick Edward",male,21,1,0,28134,11.5,,S +1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48,0,0,17466,25.9292,D17,S +0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S +0,2,"Gill, Mr. John William",male,24,0,0,233866,13,,S +1,2,"Bystrom, Mrs. (Karolina)",female,42,0,0,236852,13,,S +1,2,"Duran y More, Miss. Asuncion",female,27,1,0,SC/PARIS 2149,13.8583,,C +0,1,"Roebling, Mr. Washington Augustus II",male,31,0,0,PC 17590,50.4958,A24,S +0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,,S +1,3,"Johnson, Master. Harold Theodor",male,4,1,1,347742,11.1333,,S +0,3,"Balkic, Mr. Cerin",male,26,0,0,349248,7.8958,,S +1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47,1,1,11751,52.5542,D35,S +0,1,"Carlsson, Mr. Frans Olof",male,33,0,0,695,5,B51 B53 B55,S +0,3,"Vander Cruyssen, Mr. Victor",male,47,0,0,345765,9,,S +1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28,1,0,P/PP 3381,24,,C +1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15,0,0,2667,7.225,,C +0,3,"Gustafsson, Mr. Alfred Ossian",male,20,0,0,7534,9.8458,,S +0,3,"Petroff, Mr. Nedelio",male,19,0,0,349212,7.8958,,S +0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S +1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56,0,1,11767,83.1583,C50,C +1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25,0,1,230433,26,,S +0,3,"Markun, Mr. Johann",male,33,0,0,349257,7.8958,,S +0,3,"Dahlberg, Miss. Gerda Ulrika",female,22,0,0,7552,10.5167,,S +0,2,"Banfield, Mr. Frederick James",male,28,0,0,C.A./SOTON 34068,10.5,,S +0,3,"Sutehall, Mr. Henry Jr",male,25,0,0,SOTON/OQ 392076,7.05,,S +0,3,"Rice, Mrs. William (Margaret Norton)",female,39,0,5,382652,29.125,,Q +0,2,"Montvila, Rev. Juozas",male,27,0,0,211536,13,,S +1,1,"Graham, Miss. Margaret Edith",female,19,0,0,112053,30,B42,S +0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S +1,1,"Behr, Mr. Karl Howell",male,26,0,0,111369,30,C148,C +0,3,"Dooley, Mr. Patrick",male,32,0,0,370376,7.75,,Q diff --git a/tests/testdata/data/db_data/pets.db b/tests/testdata/data/db_data/pets.db new file mode 100644 index 00000000..dcd5ddc1 Binary files /dev/null and b/tests/testdata/data/db_data/pets.db differ