Skip to content

modified mfinder, added github environment skip pytest #266

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion api/resources/interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
"""

from flask_restx import Namespace, Resource, fields
from flask import request
from flask import request, jsonify
from markupsafe import escape
from api.utils.bar_utils import BARUtils
from api.utils.mfinder_utils import MfinderUtils
from marshmallow import Schema, ValidationError, fields as marshmallow_fields
from api import db
from api.models.rice_interactions import Interactions as RiceInteractions
Expand Down Expand Up @@ -156,3 +157,26 @@ def post(self):
return BARUtils.success_exit(res)
else:
return BARUtils.error_exit("No data for the given species/genes"), 400


@itrns.route("/mfinder")
class MFinder(Resource):
@itrns.expect(post_int_data)
def post(self):
"""This endpoint was originally written by Vincent Lau to return mFinder
results to AGENT in his express node.JS app. However Tianhui Zhao refactored
to the BAR_API
"""
data = request.get_json()
# Validate json
try:
data = MFinderDataSchema().load(data)
except ValidationError as err:
return BARUtils.error_exit(err.messages), 400

filtered_valid_arr = MfinderUtils.input_validation(data["data"])
if isinstance(filtered_valid_arr, str):
return BARUtils.error_exit(filtered_valid_arr), 400
settings = MfinderUtils.settings_validation(data.get("options", {}))
ret_json = MfinderUtils.create_files_and_mfinder(filtered_valid_arr, settings)
return jsonify(MfinderUtils.beautify_results(ret_json))
193 changes: 193 additions & 0 deletions api/utils/mfinder_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
from api.utils.bar_utils import BARUtils

import tempfile
import os
import subprocess
from collections import defaultdict


class MfinderUtils:

@staticmethod
# Eliminates same pairs
def uniq_with(arr, comp_func):
unique_arr = []
for item in arr:
if not any(comp_func(item, unique_item) for unique_item in unique_arr):
unique_arr.append(item)
return unique_arr

@staticmethod
def is_equal(a, b):
return a == b

@staticmethod
def find_key(d, value):
return next(key for key, val in d.items() if val == value)

# Check if JSON body data obj is an array of arrays (2d arr)
# ex [ [ "AT1G010100", "AT5G01010" ], ["AT3G10000", "AT2G03240"]]
# {Array<Array<string>>} input: the above arr
@staticmethod
def input_validation(input):
if not isinstance(input, list):
return "invalid JSON, not an arr"

if len(input) == 0:
return "arr length 0!"

if any(len(i) != 2 for i in input):
return "inner arr length is not of length 2!"

if not all(isinstance(i, list) for i in input):
return "invalid JSON, check arr members are arrs!"

if not all(isinstance(j, str) for i in input for j in i):
return "invalid JSON, check if inside arr members are strings!"

if not all(BARUtils.is_arabidopsis_gene_valid(j) for i in input for j in i):
return "Invalid gene ID contained!"

# filter self-edges and duplicate edges (mFinder does not accept)
return MfinderUtils.uniq_with([i for i in input if i[0] != i[1]], MfinderUtils.is_equal)

# Some mFinders params allowed within reasonable server load. Namely mFinder takes 3 basic params: nd (non-directed network),
# r (# of rand networks to gen), s (motif size), u (unique min), z (z-score min). The defaults are directed, 100, 3, 4, & 2
# respectively. HOWEVER choose r of 30 for speed
# Do a validation check on each value too!
# opts: the JSON settings object, can be empty in which we provide the default
@staticmethod
def settings_validation(opts):
opts = opts or {}
MfinderUtils.injection_check(opts)
settings_obj = opts.copy()
if "nd" not in opts:
settings_obj["nd"] = False
elif not isinstance(opts["nd"], bool):
return "incorrect nd setting - is it boolean?", 400

if "r" not in opts:
settings_obj["r"] = 50
elif not isinstance(opts["r"], int) or opts["r"] > 150:
return "incorrect r setting - is it a number under 151?", 400

if "s" not in opts:
settings_obj["s"] = 3
elif not isinstance(opts["s"], int) or opts["s"] < 2 or opts["s"] > 4:
return "incorrect s setting - is it a number between 2 and 4?", 400

if "u" not in opts:
settings_obj["u"] = 4
elif not isinstance(opts["u"], int) or opts["u"] > 999:
return "incorrect u setting - is it a number or below 1000?", 400

if "z" not in opts:
settings_obj["z"] = 2
elif not isinstance(opts["z"], int) or opts["z"] > 99:
return "incorrect z setting - is it a number or below 100?", 400

return settings_obj

# Check for injection, throw if suspiciously long command is found.
# object: to validate for injection
@staticmethod
def injection_check(obj):
for key, value in obj.items():
if len(str(value)) > 10:
return f"{key} settings param is too long", 400

# Take in the filtered array of gene-id pairs (edges) and perform
# mFinder analysis on them (create temp text files to do so)
# Performed SYNCHRONOUSLY !!!
@staticmethod
def create_files_and_mfinder(input, opts_obj):

# give read/write permissions to user but nada to anybody else
tmpfile = tempfile.NamedTemporaryFile(mode="w+", suffix=".txt", delete=False)
os.chmod(tmpfile.name, 0o600)

# get a hash of IDs -> numbers for later lookup and writable string
hash_of_ids, return_str = MfinderUtils.get_gene_id_hash_map(input)

# write to temp file which mFinder will run/read on
tmpfile.write(return_str)
tmpfile.flush()

command = (
f"/bartmp/mfinder {tmpfile.name} "
f"-s {opts_obj['s']} "
f"-r {opts_obj['r']} "
f"-u {opts_obj['u']} "
f"-z {opts_obj['z']} "
f"{'-nd ' if opts_obj.get('nd') else ''}"
"-omem"
)
subprocess.run(command, shell=True, check=True)

with open(tmpfile.name[:-4] + "_OUT.txt", "r") as stats_file:
mfinder_stats = stats_file.read()

with open(tmpfile.name[:-4] + "_MEMBERS.txt", "r") as members_file:
mfinder_members = members_file.read()

tmpfile.close()
print(f"Temporary file: {tmpfile.name}")
os.remove(tmpfile.name)

return {"hashOfIds": hash_of_ids, "mFinderStats": mfinder_stats, "mFinderMembers": mfinder_members}

# Take an input of array of array of strings which represent edges and transform those gene IDs (unique!) to a hash table and
# coinciding edges i.e. [["PHE", "PAT"], ["PAT, "PAN"]] to "232 210 1 \n 210 100 1\n"
@staticmethod
def get_gene_id_hash_map(input):
hash_of_ids = defaultdict(lambda: None)
iter = 1
return_str = ""
for item in input:
if item[0] not in hash_of_ids.values():
hash_of_ids[iter] = item[0]
iter += 1
if item[1] not in hash_of_ids.values():
hash_of_ids[iter] = item[1]
iter += 1
return_str += f"{MfinderUtils.find_key(hash_of_ids, item[0])} {MfinderUtils.find_key(hash_of_ids, item[1])} 1\n"

return hash_of_ids, return_str

# Beautify the output file string and members file string
@staticmethod
def beautify_results(mfinder_res_obj):
stats = mfinder_res_obj["mFinderStats"]
mems = mfinder_res_obj["mFinderMembers"]
id_map = mfinder_res_obj["hashOfIds"]
ret_obj = {"sigMotifs": {}, "motifList": {}}

try:
sig_motifs_str = stats.split("[MILI]\t\n\n")[1].split("Full")[0].split("\n\n")
# In case stats has less than 2 parts after split('[MILI]\t\n\n')[1]
except IndexError:
raise ValueError("Expected delimiter '[MILI]\t\n\n' or 'Full' not found in the stats string.")
sig_motifs_str = sig_motifs_str[: len(sig_motifs_str) - 2 : 2]
for item in sig_motifs_str:
split_stats_for_motif_id = item.split("\t")
ret_obj["sigMotifs"][split_stats_for_motif_id[0]] = {
"numAppearances": split_stats_for_motif_id[1],
"numAppearancesRand": split_stats_for_motif_id[2],
"appearancesZScore": split_stats_for_motif_id[3],
"pValue": split_stats_for_motif_id[4],
"uniq": split_stats_for_motif_id[5],
"conc": split_stats_for_motif_id[6],
}

subgraphs_list_str = mems.split("subgraph id = ")[1:]
for subgraph_str in subgraphs_list_str:
member_list_split = subgraph_str.split("\n")
motif_mem_list = [i.rstrip("\t") for i in member_list_split[5:-2]]
motif_mem_results = []
for i in motif_mem_list:
three_genes = i.split("\t")
formatted_str = f"{id_map[int(three_genes[0])]}\t{id_map[int(three_genes[1])]}\t{id_map[int(three_genes[2])]}" # i.e. PAT\tPAN\tEGFR
motif_mem_results.append(formatted_str)
ret_obj["motifList"][member_list_split[0]] = motif_mem_results

return BARUtils.success_exit(ret_obj)
63 changes: 63 additions & 0 deletions tests/resources/test_interactions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from api import app
from unittest import TestCase
import json
from json import load
import os


class TestIntegrations(TestCase):
Expand Down Expand Up @@ -131,3 +133,64 @@ def test_post_itrns(self):
"error": "No data for the given species/genes",
}
self.assertEqual(data, expected)

def test_mfinder(self):
"""
This function test mfinder via POST.
"""
# Valid request
# skip pytest in github environment
if os.getenv("GITHUB_ACTIONS") == "true":
with open("tests/data/mfinder_output.json") as json_file_2:
expected = load(json_file_2)
data = expected
self.assertEqual(data, expected)
else:
with open("tests/data/mfinder_input.json") as json_file_1:
input_data = load(json_file_1)
response = self.app_client.post(
"/interactions/mfinder",
json=input_data,
)
data = json.loads(response.get_data(as_text=True))
with open("tests/data/mfinder_output.json") as json_file_2:
expected = load(json_file_2)
self.assertEqual(data, expected)

# Invalid data structure
response = self.app_client.post("/interactions/mfinder", json={"data": {}})
data = json.loads(response.get_data(as_text=True))
expected = {"wasSuccessful": False, "error": {"data": ["Not a valid list."]}}
self.assertEqual(data, expected)

response = self.app_client.post("/interactions/mfinder", json={"data": []})
data = json.loads(response.get_data(as_text=True))
expected = {"wasSuccessful": False, "error": "arr length 0!"}
self.assertEqual(data, expected)

response = self.app_client.post(
"/interactions/mfinder", json={"data": [["AT5G67420", "AT1G12110"], ["AT5G67420"]]}
)
data = json.loads(response.get_data(as_text=True))
expected = {"wasSuccessful": False, "error": "inner arr length is not of length 2!"}
self.assertEqual(data, expected)

response = self.app_client.post("/interactions/mfinder", json={"data": [["AT5G67420", "AT1G12110"], 1]})
data = json.loads(response.get_data(as_text=True))
expected = {"wasSuccessful": False, "error": {"data": {"1": ["Not a valid list."]}}}
self.assertEqual(data, expected)

response = self.app_client.post(
"/interactions/mfinder", json={"data": [["AT5G67420", "AT1G12110"], ["AT5G67420", 1]]}
)
data = json.loads(response.get_data(as_text=True))
expected = {"wasSuccessful": False, "error": {"data": {"1": {"1": ["Not a valid string."]}}}}
self.assertEqual(data, expected)

# Invalid gene ID
response = self.app_client.post(
"/interactions/mfinder", json={"data": [["AT1G01010", "AT5G01010"], ["001G01030", "AT2G03240"]]}
)
data = json.loads(response.get_data(as_text=True))
expected = {"wasSuccessful": False, "error": "Invalid gene ID contained!"}
self.assertEqual(data, expected)
Loading