Skip to content
Snippets Groups Projects
Commit bb07e1d4 authored by PIAT LUCIEN's avatar PIAT LUCIEN
Browse files

Create an index of chromosomes in a yaml file

parent de72d3d3
No related branches found
No related tags found
1 merge request!10Wrapped scripts in Snakemake, added SLURM configuration, and revised the variant generation logic.
......@@ -9,27 +9,30 @@ output_dir = "results/"
rule all:
input:
expand(os.path.join(output_dir, "{sample}_results", "02_split_fai"), sample=config["samples"].keys())
expand(os.path.join(output_dir, "{sample}_results", "02_split_fai"), sample=config["samples"].keys()),
expand(os.path.join(output_dir, "{sample}_results", "chr_config.yaml"), sample=config["samples"].keys())
rule generate_fai:
input:
fasta=lambda wildcards: config["samples"][wildcards.sample]["fasta_gz"] # Get FASTA file from config
fasta=lambda wildcards: config["samples"][wildcards.sample]["fasta_gz"]
output:
fai=os.path.join(output_dir, "{sample}_results", "01_full_fai", "{sample}_full.fai")
params:
out=os.path.join(output_dir, "{sample}_results", "01_full_fai")
container:
"docker://registry.forgemia.inra.fr/pangepop/mspangepop/samtool:1.21"
shell:
"""
samtools faidx {input.fasta}
mv {input.fasta}.fai {output.fai}
samtools faidx {input.fasta} &&
mv {input.fasta}.fai {output.fai} &&
rm {input.fasta}.gzi || true
"""
# Rule to split the FAI file
rule split_fai:
input:
fai=rules.generate_fai.output.fai
output:
directory(os.path.join(output_dir, "{sample}_results", "02_split_fai")) # Directory where split files will be stored
directory(os.path.join(output_dir, "{sample}_results", "02_split_fai"))
params:
out=os.path.join(output_dir, "{sample}_results", "02_split_fai")
shell:
......@@ -38,3 +41,12 @@ rule split_fai:
awk '{{print > "{params.out}/" $1 ".fai"}}' {input.fai}
"""
rule create_chr_config:
input:
fai=rules.generate_fai.output.fai
output:
yaml=os.path.join(output_dir, "{sample}_results", "chr_config.yaml")
shell:
"""
bash workflow/scripts/fai2yaml.sh {input.fai} {output.yaml}
"""
\ No newline at end of file
#!/bin/bash
# Author: Lucien Piat
# Date: October 24, 2024
# Project: PangenOak at INRAE
# Check if the input file is provided
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <input_fai_file> <output_yaml_file>"
exit 1
fi
input_fai_file="$1"
output_yaml_file="$2"
# Initialize the output YAML file
echo "chromosomes:" > "$output_yaml_file"
# Read the FAI file and extract chromosome names
while IFS=$'\t' read -r chromosome _; do
# Append each chromosome name to the YAML file
echo " - \"$chromosome\"" >> "$output_yaml_file"
done < "$input_fai_file"
echo "YAML index file generated: $output_yaml_file"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment